Le fichier source complet

4.2.1.2.3 : Le fichier source complet

Le fichier vectorized_propagation.cpp complet :
/***************************************
	Auteur : Pierre Aubert
	Mail : pierre.aubert@lapp.in2p3.fr
	Licence : CeCILL-C
****************************************/
#include <algorithm>
#include "temporary_alloc.h"
#include "vectorized_propagation.h"

///Propagate the U and V species in the matU and matV
/**	@param[out] poutMatU : updated matrix U version
 * 	@param[out] poutMatV : updated matrix V version
 * 	@param pmatU : input of matrix U
 * 	@param pmatV : input of matrix V
 * 	@param nbRow : number of rows of the matrices
 * 	@param nbCol : number of columns of the matrices
 * 	@param matDeltaSquare : matrix of the delta square values
 * 	@param nbStencilRow : number of rows of the matrix matDeltaSquare
 * 	@param nbStencilCol : number of columns of the matrix matDeltaSquare
 * 	@param diffudionRateU : diffusion rate of the U specie
 * 	@param diffudionRateV : diffusion rate of the V specie
 * 	@param feedRate : rate of the process which feeds U and drains U, V and P
 * 	@param killRate : rate of the process which converts V into P
 * 	@param dt : time interval between two steps
*/
void grayscott_propagation(float * __restrict__ poutMatU, float * __restrict__ poutMatV, const float * __restrict__ pmatU, const float * __restrict__ pmatV,
			long nbRow, long nbCol,
			const float * matDeltaSquare, long nbStencilRow, long nbStencilCol,
			float diffudionRateU, float diffusionRateV, float feedRate, float killRate, float dt)
{
	const float* matU = (const float*)__builtin_assume_aligned(pmatU, PLIB_VECTOR_SIZE_BYTE_FLOAT);
	const float* matV = (const float*)__builtin_assume_aligned(pmatV, PLIB_VECTOR_SIZE_BYTE_FLOAT);
	float* outMatU = (float*)__builtin_assume_aligned(poutMatU, PLIB_VECTOR_SIZE_BYTE_FLOAT);
	float* outMatV = (float*)__builtin_assume_aligned(poutMatV, PLIB_VECTOR_SIZE_BYTE_FLOAT);
	long offsetStencilRow((nbStencilRow - 1l)/2l);
	long offsetStencilCol((nbStencilCol - 1l)/2l);
	for(long i(0l); i < nbRow; ++i){
		long firstRowStencil(std::max(i - offsetStencilRow, 0l));
		long lastRowStencil(std::min(i + offsetStencilRow + 1l, nbRow));
		for(long j(0l); j < nbCol; ++j){
			long firstColStencil(std::max(j - offsetStencilCol, 0l));
			long lastColStencil(std::min(j + offsetStencilCol + 1l, nbCol));
			long stencilIndexRow(0l);
			float u(matU[i*nbCol + j]), v(matV[i*nbCol + j]);
			float fullU(0.0f), fullV(0.0f);
			for(long k(firstRowStencil); k < lastRowStencil; ++k){
				long stencilIndexCol(0l);
				for(long l(firstColStencil); l < lastColStencil; ++l){
					float deltaSquare(matDeltaSquare[stencilIndexRow*nbStencilCol + stencilIndexCol]);
					fullU += (matU[k*nbCol + l] - u)*deltaSquare;
					fullV += (matV[k*nbCol + l] - v)*deltaSquare;
					++stencilIndexCol;
				}
				++stencilIndexRow;
			}
			float uvSquare(u*v*v);
			float du(diffudionRateU*fullU - uvSquare + feedRate*(1.0f - u));
			float dv(diffusionRateV*fullV + uvSquare - (feedRate + killRate)*v);
			outMatU[i*nbCol + j] = u + du*dt;
			outMatV[i*nbCol + j] = v + dv*dt;
		}
	}
}
Vous pouvez le télécharger ici.