4.3.2.2.3 : Le fichier source complet


Le fichier intrinsics_propagation.cpp complet :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
/***************************************
	Auteur : Pierre Aubert
	Mail : pierre.aubert@lapp.in2p3.fr
	Licence : CeCILL-C
****************************************/
#include "phoenix_intrinsics.h"
#include <algorithm>
#include "intrinsics_propagation.h"

///Propagate the U and V species in the matVecVecU and matVecV
/**	@param[out] outMatVecU : updated matrix U version (with vectorial neighbours)
 * 	@param[out] outMatVecV : updated matrix V version (with vectorial neighbours)
 * 	@param matVecVecU : input of matrix U (with vectorial neighbours)
 * 	@param matVecV : input of matrix V (with vectorial neighbours)
 * 	@param nbRow : number of rows of the matrices
 * 	@param nbCol : number of columns of the matrices
 * 	@param matBroadcastDeltaSquare : matrix of the delta square values (with broadcast neighbours)
 * 	@param nbStencilRow : number of rows of the matrix matBroadcastDeltaSquare
 * 	@param nbStencilCol : number of columns of the matrix matBroadcastDeltaSquare
 * 	@param diffusionRateU : diffusion rate of the U specie
 * 	@param diffudionRateV : diffusion rate of the V specie
 * 	@param feedRate : rate of the process which feeds U and drains U, V and P
 * 	@param killRate : rate of the process which converts V into P
 * 	@param dt : time interval between two steps
*/
void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float * matVecVecU, const float * matVecVecV, long nbRow, long nbCol,
		       const float * matBroadcastDeltaSquare, long nbStencilRow, long nbStencilCol,
		       float diffusionRateU, float diffusionRateV, float feedRate, float killRate, float dt)
{
	long offsetStencilRow((nbStencilRow - 1l)/2l);
	long offsetStencilCol((nbStencilCol - 1l)/2l);
	long nbVecCol(nbCol/PLIB_VECTOR_SIZE_FLOAT);
	PRegVecf vecOne(plib_broadcast_ss(1.0f));
	PRegVecf vecFeedRate(plib_broadcast_ss(feedRate));
	PRegVecf vecKillRate(plib_broadcast_ss(killRate));
	PRegVecf vecDiffudionRateU(plib_broadcast_ss(diffusionRateU));
	PRegVecf vecDiffudionRateV(plib_broadcast_ss(diffusionRateV));
	PRegVecf vecDt(plib_broadcast_ss(dt));
	for(long i(1l); i < nbRow - 1l; ++i){
		long firstRowStencil(std::max(i - offsetStencilRow, 0l));
		long lastRowStencil(std::min(i + offsetStencilRow + 1l, nbRow));
		for(long j(0l); j < nbVecCol; ++j){
			long firstColStencil(std::max(j - offsetStencilCol, 0l));
			long lastColStencil(std::min(j + offsetStencilCol + 1l, nbVecCol));
			long stencilIndexRow(0l);
			
// 			float u(matU[i*nbCol + j]), v(matV[i*nbCol + j]);
// 			float fullU(0.0f), fullV(0.0f);
			
			PRegVecf vecU(plib_load_ps(matVecVecU + (i*nbVecCol + j)*PLIB_VECTOR_SIZE_FLOAT));
			PRegVecf vecV(plib_load_ps(matVecVecV + (i*nbVecCol + j)*PLIB_VECTOR_SIZE_FLOAT));
			
			PRegVecf vecFullU(plib_broadcast_ss(0.0f)), vecFullV(plib_broadcast_ss(0.0f));
			for(long k(firstRowStencil); k < lastRowStencil; ++k){
				long stencilIndexCol(0l);
				for(long l(firstColStencil); l < lastColStencil; ++l){
// 					float deltaSquare(matDeltaSquare[stencilIndexRow*nbStencilCol + stencilIndexCol]);
					PRegVecf vecDeltaSquare(plib_load_ps(matBroadcastDeltaSquare +
									(stencilIndexRow*nbStencilCol + stencilIndexCol)*PLIB_VECTOR_SIZE_FLOAT));
					
					PRegVecf vecKLU(plib_load_ps(matVecVecU + (k*nbVecCol + l)*PLIB_VECTOR_SIZE_FLOAT));
					PRegVecf vecKLV(plib_load_ps(matVecVecV + (k*nbVecCol + l)*PLIB_VECTOR_SIZE_FLOAT));
					
					PRegVecf vecKLUminU(plib_sub_ps(vecKLU, vecU));
					PRegVecf vecKLVminV(plib_sub_ps(vecKLV, vecV));
					
					PRegVecf vecKLUminUdMultDeltaSquare(plib_mul_ps(vecKLUminU, vecDeltaSquare));
					PRegVecf vecKLVminVdMultDeltaSquare(plib_mul_ps(vecKLVminV, vecDeltaSquare));
					
					vecFullU = plib_add_ps(vecFullU, vecKLUminUdMultDeltaSquare);
					vecFullV = plib_add_ps(vecFullV, vecKLVminVdMultDeltaSquare);
					
// 					fullU += (matU[k*nbCol + l] - u)*deltaSquare;
// 					fullV += (matV[k*nbCol + l] - v)*deltaSquare;
					++stencilIndexCol;
				}
				++stencilIndexRow;
			}
// 			float uvSquare(u*v*v);
			PRegVecf vecUVSquare(plib_mul_ps(vecU, plib_mul_ps(vecV, vecV)));
			
			PRegVecf vecOneMinusU(plib_sub_ps(vecOne, vecU));
			PRegVecf vecFeedPlusKill(plib_add_ps(vecFeedRate, vecKillRate));
			
			PRegVecf vecDiffFullU(plib_mul_ps(vecDiffudionRateU, vecFullU));
			PRegVecf vecDiffFullV(plib_mul_ps(vecDiffudionRateV, vecFullV));
			
			PRegVecf vecFeedRateMultOneMinusU(plib_mul_ps(vecFeedRate, vecOneMinusU));
			PRegVecf vecFeedPlusKillMultV(plib_mul_ps(vecFeedPlusKill, vecV));
			
			PRegVecf vecDiffFullUMinusUVSquare(plib_sub_ps(vecDiffFullU, vecUVSquare));
			PRegVecf vecDiffFullVPlusUVSquare(plib_add_ps(vecDiffFullV, vecUVSquare));
			
			PRegVecf vecDu(plib_add_ps(vecDiffFullUMinusUVSquare, vecFeedRateMultOneMinusU));
			PRegVecf vecDv(plib_sub_ps(vecDiffFullVPlusUVSquare, vecFeedPlusKillMultV));
			
// 			float du(diffudionRateU*fullU - uvSquare + feedRate*(1.0f - u));
// 			float dv(diffusionRateV*fullV + uvSquare - (feedRate + killRate)*v);
			PRegVecf vecDuDt(plib_mul_ps(vecDu, vecDt));
			PRegVecf vecDvDt(plib_mul_ps(vecDv, vecDt));
			
			PRegVecf vecUPlusDuDt(plib_add_ps(vecU, vecDuDt));
			PRegVecf vecVPlusDvDt(plib_add_ps(vecV, vecDvDt));
			
			plib_store_ps(outMatVecU + (i*nbVecCol + j)*PLIB_VECTOR_SIZE_FLOAT, vecUPlusDuDt);
			plib_store_ps(outMatVecV + (i*nbVecCol + j)*PLIB_VECTOR_SIZE_FLOAT, vecVPlusDvDt);
			
// 			outMatVecU[i*nbCol + j] = u + du*dt;
// 			outMatVecV[i*nbCol + j] = v + dv*dt;
		}
	}
}


Vous pouvez le télécharger ici.