Le fichier source

6.1.1.3.2 : Le fichier source

Écrivons le fichier sgemm_vectorize.cpp :

Il nous faut tout d'abord inclure un header qui permet de définir l'alignement des données en float avec PLIB_VECTOR_SIZE_FLOAT :

1	#include "phoenix_intrinsics.h"

Et string.h pour avoir la fonction memset :

1	#include <string.h>

Ensuite nous incluons notre header :

1	#include "sgemm_swap.h"

Et nous implémentons le produit de matrices :

///Compute the Matrix-Matrix product of the x,y matrices
/**	@param[out] pmatOut : result
 * 	@param pmatX : left matrix
 * 	@param pmatY : right matrix
 * 	@param size : size of the square matrices
*/
void sgemm_vectorize(float* __restrict__ pmatOut, const float* __restrict__ pmatX, const float* __restrict__ pmatY, long unsigned int size){
	const float* matX = (const float*)__builtin_assume_aligned(pmatX, PLIB_VECTOR_SIZE_FLOAT);
	const float* matY = (const float*)__builtin_assume_aligned(pmatY, PLIB_VECTOR_SIZE_FLOAT);
	float* matOut = (float*)__builtin_assume_aligned(pmatOut, PLIB_VECTOR_SIZE_FLOAT);
	
	memset(matOut, 0, sizeof(float)*size*size);
	for(long unsigned int i(0lu); i < size; ++i){
		for(long unsigned int k(0lu); k < size; ++k){
			for(long unsigned int j(0lu); j < size; ++j){
				matOut[i*size + j] += matX[i*size + k]*matY[k*size + j];
			}
		}
	}
}

Le fichier sgemm_vectorize.cpp complet :

/***************************************
	Auteur : Pierre Aubert
	Mail : pierre.aubert@lapp.in2p3.fr
	Licence : CeCILL-C
****************************************/

#include "phoenix_intrinsics.h"
#include <string.h>
#include "sgemm_swap.h"
///Compute the Matrix-Matrix product of the x,y matrices
/**	@param[out] pmatOut : result
 * 	@param pmatX : left matrix
 * 	@param pmatY : right matrix
 * 	@param size : size of the square matrices
*/
void sgemm_vectorize(float* __restrict__ pmatOut, const float* __restrict__ pmatX, const float* __restrict__ pmatY, long unsigned int size){
	const float* matX = (const float*)__builtin_assume_aligned(pmatX, PLIB_VECTOR_SIZE_FLOAT);
	const float* matY = (const float*)__builtin_assume_aligned(pmatY, PLIB_VECTOR_SIZE_FLOAT);
	float* matOut = (float*)__builtin_assume_aligned(pmatOut, PLIB_VECTOR_SIZE_FLOAT);
	
	memset(matOut, 0, sizeof(float)*size*size);
	for(long unsigned int i(0lu); i < size; ++i){
		for(long unsigned int k(0lu); k < size; ++k){
			for(long unsigned int j(0lu); j < size; ++j){
				matOut[i*size + j] += matX[i*size + k]*matY[k*size + j];
			}
		}
	}
}

Vous pouvez le télécharger ici.