Le main_classic.cpp classique

12.1.4.2 : Le main_classic.cpp classique

Si nous voulons comparer les implémentations récentes il nous faut un point de repère avec une bonne vieille vectorisation à l'ancienne.

On commence avec les includes standards :

#include <iostream>
#include <vector>
#include <numeric>

#include <execution>
#include <algorithm>

#include "micro_benchmark.h"

On inclue notre allocateur alignée :

1	#include "AlignedAllocator.h"

On définit un type de std::vector alignés pour simplifier l'écriture de notre programme :

1 2	///Defines a vector of data typedef std::vector<float, AlignedAllocator<float> > VecData;

On implémente notre kernel (avec des pointeurs et des __builtin_assume_aligned ) :

///Compute the calibration
/**	@param[out] ptabCalibSignal : pointer to the table of calibrated signal
 * 	@param ptabADC : pointer to the table of ADC values
 * 	@param ptabPed : pointer to the table of pedestal
 * 	@param ptabGain : pointer to the table of gain
 * 	@param nbEvent : vector of index to be used to round robin over the pixel
 * 	@param nbPixel : number of pixels in the camera
*/
void compute_calibration(float * ptabCalibSignal, const float * ptabADC, const float * ptabPed, const float* ptabGain, size_t nbEvent, size_t nbSlice, size_t nbPixel){
	float * tabCalibSignal = (float*)__builtin_assume_aligned(ptabCalibSignal, VECTOR_ALIGNEMENT);
	const float * tabADC = (const float*)__builtin_assume_aligned(ptabADC, VECTOR_ALIGNEMENT);
	const float * tabPed = (const float*)__builtin_assume_aligned(ptabPed, VECTOR_ALIGNEMENT);
	const float * tabGain = (const float*)__builtin_assume_aligned(ptabGain, VECTOR_ALIGNEMENT);
	
	size_t nbRow(nbEvent*nbSlice);
	for(size_t i(0lu); i < nbRow; ++i){
		for(size_t j(0lu); j < nbPixel; ++j){
			tabCalibSignal[i*nbPixel + j] =  (tabADC[i*nbPixel + j] - tabPed[j])*tabGain[j];
		}
	}
}

On écrit la fonction qui évaluera les perfomrance de notre kernel :

///Get the number of nanoseconds per elements of the Calibration
/**	@param nbPixel : number of pixels of the tables
*/
void evaluateCalibration(size_t nbPixel){
	//Let's define size of data :
	size_t nbEvent(NB_EVENT), nbSlice(NB_SLICE);
	size_t nbElement(nbEvent*nbSlice*nbPixel);
	std::vector<float> vecGain(nbPixel), vecPedestal(nbPixel);
	std::fill(vecGain.begin(), vecGain.end(), 0.02f);
	std::fill(vecPedestal.begin(), vecPedestal.end(), 40.0f);
	
	std::vector<float> vecADCSignal(nbElement), vecCalibSignal(nbElement);
	std::fill(vecADCSignal.begin(), vecADCSignal.end(), 42.0f);
	
	std::vector<int> vecIdx(nbElement);		//Init vector of index for the computation
	std::iota(vecIdx.begin(), vecIdx.end(), 0);	//Hope some day views will work to avoid allocation of index vector
	
	//We have to create pointer to be able to catch them by copy without losing any time
	float *tabCalibSignal = vecCalibSignal.data(), *tabADC = vecADCSignal.data(), *tabGain = vecGain.data(), *tabPed = vecPedestal.data();
	size_t fullNbElement(nbElement);
	micro_benchmarkAutoNsPrint("evaluateCalibration classic", fullNbElement, compute_calibration, tabCalibSignal, tabADC, tabPed, tabGain, nbEvent, nbSlice, nbPixel);
}

Enfin, nous appellons la fonction d'évaluation de MicroBenchmark :

1
2
3

int main(int argc, char** argv){
	return micro_benchmarkParseArg(argc, argv, evaluateCalibration);
}

Le fichier main_classic.cpp complet :

/***************************************
	Auteur : Pierre Aubert
	Mail : pierre.aubert@lapp.in2p3.fr
	Licence : CeCILL-C
****************************************/
#include <iostream>
#include <vector>
#include <numeric>

#include <execution>
#include <algorithm>

#include "micro_benchmark.h"

#include "AlignedAllocator.h"

///Defines a vector of data
typedef std::vector<float, AlignedAllocator<float> > VecData;

///Compute the calibration
/**	@param[out] ptabCalibSignal : pointer to the table of calibrated signal
 * 	@param ptabADC : pointer to the table of ADC values
 * 	@param ptabPed : pointer to the table of pedestal
 * 	@param ptabGain : pointer to the table of gain
 * 	@param nbEvent : vector of index to be used to round robin over the pixel
 * 	@param nbPixel : number of pixels in the camera
*/
void compute_calibration(float * ptabCalibSignal, const float * ptabADC, const float * ptabPed, const float* ptabGain, size_t nbEvent, size_t nbSlice, size_t nbPixel){
	float * tabCalibSignal = (float*)__builtin_assume_aligned(ptabCalibSignal, VECTOR_ALIGNEMENT);
	const float * tabADC = (const float*)__builtin_assume_aligned(ptabADC, VECTOR_ALIGNEMENT);
	const float * tabPed = (const float*)__builtin_assume_aligned(ptabPed, VECTOR_ALIGNEMENT);
	const float * tabGain = (const float*)__builtin_assume_aligned(ptabGain, VECTOR_ALIGNEMENT);
	
	size_t nbRow(nbEvent*nbSlice);
	for(size_t i(0lu); i < nbRow; ++i){
		for(size_t j(0lu); j < nbPixel; ++j){
			tabCalibSignal[i*nbPixel + j] =  (tabADC[i*nbPixel + j] - tabPed[j])*tabGain[j];
		}
	}
}

///Get the number of nanoseconds per elements of the Calibration
/**	@param nbPixel : number of pixels of the tables
*/
void evaluateCalibration(size_t nbPixel){
	//Let's define size of data :
	size_t nbEvent(NB_EVENT), nbSlice(NB_SLICE);
	size_t nbElement(nbEvent*nbSlice*nbPixel);
	std::vector<float> vecGain(nbPixel), vecPedestal(nbPixel);
	std::fill(vecGain.begin(), vecGain.end(), 0.02f);
	std::fill(vecPedestal.begin(), vecPedestal.end(), 40.0f);
	
	std::vector<float> vecADCSignal(nbElement), vecCalibSignal(nbElement);
	std::fill(vecADCSignal.begin(), vecADCSignal.end(), 42.0f);
	
	std::vector<int> vecIdx(nbElement);		//Init vector of index for the computation
	std::iota(vecIdx.begin(), vecIdx.end(), 0);	//Hope some day views will work to avoid allocation of index vector
	
	//We have to create pointer to be able to catch them by copy without losing any time
	float *tabCalibSignal = vecCalibSignal.data(), *tabADC = vecADCSignal.data(), *tabGain = vecGain.data(), *tabPed = vecPedestal.data();
	size_t fullNbElement(nbElement);
	micro_benchmarkAutoNsPrint("evaluateCalibration classic", fullNbElement, compute_calibration, tabCalibSignal, tabADC, tabPed, tabGain, nbEvent, nbSlice, nbPixel);
}

int main(int argc, char** argv){
	return micro_benchmarkParseArg(argc, argv, evaluateCalibration);
}

Le fichier main_classic.cpp est disponible ici.