12.1.4.2 : Le main_classic.cpp classique
Si nous voulons comparer les implémentations récentes il nous faut un point de repère avec une bonne vieille vectorisation à l'ancienne.
On commence avec les includes standards :
1 2 3 4 5 6 7 8 |
#include <iostream> #include <vector> #include <numeric> #include <execution> #include <algorithm> #include "micro_benchmark.h" |
On inclue notre allocateur alignée :
1 |
#include "AlignedAllocator.h"
|
On définit un type de std::vector alignés pour simplifier l'écriture de notre programme :
1 2 |
///Defines a vector of data typedef std::vector<float, AlignedAllocator<float> > VecData; |
On implémente notre kernel (avec des pointeurs et des __builtin_assume_aligned ) :
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
///Compute the calibration /** @param[out] ptabCalibSignal : pointer to the table of calibrated signal * @param ptabADC : pointer to the table of ADC values * @param ptabPed : pointer to the table of pedestal * @param ptabGain : pointer to the table of gain * @param nbEvent : vector of index to be used to round robin over the pixel * @param nbPixel : number of pixels in the camera */ void compute_calibration(float * ptabCalibSignal, const float * ptabADC, const float * ptabPed, const float* ptabGain, size_t nbEvent, size_t nbSlice, size_t nbPixel){ float * tabCalibSignal = (float*)__builtin_assume_aligned(ptabCalibSignal, VECTOR_ALIGNEMENT); const float * tabADC = (const float*)__builtin_assume_aligned(ptabADC, VECTOR_ALIGNEMENT); const float * tabPed = (const float*)__builtin_assume_aligned(ptabPed, VECTOR_ALIGNEMENT); const float * tabGain = (const float*)__builtin_assume_aligned(ptabGain, VECTOR_ALIGNEMENT); size_t nbRow(nbEvent*nbSlice); for(size_t i(0lu); i < nbRow; ++i){ for(size_t j(0lu); j < nbPixel; ++j){ tabCalibSignal[i*nbPixel + j] = (tabADC[i*nbPixel + j] - tabPed[j])*tabGain[j]; } } } |
On écrit la fonction qui évaluera les perfomrance de notre kernel :
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
///Get the number of nanoseconds per elements of the Calibration /** @param nbPixel : number of pixels of the tables */ void evaluateCalibration(size_t nbPixel){ //Let's define size of data : size_t nbEvent(NB_EVENT), nbSlice(NB_SLICE); size_t nbElement(nbEvent*nbSlice*nbPixel); std::vector<float> vecGain(nbPixel), vecPedestal(nbPixel); std::fill(vecGain.begin(), vecGain.end(), 0.02f); std::fill(vecPedestal.begin(), vecPedestal.end(), 40.0f); std::vector<float> vecADCSignal(nbElement), vecCalibSignal(nbElement); std::fill(vecADCSignal.begin(), vecADCSignal.end(), 42.0f); std::vector<int> vecIdx(nbElement); //Init vector of index for the computation std::iota(vecIdx.begin(), vecIdx.end(), 0); //Hope some day views will work to avoid allocation of index vector //We have to create pointer to be able to catch them by copy without losing any time float *tabCalibSignal = vecCalibSignal.data(), *tabADC = vecADCSignal.data(), *tabGain = vecGain.data(), *tabPed = vecPedestal.data(); size_t fullNbElement(nbElement); micro_benchmarkAutoNsPrint("evaluateCalibration classic", fullNbElement, compute_calibration, tabCalibSignal, tabADC, tabPed, tabGain, nbEvent, nbSlice, nbPixel); } |
Enfin, nous appellons la fonction d'évaluation de MicroBenchmark :
1 2 3 |
int main(int argc, char** argv){ return micro_benchmarkParseArg(argc, argv, evaluateCalibration); } |
Le fichier main_classic.cpp complet :
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
/*************************************** Auteur : Pierre Aubert Mail : pierre.aubert@lapp.in2p3.fr Licence : CeCILL-C ****************************************/ #include <iostream> #include <vector> #include <numeric> #include <execution> #include <algorithm> #include "micro_benchmark.h" #include "AlignedAllocator.h" ///Defines a vector of data typedef std::vector<float, AlignedAllocator<float> > VecData; ///Compute the calibration /** @param[out] ptabCalibSignal : pointer to the table of calibrated signal * @param ptabADC : pointer to the table of ADC values * @param ptabPed : pointer to the table of pedestal * @param ptabGain : pointer to the table of gain * @param nbEvent : vector of index to be used to round robin over the pixel * @param nbPixel : number of pixels in the camera */ void compute_calibration(float * ptabCalibSignal, const float * ptabADC, const float * ptabPed, const float* ptabGain, size_t nbEvent, size_t nbSlice, size_t nbPixel){ float * tabCalibSignal = (float*)__builtin_assume_aligned(ptabCalibSignal, VECTOR_ALIGNEMENT); const float * tabADC = (const float*)__builtin_assume_aligned(ptabADC, VECTOR_ALIGNEMENT); const float * tabPed = (const float*)__builtin_assume_aligned(ptabPed, VECTOR_ALIGNEMENT); const float * tabGain = (const float*)__builtin_assume_aligned(ptabGain, VECTOR_ALIGNEMENT); size_t nbRow(nbEvent*nbSlice); for(size_t i(0lu); i < nbRow; ++i){ for(size_t j(0lu); j < nbPixel; ++j){ tabCalibSignal[i*nbPixel + j] = (tabADC[i*nbPixel + j] - tabPed[j])*tabGain[j]; } } } ///Get the number of nanoseconds per elements of the Calibration /** @param nbPixel : number of pixels of the tables */ void evaluateCalibration(size_t nbPixel){ //Let's define size of data : size_t nbEvent(NB_EVENT), nbSlice(NB_SLICE); size_t nbElement(nbEvent*nbSlice*nbPixel); std::vector<float> vecGain(nbPixel), vecPedestal(nbPixel); std::fill(vecGain.begin(), vecGain.end(), 0.02f); std::fill(vecPedestal.begin(), vecPedestal.end(), 40.0f); std::vector<float> vecADCSignal(nbElement), vecCalibSignal(nbElement); std::fill(vecADCSignal.begin(), vecADCSignal.end(), 42.0f); std::vector<int> vecIdx(nbElement); //Init vector of index for the computation std::iota(vecIdx.begin(), vecIdx.end(), 0); //Hope some day views will work to avoid allocation of index vector //We have to create pointer to be able to catch them by copy without losing any time float *tabCalibSignal = vecCalibSignal.data(), *tabADC = vecADCSignal.data(), *tabGain = vecGain.data(), *tabPed = vecPedestal.data(); size_t fullNbElement(nbElement); micro_benchmarkAutoNsPrint("evaluateCalibration classic", fullNbElement, compute_calibration, tabCalibSignal, tabADC, tabPed, tabGain, nbEvent, nbSlice, nbPixel); } int main(int argc, char** argv){ return micro_benchmarkParseArg(argc, argv, evaluateCalibration); } |