12.1.4.5 : Le main_mask2.cpp avec des std::transform imbriqués
Cette histoire d'indices est un peu ridicule car cela consomme beaucoup de mémoire. Essayons d'imbriquer des std::transform pour ne plus avoir à l'utiliser.
On commence avec les includes standards :
1 2 3 4 5 6 7 8 |
#include <iostream> #include <vector> #include <numeric> #include <execution> #include <algorithm> #include "micro_benchmark.h" |
On définit un vecteur d'indices :
1 2 |
///Defines a vector of index typedef std::vector<int> VecIndex; |
On déclare notre kernel :
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
///Compute the calibration /** @param[out] vecCalibSignal : vector of calibrated signal * @param tabADC : pointer to the table of ADC values * @param tabPed : pointer to the table of pedestal * @param tabGain : pointer to the table of gain * @param vecEventIdx : vector of index of events * @param vecSliceIdx : vector of index of slices * @param vecPixelIdx : vector of index of pixels */ void compute_calibration(float * vecCalibSignal, const float * tabADC, const float * tabPed, const float* tabGain, const VecIndex & vecEventIdx, const VecIndex & vecSliceIdx, const VecIndex & vecPixelIdx) { size_t nbSlice(vecSliceIdx.size()), nbPixel(vecPixelIdx.size()); size_t eventSize(nbSlice*nbPixel); |
On utilise deux std::for_each pour itérer sur les événements et les slices, et ensuite on appelle notre std::for_each avec des indices (et la macro EXECUTION_POLICY gère toujours la méthode d'exécution à la compilation) :
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
std::for_each(std::execution::seq, std::begin(vecEventIdx), std::end(vecEventIdx), [=](int evtIdx){ std::for_each(std::execution::seq, std::begin(vecSliceIdx), std::end(vecSliceIdx), [=](int sliceIdx){ std::for_each(EXECUTION_POLICY, std::begin(vecPixelIdx), std::end(vecPixelIdx), [=](int pixelIdx){ long currentEventSlicePixelIdx = evtIdx*eventSize + sliceIdx*nbPixel + pixelIdx; vecCalibSignal[currentEventSlicePixelIdx] = (tabADC[currentEventSlicePixelIdx] - tabPed[pixelIdx])*tabGain[pixelIdx]; } ); } ); } ); } |
La fonction qui évaluera notre kernel :
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
///Get the number of nanoseconds per elements of the Calibration /** @param nbPixel : number of pixels of the tables */ void evaluateCalibration(size_t nbPixel){ //Let's define size of data : size_t nbEvent(NB_EVENT), nbSlice(NB_SLICE); size_t nbValuePerImage(nbSlice*nbPixel); size_t nbElement(nbEvent*nbValuePerImage); std::vector<float> vecGain(nbPixel), vecPedestal(nbPixel); std::fill(vecGain.begin(), vecGain.end(), 0.02f); std::fill(vecPedestal.begin(), vecPedestal.end(), 40.0f); std::vector<float> vecADCSignal(nbElement), vecCalibSignal(nbElement); std::fill(vecADCSignal.begin(), vecADCSignal.end(), 42.0f); VecIndex vecEventIdx(nbEvent), vecSliceIdx(nbSlice), vecPixelIdx(nbPixel); //Init vectors of index for the computation std::iota(vecEventIdx.begin(), vecEventIdx.end(), 0); std::iota(vecSliceIdx.begin(), vecSliceIdx.end(), 0); std::iota(vecPixelIdx.begin(), vecPixelIdx.end(), 0); //We have to create pointer to be able to catch them by copy without losing any time float * tabADC = vecADCSignal.data(), *tabGain = vecGain.data(), *tabPed = vecPedestal.data(), *tabCalibSignal = vecCalibSignal.data(); size_t fullNbElement(nbElement); micro_benchmarkAutoNsPrint("evaluateCalibration", fullNbElement, compute_calibration, tabCalibSignal, tabADC, tabPed, tabGain, vecEventIdx, vecSliceIdx, vecPixelIdx); } |
Enfin, nous appellons la fonction d'évaluation de MicroBenchmark :
1 2 3 |
int main(int argc, char** argv){ return micro_benchmarkParseArg(argc, argv, evaluateCalibration); } |
Le fichier main_mask2.cpp complet :
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
/*************************************** Auteur : Pierre Aubert Mail : pierre.aubert@lapp.in2p3.fr Licence : CeCILL-C ****************************************/ #include <iostream> #include <vector> #include <numeric> #include <execution> #include <algorithm> #include "micro_benchmark.h" ///Defines a vector of index typedef std::vector<int> VecIndex; ///Compute the calibration /** @param[out] vecCalibSignal : vector of calibrated signal * @param tabADC : pointer to the table of ADC values * @param tabPed : pointer to the table of pedestal * @param tabGain : pointer to the table of gain * @param vecEventIdx : vector of index of events * @param vecSliceIdx : vector of index of slices * @param vecPixelIdx : vector of index of pixels */ void compute_calibration(float * vecCalibSignal, const float * tabADC, const float * tabPed, const float* tabGain, const VecIndex & vecEventIdx, const VecIndex & vecSliceIdx, const VecIndex & vecPixelIdx) { size_t nbSlice(vecSliceIdx.size()), nbPixel(vecPixelIdx.size()); size_t eventSize(nbSlice*nbPixel); std::for_each(std::execution::seq, std::begin(vecEventIdx), std::end(vecEventIdx), [=](int evtIdx){ std::for_each(std::execution::seq, std::begin(vecSliceIdx), std::end(vecSliceIdx), [=](int sliceIdx){ std::for_each(EXECUTION_POLICY, std::begin(vecPixelIdx), std::end(vecPixelIdx), [=](int pixelIdx){ long currentEventSlicePixelIdx = evtIdx*eventSize + sliceIdx*nbPixel + pixelIdx; vecCalibSignal[currentEventSlicePixelIdx] = (tabADC[currentEventSlicePixelIdx] - tabPed[pixelIdx])*tabGain[pixelIdx]; } ); } ); } ); } ///Get the number of nanoseconds per elements of the Calibration /** @param nbPixel : number of pixels of the tables */ void evaluateCalibration(size_t nbPixel){ //Let's define size of data : size_t nbEvent(NB_EVENT), nbSlice(NB_SLICE); size_t nbValuePerImage(nbSlice*nbPixel); size_t nbElement(nbEvent*nbValuePerImage); std::vector<float> vecGain(nbPixel), vecPedestal(nbPixel); std::fill(vecGain.begin(), vecGain.end(), 0.02f); std::fill(vecPedestal.begin(), vecPedestal.end(), 40.0f); std::vector<float> vecADCSignal(nbElement), vecCalibSignal(nbElement); std::fill(vecADCSignal.begin(), vecADCSignal.end(), 42.0f); VecIndex vecEventIdx(nbEvent), vecSliceIdx(nbSlice), vecPixelIdx(nbPixel); //Init vectors of index for the computation std::iota(vecEventIdx.begin(), vecEventIdx.end(), 0); std::iota(vecSliceIdx.begin(), vecSliceIdx.end(), 0); std::iota(vecPixelIdx.begin(), vecPixelIdx.end(), 0); //We have to create pointer to be able to catch them by copy without losing any time float * tabADC = vecADCSignal.data(), *tabGain = vecGain.data(), *tabPed = vecPedestal.data(), *tabCalibSignal = vecCalibSignal.data(); size_t fullNbElement(nbElement); micro_benchmarkAutoNsPrint("evaluateCalibration", fullNbElement, compute_calibration, tabCalibSignal, tabADC, tabPed, tabGain, vecEventIdx, vecSliceIdx, vecPixelIdx); } int main(int argc, char** argv){ return micro_benchmarkParseArg(argc, argv, evaluateCalibration); } |