3.4.3.5.3 : Le programme main.cpp
Le fichier main.cpp est simplifié par l'utilisation de MicroBenchmark :
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
#include <iostream> #include "micro_benchmark.h" #include "PTensor.h" #include "hadamard.h" ///Get the number of nanoseconds per elements /** @param nbElement : number of elements of the tables */ void evaluateHadamardProduct(size_t nbElement){ //If you use memory allocated with memalign instead of new. Thrust (GPU lib of NVC++) will complain and will tell you : //terminate called after throwing an instance of 'thrust::system::system_error' // what(): transform: failed to synchronize: cudaErrorIllegalAddress: an illegal memory access was encountered // Abandon (core dumped) AllocMode::AllocMode allocMode = PHOENIX_AUTO_ALIGNED; //Will be aligned on CPU and standard on GPU (with -DPHOENIX_GPU_MODE=1) PTensor<float> tabX(allocMode, nbElement); PTensor<float> tabY(allocMode, nbElement); PTensor<float> tabRes(allocMode, nbElement); for(size_t i(0lu); i < nbElement; ++i){ tabX[i] = i*19lu%11; tabY[i] = i*27lu%19; } micro_benchmarkAutoNsPrint("evaluate hadamard", nbElement, hadamard_product, tabRes.getData(), tabX.getData(), tabY.getData(), nbElement); } int main(int argc, char** argv){ return micro_benchmarkParseArg(argc, argv, evaluateHadamardProduct); } |
Le fichier main.cpp complet :
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
/*************************************** Auteur : Pierre Aubert Mail : pierre.aubert@lapp.in2p3.fr Licence : CeCILL-C ****************************************/ #include <iostream> #include "micro_benchmark.h" #include "PTensor.h" #include "hadamard.h" ///Get the number of nanoseconds per elements /** @param nbElement : number of elements of the tables */ void evaluateHadamardProduct(size_t nbElement){ //If you use memory allocated with memalign instead of new. Thrust (GPU lib of NVC++) will complain and will tell you : //terminate called after throwing an instance of 'thrust::system::system_error' // what(): transform: failed to synchronize: cudaErrorIllegalAddress: an illegal memory access was encountered // Abandon (core dumped) AllocMode::AllocMode allocMode = PHOENIX_AUTO_ALIGNED; //Will be aligned on CPU and standard on GPU (with -DPHOENIX_GPU_MODE=1) PTensor<float> tabX(allocMode, nbElement); PTensor<float> tabY(allocMode, nbElement); PTensor<float> tabRes(allocMode, nbElement); for(size_t i(0lu); i < nbElement; ++i){ tabX[i] = i*19lu%11; tabY[i] = i*27lu%19; } micro_benchmarkAutoNsPrint("evaluate hadamard", nbElement, hadamard_product, tabRes.getData(), tabX.getData(), tabY.getData(), nbElement); } int main(int argc, char** argv){ return micro_benchmarkParseArg(argc, argv, evaluateHadamardProduct); } |
Le fichier main.cpp est disponible ici.