6.1.4 : CMakeLists.txt
Écrivons le fichier CMakeLists.txt :
On commence par définir le projet :
1 2 |
project(PERFORMANCE_WITH_NAN) cmake_minimum_required(VERSION 3.0) |
Ensuite, on inclue les macros que l'on a déjà développé pour le produit de hadamard :
1 |
include(${CMAKE_SOURCE_DIR}/Examples/1-HadamardProduct/multiplePerfTest.cmake) |
On définie les différentes tailles de vecteurs qui seront les différents points de nos graphes (attention à avoir des multiples de 8 pour que les tests en fonctions intrisèques se passent bien) :
1 |
set(CONFIG_SGEMM "16, 32, 64, 112") |
Définissions ce qu'est un NaN (qui se signal) :
1 |
set(NAN_DEF "std::numeric_limits<float>::signaling_NaN()") |
Définissions ce qu'est un infini :
1 |
set(INF_DEF "std::numeric_limits<float>::infinity()") |
Définissons un nombre dénormalisé :
1 |
set(DENORM_DEF "std::numeric_limits<float>::denorm_min()") |
Testons avec le plus petit nombre possible (si on le multiplie par lui même il sera dénormalisé) :
1 |
set(MIN_TO_DENORM_DEF "std::numeric_limits<float>::min()") |
Voici les trois sources de base pour tous les tests de performances de cette partie :
1 2 3 4 |
set(SGEMM_BASE_SRC sgemm_base.cpp main_sgemm.cpp) set(SGEMM_SWAP_SRC sgemm_swap.cpp main_sgemm.cpp) set(SGEMM_VECTORIZE_SRC sgemm_vectorize.cpp main_sgemm.cpp) set(SGEMM_INTRINSICS_SRC sgemm_intrinsics.cpp main_sgemm.cpp) |
Créons les tests de références :
1 2 3 4 |
multiplePerfTestLogX("sgemmBase" sgemm base "" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC}) multiplePerfTestLogX("sgemmSwap" sgemm swap "" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC}) multiplePerfTestLogX("sgemmVectorize" sgemm vectorize "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC}) multiplePerfTestLogX("sgemmIntrinsics" sgemm intrinsics "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC}) |
Nous pouvons également ajouter des graphes (mais il faut faire attention au nom des programmes que l'on compoare) :
1 2 |
phoenix_plotPerfLogX("cmpSgemmSafe03" sgemm_base_O3 sgemm_swap_O3 sgemm_vectorize_O3 sgemm_intrinsics_O3) phoenix_plotPerfLogX("cmpSgemmSafe0fast" sgemm_base_Ofast sgemm_swap_Ofast sgemm_vectorize_Ofast sgemm_intrinsics_Ofast) |
Testons avec des NaN :
1 2 3 4 5 6 7 8 9 10 |
multiplePerfTestValueLogX("sgemmBaseNanO3" sgemm base nan -O3 "${NAN_DEF}" "" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC}) multiplePerfTestValueLogX("sgemmSwapNanO3" sgemm swap nan -O3 "${NAN_DEF}" "" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC}) multiplePerfTestValueLogX("sgemmVectorizeNanO3" sgemm vectorize nan -O3 "${NAN_DEF}" "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC}) multiplePerfTestValueLogX("sgemmIntrinsicsNanO3" sgemm intrinsics nan -O3 "${NAN_DEF}" "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC}) |
Testons avec des Inf :
1 2 3 4 5 6 7 8 9 10 |
multiplePerfTestValueLogX("sgemmBaseInfO3" sgemm base inf -O3 "${INF_DEF}" "" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC}) multiplePerfTestValueLogX("sgemmSwapInfO3" sgemm swap inf -O3 "${INF_DEF}" "" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC}) multiplePerfTestValueLogX("sgemmVectorizeInfO3" sgemm vectorize inf -O3 "${INF_DEF}" "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC}) multiplePerfTestValueLogX("sgemmIntrinsicsInfO3" sgemm intrinsics inf -O3 "${INF_DEF}" "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC}) |
Testons avec des nombres dénormalisés :
1 2 3 4 5 6 7 8 9 10 |
multiplePerfTestValueLogX("sgemmBaseDenormO3" sgemm base denorm -O3 "${DENORM_DEF}" "" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC}) multiplePerfTestValueLogX("sgemmSwapDenormO3" sgemm swap denorm -O3 "${DENORM_DEF}" "" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC}) multiplePerfTestValueLogX("sgemmVectorizeDenormO3" sgemm vectorize denorm -O3 "${DENORM_DEF}" "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC}) multiplePerfTestValueLogX("sgemmIntrinicsDenormO3" sgemm intrinsics denorm -O3 "${DENORM_DEF}" "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC}) |
Essayons de régler le problème avec des nombres dénormalisés :
1 2 3 4 5 6 7 8 9 10 |
multiplePerfTestValueLogX("sgemmBaseDenormDazO3" sgemm base denormDaz -O3 "${DENORM_DEF}" "-mfpmath=sse" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC}) multiplePerfTestValueLogX("sgemmSwapDenormDazO3" sgemm swap denormDaz -O3 "${DENORM_DEF}" "-mfpmath=sse" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC}) multiplePerfTestValueLogX("sgemmVectorizeDenormDazO3" sgemm vectorize denormDaz -O3 "${DENORM_DEF}" "${VECTORIZED_OPTION} ${DENORM_DAZ_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC}) multiplePerfTestValueLogX("sgemmIntrinicsDenormDazO3" sgemm intrinsics denormDaz -O3 "${DENORM_DEF}" "${VECTORIZED_OPTION} ${DENORM_DAZ_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC}) |
Voyons ce qu'il se passe lorsque le calcul produit des nombres dénormalisés :
1 2 3 4 5 6 7 8 9 10 |
multiplePerfTestValueLogX("sgemmBaseMakeDenormO3" sgemm base make_denorm -O3 "${MIN_TO_DENORM_DEF}" "" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC}) multiplePerfTestValueLogX("sgemmSwapMakeDenormO3" sgemm swap make_denorm -O3 "${MIN_TO_DENORM_DEF}" "" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC}) multiplePerfTestValueLogX("sgemmVectorizeMakeDenormO3" sgemm vectorize make_denorm -O3 "${MIN_TO_DENORM_DEF}" "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC}) multiplePerfTestValueLogX("sgemmIntrinicsMakeDenormO3" sgemm intrinsics make_denorm -O3 "${MIN_TO_DENORM_DEF}" "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC}) |
Le fichier CMakeLists.txt complet :
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
project(PERFORMANCE_WITH_NAN) cmake_minimum_required(VERSION 3.0) include(${CMAKE_SOURCE_DIR}/Examples/1-HadamardProduct/multiplePerfTest.cmake) set(CONFIG_SGEMM "16, 32, 64, 112") set(NAN_DEF "std::numeric_limits<float>::signaling_NaN()") set(INF_DEF "std::numeric_limits<float>::infinity()") set(DENORM_DEF "std::numeric_limits<float>::denorm_min()") set(MIN_TO_DENORM_DEF "std::numeric_limits<float>::min()") set(SGEMM_BASE_SRC sgemm_base.cpp main_sgemm.cpp) set(SGEMM_SWAP_SRC sgemm_swap.cpp main_sgemm.cpp) set(SGEMM_VECTORIZE_SRC sgemm_vectorize.cpp main_sgemm.cpp) set(SGEMM_INTRINSICS_SRC sgemm_intrinsics.cpp main_sgemm.cpp) multiplePerfTestLogX("sgemmBase" sgemm base "" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC}) multiplePerfTestLogX("sgemmSwap" sgemm swap "" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC}) multiplePerfTestLogX("sgemmVectorize" sgemm vectorize "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC}) multiplePerfTestLogX("sgemmIntrinsics" sgemm intrinsics "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC}) phoenix_plotPerfLogX("cmpSgemmSafe03" sgemm_base_O3 sgemm_swap_O3 sgemm_vectorize_O3 sgemm_intrinsics_O3) phoenix_plotPerfLogX("cmpSgemmSafe0fast" sgemm_base_Ofast sgemm_swap_Ofast sgemm_vectorize_Ofast sgemm_intrinsics_Ofast) multiplePerfTestValueLogX("sgemmBaseNanO3" sgemm base nan -O3 "${NAN_DEF}" "" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC}) multiplePerfTestValueLogX("sgemmSwapNanO3" sgemm swap nan -O3 "${NAN_DEF}" "" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC}) multiplePerfTestValueLogX("sgemmVectorizeNanO3" sgemm vectorize nan -O3 "${NAN_DEF}" "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC}) multiplePerfTestValueLogX("sgemmIntrinsicsNanO3" sgemm intrinsics nan -O3 "${NAN_DEF}" "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC}) multiplePerfTestValueLogX("sgemmBaseInfO3" sgemm base inf -O3 "${INF_DEF}" "" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC}) multiplePerfTestValueLogX("sgemmSwapInfO3" sgemm swap inf -O3 "${INF_DEF}" "" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC}) multiplePerfTestValueLogX("sgemmVectorizeInfO3" sgemm vectorize inf -O3 "${INF_DEF}" "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC}) multiplePerfTestValueLogX("sgemmIntrinsicsInfO3" sgemm intrinsics inf -O3 "${INF_DEF}" "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC}) multiplePerfTestValueLogX("sgemmBaseDenormO3" sgemm base denorm -O3 "${DENORM_DEF}" "" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC}) multiplePerfTestValueLogX("sgemmSwapDenormO3" sgemm swap denorm -O3 "${DENORM_DEF}" "" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC}) multiplePerfTestValueLogX("sgemmVectorizeDenormO3" sgemm vectorize denorm -O3 "${DENORM_DEF}" "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC}) multiplePerfTestValueLogX("sgemmIntrinicsDenormO3" sgemm intrinsics denorm -O3 "${DENORM_DEF}" "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC}) multiplePerfTestValueLogX("sgemmBaseDenormDazO3" sgemm base denormDaz -O3 "${DENORM_DEF}" "-mfpmath=sse" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC}) multiplePerfTestValueLogX("sgemmSwapDenormDazO3" sgemm swap denormDaz -O3 "${DENORM_DEF}" "-mfpmath=sse" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC}) multiplePerfTestValueLogX("sgemmVectorizeDenormDazO3" sgemm vectorize denormDaz -O3 "${DENORM_DEF}" "${VECTORIZED_OPTION} ${DENORM_DAZ_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC}) multiplePerfTestValueLogX("sgemmIntrinicsDenormDazO3" sgemm intrinsics denormDaz -O3 "${DENORM_DEF}" "${VECTORIZED_OPTION} ${DENORM_DAZ_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC}) multiplePerfTestValueLogX("sgemmBaseMakeDenormO3" sgemm base make_denorm -O3 "${MIN_TO_DENORM_DEF}" "" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC}) multiplePerfTestValueLogX("sgemmSwapMakeDenormO3" sgemm swap make_denorm -O3 "${MIN_TO_DENORM_DEF}" "" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC}) multiplePerfTestValueLogX("sgemmVectorizeMakeDenormO3" sgemm vectorize make_denorm -O3 "${MIN_TO_DENORM_DEF}" "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC}) multiplePerfTestValueLogX("sgemmIntrinicsMakeDenormO3" sgemm intrinsics make_denorm -O3 "${MIN_TO_DENORM_DEF}" "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC}) |
Vous pouvez le télécharger ici.
Il n'a fallu que quelques lignes pour créer tous les tests dont nous avons besoin.