GCC Code Coverage Report | |||||||||||||||||||||
|
|||||||||||||||||||||
Line | Branch | Exec | Source |
1 |
/*************************************** |
||
2 |
Auteur : Pierre Aubert |
||
3 |
Mail : pierre.aubert@lapp.in2p3.fr |
||
4 |
Licence : CeCILL-C |
||
5 |
****************************************/ |
||
6 |
|||
7 |
#ifndef __MICRO_BENCHMARK_NS_IMPL_H__ |
||
8 |
#define __MICRO_BENCHMARK_NS_IMPL_H__ |
||
9 |
|||
10 |
#include <cmath> |
||
11 |
#include "micro_benchmark_ns.h" |
||
12 |
|||
13 |
///Minimum time for performance test in ns |
||
14 |
#define PHOENIX_MINIMUM_TIME_NS 1000000000.0 |
||
15 |
|||
16 |
///Prevent compiler from optimising a loop |
||
17 |
/** @param value : reference to a value |
||
18 |
*/ |
||
19 |
template <class T> |
||
20 |
[[gnu::always_inline]] inline void phoenix_doNotOptimize(T const& value) { |
||
21 |
asm volatile("" : : "r,m"(value) : "memory"); |
||
22 |
} |
||
23 |
|||
24 |
///Prevent compiler from optimising a loop |
||
25 |
/** @param value : reference to a value |
||
26 |
*/ |
||
27 |
template <class T> |
||
28 |
260449128 |
[[gnu::always_inline]] inline void phoenix_doNotOptimize(T & value) { |
|
29 |
#if defined(__clang__) |
||
30 |
asm volatile("" : "+r,m"(value) : : "memory"); |
||
31 |
#else |
||
32 |
262750400 |
asm volatile("" : "+m,r"(value) : : "memory"); |
|
33 |
#endif |
||
34 |
2301818 |
} |
|
35 |
|||
36 |
///Do the micro benchmarking of a given function and gives performance results in ns |
||
37 |
/** @param[out] ellapsedTimeNs : ellapsed time in ns |
||
38 |
* @param[out] ellapsedTimeErrorNs : error on the ellapsed time in ns |
||
39 |
* @param nbTestPerf : number of performance test |
||
40 |
* @param nbCallPerTest : number of calls per performance test |
||
41 |
* @param __f : function to be called and benchmarked |
||
42 |
* @param __args : parameter of the function to be benchmarked |
||
43 |
*/ |
||
44 |
template<typename _Callable, typename... _Args> |
||
45 |
546 |
void micro_benchmarkNs(double & ellapsedTimeNs, double & ellapsedTimeErrorNs, size_t nbTestPerf, size_t nbCallPerTest, |
|
46 |
_Callable&& __f, _Args&&... __args) |
||
47 |
{ |
||
48 |
1092 |
VecEllapsedTime vecTimeNs; |
|
49 |
546 |
int res = 0; |
|
50 |
528 |
::phoenix_doNotOptimize(res); |
|
51 |
✓✓✓ | 129426 |
for(size_t i(0lu); i < nbTestPerf; ++i){ |
52 |
//Stating the timer |
||
53 |
✓ | 128880 |
HiPeTime beginTime = phoenix_getTime(); |
54 |
✓✓ | 262879280 |
for(size_t j(0lu); j < nbCallPerTest; ++j){ |
55 |
260448600 |
::phoenix_doNotOptimize(res); |
|
56 |
✓ | 262750400 |
__f(__args...); |
57 |
} |
||
58 |
//Get the time of the nbCallPerTest calls |
||
59 |
✓✓ | 128880 |
NanoSecs elapsedTime(phoenix_getTime() - beginTime); |
60 |
128880 |
double fullNs(elapsedTime.count()/((double)nbCallPerTest)); |
|
61 |
✓ | 128880 |
vecTimeNs.push_back(fullNs); |
62 |
} |
||
63 |
✓✗ | 564 |
MapOrderedTime mapOrderTime; |
64 |
✓ | 546 |
micro_benchmarkVecToMap(mapOrderTime, vecTimeNs); |
65 |
✓ | 546 |
size_t nbValueUsed(vecTimeNs.size()*0.7 + 1lu); |
66 |
✗✓ | 546 |
if(nbValueUsed > vecTimeNs.size()){ |
67 |
nbValueUsed = vecTimeNs.size(); |
||
68 |
} |
||
69 |
✓ | 546 |
micro_benchmarkComputeTime(ellapsedTimeNs, ellapsedTimeErrorNs, mapOrderTime, nbValueUsed); |
70 |
546 |
} |
|
71 |
|||
72 |
|||
73 |
///Do the micro benchmarking of a given function and gives performance results in ns |
||
74 |
/** @param[out] ellapsedTimeNs : ellapsed time in ns |
||
75 |
* @param[out] ellapsedTimeErrorNs : error on the ellapsed time in ns |
||
76 |
* @param __f : function to be called and benchmarked |
||
77 |
* @param __args : parameter of the function to be benchmarked |
||
78 |
* This function tries to find automatically a relevant performance measurement |
||
79 |
*/ |
||
80 |
template<typename _Callable, typename... _Args> |
||
81 |
66 |
void micro_benchmarkAutoNs(double & ellapsedTimeNs, double & ellapsedTimeErrorNs, _Callable&& __f, _Args&&... __args){ |
|
82 |
66 |
size_t nbTestPerf(100lu), nbCallPerTest(10lu); |
|
83 |
//Let's try with default values |
||
84 |
66 |
micro_benchmarkNs(ellapsedTimeNs, ellapsedTimeErrorNs, nbTestPerf, nbCallPerTest, __f, __args...); |
|
85 |
// while(std::isnan(ellapsedTimeNs)){ |
||
86 |
// std::cout << "micro_benchmarkAutoNs : ellapsedTimeNs is NaN ! Adjusting nbTestPerf = " << nbTestPerf << std::endl; |
||
87 |
// nbCallPerTest += 5lu; |
||
88 |
// micro_benchmarkNs(ellapsedTimeNs, ellapsedTimeErrorNs, nbTestPerf, nbCallPerTest, __f, __args...); |
||
89 |
// std::cout << "micro_benchmarkAutoNs = nbTestPerf = "<<nbTestPerf<<", nbCallPerTest" << nbCallPerTest << std::endl; |
||
90 |
// } |
||
91 |
66 |
double fullEllapsedTime(ellapsedTimeNs*((double)nbTestPerf)*((double)nbCallPerTest)); |
|
92 |
//Check if the total time is more than one second |
||
93 |
✓✓✓✗ ✓✗ |
150 |
while(fullEllapsedTime < PHOENIX_MINIMUM_TIME_NS && nbCallPerTest < 1000000000lu && nbCallPerTest > 0lu){ //Let's try again if the total time is less than one second |
94 |
//If the total time is less than one second, we try to change nbTestPerf and nbCallPerTest to make it about one second |
||
95 |
84 |
double ratioTime((1.3*PHOENIX_MINIMUM_TIME_NS)/fullEllapsedTime); |
|
96 |
✗✓ | 76 |
if(ratioTime < 1.2){ |
97 |
ratioTime = 1.2; |
||
98 |
} |
||
99 |
// double ratioSqrt(std::sqrt(ratioTime) + 1.0); |
||
100 |
//Let's modify the call |
||
101 |
// nbTestPerf *= ratioSqrt; |
||
102 |
// nbCallPerTest *= ratioSqrt; |
||
103 |
|||
104 |
84 |
nbCallPerTest *= ratioTime; |
|
105 |
|||
106 |
//Let's try again |
||
107 |
84 |
micro_benchmarkNs(ellapsedTimeNs, ellapsedTimeErrorNs, nbTestPerf, nbCallPerTest, __f, __args...); |
|
108 |
84 |
fullEllapsedTime = ellapsedTimeNs*((double)nbTestPerf)*((double)nbCallPerTest); |
|
109 |
//We loop until we have one second of full time measurement |
||
110 |
} |
||
111 |
66 |
std::cout << "micro_benchmarkAutoNs : nbCallPerTest = " << nbCallPerTest << std::endl; |
|
112 |
✓✓✗✓ |
66 |
if(nbCallPerTest > 1000000000lu || nbCallPerTest == 0lu){ |
113 |
std::cout << "micro_benchmarkAutoNs : Warning invalid number of calls per test should lead to unrelevant results!!!" << std::endl; |
||
114 |
} |
||
115 |
66 |
} |
|
116 |
|||
117 |
///Do the micro benchmarking of a given function and gives performance results in ns |
||
118 |
/** @param[out] ellapsedTimeNs : ellapsed time in ns |
||
119 |
* @param[out] ellapsedTimeErrorNs : error on the ellapsed time in ns |
||
120 |
* @param[out] timePerElement : time per element in ns |
||
121 |
* @param[out] timeErrorPerElement : error of time per element in ns |
||
122 |
* @param nbTestPerf : number of performance test |
||
123 |
* @param nbCallPerTest : number of calls per performance test |
||
124 |
* @param nbElement : number of element treated by the function __f |
||
125 |
* @param __f : function to be called and benchmarked |
||
126 |
* @param __args : parameter of the function to be benchmarked |
||
127 |
*/ |
||
128 |
template<typename _Callable, typename... _Args> |
||
129 |
396 |
void micro_benchmarkNs(double & ellapsedTimeNs, double & ellapsedTimeErrorNs, |
|
130 |
double & timePerElement, double & timeErrorPerElement, size_t nbTestPerf, size_t nbCallPerTest, size_t nbElement, |
||
131 |
_Callable&& __f, _Args&&... __args) |
||
132 |
{ |
||
133 |
396 |
micro_benchmarkNs(ellapsedTimeNs, ellapsedTimeErrorNs, nbTestPerf, nbCallPerTest, __f, __args...); |
|
134 |
396 |
timePerElement = ellapsedTimeNs/((double)nbElement); |
|
135 |
✓✗ | 396 |
timeErrorPerElement = ellapsedTimeErrorNs/((double)nbElement); |
136 |
12 |
} |
|
137 |
|||
138 |
///Do the micro benchmarking of a given function and gives performance results in ns |
||
139 |
/** @param[out] ellapsedTimeNs : ellapsed time in ns |
||
140 |
* @param[out] ellapsedTimeErrorNs : error on the ellapsed time in ns |
||
141 |
* @param[out] timePerElement : time per element in ns |
||
142 |
* @param[out] timeErrorPerElement : error of time per element in ns |
||
143 |
* @param nbElement : number of element treated by the function __f |
||
144 |
* @param __f : function to be called and benchmarked |
||
145 |
* @param __args : parameter of the function to be benchmarked |
||
146 |
*/ |
||
147 |
template<typename _Callable, typename... _Args> |
||
148 |
66 |
void micro_benchmarkAutoNs(double & ellapsedTimeNs, double & ellapsedTimeErrorNs, |
|
149 |
double & timePerElement, double & timeErrorPerElement, size_t nbElement, |
||
150 |
_Callable&& __f, _Args&&... __args) |
||
151 |
{ |
||
152 |
66 |
micro_benchmarkAutoNs(ellapsedTimeNs, ellapsedTimeErrorNs, __f, __args...); |
|
153 |
66 |
timePerElement = ellapsedTimeNs/((double)nbElement); |
|
154 |
66 |
timeErrorPerElement = ellapsedTimeErrorNs/((double)nbElement); |
|
155 |
12 |
} |
|
156 |
|||
157 |
///Do the micro benchmarking of a given function and gives performance results in ns and print the result |
||
158 |
/** @param testName : name of the performance test |
||
159 |
* @param nbTestPerf : number of performance test |
||
160 |
* @param nbCallPerTest : number of calls per performance test |
||
161 |
* @param nbElement : number of element treated by the function __f |
||
162 |
* @param __f : function to be called and benchmarked |
||
163 |
* @param __args : parameter of the function to be benchmarked |
||
164 |
*/ |
||
165 |
template<typename _Callable, typename... _Args> |
||
166 |
96 |
void micro_benchmarkNsPrint(const std::string & testName, size_t nbTestPerf, size_t nbCallPerTest, size_t nbElement, _Callable&& __f, _Args&&... __args){ |
|
167 |
96 |
double ellapsedTimeNs(0.0), ellapsedTimeErrorNs(0.0), timePerElement(0.0), timeErrorPerElement(0.0); |
|
168 |
✓ | 96 |
micro_benchmarkNs(ellapsedTimeNs, ellapsedTimeErrorNs, timePerElement, timeErrorPerElement, nbTestPerf, nbCallPerTest, nbElement, __f, __args...); |
169 |
|||
170 |
✓✓✓✓ ✓✓✓✓ ✓✓✓✓ |
96 |
std::cout << testName << " : nbElement = "<<nbElement<<", timePerElement = " << timePerElement << " ns/el ± "<<timeErrorPerElement<<", elapsedTime = " << ellapsedTimeNs << " ns ± "<<ellapsedTimeErrorNs << std::endl; |
171 |
✓✓✓✓ ✓✓✓✓ ✓✓ |
96 |
std::cerr << nbElement << "\t" << timePerElement << "\t" << ellapsedTimeNs << "\t" << timeErrorPerElement << "\t" << ellapsedTimeErrorNs << std::endl; |
172 |
96 |
} |
|
173 |
|||
174 |
///Do the micro benchmarking of a given function and gives performance results in ns and print the result |
||
175 |
/** @param testName : name of the performance test |
||
176 |
* @param nbElement : number of element treated by the function __f |
||
177 |
* @param __f : function to be called and benchmarked |
||
178 |
* @param __args : parameter of the function to be benchmarked |
||
179 |
*/ |
||
180 |
template<typename _Callable, typename... _Args> |
||
181 |
66 |
void micro_benchmarkAutoNsPrint(const std::string & testName, size_t nbElement, _Callable&& __f, _Args&&... __args){ |
|
182 |
66 |
double ellapsedTimeNs(0.0), ellapsedTimeErrorNs(0.0), timePerElement(0.0), timeErrorPerElement(0.0); |
|
183 |
✓ | 66 |
micro_benchmarkAutoNs(ellapsedTimeNs, ellapsedTimeErrorNs, timePerElement, timeErrorPerElement, nbElement, __f, __args...); |
184 |
|||
185 |
✓✓✓✓ ✓✓✓✓ ✓✓✓✓ |
66 |
std::cout << testName << " : nbElement = "<<nbElement<<", timePerElement = " << timePerElement << " ns/el ± "<<timeErrorPerElement<<", elapsedTime = " << ellapsedTimeNs << " ns ± "<<ellapsedTimeErrorNs << std::endl; |
186 |
✓✓✓✓ ✓✓✓✓ ✓✓ |
66 |
std::cerr << nbElement << "\t" << timePerElement << "\t" << ellapsedTimeNs << "\t" << timeErrorPerElement << "\t" << ellapsedTimeErrorNs << std::endl; |
187 |
66 |
} |
|
188 |
|||
189 |
#endif |
Generated by: GCOVR (Version 4.2) |