Line |
Branch |
Exec |
Source |
1 |
|
|
/*************************************** |
2 |
|
|
Auteur : Pierre Aubert |
3 |
|
|
Mail : pierre.aubert@lapp.in2p3.fr |
4 |
|
|
Licence : CeCILL-C |
5 |
|
|
****************************************/ |
6 |
|
|
|
7 |
|
|
#ifndef __MICRO_BENCHMARK_NS_IMPL_H__ |
8 |
|
|
#define __MICRO_BENCHMARK_NS_IMPL_H__ |
9 |
|
|
|
10 |
|
|
#include <cmath> |
11 |
|
|
#include "micro_benchmark_ns.h" |
12 |
|
|
|
13 |
|
|
///Minimum time for performance test in ns |
14 |
|
|
#define PHOENIX_MINIMUM_TIME_NS 1000000000.0 |
15 |
|
|
|
16 |
|
|
///Prevent compiler from optimising a loop |
17 |
|
|
/** @param value : reference to a value |
18 |
|
|
*/ |
19 |
|
|
template <class T> |
20 |
|
|
[[gnu::always_inline]] inline void phoenix_doNotOptimize(T const& value) { |
21 |
|
|
asm volatile("" : : "r,m"(value) : "memory"); |
22 |
|
|
} |
23 |
|
|
|
24 |
|
|
///Prevent compiler from optimising a loop |
25 |
|
|
/** @param value : reference to a value |
26 |
|
|
*/ |
27 |
|
|
template <class T> |
28 |
|
✗ |
[[gnu::always_inline]] inline void phoenix_doNotOptimize(T & value) { |
29 |
|
|
#if defined(__clang__) |
30 |
|
|
asm volatile("" : "+r,m"(value) : : "memory"); |
31 |
|
|
#else |
32 |
|
✗ |
asm volatile("" : "+m,r"(value) : : "memory"); |
33 |
|
|
#endif |
34 |
|
|
} |
35 |
|
|
|
36 |
|
|
///Do the micro benchmarking of a given function and gives performance results in ns |
37 |
|
|
/** @param[out] ellapsedTimeNs : ellapsed time in ns |
38 |
|
|
* @param[out] ellapsedTimeErrorNs : error on the ellapsed time in ns |
39 |
|
|
* @param nbTestPerf : number of performance test |
40 |
|
|
* @param nbCallPerTest : number of calls per performance test |
41 |
|
|
* @param __f : function to be called and benchmarked |
42 |
|
|
* @param __args : parameter of the function to be benchmarked |
43 |
|
|
*/ |
44 |
|
|
template<typename _Callable, typename... _Args> |
45 |
|
✗ |
void micro_benchmarkNs(double & ellapsedTimeNs, double & ellapsedTimeErrorNs, size_t nbTestPerf, size_t nbCallPerTest, |
46 |
|
|
_Callable&& __f, _Args&&... __args) |
47 |
|
|
{ |
48 |
|
✗ |
VecEllapsedTime vecTimeNs; |
49 |
|
✗ |
int res = 0; |
50 |
|
✗ |
::phoenix_doNotOptimize(res); |
51 |
|
✗ |
for(size_t i(0lu); i < nbTestPerf; ++i){ |
52 |
|
|
//Stating the timer |
53 |
|
✗ |
HiPeTime beginTime = phoenix_getTime(); |
54 |
|
✗ |
for(size_t j(0lu); j < nbCallPerTest; ++j){ |
55 |
|
✗ |
::phoenix_doNotOptimize(res); |
56 |
|
✗ |
__f(__args...); |
57 |
|
|
} |
58 |
|
|
//Get the time of the nbCallPerTest calls |
59 |
|
✗ |
NanoSecs elapsedTime(phoenix_getTime() - beginTime); |
60 |
|
✗ |
double fullNs(elapsedTime.count()/((double)nbCallPerTest)); |
61 |
|
✗ |
vecTimeNs.push_back(fullNs); |
62 |
|
|
} |
63 |
|
✗ |
MapOrderedTime mapOrderTime; |
64 |
|
✗ |
micro_benchmarkVecToMap(mapOrderTime, vecTimeNs); |
65 |
|
✗ |
size_t nbValueUsed(vecTimeNs.size()*0.7 + 1lu); |
66 |
|
✗ |
if(nbValueUsed > vecTimeNs.size()){ |
67 |
|
✗ |
nbValueUsed = vecTimeNs.size(); |
68 |
|
|
} |
69 |
|
✗ |
micro_benchmarkComputeTime(ellapsedTimeNs, ellapsedTimeErrorNs, mapOrderTime, nbValueUsed); |
70 |
|
|
} |
71 |
|
|
|
72 |
|
|
|
73 |
|
|
///Do the micro benchmarking of a given function and gives performance results in ns |
74 |
|
|
/** @param[out] ellapsedTimeNs : ellapsed time in ns |
75 |
|
|
* @param[out] ellapsedTimeErrorNs : error on the ellapsed time in ns |
76 |
|
|
* @param __f : function to be called and benchmarked |
77 |
|
|
* @param __args : parameter of the function to be benchmarked |
78 |
|
|
* This function tries to find automatically a relevant performance measurement |
79 |
|
|
*/ |
80 |
|
|
template<typename _Callable, typename... _Args> |
81 |
|
✗ |
void micro_benchmarkAutoNs(double & ellapsedTimeNs, double & ellapsedTimeErrorNs, _Callable&& __f, _Args&&... __args){ |
82 |
|
✗ |
size_t nbTestPerf(100lu), nbCallPerTest(10lu); |
83 |
|
|
//Let's try with default values |
84 |
|
✗ |
micro_benchmarkNs(ellapsedTimeNs, ellapsedTimeErrorNs, nbTestPerf, nbCallPerTest, __f, __args...); |
85 |
|
|
// while(std::isnan(ellapsedTimeNs)){ |
86 |
|
|
// std::cout << "micro_benchmarkAutoNs : ellapsedTimeNs is NaN ! Adjusting nbTestPerf = " << nbTestPerf << std::endl; |
87 |
|
|
// nbCallPerTest += 5lu; |
88 |
|
|
// micro_benchmarkNs(ellapsedTimeNs, ellapsedTimeErrorNs, nbTestPerf, nbCallPerTest, __f, __args...); |
89 |
|
|
// std::cout << "micro_benchmarkAutoNs = nbTestPerf = "<<nbTestPerf<<", nbCallPerTest" << nbCallPerTest << std::endl; |
90 |
|
|
// } |
91 |
|
✗ |
double fullEllapsedTime(ellapsedTimeNs*((double)nbTestPerf)*((double)nbCallPerTest)); |
92 |
|
|
//Check if the total time is more than one second |
93 |
|
✗ |
while(fullEllapsedTime < PHOENIX_MINIMUM_TIME_NS && nbCallPerTest < 1000000000lu && nbCallPerTest > 0lu){ //Let's try again if the total time is less than one second |
94 |
|
|
//If the total time is less than one second, we try to change nbTestPerf and nbCallPerTest to make it about one second |
95 |
|
✗ |
double ratioTime((1.3*PHOENIX_MINIMUM_TIME_NS)/fullEllapsedTime); |
96 |
|
✗ |
if(ratioTime < 1.2){ |
97 |
|
✗ |
ratioTime = 1.2; |
98 |
|
|
} |
99 |
|
|
// double ratioSqrt(std::sqrt(ratioTime) + 1.0); |
100 |
|
|
//Let's modify the call |
101 |
|
|
// nbTestPerf *= ratioSqrt; |
102 |
|
|
// nbCallPerTest *= ratioSqrt; |
103 |
|
|
|
104 |
|
✗ |
nbCallPerTest *= ratioTime; |
105 |
|
|
|
106 |
|
|
//Let's try again |
107 |
|
✗ |
micro_benchmarkNs(ellapsedTimeNs, ellapsedTimeErrorNs, nbTestPerf, nbCallPerTest, __f, __args...); |
108 |
|
✗ |
fullEllapsedTime = ellapsedTimeNs*((double)nbTestPerf)*((double)nbCallPerTest); |
109 |
|
|
//We loop until we have one second of full time measurement |
110 |
|
|
} |
111 |
|
✗ |
std::cout << "micro_benchmarkAutoNs : nbCallPerTest = " << nbCallPerTest << std::endl; |
112 |
|
✗ |
if(nbCallPerTest > 1000000000lu || nbCallPerTest == 0lu){ |
113 |
|
✗ |
std::cout << "micro_benchmarkAutoNs : Warning invalid number of calls per test should lead to unrelevant results!!!" << std::endl; |
114 |
|
|
} |
115 |
|
|
} |
116 |
|
|
|
117 |
|
|
///Do the micro benchmarking of a given function and gives performance results in ns |
118 |
|
|
/** @param[out] ellapsedTimeNs : ellapsed time in ns |
119 |
|
|
* @param[out] ellapsedTimeErrorNs : error on the ellapsed time in ns |
120 |
|
|
* @param[out] timePerElement : time per element in ns |
121 |
|
|
* @param[out] timeErrorPerElement : error of time per element in ns |
122 |
|
|
* @param nbTestPerf : number of performance test |
123 |
|
|
* @param nbCallPerTest : number of calls per performance test |
124 |
|
|
* @param nbElement : number of element treated by the function __f |
125 |
|
|
* @param __f : function to be called and benchmarked |
126 |
|
|
* @param __args : parameter of the function to be benchmarked |
127 |
|
|
*/ |
128 |
|
|
template<typename _Callable, typename... _Args> |
129 |
|
✗ |
void micro_benchmarkNs(double & ellapsedTimeNs, double & ellapsedTimeErrorNs, |
130 |
|
|
double & timePerElement, double & timeErrorPerElement, size_t nbTestPerf, size_t nbCallPerTest, size_t nbElement, |
131 |
|
|
_Callable&& __f, _Args&&... __args) |
132 |
|
|
{ |
133 |
|
✗ |
micro_benchmarkNs(ellapsedTimeNs, ellapsedTimeErrorNs, nbTestPerf, nbCallPerTest, __f, __args...); |
134 |
|
✗ |
timePerElement = ellapsedTimeNs/((double)nbElement); |
135 |
|
✗ |
timeErrorPerElement = ellapsedTimeErrorNs/((double)nbElement); |
136 |
|
|
} |
137 |
|
|
|
138 |
|
|
///Do the micro benchmarking of a given function and gives performance results in ns |
139 |
|
|
/** @param[out] ellapsedTimeNs : ellapsed time in ns |
140 |
|
|
* @param[out] ellapsedTimeErrorNs : error on the ellapsed time in ns |
141 |
|
|
* @param[out] timePerElement : time per element in ns |
142 |
|
|
* @param[out] timeErrorPerElement : error of time per element in ns |
143 |
|
|
* @param nbElement : number of element treated by the function __f |
144 |
|
|
* @param __f : function to be called and benchmarked |
145 |
|
|
* @param __args : parameter of the function to be benchmarked |
146 |
|
|
*/ |
147 |
|
|
template<typename _Callable, typename... _Args> |
148 |
|
✗ |
void micro_benchmarkAutoNs(double & ellapsedTimeNs, double & ellapsedTimeErrorNs, |
149 |
|
|
double & timePerElement, double & timeErrorPerElement, size_t nbElement, |
150 |
|
|
_Callable&& __f, _Args&&... __args) |
151 |
|
|
{ |
152 |
|
✗ |
micro_benchmarkAutoNs(ellapsedTimeNs, ellapsedTimeErrorNs, __f, __args...); |
153 |
|
✗ |
timePerElement = ellapsedTimeNs/((double)nbElement); |
154 |
|
✗ |
timeErrorPerElement = ellapsedTimeErrorNs/((double)nbElement); |
155 |
|
|
} |
156 |
|
|
|
157 |
|
|
///Do the micro benchmarking of a given function and gives performance results in ns and print the result |
158 |
|
|
/** @param testName : name of the performance test |
159 |
|
|
* @param nbTestPerf : number of performance test |
160 |
|
|
* @param nbCallPerTest : number of calls per performance test |
161 |
|
|
* @param nbElement : number of element treated by the function __f |
162 |
|
|
* @param __f : function to be called and benchmarked |
163 |
|
|
* @param __args : parameter of the function to be benchmarked |
164 |
|
|
*/ |
165 |
|
|
template<typename _Callable, typename... _Args> |
166 |
|
✗ |
void micro_benchmarkNsPrint(const std::string & testName, size_t nbTestPerf, size_t nbCallPerTest, size_t nbElement, _Callable&& __f, _Args&&... __args){ |
167 |
|
✗ |
double ellapsedTimeNs(0.0), ellapsedTimeErrorNs(0.0), timePerElement(0.0), timeErrorPerElement(0.0); |
168 |
|
✗ |
micro_benchmarkNs(ellapsedTimeNs, ellapsedTimeErrorNs, timePerElement, timeErrorPerElement, nbTestPerf, nbCallPerTest, nbElement, __f, __args...); |
169 |
|
|
|
170 |
|
✗ |
std::cout << testName << " : nbElement = "<<nbElement<<", timePerElement = " << timePerElement << " ns/el ± "<<timeErrorPerElement<<", elapsedTime = " << ellapsedTimeNs << " ns ± "<<ellapsedTimeErrorNs << std::endl; |
171 |
|
✗ |
std::cerr << nbElement << "\t" << timePerElement << "\t" << ellapsedTimeNs << "\t" << timeErrorPerElement << "\t" << ellapsedTimeErrorNs << std::endl; |
172 |
|
|
} |
173 |
|
|
|
174 |
|
|
///Do the micro benchmarking of a given function and gives performance results in ns and print the result |
175 |
|
|
/** @param testName : name of the performance test |
176 |
|
|
* @param nbElement : number of element treated by the function __f |
177 |
|
|
* @param __f : function to be called and benchmarked |
178 |
|
|
* @param __args : parameter of the function to be benchmarked |
179 |
|
|
*/ |
180 |
|
|
template<typename _Callable, typename... _Args> |
181 |
|
✗ |
void micro_benchmarkAutoNsPrint(const std::string & testName, size_t nbElement, _Callable&& __f, _Args&&... __args){ |
182 |
|
✗ |
double ellapsedTimeNs(0.0), ellapsedTimeErrorNs(0.0), timePerElement(0.0), timeErrorPerElement(0.0); |
183 |
|
✗ |
micro_benchmarkAutoNs(ellapsedTimeNs, ellapsedTimeErrorNs, timePerElement, timeErrorPerElement, nbElement, __f, __args...); |
184 |
|
|
|
185 |
|
✗ |
std::cout << testName << " : nbElement = "<<nbElement<<", timePerElement = " << timePerElement << " ns/el ± "<<timeErrorPerElement<<", elapsedTime = " << ellapsedTimeNs << " ns ± "<<ellapsedTimeErrorNs << std::endl; |
186 |
|
✗ |
std::cerr << nbElement << "\t" << timePerElement << "\t" << ellapsedTimeNs << "\t" << timeErrorPerElement << "\t" << ellapsedTimeErrorNs << std::endl; |
187 |
|
|
} |
188 |
|
|
|
189 |
|
|
#endif |
190 |
|
|
|