1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <cfloat>
8 #include <cmath>
9 #include <functional>
10 #include <random>
11 #include <vector>
12
13 #include <cpuinfo.h>
14
15 #include <benchmark/benchmark.h>
16 #include "bench/utils.h"
17
18 #include <xnnpack/aligned-allocator.h>
19 #include <xnnpack/common.h>
20 #include <xnnpack/math-stubs.h>
21
22
23 class Rounding : public benchmark::Fixture {
24 public:
Rounding()25 inline Rounding()
26 {
27 cpuinfo_initialize();
28 const size_t l1d_size = cpuinfo_get_l1d_cache(0)->size;
29 const size_t l1d_reserve = 1024;
30 n_ = (l1d_size - l1d_reserve) / (2 * sizeof(float));
31 n_ = n_ / 16 * 16;
32 }
33
SetUp(const benchmark::State &)34 virtual void SetUp(const benchmark::State&) override
35 {
36 std::random_device random_device;
37 auto rng = std::mt19937(random_device());
38 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
39
40 input_.resize(n());
41 std::generate(input_.begin(), input_.end(), std::ref(f32rng));
42 output_.resize(n());
43 std::fill(output_.begin(), output_.end(), 0xA5);
44 }
45
TearDown(benchmark::State & state)46 virtual void TearDown(benchmark::State& state) override
47 {
48 state.SetItemsProcessed(uint64_t(state.iterations()) * n());
49 state.SetBytesProcessed(uint64_t(state.iterations()) * n() * 2 * sizeof(float));
50 input_.clear();
51 output_.clear();
52 }
53
input() const54 inline const float* input() const
55 {
56 return input_.data();
57 }
58
output()59 inline float* output()
60 {
61 return output_.data();
62 }
63
n() const64 inline size_t n() const
65 {
66 return n_;
67 }
68
69 protected:
70 std::vector<float, AlignedAllocator<float, 64>> input_;
71 std::vector<float, AlignedAllocator<float, 64>> output_;
72 size_t n_;
73 };
74
75 class RoundingToNearestEven : public Rounding { };
76 class RoundingDown : public Rounding { };
77 class RoundingUp : public Rounding { };
78 class RoundingTowardsZero : public Rounding { };
79
BENCHMARK_F(RoundingToNearestEven,scalar_addsub)80 BENCHMARK_F(RoundingToNearestEven, scalar_addsub)(benchmark::State& state) {
81 for (auto _ : state) {
82 xnn_math_f32_roundne__scalar_addsub(
83 n() * sizeof(float), input(), output());
84 }
85 }
86
BENCHMARK_F(RoundingToNearestEven,scalar_nearbyint)87 BENCHMARK_F(RoundingToNearestEven, scalar_nearbyint)(benchmark::State& state) {
88 for (auto _ : state) {
89 xnn_math_f32_roundne__scalar_nearbyint(
90 n() * sizeof(float), input(), output());
91 }
92 }
93
BENCHMARK_F(RoundingToNearestEven,scalar_rint)94 BENCHMARK_F(RoundingToNearestEven, scalar_rint)(benchmark::State& state) {
95 for (auto _ : state) {
96 xnn_math_f32_roundne__scalar_rint(
97 n() * sizeof(float), input(), output());
98 }
99 }
100
BENCHMARK_F(RoundingDown,scalar_addsub)101 BENCHMARK_F(RoundingDown, scalar_addsub)(benchmark::State& state) {
102 for (auto _ : state) {
103 xnn_math_f32_roundd__scalar_addsub(
104 n() * sizeof(float), input(), output());
105 }
106 }
107
BENCHMARK_F(RoundingDown,scalar_cvt)108 BENCHMARK_F(RoundingDown, scalar_cvt)(benchmark::State& state) {
109 for (auto _ : state) {
110 xnn_math_f32_roundd__scalar_cvt(
111 n() * sizeof(float), input(), output());
112 }
113 }
114
BENCHMARK_F(RoundingDown,scalar_floor)115 BENCHMARK_F(RoundingDown, scalar_floor)(benchmark::State& state) {
116 for (auto _ : state) {
117 xnn_math_f32_roundd__scalar_floor(
118 n() * sizeof(float), input(), output());
119 }
120 }
121
BENCHMARK_F(RoundingUp,scalar_addsub)122 BENCHMARK_F(RoundingUp, scalar_addsub)(benchmark::State& state) {
123 for (auto _ : state) {
124 xnn_math_f32_roundu__scalar_addsub(
125 n() * sizeof(float), input(), output());
126 }
127 }
128
BENCHMARK_F(RoundingUp,scalar_cvt)129 BENCHMARK_F(RoundingUp, scalar_cvt)(benchmark::State& state) {
130 for (auto _ : state) {
131 xnn_math_f32_roundu__scalar_cvt(
132 n() * sizeof(float), input(), output());
133 }
134 }
135
BENCHMARK_F(RoundingUp,scalar_ceil)136 BENCHMARK_F(RoundingUp, scalar_ceil)(benchmark::State& state) {
137 for (auto _ : state) {
138 xnn_math_f32_roundu__scalar_ceil(
139 n() * sizeof(float), input(), output());
140 }
141 }
142
BENCHMARK_F(RoundingTowardsZero,scalar_addsub)143 BENCHMARK_F(RoundingTowardsZero, scalar_addsub)(benchmark::State& state) {
144 for (auto _ : state) {
145 xnn_math_f32_roundz__scalar_addsub(
146 n() * sizeof(float), input(), output());
147 }
148 }
149
BENCHMARK_F(RoundingTowardsZero,scalar_cvt)150 BENCHMARK_F(RoundingTowardsZero, scalar_cvt)(benchmark::State& state) {
151 for (auto _ : state) {
152 xnn_math_f32_roundz__scalar_cvt(
153 n() * sizeof(float), input(), output());
154 }
155 }
156
BENCHMARK_F(RoundingTowardsZero,scalar_trunc)157 BENCHMARK_F(RoundingTowardsZero, scalar_trunc)(benchmark::State& state) {
158 for (auto _ : state) {
159 xnn_math_f32_roundz__scalar_trunc(
160 n() * sizeof(float), input(), output());
161 }
162 }
163
164 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
BENCHMARK_F(RoundingToNearestEven,wasmsimd_addsub)165 BENCHMARK_F(RoundingToNearestEven, wasmsimd_addsub)(benchmark::State& state) {
166 for (auto _ : state) {
167 xnn_math_f32_roundne__wasmsimd_addsub(
168 n() * sizeof(float), input(), output());
169 }
170 }
171
BENCHMARK_F(RoundingToNearestEven,wasmsimd_native)172 BENCHMARK_F(RoundingToNearestEven, wasmsimd_native)(benchmark::State& state) {
173 for (auto _ : state) {
174 xnn_math_f32_roundne__wasmsimd_native(
175 n() * sizeof(float), input(), output());
176 }
177 }
178
BENCHMARK_F(RoundingDown,wasmsimd_addsub)179 BENCHMARK_F(RoundingDown, wasmsimd_addsub)(benchmark::State& state) {
180 for (auto _ : state) {
181 xnn_math_f32_roundd__wasmsimd_addsub(
182 n() * sizeof(float), input(), output());
183 }
184 }
185
BENCHMARK_F(RoundingDown,wasmsimd_cvt)186 BENCHMARK_F(RoundingDown, wasmsimd_cvt)(benchmark::State& state) {
187 for (auto _ : state) {
188 xnn_math_f32_roundd__wasmsimd_cvt(
189 n() * sizeof(float), input(), output());
190 }
191 }
192
BENCHMARK_F(RoundingDown,wasmsimd_native)193 BENCHMARK_F(RoundingDown, wasmsimd_native)(benchmark::State& state) {
194 for (auto _ : state) {
195 xnn_math_f32_roundd__wasmsimd_native(
196 n() * sizeof(float), input(), output());
197 }
198 }
199
BENCHMARK_F(RoundingUp,wasmsimd_addsub)200 BENCHMARK_F(RoundingUp, wasmsimd_addsub)(benchmark::State& state) {
201 for (auto _ : state) {
202 xnn_math_f32_roundu__wasmsimd_addsub(
203 n() * sizeof(float), input(), output());
204 }
205 }
206
BENCHMARK_F(RoundingUp,wasmsimd_cvt)207 BENCHMARK_F(RoundingUp, wasmsimd_cvt)(benchmark::State& state) {
208 for (auto _ : state) {
209 xnn_math_f32_roundu__wasmsimd_cvt(
210 n() * sizeof(float), input(), output());
211 }
212 }
213
BENCHMARK_F(RoundingUp,wasmsimd_native)214 BENCHMARK_F(RoundingUp, wasmsimd_native)(benchmark::State& state) {
215 for (auto _ : state) {
216 xnn_math_f32_roundu__wasmsimd_native(
217 n() * sizeof(float), input(), output());
218 }
219 }
220
BENCHMARK_F(RoundingTowardsZero,wasmsimd_addsub)221 BENCHMARK_F(RoundingTowardsZero, wasmsimd_addsub)(benchmark::State& state) {
222 for (auto _ : state) {
223 xnn_math_f32_roundz__wasmsimd_addsub(
224 n() * sizeof(float), input(), output());
225 }
226 }
227
BENCHMARK_F(RoundingTowardsZero,wasmsimd_cvt)228 BENCHMARK_F(RoundingTowardsZero, wasmsimd_cvt)(benchmark::State& state) {
229 for (auto _ : state) {
230 xnn_math_f32_roundz__wasmsimd_cvt(
231 n() * sizeof(float), input(), output());
232 }
233 }
234
BENCHMARK_F(RoundingTowardsZero,wasmsimd_native)235 BENCHMARK_F(RoundingTowardsZero, wasmsimd_native)(benchmark::State& state) {
236 for (auto _ : state) {
237 xnn_math_f32_roundz__wasmsimd_native(
238 n() * sizeof(float), input(), output());
239 }
240 }
241 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
242
243 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
BENCHMARK_F(RoundingToNearestEven,neon_addsub)244 BENCHMARK_F(RoundingToNearestEven, neon_addsub)(benchmark::State& state) {
245 for (auto _ : state) {
246 xnn_math_f32_roundne__neon_addsub(
247 n() * sizeof(float), input(), output());
248 }
249 }
250
BENCHMARK_F(RoundingToNearestEven,neonv8)251 BENCHMARK_F(RoundingToNearestEven, neonv8)(benchmark::State& state) {
252 for (auto _ : state) {
253 xnn_math_f32_roundne__neonv8(
254 n() * sizeof(float), input(), output());
255 }
256 }
257
BENCHMARK_F(RoundingDown,neon_addsub)258 BENCHMARK_F(RoundingDown, neon_addsub)(benchmark::State& state) {
259 for (auto _ : state) {
260 xnn_math_f32_roundd__neon_addsub(
261 n() * sizeof(float), input(), output());
262 }
263 }
264
BENCHMARK_F(RoundingDown,neon_cvt)265 BENCHMARK_F(RoundingDown, neon_cvt)(benchmark::State& state) {
266 for (auto _ : state) {
267 xnn_math_f32_roundd__neon_cvt(
268 n() * sizeof(float), input(), output());
269 }
270 }
271
BENCHMARK_F(RoundingDown,neonv8)272 BENCHMARK_F(RoundingDown, neonv8)(benchmark::State& state) {
273 for (auto _ : state) {
274 xnn_math_f32_roundd__neonv8(
275 n() * sizeof(float), input(), output());
276 }
277 }
278
BENCHMARK_F(RoundingUp,neon_addsub)279 BENCHMARK_F(RoundingUp, neon_addsub)(benchmark::State& state) {
280 for (auto _ : state) {
281 xnn_math_f32_roundu__neon_addsub(
282 n() * sizeof(float), input(), output());
283 }
284 }
285
BENCHMARK_F(RoundingUp,neon_cvt)286 BENCHMARK_F(RoundingUp, neon_cvt)(benchmark::State& state) {
287 for (auto _ : state) {
288 xnn_math_f32_roundu__neon_cvt(
289 n() * sizeof(float), input(), output());
290 }
291 }
292
BENCHMARK_F(RoundingUp,neonv8)293 BENCHMARK_F(RoundingUp, neonv8)(benchmark::State& state) {
294 for (auto _ : state) {
295 xnn_math_f32_roundu__neonv8(
296 n() * sizeof(float), input(), output());
297 }
298 }
299
BENCHMARK_F(RoundingTowardsZero,neon_addsub)300 BENCHMARK_F(RoundingTowardsZero, neon_addsub)(benchmark::State& state) {
301 for (auto _ : state) {
302 xnn_math_f32_roundz__neon_addsub(
303 n() * sizeof(float), input(), output());
304 }
305 }
306
BENCHMARK_F(RoundingTowardsZero,neon_cvt)307 BENCHMARK_F(RoundingTowardsZero, neon_cvt)(benchmark::State& state) {
308 for (auto _ : state) {
309 xnn_math_f32_roundz__neon_cvt(
310 n() * sizeof(float), input(), output());
311 }
312 }
313
BENCHMARK_F(RoundingTowardsZero,neonv8)314 BENCHMARK_F(RoundingTowardsZero, neonv8)(benchmark::State& state) {
315 for (auto _ : state) {
316 xnn_math_f32_roundz__neonv8(
317 n() * sizeof(float), input(), output());
318 }
319 }
320 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
321
322 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
BENCHMARK_F(RoundingToNearestEven,sse_addsub)323 BENCHMARK_F(RoundingToNearestEven, sse_addsub)(benchmark::State& state) {
324 for (auto _ : state) {
325 xnn_math_f32_roundne__sse_addsub(
326 n() * sizeof(float), input(), output());
327 }
328 }
329
BENCHMARK_F(RoundingToNearestEven,sse2_cvt)330 BENCHMARK_F(RoundingToNearestEven, sse2_cvt)(benchmark::State& state) {
331 for (auto _ : state) {
332 xnn_math_f32_roundne__sse2_cvt(
333 n() * sizeof(float), input(), output());
334 }
335 }
336
BENCHMARK_F(RoundingToNearestEven,sse4)337 BENCHMARK_F(RoundingToNearestEven, sse4)(benchmark::State& state) {
338 for (auto _ : state) {
339 xnn_math_f32_roundne__sse41(
340 n() * sizeof(float), input(), output());
341 }
342 }
343
BENCHMARK_F(RoundingDown,sse_addsub)344 BENCHMARK_F(RoundingDown, sse_addsub)(benchmark::State& state) {
345 for (auto _ : state) {
346 xnn_math_f32_roundd__sse_addsub(
347 n() * sizeof(float), input(), output());
348 }
349 }
350
BENCHMARK_F(RoundingDown,sse2_cvt)351 BENCHMARK_F(RoundingDown, sse2_cvt)(benchmark::State& state) {
352 for (auto _ : state) {
353 xnn_math_f32_roundd__sse2_cvt(
354 n() * sizeof(float), input(), output());
355 }
356 }
357
BENCHMARK_F(RoundingDown,sse4)358 BENCHMARK_F(RoundingDown, sse4)(benchmark::State& state) {
359 for (auto _ : state) {
360 xnn_math_f32_roundd__sse41(
361 n() * sizeof(float), input(), output());
362 }
363 }
364
BENCHMARK_F(RoundingUp,sse_addsub)365 BENCHMARK_F(RoundingUp, sse_addsub)(benchmark::State& state) {
366 for (auto _ : state) {
367 xnn_math_f32_roundu__sse_addsub(
368 n() * sizeof(float), input(), output());
369 }
370 }
371
BENCHMARK_F(RoundingUp,sse2_cvt)372 BENCHMARK_F(RoundingUp, sse2_cvt)(benchmark::State& state) {
373 for (auto _ : state) {
374 xnn_math_f32_roundu__sse2_cvt(
375 n() * sizeof(float), input(), output());
376 }
377 }
378
BENCHMARK_F(RoundingUp,sse4)379 BENCHMARK_F(RoundingUp, sse4)(benchmark::State& state) {
380 for (auto _ : state) {
381 xnn_math_f32_roundu__sse41(
382 n() * sizeof(float), input(), output());
383 }
384 }
385
BENCHMARK_F(RoundingTowardsZero,sse_addsub)386 BENCHMARK_F(RoundingTowardsZero, sse_addsub)(benchmark::State& state) {
387 for (auto _ : state) {
388 xnn_math_f32_roundz__sse_addsub(
389 n() * sizeof(float), input(), output());
390 }
391 }
392
BENCHMARK_F(RoundingTowardsZero,sse2_cvt)393 BENCHMARK_F(RoundingTowardsZero, sse2_cvt)(benchmark::State& state) {
394 for (auto _ : state) {
395 xnn_math_f32_roundz__sse2_cvt(
396 n() * sizeof(float), input(), output());
397 }
398 }
399
BENCHMARK_F(RoundingTowardsZero,sse4)400 BENCHMARK_F(RoundingTowardsZero, sse4)(benchmark::State& state) {
401 for (auto _ : state) {
402 xnn_math_f32_roundz__sse41(
403 n() * sizeof(float), input(), output());
404 }
405 }
406 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
407
408
409 #ifndef XNNPACK_BENCHMARK_NO_MAIN
410 BENCHMARK_MAIN();
411 #endif
412