xref: /aosp_15_r20/external/XNNPACK/bench/rounding.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <cfloat>
8 #include <cmath>
9 #include <functional>
10 #include <random>
11 #include <vector>
12 
13 #include <cpuinfo.h>
14 
15 #include <benchmark/benchmark.h>
16 #include "bench/utils.h"
17 
18 #include <xnnpack/aligned-allocator.h>
19 #include <xnnpack/common.h>
20 #include <xnnpack/math-stubs.h>
21 
22 
23 class Rounding : public benchmark::Fixture {
24  public:
Rounding()25   inline Rounding()
26   {
27     cpuinfo_initialize();
28     const size_t l1d_size = cpuinfo_get_l1d_cache(0)->size;
29     const size_t l1d_reserve = 1024;
30     n_ = (l1d_size - l1d_reserve) / (2 * sizeof(float));
31     n_ = n_ / 16 * 16;
32   }
33 
SetUp(const benchmark::State &)34   virtual void SetUp(const benchmark::State&) override
35   {
36     std::random_device random_device;
37     auto rng = std::mt19937(random_device());
38     auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
39 
40     input_.resize(n());
41     std::generate(input_.begin(), input_.end(), std::ref(f32rng));
42     output_.resize(n());
43     std::fill(output_.begin(), output_.end(), 0xA5);
44   }
45 
TearDown(benchmark::State & state)46   virtual void TearDown(benchmark::State& state) override
47   {
48     state.SetItemsProcessed(uint64_t(state.iterations()) * n());
49     state.SetBytesProcessed(uint64_t(state.iterations()) * n() * 2 * sizeof(float));
50     input_.clear();
51     output_.clear();
52   }
53 
input() const54   inline const float* input() const
55   {
56     return input_.data();
57   }
58 
output()59   inline float* output()
60   {
61     return output_.data();
62   }
63 
n() const64   inline size_t n() const
65   {
66     return n_;
67   }
68 
69  protected:
70   std::vector<float, AlignedAllocator<float, 64>> input_;
71   std::vector<float, AlignedAllocator<float, 64>> output_;
72   size_t n_;
73 };
74 
75 class RoundingToNearestEven : public Rounding { };
76 class RoundingDown : public Rounding { };
77 class RoundingUp : public Rounding { };
78 class RoundingTowardsZero : public Rounding { };
79 
BENCHMARK_F(RoundingToNearestEven,scalar_addsub)80 BENCHMARK_F(RoundingToNearestEven, scalar_addsub)(benchmark::State& state) {
81   for (auto _ : state) {
82     xnn_math_f32_roundne__scalar_addsub(
83         n() * sizeof(float), input(), output());
84   }
85 }
86 
BENCHMARK_F(RoundingToNearestEven,scalar_nearbyint)87 BENCHMARK_F(RoundingToNearestEven, scalar_nearbyint)(benchmark::State& state) {
88   for (auto _ : state) {
89     xnn_math_f32_roundne__scalar_nearbyint(
90         n() * sizeof(float), input(), output());
91   }
92 }
93 
BENCHMARK_F(RoundingToNearestEven,scalar_rint)94 BENCHMARK_F(RoundingToNearestEven, scalar_rint)(benchmark::State& state) {
95   for (auto _ : state) {
96     xnn_math_f32_roundne__scalar_rint(
97         n() * sizeof(float), input(), output());
98   }
99 }
100 
BENCHMARK_F(RoundingDown,scalar_addsub)101 BENCHMARK_F(RoundingDown, scalar_addsub)(benchmark::State& state) {
102   for (auto _ : state) {
103     xnn_math_f32_roundd__scalar_addsub(
104         n() * sizeof(float), input(), output());
105   }
106 }
107 
BENCHMARK_F(RoundingDown,scalar_cvt)108 BENCHMARK_F(RoundingDown, scalar_cvt)(benchmark::State& state) {
109   for (auto _ : state) {
110     xnn_math_f32_roundd__scalar_cvt(
111         n() * sizeof(float), input(), output());
112   }
113 }
114 
BENCHMARK_F(RoundingDown,scalar_floor)115 BENCHMARK_F(RoundingDown, scalar_floor)(benchmark::State& state) {
116   for (auto _ : state) {
117     xnn_math_f32_roundd__scalar_floor(
118         n() * sizeof(float), input(), output());
119   }
120 }
121 
BENCHMARK_F(RoundingUp,scalar_addsub)122 BENCHMARK_F(RoundingUp, scalar_addsub)(benchmark::State& state) {
123   for (auto _ : state) {
124     xnn_math_f32_roundu__scalar_addsub(
125         n() * sizeof(float), input(), output());
126   }
127 }
128 
BENCHMARK_F(RoundingUp,scalar_cvt)129 BENCHMARK_F(RoundingUp, scalar_cvt)(benchmark::State& state) {
130   for (auto _ : state) {
131     xnn_math_f32_roundu__scalar_cvt(
132         n() * sizeof(float), input(), output());
133   }
134 }
135 
BENCHMARK_F(RoundingUp,scalar_ceil)136 BENCHMARK_F(RoundingUp, scalar_ceil)(benchmark::State& state) {
137   for (auto _ : state) {
138     xnn_math_f32_roundu__scalar_ceil(
139         n() * sizeof(float), input(), output());
140   }
141 }
142 
BENCHMARK_F(RoundingTowardsZero,scalar_addsub)143 BENCHMARK_F(RoundingTowardsZero, scalar_addsub)(benchmark::State& state) {
144   for (auto _ : state) {
145     xnn_math_f32_roundz__scalar_addsub(
146         n() * sizeof(float), input(), output());
147   }
148 }
149 
BENCHMARK_F(RoundingTowardsZero,scalar_cvt)150 BENCHMARK_F(RoundingTowardsZero, scalar_cvt)(benchmark::State& state) {
151   for (auto _ : state) {
152     xnn_math_f32_roundz__scalar_cvt(
153         n() * sizeof(float), input(), output());
154   }
155 }
156 
BENCHMARK_F(RoundingTowardsZero,scalar_trunc)157 BENCHMARK_F(RoundingTowardsZero, scalar_trunc)(benchmark::State& state) {
158   for (auto _ : state) {
159     xnn_math_f32_roundz__scalar_trunc(
160         n() * sizeof(float), input(), output());
161   }
162 }
163 
164 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
BENCHMARK_F(RoundingToNearestEven,wasmsimd_addsub)165   BENCHMARK_F(RoundingToNearestEven, wasmsimd_addsub)(benchmark::State& state) {
166     for (auto _ : state) {
167       xnn_math_f32_roundne__wasmsimd_addsub(
168           n() * sizeof(float), input(), output());
169     }
170   }
171 
BENCHMARK_F(RoundingToNearestEven,wasmsimd_native)172   BENCHMARK_F(RoundingToNearestEven, wasmsimd_native)(benchmark::State& state) {
173     for (auto _ : state) {
174       xnn_math_f32_roundne__wasmsimd_native(
175           n() * sizeof(float), input(), output());
176     }
177   }
178 
BENCHMARK_F(RoundingDown,wasmsimd_addsub)179   BENCHMARK_F(RoundingDown, wasmsimd_addsub)(benchmark::State& state) {
180     for (auto _ : state) {
181       xnn_math_f32_roundd__wasmsimd_addsub(
182           n() * sizeof(float), input(), output());
183     }
184   }
185 
BENCHMARK_F(RoundingDown,wasmsimd_cvt)186   BENCHMARK_F(RoundingDown, wasmsimd_cvt)(benchmark::State& state) {
187     for (auto _ : state) {
188       xnn_math_f32_roundd__wasmsimd_cvt(
189           n() * sizeof(float), input(), output());
190     }
191   }
192 
BENCHMARK_F(RoundingDown,wasmsimd_native)193   BENCHMARK_F(RoundingDown, wasmsimd_native)(benchmark::State& state) {
194     for (auto _ : state) {
195       xnn_math_f32_roundd__wasmsimd_native(
196           n() * sizeof(float), input(), output());
197     }
198   }
199 
BENCHMARK_F(RoundingUp,wasmsimd_addsub)200   BENCHMARK_F(RoundingUp, wasmsimd_addsub)(benchmark::State& state) {
201     for (auto _ : state) {
202       xnn_math_f32_roundu__wasmsimd_addsub(
203           n() * sizeof(float), input(), output());
204     }
205   }
206 
BENCHMARK_F(RoundingUp,wasmsimd_cvt)207   BENCHMARK_F(RoundingUp, wasmsimd_cvt)(benchmark::State& state) {
208     for (auto _ : state) {
209       xnn_math_f32_roundu__wasmsimd_cvt(
210           n() * sizeof(float), input(), output());
211     }
212   }
213 
BENCHMARK_F(RoundingUp,wasmsimd_native)214   BENCHMARK_F(RoundingUp, wasmsimd_native)(benchmark::State& state) {
215     for (auto _ : state) {
216       xnn_math_f32_roundu__wasmsimd_native(
217           n() * sizeof(float), input(), output());
218     }
219   }
220 
BENCHMARK_F(RoundingTowardsZero,wasmsimd_addsub)221   BENCHMARK_F(RoundingTowardsZero, wasmsimd_addsub)(benchmark::State& state) {
222     for (auto _ : state) {
223       xnn_math_f32_roundz__wasmsimd_addsub(
224           n() * sizeof(float), input(), output());
225     }
226   }
227 
BENCHMARK_F(RoundingTowardsZero,wasmsimd_cvt)228   BENCHMARK_F(RoundingTowardsZero, wasmsimd_cvt)(benchmark::State& state) {
229     for (auto _ : state) {
230       xnn_math_f32_roundz__wasmsimd_cvt(
231           n() * sizeof(float), input(), output());
232     }
233   }
234 
BENCHMARK_F(RoundingTowardsZero,wasmsimd_native)235   BENCHMARK_F(RoundingTowardsZero, wasmsimd_native)(benchmark::State& state) {
236     for (auto _ : state) {
237       xnn_math_f32_roundz__wasmsimd_native(
238           n() * sizeof(float), input(), output());
239     }
240   }
241 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
242 
243 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
BENCHMARK_F(RoundingToNearestEven,neon_addsub)244   BENCHMARK_F(RoundingToNearestEven, neon_addsub)(benchmark::State& state) {
245     for (auto _ : state) {
246       xnn_math_f32_roundne__neon_addsub(
247           n() * sizeof(float), input(), output());
248     }
249   }
250 
BENCHMARK_F(RoundingToNearestEven,neonv8)251   BENCHMARK_F(RoundingToNearestEven, neonv8)(benchmark::State& state) {
252     for (auto _ : state) {
253       xnn_math_f32_roundne__neonv8(
254           n() * sizeof(float), input(), output());
255     }
256   }
257 
BENCHMARK_F(RoundingDown,neon_addsub)258   BENCHMARK_F(RoundingDown, neon_addsub)(benchmark::State& state) {
259     for (auto _ : state) {
260       xnn_math_f32_roundd__neon_addsub(
261           n() * sizeof(float), input(), output());
262     }
263   }
264 
BENCHMARK_F(RoundingDown,neon_cvt)265   BENCHMARK_F(RoundingDown, neon_cvt)(benchmark::State& state) {
266     for (auto _ : state) {
267       xnn_math_f32_roundd__neon_cvt(
268           n() * sizeof(float), input(), output());
269     }
270   }
271 
BENCHMARK_F(RoundingDown,neonv8)272   BENCHMARK_F(RoundingDown, neonv8)(benchmark::State& state) {
273     for (auto _ : state) {
274       xnn_math_f32_roundd__neonv8(
275           n() * sizeof(float), input(), output());
276     }
277   }
278 
BENCHMARK_F(RoundingUp,neon_addsub)279   BENCHMARK_F(RoundingUp, neon_addsub)(benchmark::State& state) {
280     for (auto _ : state) {
281       xnn_math_f32_roundu__neon_addsub(
282           n() * sizeof(float), input(), output());
283     }
284   }
285 
BENCHMARK_F(RoundingUp,neon_cvt)286   BENCHMARK_F(RoundingUp, neon_cvt)(benchmark::State& state) {
287     for (auto _ : state) {
288       xnn_math_f32_roundu__neon_cvt(
289           n() * sizeof(float), input(), output());
290     }
291   }
292 
BENCHMARK_F(RoundingUp,neonv8)293   BENCHMARK_F(RoundingUp, neonv8)(benchmark::State& state) {
294     for (auto _ : state) {
295       xnn_math_f32_roundu__neonv8(
296           n() * sizeof(float), input(), output());
297     }
298   }
299 
BENCHMARK_F(RoundingTowardsZero,neon_addsub)300   BENCHMARK_F(RoundingTowardsZero, neon_addsub)(benchmark::State& state) {
301     for (auto _ : state) {
302       xnn_math_f32_roundz__neon_addsub(
303           n() * sizeof(float), input(), output());
304     }
305   }
306 
BENCHMARK_F(RoundingTowardsZero,neon_cvt)307   BENCHMARK_F(RoundingTowardsZero, neon_cvt)(benchmark::State& state) {
308     for (auto _ : state) {
309       xnn_math_f32_roundz__neon_cvt(
310           n() * sizeof(float), input(), output());
311     }
312   }
313 
BENCHMARK_F(RoundingTowardsZero,neonv8)314   BENCHMARK_F(RoundingTowardsZero, neonv8)(benchmark::State& state) {
315     for (auto _ : state) {
316       xnn_math_f32_roundz__neonv8(
317           n() * sizeof(float), input(), output());
318     }
319   }
320 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
321 
322 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
BENCHMARK_F(RoundingToNearestEven,sse_addsub)323   BENCHMARK_F(RoundingToNearestEven, sse_addsub)(benchmark::State& state) {
324     for (auto _ : state) {
325       xnn_math_f32_roundne__sse_addsub(
326           n() * sizeof(float), input(), output());
327     }
328   }
329 
BENCHMARK_F(RoundingToNearestEven,sse2_cvt)330   BENCHMARK_F(RoundingToNearestEven, sse2_cvt)(benchmark::State& state) {
331     for (auto _ : state) {
332       xnn_math_f32_roundne__sse2_cvt(
333           n() * sizeof(float), input(), output());
334     }
335   }
336 
BENCHMARK_F(RoundingToNearestEven,sse4)337   BENCHMARK_F(RoundingToNearestEven, sse4)(benchmark::State& state) {
338     for (auto _ : state) {
339       xnn_math_f32_roundne__sse41(
340           n() * sizeof(float), input(), output());
341     }
342   }
343 
BENCHMARK_F(RoundingDown,sse_addsub)344   BENCHMARK_F(RoundingDown, sse_addsub)(benchmark::State& state) {
345     for (auto _ : state) {
346       xnn_math_f32_roundd__sse_addsub(
347           n() * sizeof(float), input(), output());
348     }
349   }
350 
BENCHMARK_F(RoundingDown,sse2_cvt)351   BENCHMARK_F(RoundingDown, sse2_cvt)(benchmark::State& state) {
352     for (auto _ : state) {
353       xnn_math_f32_roundd__sse2_cvt(
354           n() * sizeof(float), input(), output());
355     }
356   }
357 
BENCHMARK_F(RoundingDown,sse4)358   BENCHMARK_F(RoundingDown, sse4)(benchmark::State& state) {
359     for (auto _ : state) {
360       xnn_math_f32_roundd__sse41(
361           n() * sizeof(float), input(), output());
362     }
363   }
364 
BENCHMARK_F(RoundingUp,sse_addsub)365   BENCHMARK_F(RoundingUp, sse_addsub)(benchmark::State& state) {
366     for (auto _ : state) {
367       xnn_math_f32_roundu__sse_addsub(
368           n() * sizeof(float), input(), output());
369     }
370   }
371 
BENCHMARK_F(RoundingUp,sse2_cvt)372   BENCHMARK_F(RoundingUp, sse2_cvt)(benchmark::State& state) {
373     for (auto _ : state) {
374       xnn_math_f32_roundu__sse2_cvt(
375           n() * sizeof(float), input(), output());
376     }
377   }
378 
BENCHMARK_F(RoundingUp,sse4)379   BENCHMARK_F(RoundingUp, sse4)(benchmark::State& state) {
380     for (auto _ : state) {
381       xnn_math_f32_roundu__sse41(
382           n() * sizeof(float), input(), output());
383     }
384   }
385 
BENCHMARK_F(RoundingTowardsZero,sse_addsub)386   BENCHMARK_F(RoundingTowardsZero, sse_addsub)(benchmark::State& state) {
387     for (auto _ : state) {
388       xnn_math_f32_roundz__sse_addsub(
389           n() * sizeof(float), input(), output());
390     }
391   }
392 
BENCHMARK_F(RoundingTowardsZero,sse2_cvt)393   BENCHMARK_F(RoundingTowardsZero, sse2_cvt)(benchmark::State& state) {
394     for (auto _ : state) {
395       xnn_math_f32_roundz__sse2_cvt(
396           n() * sizeof(float), input(), output());
397     }
398   }
399 
BENCHMARK_F(RoundingTowardsZero,sse4)400   BENCHMARK_F(RoundingTowardsZero, sse4)(benchmark::State& state) {
401     for (auto _ : state) {
402       xnn_math_f32_roundz__sse41(
403           n() * sizeof(float), input(), output());
404     }
405   }
406 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
407 
408 
409 #ifndef XNNPACK_BENCHMARK_NO_MAIN
410 BENCHMARK_MAIN();
411 #endif
412