xref: /aosp_15_r20/external/XNNPACK/bench/f32-vlrelu.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <cmath>
8 #include <functional>
9 #include <random>
10 #include <vector>
11 
12 #include <benchmark/benchmark.h>
13 #include "bench/utils.h"
14 
15 #include <xnnpack.h>
16 #include <xnnpack/aligned-allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/microfnptr.h>
19 #include <xnnpack/microparams-init.h>
20 #include <xnnpack/vunary.h>
21 
22 
f32_vlrelu(benchmark::State & state,xnn_f32_vlrelu_ukernel_function vlrelu,xnn_init_f32_lrelu_params_fn init_params,benchmark::utils::IsaCheckFunction isa_check=nullptr)23 static void f32_vlrelu(
24   benchmark::State& state,
25   xnn_f32_vlrelu_ukernel_function vlrelu,
26   xnn_init_f32_lrelu_params_fn init_params,
27   benchmark::utils::IsaCheckFunction isa_check = nullptr)
28 {
29   if (isa_check && !isa_check(state)) {
30     return;
31   }
32 
33   const size_t elements = state.range(0);
34   std::vector<float, AlignedAllocator<float, 64>> input(elements);
35   std::vector<float, AlignedAllocator<float, 64>> output(elements);
36 
37   std::random_device random_device;
38   auto rng = std::mt19937(random_device());
39   auto f32rng = std::bind(std::uniform_real_distribution<float>(-5.0f, 5.0f), std::ref(rng));
40   std::generate(input.begin(), input.end(), std::ref(f32rng));
41   std::fill(output.begin(), output.end(), std::nanf(""));
42 
43   union xnn_f32_lrelu_params params;
44   init_params(&params, 0.01f);
45   for (auto _ : state) {
46     vlrelu(elements * sizeof(float), input.data(), output.data(), &params);
47   }
48 
49   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
50   if (cpu_frequency != 0) {
51     state.counters["cpufreq"] = cpu_frequency;
52   }
53 
54   const size_t elements_per_iteration = elements;
55   state.counters["elements"] =
56     benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
57 
58   const size_t bytes_per_iteration = 2 * elements * sizeof(float);
59   state.counters["bytes"] =
60     benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
61 }
62 
63 #if XNN_ARCH_ARM64 || XNN_ARCH_ARM64
64   BENCHMARK_CAPTURE(f32_vlrelu, neon_x4,
65                     xnn_f32_vlrelu_ukernel__neon_x4,
66                     xnn_init_f32_lrelu_scalar_params,
67                     benchmark::utils::CheckNEON)
68     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
69     ->UseRealTime();
70   BENCHMARK_CAPTURE(f32_vlrelu, neon_x8,
71                     xnn_f32_vlrelu_ukernel__neon_x8,
72                     xnn_init_f32_lrelu_scalar_params,
73                     benchmark::utils::CheckNEON)
74     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
75     ->UseRealTime();
76 #endif  // XNN_ARCH_ARM64 || XNN_ARCH_ARM64
77 
78 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
79   BENCHMARK_CAPTURE(f32_vlrelu, sse_x4,
80                     xnn_f32_vlrelu_ukernel__sse_x4,
81                     xnn_init_f32_lrelu_sse_params)
82     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
83     ->UseRealTime();
84   BENCHMARK_CAPTURE(f32_vlrelu, sse_x8,
85                     xnn_f32_vlrelu_ukernel__sse_x8,
86                     xnn_init_f32_lrelu_sse_params)
87     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
88     ->UseRealTime();
89 
90   BENCHMARK_CAPTURE(f32_vlrelu, sse2_x4,
91                     xnn_f32_vlrelu_ukernel__sse2_x4,
92                     xnn_init_f32_lrelu_sse_params)
93     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
94     ->UseRealTime();
95   BENCHMARK_CAPTURE(f32_vlrelu, sse2_x8,
96                     xnn_f32_vlrelu_ukernel__sse2_x8,
97                     xnn_init_f32_lrelu_sse_params)
98     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
99     ->UseRealTime();
100 
101   BENCHMARK_CAPTURE(f32_vlrelu, sse41_x4,
102                     xnn_f32_vlrelu_ukernel__sse41_x4,
103                     xnn_init_f32_lrelu_sse_params,
104                     benchmark::utils::CheckSSE41)
105     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
106     ->UseRealTime();
107   BENCHMARK_CAPTURE(f32_vlrelu, sse41_x8,
108                     xnn_f32_vlrelu_ukernel__sse41_x8,
109                     xnn_init_f32_lrelu_sse_params,
110                     benchmark::utils::CheckSSE41)
111     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
112     ->UseRealTime();
113 
114   BENCHMARK_CAPTURE(f32_vlrelu, avx_x8,
115                     xnn_f32_vlrelu_ukernel__avx_x8,
116                     xnn_init_f32_lrelu_avx_params,
117                     benchmark::utils::CheckAVX)
118     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
119     ->UseRealTime();
120   BENCHMARK_CAPTURE(f32_vlrelu, avx_x16,
121                     xnn_f32_vlrelu_ukernel__avx_x16,
122                     xnn_init_f32_lrelu_avx_params,
123                     benchmark::utils::CheckAVX)
124     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
125     ->UseRealTime();
126 
127   BENCHMARK_CAPTURE(f32_vlrelu, avx512f_x16,
128                     xnn_f32_vlrelu_ukernel__avx512f_x16,
129                     xnn_init_f32_lrelu_scalar_params,
130                     benchmark::utils::CheckAVX512F)
131     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
132     ->UseRealTime();
133   BENCHMARK_CAPTURE(f32_vlrelu, avx512f_x32,
134                     xnn_f32_vlrelu_ukernel__avx512f_x32,
135                     xnn_init_f32_lrelu_scalar_params,
136                     benchmark::utils::CheckAVX512F)
137     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
138     ->UseRealTime();
139 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
140 
141 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
142   BENCHMARK_CAPTURE(f32_vlrelu, wasmsimd_bitselect_x4,
143                     xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x4,
144                     xnn_init_f32_lrelu_wasmsimd_params)
145     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
146     ->UseRealTime();
147   BENCHMARK_CAPTURE(f32_vlrelu, wasmsimd_bitselect_x8,
148                     xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x8,
149                     xnn_init_f32_lrelu_wasmsimd_params)
150     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
151     ->UseRealTime();
152 
153   BENCHMARK_CAPTURE(f32_vlrelu, wasmsimd_minmax_x4,
154                     xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x4,
155                     xnn_init_f32_lrelu_wasmsimd_params)
156     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
157     ->UseRealTime();
158   BENCHMARK_CAPTURE(f32_vlrelu, wasmsimd_minmax_x8,
159                     xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8,
160                     xnn_init_f32_lrelu_wasmsimd_params)
161     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
162     ->UseRealTime();
163 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
164 
165 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
166   BENCHMARK_CAPTURE(f32_vlrelu, wasm_x1,
167                     xnn_f32_vlrelu_ukernel__wasm_x1,
168                     xnn_init_f32_lrelu_scalar_params)
169     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
170     ->UseRealTime();
171   BENCHMARK_CAPTURE(f32_vlrelu, wasm_x2,
172                     xnn_f32_vlrelu_ukernel__wasm_x2,
173                     xnn_init_f32_lrelu_scalar_params)
174     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
175     ->UseRealTime();
176   BENCHMARK_CAPTURE(f32_vlrelu, wasm_x4,
177                     xnn_f32_vlrelu_ukernel__wasm_x4,
178                     xnn_init_f32_lrelu_scalar_params)
179     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
180     ->UseRealTime();
181 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
182 
183 BENCHMARK_CAPTURE(f32_vlrelu, scalar_x1,
184                   xnn_f32_vlrelu_ukernel__scalar_x1,
185                   xnn_init_f32_lrelu_scalar_params)
186   ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
187   ->UseRealTime();
188 BENCHMARK_CAPTURE(f32_vlrelu, scalar_x2,
189                   xnn_f32_vlrelu_ukernel__scalar_x2,
190                   xnn_init_f32_lrelu_scalar_params)
191   ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
192   ->UseRealTime();
193 BENCHMARK_CAPTURE(f32_vlrelu, scalar_x4,
194                   xnn_f32_vlrelu_ukernel__scalar_x4,
195                   xnn_init_f32_lrelu_scalar_params)
196   ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
197   ->UseRealTime();
198 
199 #ifndef XNNPACK_BENCHMARK_NO_MAIN
200 BENCHMARK_MAIN();
201 #endif
202