1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <cmath>
8 #include <functional>
9 #include <random>
10 #include <vector>
11
12 #include <benchmark/benchmark.h>
13 #include "bench/utils.h"
14
15 #include <xnnpack.h>
16 #include <xnnpack/aligned-allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/microfnptr.h>
19 #include <xnnpack/microparams-init.h>
20 #include <xnnpack/raddstoreexpminusmax.h>
21 #include <xnnpack/rmax.h>
22
23
f32_raddstoreexpminusmax(benchmark::State & state,xnn_f32_rmax_ukernel_function rmax,xnn_f32_raddstoreexpminusmax_ukernel_function raddstoreexpminusmax,xnn_init_f32_expminus_params_fn init_params,benchmark::utils::IsaCheckFunction isa_check=nullptr)24 static void f32_raddstoreexpminusmax(
25 benchmark::State& state,
26 xnn_f32_rmax_ukernel_function rmax,
27 xnn_f32_raddstoreexpminusmax_ukernel_function raddstoreexpminusmax,
28 xnn_init_f32_expminus_params_fn init_params,
29 benchmark::utils::IsaCheckFunction isa_check = nullptr)
30 {
31 if (isa_check && !isa_check(state)) {
32 return;
33 }
34
35 const size_t elements = state.range(0);
36 const size_t cache_line_size_max = 128;
37 const size_t packed_elements = benchmark::utils::RoundUp(elements, cache_line_size_max / sizeof(float));
38
39 std::random_device random_device;
40 auto rng = std::mt19937(random_device());
41 auto f32rng = std::bind(std::uniform_real_distribution<float>(-1000.0f, 1000.0f), std::ref(rng));
42
43 const size_t num_buffers = 1 +
44 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(), packed_elements * sizeof(float));
45 std::vector<float, AlignedAllocator<float, 64>> x(elements);
46 std::vector<float, AlignedAllocator<float, 64>> y(packed_elements * num_buffers);
47
48 std::generate(x.begin(), x.end(), std::ref(f32rng));
49
50 benchmark::utils::DisableDenormals();
51
52 xnn_f32_expminus_params params;
53 init_params(¶ms);
54
55 size_t buffer_index = 0;
56 for (auto _ : state) {
57 state.PauseTiming();
58 float x_max = nanf("");
59 rmax(elements * sizeof(float), x.data(), &x_max);
60 if (++buffer_index == num_buffers) {
61 buffer_index = 0;
62 }
63 state.ResumeTiming();
64
65 float y_sum = nanf("");
66 raddstoreexpminusmax(elements * sizeof(float), x.data(), &x_max, y.data() + buffer_index * packed_elements, &y_sum, ¶ms);
67 }
68
69 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
70 if (cpu_frequency != 0) {
71 state.counters["cpufreq"] = cpu_frequency;
72 }
73
74 const size_t elements_per_iteration = elements;
75 state.counters["elements"] =
76 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
77
78 const size_t bytes_per_iteration = 2 * elements * sizeof(float);
79 state.counters["bytes"] =
80 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
81 }
82
83 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
84 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_p5_x4,
85 xnn_f32_rmax_ukernel__neon,
86 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x4,
87 xnn_init_f32_expminus_neon_rr2_p5_params,
88 benchmark::utils::CheckNEON)
89 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
90 ->UseRealTime();
91 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_p5_x8,
92 xnn_f32_rmax_ukernel__neon,
93 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x8,
94 xnn_init_f32_expminus_neon_rr2_p5_params,
95 benchmark::utils::CheckNEON)
96 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
97 ->UseRealTime();
98 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_p5_x8_acc2,
99 xnn_f32_rmax_ukernel__neon,
100 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x8_acc2,
101 xnn_init_f32_expminus_neon_rr2_p5_params,
102 benchmark::utils::CheckNEON)
103 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
104 ->UseRealTime();
105 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_p5_x12,
106 xnn_f32_rmax_ukernel__neon,
107 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x12,
108 xnn_init_f32_expminus_neon_rr2_p5_params,
109 benchmark::utils::CheckNEON)
110 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
111 ->UseRealTime();
112 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_p5_x12_acc2,
113 xnn_f32_rmax_ukernel__neon,
114 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x12_acc2,
115 xnn_init_f32_expminus_neon_rr2_p5_params,
116 benchmark::utils::CheckNEON)
117 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
118 ->UseRealTime();
119 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_p5_x12_acc3,
120 xnn_f32_rmax_ukernel__neon,
121 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x12_acc3,
122 xnn_init_f32_expminus_neon_rr2_p5_params,
123 benchmark::utils::CheckNEON)
124 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
125 ->UseRealTime();
126 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_p5_x16,
127 xnn_f32_rmax_ukernel__neon,
128 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x16,
129 xnn_init_f32_expminus_neon_rr2_p5_params,
130 benchmark::utils::CheckNEON)
131 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
132 ->UseRealTime();
133 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_p5_x16_acc2,
134 xnn_f32_rmax_ukernel__neon,
135 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x16_acc2,
136 xnn_init_f32_expminus_neon_rr2_p5_params,
137 benchmark::utils::CheckNEON)
138 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
139 ->UseRealTime();
140 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_p5_x16_acc4,
141 xnn_f32_rmax_ukernel__neon,
142 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x16_acc4,
143 xnn_init_f32_expminus_neon_rr2_p5_params,
144 benchmark::utils::CheckNEON)
145 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
146 ->UseRealTime();
147 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_p5_x20,
148 xnn_f32_rmax_ukernel__neon,
149 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20,
150 xnn_init_f32_expminus_neon_rr2_p5_params,
151 benchmark::utils::CheckNEON)
152 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
153 ->UseRealTime();
154 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_p5_x20_acc2,
155 xnn_f32_rmax_ukernel__neon,
156 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2,
157 xnn_init_f32_expminus_neon_rr2_p5_params,
158 benchmark::utils::CheckNEON)
159 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
160 ->UseRealTime();
161 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_p5_x20_acc5,
162 xnn_f32_rmax_ukernel__neon,
163 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5,
164 xnn_init_f32_expminus_neon_rr2_p5_params,
165 benchmark::utils::CheckNEON)
166 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
167 ->UseRealTime();
168
169 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_lut64_p2_x4,
170 xnn_f32_rmax_ukernel__neon,
171 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x4,
172 xnn_init_f32_expminus_neon_rr2_lut64_p2_params,
173 benchmark::utils::CheckNEON)
174 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
175 ->UseRealTime();
176 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_lut64_p2_x8,
177 xnn_f32_rmax_ukernel__neon,
178 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x8,
179 xnn_init_f32_expminus_neon_rr2_lut64_p2_params,
180 benchmark::utils::CheckNEON)
181 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
182 ->UseRealTime();
183 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_lut64_p2_x8_acc2,
184 xnn_f32_rmax_ukernel__neon,
185 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x8_acc2,
186 xnn_init_f32_expminus_neon_rr2_lut64_p2_params,
187 benchmark::utils::CheckNEON)
188 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
189 ->UseRealTime();
190 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_lut64_p2_x12,
191 xnn_f32_rmax_ukernel__neon,
192 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x12,
193 xnn_init_f32_expminus_neon_rr2_lut64_p2_params,
194 benchmark::utils::CheckNEON)
195 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
196 ->UseRealTime();
197 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_lut64_p2_x12_acc2,
198 xnn_f32_rmax_ukernel__neon,
199 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x12_acc2,
200 xnn_init_f32_expminus_neon_rr2_lut64_p2_params,
201 benchmark::utils::CheckNEON)
202 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
203 ->UseRealTime();
204 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_lut64_p2_x12_acc3,
205 xnn_f32_rmax_ukernel__neon,
206 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x12_acc3,
207 xnn_init_f32_expminus_neon_rr2_lut64_p2_params,
208 benchmark::utils::CheckNEON)
209 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
210 ->UseRealTime();
211 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_lut64_p2_x16,
212 xnn_f32_rmax_ukernel__neon,
213 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x16,
214 xnn_init_f32_expminus_neon_rr2_lut64_p2_params,
215 benchmark::utils::CheckNEON)
216 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
217 ->UseRealTime();
218 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_lut64_p2_x16_acc2,
219 xnn_f32_rmax_ukernel__neon,
220 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x16_acc2,
221 xnn_init_f32_expminus_neon_rr2_lut64_p2_params,
222 benchmark::utils::CheckNEON)
223 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
224 ->UseRealTime();
225 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_lut64_p2_x16_acc4,
226 xnn_f32_rmax_ukernel__neon,
227 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x16_acc4,
228 xnn_init_f32_expminus_neon_rr2_lut64_p2_params,
229 benchmark::utils::CheckNEON)
230 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
231 ->UseRealTime();
232 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_lut64_p2_x20,
233 xnn_f32_rmax_ukernel__neon,
234 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20,
235 xnn_init_f32_expminus_neon_rr2_lut64_p2_params,
236 benchmark::utils::CheckNEON)
237 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
238 ->UseRealTime();
239 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_lut64_p2_x20_acc2,
240 xnn_f32_rmax_ukernel__neon,
241 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc2,
242 xnn_init_f32_expminus_neon_rr2_lut64_p2_params,
243 benchmark::utils::CheckNEON)
244 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
245 ->UseRealTime();
246 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neon_rr2_lut64_p2_x20_acc5,
247 xnn_f32_rmax_ukernel__neon,
248 xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc5,
249 xnn_init_f32_expminus_neon_rr2_lut64_p2_params,
250 benchmark::utils::CheckNEON)
251 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
252 ->UseRealTime();
253
254 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_p5_x4,
255 xnn_f32_rmax_ukernel__neon,
256 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x4,
257 xnn_init_f32_expminus_neonfma_rr1_p5_params,
258 benchmark::utils::CheckNEONFMA)
259 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
260 ->UseRealTime();
261 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_p5_x8,
262 xnn_f32_rmax_ukernel__neon,
263 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x8,
264 xnn_init_f32_expminus_neonfma_rr1_p5_params,
265 benchmark::utils::CheckNEONFMA)
266 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
267 ->UseRealTime();
268 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_p5_x8_acc2,
269 xnn_f32_rmax_ukernel__neon,
270 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x8_acc2,
271 xnn_init_f32_expminus_neonfma_rr1_p5_params,
272 benchmark::utils::CheckNEONFMA)
273 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
274 ->UseRealTime();
275 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_p5_x12,
276 xnn_f32_rmax_ukernel__neon,
277 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x12,
278 xnn_init_f32_expminus_neonfma_rr1_p5_params,
279 benchmark::utils::CheckNEONFMA)
280 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
281 ->UseRealTime();
282 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_p5_x12_acc2,
283 xnn_f32_rmax_ukernel__neon,
284 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x12_acc2,
285 xnn_init_f32_expminus_neonfma_rr1_p5_params,
286 benchmark::utils::CheckNEONFMA)
287 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
288 ->UseRealTime();
289 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_p5_x12_acc3,
290 xnn_f32_rmax_ukernel__neon,
291 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x12_acc3,
292 xnn_init_f32_expminus_neonfma_rr1_p5_params,
293 benchmark::utils::CheckNEONFMA)
294 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
295 ->UseRealTime();
296 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_p5_x16,
297 xnn_f32_rmax_ukernel__neon,
298 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x16,
299 xnn_init_f32_expminus_neonfma_rr1_p5_params,
300 benchmark::utils::CheckNEONFMA)
301 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
302 ->UseRealTime();
303 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_p5_x16_acc2,
304 xnn_f32_rmax_ukernel__neon,
305 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x16_acc2,
306 xnn_init_f32_expminus_neonfma_rr1_p5_params,
307 benchmark::utils::CheckNEONFMA)
308 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
309 ->UseRealTime();
310 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_p5_x16_acc4,
311 xnn_f32_rmax_ukernel__neon,
312 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x16_acc4,
313 xnn_init_f32_expminus_neonfma_rr1_p5_params,
314 benchmark::utils::CheckNEONFMA)
315 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
316 ->UseRealTime();
317 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_p5_x20,
318 xnn_f32_rmax_ukernel__neon,
319 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20,
320 xnn_init_f32_expminus_neonfma_rr1_p5_params,
321 benchmark::utils::CheckNEONFMA)
322 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
323 ->UseRealTime();
324 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_p5_x20_acc2,
325 xnn_f32_rmax_ukernel__neon,
326 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc2,
327 xnn_init_f32_expminus_neonfma_rr1_p5_params,
328 benchmark::utils::CheckNEONFMA)
329 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
330 ->UseRealTime();
331 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_p5_x20_acc5,
332 xnn_f32_rmax_ukernel__neon,
333 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc5,
334 xnn_init_f32_expminus_neonfma_rr1_p5_params,
335 benchmark::utils::CheckNEONFMA)
336 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
337 ->UseRealTime();
338
339 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_lut64_p2_x4,
340 xnn_f32_rmax_ukernel__neon,
341 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x4,
342 xnn_init_f32_expminus_neonfma_rr1_lut64_p2_params,
343 benchmark::utils::CheckNEONFMA)
344 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
345 ->UseRealTime();
346 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_lut64_p2_x8,
347 xnn_f32_rmax_ukernel__neon,
348 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x8,
349 xnn_init_f32_expminus_neonfma_rr1_lut64_p2_params,
350 benchmark::utils::CheckNEONFMA)
351 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
352 ->UseRealTime();
353 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_lut64_p2_x8_acc2,
354 xnn_f32_rmax_ukernel__neon,
355 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x8_acc2,
356 xnn_init_f32_expminus_neonfma_rr1_lut64_p2_params,
357 benchmark::utils::CheckNEONFMA)
358 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
359 ->UseRealTime();
360 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_lut64_p2_x12,
361 xnn_f32_rmax_ukernel__neon,
362 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x12,
363 xnn_init_f32_expminus_neonfma_rr1_lut64_p2_params,
364 benchmark::utils::CheckNEONFMA)
365 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
366 ->UseRealTime();
367 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_lut64_p2_x12_acc2,
368 xnn_f32_rmax_ukernel__neon,
369 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x12_acc2,
370 xnn_init_f32_expminus_neonfma_rr1_lut64_p2_params,
371 benchmark::utils::CheckNEONFMA)
372 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
373 ->UseRealTime();
374 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_lut64_p2_x12_acc3,
375 xnn_f32_rmax_ukernel__neon,
376 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x12_acc3,
377 xnn_init_f32_expminus_neonfma_rr1_lut64_p2_params,
378 benchmark::utils::CheckNEONFMA)
379 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
380 ->UseRealTime();
381 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_lut64_p2_x16,
382 xnn_f32_rmax_ukernel__neon,
383 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x16,
384 xnn_init_f32_expminus_neonfma_rr1_lut64_p2_params,
385 benchmark::utils::CheckNEONFMA)
386 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
387 ->UseRealTime();
388 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_lut64_p2_x16_acc2,
389 xnn_f32_rmax_ukernel__neon,
390 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x16_acc2,
391 xnn_init_f32_expminus_neonfma_rr1_lut64_p2_params,
392 benchmark::utils::CheckNEONFMA)
393 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
394 ->UseRealTime();
395 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_lut64_p2_x16_acc4,
396 xnn_f32_rmax_ukernel__neon,
397 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x16_acc4,
398 xnn_init_f32_expminus_neonfma_rr1_lut64_p2_params,
399 benchmark::utils::CheckNEONFMA)
400 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
401 ->UseRealTime();
402 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_lut64_p2_x20,
403 xnn_f32_rmax_ukernel__neon,
404 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x20,
405 xnn_init_f32_expminus_neonfma_rr1_lut64_p2_params,
406 benchmark::utils::CheckNEONFMA)
407 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
408 ->UseRealTime();
409 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_lut64_p2_x20_acc2,
410 xnn_f32_rmax_ukernel__neon,
411 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x20_acc2,
412 xnn_init_f32_expminus_neonfma_rr1_lut64_p2_params,
413 benchmark::utils::CheckNEONFMA)
414 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
415 ->UseRealTime();
416 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, neonfma_rr1_lut64_p2_x20_acc5,
417 xnn_f32_rmax_ukernel__neon,
418 xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x20_acc5,
419 xnn_init_f32_expminus_neonfma_rr1_lut64_p2_params,
420 benchmark::utils::CheckNEONFMA)
421 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
422 ->UseRealTime();
423 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
424
425 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
426 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx512f_rr1_p5_scalef_x128,
427 xnn_f32_rmax_ukernel__avx,
428 xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128,
429 xnn_init_f32_expminus_avx512_rr1_p5_params,
430 benchmark::utils::CheckAVX512F)
431 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
432 ->UseRealTime();
433 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx512f_rr1_p5_scalef_x128_acc2,
434 xnn_f32_rmax_ukernel__avx,
435 xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc2,
436 xnn_init_f32_expminus_avx512_rr1_p5_params,
437 benchmark::utils::CheckAVX512F)
438 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
439 ->UseRealTime();
440 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx512f_rr1_p5_scalef_x128_acc4,
441 xnn_f32_rmax_ukernel__avx,
442 xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc4,
443 xnn_init_f32_expminus_avx512_rr1_p5_params,
444 benchmark::utils::CheckAVX512F)
445 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
446 ->UseRealTime();
447
448 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx512f_rr1_p5_scalef_x144,
449 xnn_f32_rmax_ukernel__avx,
450 xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144,
451 xnn_init_f32_expminus_avx512_rr1_p5_params,
452 benchmark::utils::CheckAVX512F)
453 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
454 ->UseRealTime();
455 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx512f_rr1_p5_scalef_x144_acc3,
456 xnn_f32_rmax_ukernel__avx,
457 xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144_acc3,
458 xnn_init_f32_expminus_avx512_rr1_p5_params,
459 benchmark::utils::CheckAVX512F)
460 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
461 ->UseRealTime();
462
463 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx512f_rr1_p5_scalef_x160,
464 xnn_f32_rmax_ukernel__avx,
465 xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160,
466 xnn_init_f32_expminus_avx512_rr1_p5_params,
467 benchmark::utils::CheckAVX512F)
468 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
469 ->UseRealTime();
470 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx512f_rr1_p5_scalef_x160_acc2,
471 xnn_f32_rmax_ukernel__avx,
472 xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc2,
473 xnn_init_f32_expminus_avx512_rr1_p5_params,
474 benchmark::utils::CheckAVX512F)
475 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
476 ->UseRealTime();
477 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx512f_rr1_p5_scalef_x160_acc5,
478 xnn_f32_rmax_ukernel__avx,
479 xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc5,
480 xnn_init_f32_expminus_avx512_rr1_p5_params,
481 benchmark::utils::CheckAVX512F)
482 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
483 ->UseRealTime();
484
485 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx512f_rr1_p5_scalef_x192,
486 xnn_f32_rmax_ukernel__avx,
487 xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192,
488 xnn_init_f32_expminus_avx512_rr1_p5_params,
489 benchmark::utils::CheckAVX512F)
490 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
491 ->UseRealTime();
492 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx512f_rr1_p5_scalef_x192_acc2,
493 xnn_f32_rmax_ukernel__avx,
494 xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2,
495 xnn_init_f32_expminus_avx512_rr1_p5_params,
496 benchmark::utils::CheckAVX512F)
497 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
498 ->UseRealTime();
499 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx512f_rr1_p5_scalef_x192_acc3,
500 xnn_f32_rmax_ukernel__avx,
501 xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3,
502 xnn_init_f32_expminus_avx512_rr1_p5_params,
503 benchmark::utils::CheckAVX512F)
504 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
505 ->UseRealTime();
506 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx512f_rr1_p5_scalef_x192_acc6,
507 xnn_f32_rmax_ukernel__avx,
508 xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6,
509 xnn_init_f32_expminus_avx512_rr1_p5_params,
510 benchmark::utils::CheckAVX512F)
511 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
512 ->UseRealTime();
513
514 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx2_rr1_p5_x64,
515 xnn_f32_rmax_ukernel__avx,
516 xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x64,
517 xnn_init_f32_expminus_avx2_rr1_p5_params,
518 benchmark::utils::CheckAVX2)
519 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
520 ->UseRealTime();
521 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx2_rr1_p5_x64_acc2,
522 xnn_f32_rmax_ukernel__avx,
523 xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x64_acc2,
524 xnn_init_f32_expminus_avx2_rr1_p5_params,
525 benchmark::utils::CheckAVX2)
526 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
527 ->UseRealTime();
528 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx2_rr1_p5_x64_acc4,
529 xnn_f32_rmax_ukernel__avx,
530 xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x64_acc4,
531 xnn_init_f32_expminus_avx2_rr1_p5_params,
532 benchmark::utils::CheckAVX2)
533 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
534 ->UseRealTime();
535
536 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx2_rr1_p5_x72,
537 xnn_f32_rmax_ukernel__avx,
538 xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x72,
539 xnn_init_f32_expminus_avx2_rr1_p5_params,
540 benchmark::utils::CheckAVX2)
541 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
542 ->UseRealTime();
543 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx2_rr1_p5_x72_acc3,
544 xnn_f32_rmax_ukernel__avx,
545 xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x72_acc3,
546 xnn_init_f32_expminus_avx2_rr1_p5_params,
547 benchmark::utils::CheckAVX2)
548 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
549 ->UseRealTime();
550
551 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx2_rr1_p5_x80,
552 xnn_f32_rmax_ukernel__avx,
553 xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x80,
554 xnn_init_f32_expminus_avx2_rr1_p5_params,
555 benchmark::utils::CheckAVX2)
556 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
557 ->UseRealTime();
558 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx2_rr1_p5_x80_acc2,
559 xnn_f32_rmax_ukernel__avx,
560 xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x80_acc2,
561 xnn_init_f32_expminus_avx2_rr1_p5_params,
562 benchmark::utils::CheckAVX2)
563 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
564 ->UseRealTime();
565 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx2_rr1_p5_x80_acc5,
566 xnn_f32_rmax_ukernel__avx,
567 xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x80_acc5,
568 xnn_init_f32_expminus_avx2_rr1_p5_params,
569 benchmark::utils::CheckAVX2)
570 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
571 ->UseRealTime();
572
573 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx2_rr1_p5_x96,
574 xnn_f32_rmax_ukernel__avx,
575 xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96,
576 xnn_init_f32_expminus_avx2_rr1_p5_params,
577 benchmark::utils::CheckAVX2)
578 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
579 ->UseRealTime();
580 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx2_rr1_p5_x96_acc2,
581 xnn_f32_rmax_ukernel__avx,
582 xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2,
583 xnn_init_f32_expminus_avx2_rr1_p5_params,
584 benchmark::utils::CheckAVX2)
585 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
586 ->UseRealTime();
587 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx2_rr1_p5_x96_acc3,
588 xnn_f32_rmax_ukernel__avx,
589 xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3,
590 xnn_init_f32_expminus_avx2_rr1_p5_params,
591 benchmark::utils::CheckAVX2)
592 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
593 ->UseRealTime();
594 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, avx2_rr1_p5_x96_acc6,
595 xnn_f32_rmax_ukernel__avx,
596 xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6,
597 xnn_init_f32_expminus_avx2_rr1_p5_params,
598 benchmark::utils::CheckAVX2)
599 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
600 ->UseRealTime();
601
602 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, sse2_rr2_p5_x4,
603 xnn_f32_rmax_ukernel__sse,
604 xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x4,
605 xnn_init_f32_expminus_sse2_rr2_p5_params)
606 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
607 ->UseRealTime();
608 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, sse2_rr2_p5_x8,
609 xnn_f32_rmax_ukernel__sse,
610 xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x8,
611 xnn_init_f32_expminus_sse2_rr2_p5_params)
612 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
613 ->UseRealTime();
614 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, sse2_rr2_p5_x8_acc2,
615 xnn_f32_rmax_ukernel__sse,
616 xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x8_acc2,
617 xnn_init_f32_expminus_sse2_rr2_p5_params)
618 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
619 ->UseRealTime();
620 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, sse2_rr2_p5_x12,
621 xnn_f32_rmax_ukernel__sse,
622 xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x12,
623 xnn_init_f32_expminus_sse2_rr2_p5_params)
624 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
625 ->UseRealTime();
626 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, sse2_rr2_p5_x12_acc2,
627 xnn_f32_rmax_ukernel__sse,
628 xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x12_acc2,
629 xnn_init_f32_expminus_sse2_rr2_p5_params)
630 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
631 ->UseRealTime();
632 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, sse2_rr2_p5_x12_acc3,
633 xnn_f32_rmax_ukernel__sse,
634 xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x12_acc3,
635 xnn_init_f32_expminus_sse2_rr2_p5_params)
636 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
637 ->UseRealTime();
638 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, sse2_rr2_p5_x16,
639 xnn_f32_rmax_ukernel__sse,
640 xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x16,
641 xnn_init_f32_expminus_sse2_rr2_p5_params)
642 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
643 ->UseRealTime();
644 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, sse2_rr2_p5_x16_acc2,
645 xnn_f32_rmax_ukernel__sse,
646 xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x16_acc2,
647 xnn_init_f32_expminus_sse2_rr2_p5_params)
648 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
649 ->UseRealTime();
650 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, sse2_rr2_p5_x16_acc4,
651 xnn_f32_rmax_ukernel__sse,
652 xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x16_acc4,
653 xnn_init_f32_expminus_sse2_rr2_p5_params)
654 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
655 ->UseRealTime();
656 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, sse2_rr2_p5_x20,
657 xnn_f32_rmax_ukernel__sse,
658 xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20,
659 xnn_init_f32_expminus_sse2_rr2_p5_params)
660 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
661 ->UseRealTime();
662 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, sse2_rr2_p5_x20_acc2,
663 xnn_f32_rmax_ukernel__sse,
664 xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2,
665 xnn_init_f32_expminus_sse2_rr2_p5_params)
666 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
667 ->UseRealTime();
668 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, sse2_rr2_p5_x20_acc5,
669 xnn_f32_rmax_ukernel__sse,
670 xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5,
671 xnn_init_f32_expminus_sse2_rr2_p5_params)
672 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
673 ->UseRealTime();
674 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
675
676 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
677 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, wasmsimd_rr2_p5_x4,
678 xnn_f32_rmax_ukernel__wasmsimd_arm,
679 xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x4,
680 xnn_init_f32_expminus_wasmsimd_rr2_p5_params)
681 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
682 ->UseRealTime();
683 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, wasmsimd_rr2_p5_x8,
684 xnn_f32_rmax_ukernel__wasmsimd_arm,
685 xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x8,
686 xnn_init_f32_expminus_wasmsimd_rr2_p5_params)
687 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
688 ->UseRealTime();
689 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, wasmsimd_rr2_p5_x8_acc2,
690 xnn_f32_rmax_ukernel__wasmsimd_arm,
691 xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x8_acc2,
692 xnn_init_f32_expminus_wasmsimd_rr2_p5_params)
693 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
694 ->UseRealTime();
695 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, wasmsimd_rr2_p5_x12,
696 xnn_f32_rmax_ukernel__wasmsimd_arm,
697 xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x12,
698 xnn_init_f32_expminus_wasmsimd_rr2_p5_params)
699 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
700 ->UseRealTime();
701 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, wasmsimd_rr2_p5_x12_acc2,
702 xnn_f32_rmax_ukernel__wasmsimd_arm,
703 xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x12_acc2,
704 xnn_init_f32_expminus_wasmsimd_rr2_p5_params)
705 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
706 ->UseRealTime();
707 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, wasmsimd_rr2_p5_x12_acc3,
708 xnn_f32_rmax_ukernel__wasmsimd_arm,
709 xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x12_acc3,
710 xnn_init_f32_expminus_wasmsimd_rr2_p5_params)
711 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
712 ->UseRealTime();
713 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, wasmsimd_rr2_p5_x16,
714 xnn_f32_rmax_ukernel__wasmsimd_arm,
715 xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x16,
716 xnn_init_f32_expminus_wasmsimd_rr2_p5_params)
717 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
718 ->UseRealTime();
719 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, wasmsimd_rr2_p5_x16_acc2,
720 xnn_f32_rmax_ukernel__wasmsimd_arm,
721 xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x16_acc2,
722 xnn_init_f32_expminus_wasmsimd_rr2_p5_params)
723 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
724 ->UseRealTime();
725 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, wasmsimd_rr2_p5_x16_acc4,
726 xnn_f32_rmax_ukernel__wasmsimd_arm,
727 xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x16_acc4,
728 xnn_init_f32_expminus_wasmsimd_rr2_p5_params)
729 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
730 ->UseRealTime();
731 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, wasmsimd_rr2_p5_x20,
732 xnn_f32_rmax_ukernel__wasmsimd_arm,
733 xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20,
734 xnn_init_f32_expminus_wasmsimd_rr2_p5_params)
735 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
736 ->UseRealTime();
737 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, wasmsimd_rr2_p5_x20_acc2,
738 xnn_f32_rmax_ukernel__wasmsimd_arm,
739 xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2,
740 xnn_init_f32_expminus_wasmsimd_rr2_p5_params)
741 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
742 ->UseRealTime();
743 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, wasmsimd_rr2_p5_x20_acc5,
744 xnn_f32_rmax_ukernel__wasmsimd_arm,
745 xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5,
746 xnn_init_f32_expminus_wasmsimd_rr2_p5_params)
747 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
748 ->UseRealTime();
749 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
750
751 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, scalar_rr2_lut64_p2_x1,
752 xnn_f32_rmax_ukernel__scalar,
753 xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_x1,
754 xnn_init_f32_expminus_scalar_rr2_lut64_p2_params)
755 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
756 ->UseRealTime();
757 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, scalar_rr2_lut64_p2_x2,
758 xnn_f32_rmax_ukernel__scalar,
759 xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_x2,
760 xnn_init_f32_expminus_scalar_rr2_lut64_p2_params)
761 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
762 ->UseRealTime();
763 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, scalar_rr2_lut64_p2_x2_acc2,
764 xnn_f32_rmax_ukernel__scalar,
765 xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_x2_acc2,
766 xnn_init_f32_expminus_scalar_rr2_lut64_p2_params)
767 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
768 ->UseRealTime();
769 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, scalar_rr2_lut64_p2_x4,
770 xnn_f32_rmax_ukernel__scalar,
771 xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_x4,
772 xnn_init_f32_expminus_scalar_rr2_lut64_p2_params)
773 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
774 ->UseRealTime();
775 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, scalar_rr2_lut64_p2_x4_acc2,
776 xnn_f32_rmax_ukernel__scalar,
777 xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_x4_acc2,
778 xnn_init_f32_expminus_scalar_rr2_lut64_p2_params)
779 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
780 ->UseRealTime();
781 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, scalar_rr2_lut64_p2_x4_acc4,
782 xnn_f32_rmax_ukernel__scalar,
783 xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_x4_acc4,
784 xnn_init_f32_expminus_scalar_rr2_lut64_p2_params)
785 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
786 ->UseRealTime();
787
788 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, scalar_rr2_p5_x1,
789 xnn_f32_rmax_ukernel__scalar,
790 xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_x1,
791 xnn_init_f32_expminus_scalar_rr2_p5_params)
792 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
793 ->UseRealTime();
794 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, scalar_rr2_p5_x2,
795 xnn_f32_rmax_ukernel__scalar,
796 xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_x2,
797 xnn_init_f32_expminus_scalar_rr2_p5_params)
798 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
799 ->UseRealTime();
800 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, scalar_rr2_p5_x2_acc2,
801 xnn_f32_rmax_ukernel__scalar,
802 xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_x2_acc2,
803 xnn_init_f32_expminus_scalar_rr2_p5_params)
804 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
805 ->UseRealTime();
806 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, scalar_rr2_p5_x4,
807 xnn_f32_rmax_ukernel__scalar,
808 xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_x4,
809 xnn_init_f32_expminus_scalar_rr2_p5_params)
810 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
811 ->UseRealTime();
812 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, scalar_rr2_p5_x4_acc2,
813 xnn_f32_rmax_ukernel__scalar,
814 xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_x4_acc2,
815 xnn_init_f32_expminus_scalar_rr2_p5_params)
816 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
817 ->UseRealTime();
818 BENCHMARK_CAPTURE(f32_raddstoreexpminusmax, scalar_rr2_p5_x4_acc4,
819 xnn_f32_rmax_ukernel__scalar,
820 xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_x4_acc4,
821 xnn_init_f32_expminus_scalar_rr2_p5_params)
822 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
823 ->UseRealTime();
824
825 #ifndef XNNPACK_BENCHMARK_NO_MAIN
826 BENCHMARK_MAIN();
827 #endif
828