1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <cmath>
8 #include <functional>
9 #include <random>
10 #include <vector>
11
12 #include <benchmark/benchmark.h>
13 #include "bench/utils.h"
14
15 #include <xnnpack.h>
16 #include <xnnpack/aligned-allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/microfnptr.h>
19 #include <xnnpack/microparams-init.h>
20 #include <xnnpack/vunary.h>
21
22
f32_velu(benchmark::State & state,xnn_f32_velu_ukernel_function elu,xnn_init_f32_elu_params_fn init_params,benchmark::utils::IsaCheckFunction isa_check=nullptr)23 static void f32_velu(
24 benchmark::State& state,
25 xnn_f32_velu_ukernel_function elu,
26 xnn_init_f32_elu_params_fn init_params,
27 benchmark::utils::IsaCheckFunction isa_check = nullptr)
28 {
29 if (isa_check && !isa_check(state)) {
30 return;
31 }
32
33 const size_t num_elements = state.range(0);
34
35 std::random_device random_device;
36 auto rng = std::mt19937(random_device());
37 auto f32rng = std::bind(std::uniform_real_distribution<float>(-20.0f, 10.0f), std::ref(rng));
38
39 std::vector<float, AlignedAllocator<float, 64>> x(num_elements);
40 std::vector<float, AlignedAllocator<float, 64>> y(num_elements);
41 std::generate(x.begin(), x.end(), std::ref(f32rng));
42 std::fill(y.begin(), y.end(), std::nanf(""));
43
44 union xnn_f32_elu_params params;
45 init_params(¶ms, 1.0f /* prescale */, 1.0f /* alpha */, 1.0f /* beta */);
46 for (auto _ : state) {
47 elu(num_elements * sizeof(float), x.data(), y.data(), ¶ms);
48 }
49
50 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
51 if (cpu_frequency != 0) {
52 state.counters["cpufreq"] = cpu_frequency;
53 }
54
55 const size_t elements_per_iteration = num_elements;
56 state.counters["elements"] =
57 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
58
59 const size_t bytes_per_iteration = 2 * num_elements * sizeof(float);
60 state.counters["bytes"] =
61 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
62 }
63
64 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
65 BENCHMARK_CAPTURE(f32_velu, neonfma_lut16_p3_x4,
66 xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4,
67 xnn_init_f32_elu_neonfma_rr1_lut16_p3_params,
68 benchmark::utils::CheckNEONFMA)
69 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
70 ->UseRealTime();
71 BENCHMARK_CAPTURE(f32_velu, neonfma_lut16_p3_x8,
72 xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8,
73 xnn_init_f32_elu_neonfma_rr1_lut16_p3_params,
74 benchmark::utils::CheckNEONFMA)
75 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
76 ->UseRealTime();
77 BENCHMARK_CAPTURE(f32_velu, neonfma_lut16_p3_x12,
78 xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12,
79 xnn_init_f32_elu_neonfma_rr1_lut16_p3_params,
80 benchmark::utils::CheckNEONFMA)
81 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
82 ->UseRealTime();
83 BENCHMARK_CAPTURE(f32_velu, neonfma_lut16_p3_x16,
84 xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16,
85 xnn_init_f32_elu_neonfma_rr1_lut16_p3_params,
86 benchmark::utils::CheckNEONFMA)
87 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
88 ->UseRealTime();
89 BENCHMARK_CAPTURE(f32_velu, neonfma_lut16_p3_x20,
90 xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20,
91 xnn_init_f32_elu_neonfma_rr1_lut16_p3_params,
92 benchmark::utils::CheckNEONFMA)
93 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
94 ->UseRealTime();
95 BENCHMARK_CAPTURE(f32_velu, neonfma_lut16_p3_x24,
96 xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24,
97 xnn_init_f32_elu_neonfma_rr1_lut16_p3_params,
98 benchmark::utils::CheckNEONFMA)
99 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
100 ->UseRealTime();
101
102 BENCHMARK_CAPTURE(f32_velu, neonfma_p6_x4,
103 xnn_f32_velu_ukernel__neonfma_rr1_p6_x4,
104 xnn_init_f32_elu_neonfma_rr1_p6_params,
105 benchmark::utils::CheckNEONFMA)
106 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
107 ->UseRealTime();
108 BENCHMARK_CAPTURE(f32_velu, neonfma_p6_x8,
109 xnn_f32_velu_ukernel__neonfma_rr1_p6_x8,
110 xnn_init_f32_elu_neonfma_rr1_p6_params,
111 benchmark::utils::CheckNEONFMA)
112 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
113 ->UseRealTime();
114 BENCHMARK_CAPTURE(f32_velu, neonfma_p6_x12,
115 xnn_f32_velu_ukernel__neonfma_rr1_p6_x12,
116 xnn_init_f32_elu_neonfma_rr1_p6_params,
117 benchmark::utils::CheckNEONFMA)
118 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
119 ->UseRealTime();
120 BENCHMARK_CAPTURE(f32_velu, neonfma_p6_x16,
121 xnn_f32_velu_ukernel__neonfma_rr1_p6_x16,
122 xnn_init_f32_elu_neonfma_rr1_p6_params,
123 benchmark::utils::CheckNEONFMA)
124 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
125 ->UseRealTime();
126 BENCHMARK_CAPTURE(f32_velu, neonfma_p6_x20,
127 xnn_f32_velu_ukernel__neonfma_rr1_p6_x20,
128 xnn_init_f32_elu_neonfma_rr1_p6_params,
129 benchmark::utils::CheckNEONFMA)
130 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
131 ->UseRealTime();
132 BENCHMARK_CAPTURE(f32_velu, neonfma_p6_x24,
133 xnn_f32_velu_ukernel__neonfma_rr1_p6_x24,
134 xnn_init_f32_elu_neonfma_rr1_p6_params,
135 benchmark::utils::CheckNEONFMA)
136 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
137 ->UseRealTime();
138
139 BENCHMARK_CAPTURE(f32_velu, neon_lut16_p3_x4,
140 xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4,
141 xnn_init_f32_elu_neon_rr2_lut16_p3_params,
142 benchmark::utils::CheckNEON)
143 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
144 ->UseRealTime();
145 BENCHMARK_CAPTURE(f32_velu, neon_lut16_p3_x8,
146 xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8,
147 xnn_init_f32_elu_neon_rr2_lut16_p3_params,
148 benchmark::utils::CheckNEON)
149 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
150 ->UseRealTime();
151 BENCHMARK_CAPTURE(f32_velu, neon_lut16_p3_x12,
152 xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12,
153 xnn_init_f32_elu_neon_rr2_lut16_p3_params,
154 benchmark::utils::CheckNEON)
155 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
156 ->UseRealTime();
157 BENCHMARK_CAPTURE(f32_velu, neon_lut16_p3_x16,
158 xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16,
159 xnn_init_f32_elu_neon_rr2_lut16_p3_params,
160 benchmark::utils::CheckNEON)
161 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
162 ->UseRealTime();
163 BENCHMARK_CAPTURE(f32_velu, neon_lut16_p3_x20,
164 xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20,
165 xnn_init_f32_elu_neon_rr2_lut16_p3_params,
166 benchmark::utils::CheckNEON)
167 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
168 ->UseRealTime();
169 BENCHMARK_CAPTURE(f32_velu, neon_lut16_p3_x24,
170 xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24,
171 xnn_init_f32_elu_neon_rr2_lut16_p3_params,
172 benchmark::utils::CheckNEON)
173 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
174 ->UseRealTime();
175
176 BENCHMARK_CAPTURE(f32_velu, neon_p6_x4,
177 xnn_f32_velu_ukernel__neon_rr2_p6_x4,
178 xnn_init_f32_elu_neon_rr2_p6_params,
179 benchmark::utils::CheckNEON)
180 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
181 ->UseRealTime();
182 BENCHMARK_CAPTURE(f32_velu, neon_p6_x8,
183 xnn_f32_velu_ukernel__neon_rr2_p6_x8,
184 xnn_init_f32_elu_neon_rr2_p6_params,
185 benchmark::utils::CheckNEON)
186 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
187 ->UseRealTime();
188 BENCHMARK_CAPTURE(f32_velu, neon_p6_x12,
189 xnn_f32_velu_ukernel__neon_rr2_p6_x12,
190 xnn_init_f32_elu_neon_rr2_p6_params,
191 benchmark::utils::CheckNEON)
192 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
193 ->UseRealTime();
194 BENCHMARK_CAPTURE(f32_velu, neon_p6_x16,
195 xnn_f32_velu_ukernel__neon_rr2_p6_x16,
196 xnn_init_f32_elu_neon_rr2_p6_params,
197 benchmark::utils::CheckNEON)
198 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
199 ->UseRealTime();
200 BENCHMARK_CAPTURE(f32_velu, neon_p6_x20,
201 xnn_f32_velu_ukernel__neon_rr2_p6_x20,
202 xnn_init_f32_elu_neon_rr2_p6_params,
203 benchmark::utils::CheckNEON)
204 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
205 ->UseRealTime();
206 BENCHMARK_CAPTURE(f32_velu, neon_p6_x24,
207 xnn_f32_velu_ukernel__neon_rr2_p6_x24,
208 xnn_init_f32_elu_neon_rr2_p6_params,
209 benchmark::utils::CheckNEON)
210 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
211 ->UseRealTime();
212 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
213
214 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
215 BENCHMARK_CAPTURE(f32_velu, avx512f_lut16_p3_x16,
216 xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16,
217 xnn_init_f32_elu_avx512_rr1_lut16_p3_params,
218 benchmark::utils::CheckAVX512F)
219 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
220 ->UseRealTime();
221 BENCHMARK_CAPTURE(f32_velu, avx512f_lut16_p3_x32,
222 xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32,
223 xnn_init_f32_elu_avx512_rr1_lut16_p3_params,
224 benchmark::utils::CheckAVX512F)
225 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
226 ->UseRealTime();
227 BENCHMARK_CAPTURE(f32_velu, avx512f_lut16_p3_x48,
228 xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48,
229 xnn_init_f32_elu_avx512_rr1_lut16_p3_params,
230 benchmark::utils::CheckAVX512F)
231 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
232 ->UseRealTime();
233 BENCHMARK_CAPTURE(f32_velu, avx512f_lut16_p3_x64,
234 xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64,
235 xnn_init_f32_elu_avx512_rr1_lut16_p3_params,
236 benchmark::utils::CheckAVX512F)
237 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
238 ->UseRealTime();
239 BENCHMARK_CAPTURE(f32_velu, avx512f_lut16_p3_x80,
240 xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80,
241 xnn_init_f32_elu_avx512_rr1_lut16_p3_params,
242 benchmark::utils::CheckAVX512F)
243 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
244 ->UseRealTime();
245 BENCHMARK_CAPTURE(f32_velu, avx512f_lut16_p3_x96,
246 xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96,
247 xnn_init_f32_elu_avx512_rr1_lut16_p3_params,
248 benchmark::utils::CheckAVX512F)
249 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
250 ->UseRealTime();
251 BENCHMARK_CAPTURE(f32_velu, avx512f_lut16_p3_x112,
252 xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112,
253 xnn_init_f32_elu_avx512_rr1_lut16_p3_params,
254 benchmark::utils::CheckAVX512F)
255 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
256 ->UseRealTime();
257 BENCHMARK_CAPTURE(f32_velu, avx512f_lut16_p3_x128,
258 xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128,
259 xnn_init_f32_elu_avx512_rr1_lut16_p3_params,
260 benchmark::utils::CheckAVX512F)
261 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
262 ->UseRealTime();
263
264 BENCHMARK_CAPTURE(f32_velu, avx512f_p6_x16,
265 xnn_f32_velu_ukernel__avx512f_rr1_p6_x16,
266 xnn_init_f32_elu_avx512_rr1_p6_params,
267 benchmark::utils::CheckAVX512F)
268 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
269 ->UseRealTime();
270 BENCHMARK_CAPTURE(f32_velu, avx512f_p6_x32,
271 xnn_f32_velu_ukernel__avx512f_rr1_p6_x32,
272 xnn_init_f32_elu_avx512_rr1_p6_params,
273 benchmark::utils::CheckAVX512F)
274 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
275 ->UseRealTime();
276 BENCHMARK_CAPTURE(f32_velu, avx512f_p6_x48,
277 xnn_f32_velu_ukernel__avx512f_rr1_p6_x48,
278 xnn_init_f32_elu_avx512_rr1_p6_params,
279 benchmark::utils::CheckAVX512F)
280 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
281 ->UseRealTime();
282 BENCHMARK_CAPTURE(f32_velu, avx512f_p6_x64,
283 xnn_f32_velu_ukernel__avx512f_rr1_p6_x64,
284 xnn_init_f32_elu_avx512_rr1_p6_params,
285 benchmark::utils::CheckAVX512F)
286 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
287 ->UseRealTime();
288 BENCHMARK_CAPTURE(f32_velu, avx512f_p6_x80,
289 xnn_f32_velu_ukernel__avx512f_rr1_p6_x80,
290 xnn_init_f32_elu_avx512_rr1_p6_params,
291 benchmark::utils::CheckAVX512F)
292 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
293 ->UseRealTime();
294 BENCHMARK_CAPTURE(f32_velu, avx512f_p6_x96,
295 xnn_f32_velu_ukernel__avx512f_rr1_p6_x96,
296 xnn_init_f32_elu_avx512_rr1_p6_params,
297 benchmark::utils::CheckAVX512F)
298 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
299 ->UseRealTime();
300 BENCHMARK_CAPTURE(f32_velu, avx512f_p6_x112,
301 xnn_f32_velu_ukernel__avx512f_rr1_p6_x112,
302 xnn_init_f32_elu_avx512_rr1_p6_params,
303 benchmark::utils::CheckAVX512F)
304 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
305 ->UseRealTime();
306 BENCHMARK_CAPTURE(f32_velu, avx512f_p6_x128,
307 xnn_f32_velu_ukernel__avx512f_rr1_p6_x128,
308 xnn_init_f32_elu_avx512_rr1_p6_params,
309 benchmark::utils::CheckAVX512F)
310 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
311 ->UseRealTime();
312
313 BENCHMARK_CAPTURE(f32_velu, avx2_lut4_p4_x8,
314 xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8,
315 xnn_init_f32_elu_avx2_rr1_lut4_p4_params,
316 benchmark::utils::CheckAVX2)
317 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
318 ->UseRealTime();
319 BENCHMARK_CAPTURE(f32_velu, avx2_lut4_p4_x16,
320 xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16,
321 xnn_init_f32_elu_avx2_rr1_lut4_p4_params,
322 benchmark::utils::CheckAVX2)
323 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
324 ->UseRealTime();
325 BENCHMARK_CAPTURE(f32_velu, avx2_lut4_p4_x24,
326 xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24,
327 xnn_init_f32_elu_avx2_rr1_lut4_p4_params,
328 benchmark::utils::CheckAVX2)
329 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
330 ->UseRealTime();
331 BENCHMARK_CAPTURE(f32_velu, avx2_lut4_p4_x32,
332 xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32,
333 xnn_init_f32_elu_avx2_rr1_lut4_p4_params,
334 benchmark::utils::CheckAVX2)
335 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
336 ->UseRealTime();
337 BENCHMARK_CAPTURE(f32_velu, avx2_lut4_p4_x40,
338 xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40,
339 xnn_init_f32_elu_avx2_rr1_lut4_p4_params,
340 benchmark::utils::CheckAVX2)
341 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
342 ->UseRealTime();
343 BENCHMARK_CAPTURE(f32_velu, avx2_lut4_p4_x48,
344 xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48,
345 xnn_init_f32_elu_avx2_rr1_lut4_p4_params,
346 benchmark::utils::CheckAVX2)
347 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
348 ->UseRealTime();
349 BENCHMARK_CAPTURE(f32_velu, avx2_lut4_p4_x56,
350 xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56,
351 xnn_init_f32_elu_avx2_rr1_lut4_p4_params,
352 benchmark::utils::CheckAVX2)
353 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
354 ->UseRealTime();
355 BENCHMARK_CAPTURE(f32_velu, avx2_lut4_p4_x64,
356 xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64,
357 xnn_init_f32_elu_avx2_rr1_lut4_p4_params,
358 benchmark::utils::CheckAVX2)
359 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
360 ->UseRealTime();
361 BENCHMARK_CAPTURE(f32_velu, avx2_lut4_p4_x72,
362 xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72,
363 xnn_init_f32_elu_avx2_rr1_lut4_p4_params,
364 benchmark::utils::CheckAVX2)
365 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
366 ->UseRealTime();
367 BENCHMARK_CAPTURE(f32_velu, avx2_lut4_p4_x80,
368 xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80,
369 xnn_init_f32_elu_avx2_rr1_lut4_p4_params,
370 benchmark::utils::CheckAVX2)
371 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
372 ->UseRealTime();
373
374 BENCHMARK_CAPTURE(f32_velu, avx2_lut8_p4_x8,
375 xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8,
376 xnn_init_f32_elu_avx2_rr1_lut8_p4_params,
377 benchmark::utils::CheckAVX2)
378 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
379 ->UseRealTime();
380 BENCHMARK_CAPTURE(f32_velu, avx2_lut8_p4_x16,
381 xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16,
382 xnn_init_f32_elu_avx2_rr1_lut8_p4_params,
383 benchmark::utils::CheckAVX2)
384 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
385 ->UseRealTime();
386 BENCHMARK_CAPTURE(f32_velu, avx2_lut8_p4_x24,
387 xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24,
388 xnn_init_f32_elu_avx2_rr1_lut8_p4_params,
389 benchmark::utils::CheckAVX2)
390 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
391 ->UseRealTime();
392 BENCHMARK_CAPTURE(f32_velu, avx2_lut8_p4_x32,
393 xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32,
394 xnn_init_f32_elu_avx2_rr1_lut8_p4_params,
395 benchmark::utils::CheckAVX2)
396 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
397 ->UseRealTime();
398 BENCHMARK_CAPTURE(f32_velu, avx2_lut8_p4_x40,
399 xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40,
400 xnn_init_f32_elu_avx2_rr1_lut8_p4_params,
401 benchmark::utils::CheckAVX2)
402 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
403 ->UseRealTime();
404 BENCHMARK_CAPTURE(f32_velu, avx2_lut8_p4_x48,
405 xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48,
406 xnn_init_f32_elu_avx2_rr1_lut8_p4_params,
407 benchmark::utils::CheckAVX2)
408 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
409 ->UseRealTime();
410 BENCHMARK_CAPTURE(f32_velu, avx2_lut8_p4_x56,
411 xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56,
412 xnn_init_f32_elu_avx2_rr1_lut8_p4_params,
413 benchmark::utils::CheckAVX2)
414 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
415 ->UseRealTime();
416 BENCHMARK_CAPTURE(f32_velu, avx2_lut8_p4_x64,
417 xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64,
418 xnn_init_f32_elu_avx2_rr1_lut8_p4_params,
419 benchmark::utils::CheckAVX2)
420 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
421 ->UseRealTime();
422 BENCHMARK_CAPTURE(f32_velu, avx2_lut8_p4_x72,
423 xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72,
424 xnn_init_f32_elu_avx2_rr1_lut8_p4_params,
425 benchmark::utils::CheckAVX2)
426 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
427 ->UseRealTime();
428 BENCHMARK_CAPTURE(f32_velu, avx2_lut8_p4_x80,
429 xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80,
430 xnn_init_f32_elu_avx2_rr1_lut8_p4_params,
431 benchmark::utils::CheckAVX2)
432 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
433 ->UseRealTime();
434
435 BENCHMARK_CAPTURE(f32_velu, avx2_lut16_p3_x8,
436 xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8,
437 xnn_init_f32_elu_avx2_rr1_lut16_p3_params,
438 benchmark::utils::CheckAVX2)
439 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
440 ->UseRealTime();
441 BENCHMARK_CAPTURE(f32_velu, avx2_lut16_p3_x16,
442 xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16,
443 xnn_init_f32_elu_avx2_rr1_lut16_p3_params,
444 benchmark::utils::CheckAVX2)
445 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
446 ->UseRealTime();
447 BENCHMARK_CAPTURE(f32_velu, avx2_lut16_p3_x24,
448 xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24,
449 xnn_init_f32_elu_avx2_rr1_lut16_p3_params,
450 benchmark::utils::CheckAVX2)
451 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
452 ->UseRealTime();
453 BENCHMARK_CAPTURE(f32_velu, avx2_lut16_p3_x32,
454 xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32,
455 xnn_init_f32_elu_avx2_rr1_lut16_p3_params,
456 benchmark::utils::CheckAVX2)
457 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
458 ->UseRealTime();
459 BENCHMARK_CAPTURE(f32_velu, avx2_lut16_p3_x40,
460 xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40,
461 xnn_init_f32_elu_avx2_rr1_lut16_p3_params,
462 benchmark::utils::CheckAVX2)
463 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
464 ->UseRealTime();
465 BENCHMARK_CAPTURE(f32_velu, avx2_lut16_p3_x48,
466 xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48,
467 xnn_init_f32_elu_avx2_rr1_lut16_p3_params,
468 benchmark::utils::CheckAVX2)
469 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
470 ->UseRealTime();
471 BENCHMARK_CAPTURE(f32_velu, avx2_lut16_p3_x56,
472 xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56,
473 xnn_init_f32_elu_avx2_rr1_lut16_p3_params,
474 benchmark::utils::CheckAVX2)
475 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
476 ->UseRealTime();
477 BENCHMARK_CAPTURE(f32_velu, avx2_lut16_p3_x64,
478 xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64,
479 xnn_init_f32_elu_avx2_rr1_lut16_p3_params,
480 benchmark::utils::CheckAVX2)
481 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
482 ->UseRealTime();
483 BENCHMARK_CAPTURE(f32_velu, avx2_lut16_p3_x72,
484 xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72,
485 xnn_init_f32_elu_avx2_rr1_lut16_p3_params,
486 benchmark::utils::CheckAVX2)
487 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
488 ->UseRealTime();
489 BENCHMARK_CAPTURE(f32_velu, avx2_lut16_p3_x80,
490 xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80,
491 xnn_init_f32_elu_avx2_rr1_lut16_p3_params,
492 benchmark::utils::CheckAVX2)
493 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
494 ->UseRealTime();
495
496 BENCHMARK_CAPTURE(f32_velu, avx2_p6_x8,
497 xnn_f32_velu_ukernel__avx2_rr1_p6_x8,
498 xnn_init_f32_elu_avx2_rr1_p6_params,
499 benchmark::utils::CheckAVX2)
500 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
501 ->UseRealTime();
502 BENCHMARK_CAPTURE(f32_velu, avx2_p6_x16,
503 xnn_f32_velu_ukernel__avx2_rr1_p6_x16,
504 xnn_init_f32_elu_avx2_rr1_p6_params,
505 benchmark::utils::CheckAVX2)
506 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
507 ->UseRealTime();
508 BENCHMARK_CAPTURE(f32_velu, avx2_p6_x24,
509 xnn_f32_velu_ukernel__avx2_rr1_p6_x24,
510 xnn_init_f32_elu_avx2_rr1_p6_params,
511 benchmark::utils::CheckAVX2)
512 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
513 ->UseRealTime();
514 BENCHMARK_CAPTURE(f32_velu, avx2_p6_x32,
515 xnn_f32_velu_ukernel__avx2_rr1_p6_x32,
516 xnn_init_f32_elu_avx2_rr1_p6_params,
517 benchmark::utils::CheckAVX2)
518 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
519 ->UseRealTime();
520 BENCHMARK_CAPTURE(f32_velu, avx2_p6_x40,
521 xnn_f32_velu_ukernel__avx2_rr1_p6_x40,
522 xnn_init_f32_elu_avx2_rr1_p6_params,
523 benchmark::utils::CheckAVX2)
524 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
525 ->UseRealTime();
526 BENCHMARK_CAPTURE(f32_velu, avx2_p6_x48,
527 xnn_f32_velu_ukernel__avx2_rr1_p6_x48,
528 xnn_init_f32_elu_avx2_rr1_p6_params,
529 benchmark::utils::CheckAVX2)
530 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
531 ->UseRealTime();
532 BENCHMARK_CAPTURE(f32_velu, avx2_p6_x56,
533 xnn_f32_velu_ukernel__avx2_rr1_p6_x56,
534 xnn_init_f32_elu_avx2_rr1_p6_params,
535 benchmark::utils::CheckAVX2)
536 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
537 ->UseRealTime();
538 BENCHMARK_CAPTURE(f32_velu, avx2_p6_x64,
539 xnn_f32_velu_ukernel__avx2_rr1_p6_x64,
540 xnn_init_f32_elu_avx2_rr1_p6_params,
541 benchmark::utils::CheckAVX2)
542 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
543 ->UseRealTime();
544 BENCHMARK_CAPTURE(f32_velu, avx2_p6_x72,
545 xnn_f32_velu_ukernel__avx2_rr1_p6_x72,
546 xnn_init_f32_elu_avx2_rr1_p6_params,
547 benchmark::utils::CheckAVX2)
548 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
549 ->UseRealTime();
550 BENCHMARK_CAPTURE(f32_velu, avx2_p6_x80,
551 xnn_f32_velu_ukernel__avx2_rr1_p6_x80,
552 xnn_init_f32_elu_avx2_rr1_p6_params,
553 benchmark::utils::CheckAVX2)
554 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
555 ->UseRealTime();
556
557 BENCHMARK_CAPTURE(f32_velu, avx_lut4_p4_x8,
558 xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8,
559 xnn_init_f32_elu_avx_rr2_lut4_p4_params,
560 benchmark::utils::CheckAVX)
561 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
562 ->UseRealTime();
563 BENCHMARK_CAPTURE(f32_velu, avx_lut4_p4_x16,
564 xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16,
565 xnn_init_f32_elu_avx_rr2_lut4_p4_params,
566 benchmark::utils::CheckAVX)
567 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
568 ->UseRealTime();
569 BENCHMARK_CAPTURE(f32_velu, avx_lut4_p4_x24,
570 xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24,
571 xnn_init_f32_elu_avx_rr2_lut4_p4_params,
572 benchmark::utils::CheckAVX)
573 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
574 ->UseRealTime();
575 BENCHMARK_CAPTURE(f32_velu, avx_lut4_p4_x32,
576 xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32,
577 xnn_init_f32_elu_avx_rr2_lut4_p4_params,
578 benchmark::utils::CheckAVX)
579 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
580 ->UseRealTime();
581 BENCHMARK_CAPTURE(f32_velu, avx_lut4_p4_x40,
582 xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40,
583 xnn_init_f32_elu_avx_rr2_lut4_p4_params,
584 benchmark::utils::CheckAVX)
585 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
586 ->UseRealTime();
587 BENCHMARK_CAPTURE(f32_velu, avx_lut4_p4_x48,
588 xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48,
589 xnn_init_f32_elu_avx_rr2_lut4_p4_params,
590 benchmark::utils::CheckAVX)
591 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
592 ->UseRealTime();
593
594 BENCHMARK_CAPTURE(f32_velu, avx_lut16_p3_x8,
595 xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8,
596 xnn_init_f32_elu_avx_rr2_lut16_p3_params,
597 benchmark::utils::CheckAVX)
598 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
599 ->UseRealTime();
600 BENCHMARK_CAPTURE(f32_velu, avx_lut16_p3_x16,
601 xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16,
602 xnn_init_f32_elu_avx_rr2_lut16_p3_params,
603 benchmark::utils::CheckAVX)
604 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
605 ->UseRealTime();
606 BENCHMARK_CAPTURE(f32_velu, avx_lut16_p3_x24,
607 xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24,
608 xnn_init_f32_elu_avx_rr2_lut16_p3_params,
609 benchmark::utils::CheckAVX)
610 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
611 ->UseRealTime();
612 BENCHMARK_CAPTURE(f32_velu, avx_lut16_p3_x32,
613 xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32,
614 xnn_init_f32_elu_avx_rr2_lut16_p3_params,
615 benchmark::utils::CheckAVX)
616 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
617 ->UseRealTime();
618 BENCHMARK_CAPTURE(f32_velu, avx_lut16_p3_x40,
619 xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40,
620 xnn_init_f32_elu_avx_rr2_lut16_p3_params,
621 benchmark::utils::CheckAVX)
622 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
623 ->UseRealTime();
624 BENCHMARK_CAPTURE(f32_velu, avx_lut16_p3_x48,
625 xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48,
626 xnn_init_f32_elu_avx_rr2_lut16_p3_params,
627 benchmark::utils::CheckAVX)
628 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
629 ->UseRealTime();
630
631 BENCHMARK_CAPTURE(f32_velu, avx_p6_x8,
632 xnn_f32_velu_ukernel__avx_rr2_p6_x8,
633 xnn_init_f32_elu_avx_rr2_p6_params,
634 benchmark::utils::CheckAVX)
635 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
636 ->UseRealTime();
637 BENCHMARK_CAPTURE(f32_velu, avx_p6_x16,
638 xnn_f32_velu_ukernel__avx_rr2_p6_x16,
639 xnn_init_f32_elu_avx_rr2_p6_params,
640 benchmark::utils::CheckAVX)
641 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
642 ->UseRealTime();
643 BENCHMARK_CAPTURE(f32_velu, avx_p6_x24,
644 xnn_f32_velu_ukernel__avx_rr2_p6_x24,
645 xnn_init_f32_elu_avx_rr2_p6_params,
646 benchmark::utils::CheckAVX)
647 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
648 ->UseRealTime();
649 BENCHMARK_CAPTURE(f32_velu, avx_p6_x32,
650 xnn_f32_velu_ukernel__avx_rr2_p6_x32,
651 xnn_init_f32_elu_avx_rr2_p6_params,
652 benchmark::utils::CheckAVX)
653 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
654 ->UseRealTime();
655 BENCHMARK_CAPTURE(f32_velu, avx_p6_x40,
656 xnn_f32_velu_ukernel__avx_rr2_p6_x40,
657 xnn_init_f32_elu_avx_rr2_p6_params,
658 benchmark::utils::CheckAVX)
659 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
660 ->UseRealTime();
661 BENCHMARK_CAPTURE(f32_velu, avx_p6_x48,
662 xnn_f32_velu_ukernel__avx_rr2_p6_x48,
663 xnn_init_f32_elu_avx_rr2_p6_params,
664 benchmark::utils::CheckAVX)
665 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
666 ->UseRealTime();
667
668 BENCHMARK_CAPTURE(f32_velu, sse41_lut16_p3_x4,
669 xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4,
670 xnn_init_f32_elu_sse2_rr2_lut16_p3_params,
671 benchmark::utils::CheckSSE41)
672 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
673 ->UseRealTime();
674 BENCHMARK_CAPTURE(f32_velu, sse41_lut16_p3_x8,
675 xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8,
676 xnn_init_f32_elu_sse2_rr2_lut16_p3_params,
677 benchmark::utils::CheckSSE41)
678 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
679 ->UseRealTime();
680 BENCHMARK_CAPTURE(f32_velu, sse41_lut16_p3_x12,
681 xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12,
682 xnn_init_f32_elu_sse2_rr2_lut16_p3_params,
683 benchmark::utils::CheckSSE41)
684 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
685 ->UseRealTime();
686 BENCHMARK_CAPTURE(f32_velu, sse41_lut16_p3_x16,
687 xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16,
688 xnn_init_f32_elu_sse2_rr2_lut16_p3_params,
689 benchmark::utils::CheckSSE41)
690 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
691 ->UseRealTime();
692 BENCHMARK_CAPTURE(f32_velu, sse41_lut16_p3_x20,
693 xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20,
694 xnn_init_f32_elu_sse2_rr2_lut16_p3_params,
695 benchmark::utils::CheckSSE41)
696 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
697 ->UseRealTime();
698 BENCHMARK_CAPTURE(f32_velu, sse41_lut16_p3_x24,
699 xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24,
700 xnn_init_f32_elu_sse2_rr2_lut16_p3_params,
701 benchmark::utils::CheckSSE41)
702 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
703 ->UseRealTime();
704
705 BENCHMARK_CAPTURE(f32_velu, sse41_p6_x4,
706 xnn_f32_velu_ukernel__sse41_rr2_p6_x4,
707 xnn_init_f32_elu_sse2_rr2_p6_params,
708 benchmark::utils::CheckSSE41)
709 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
710 ->UseRealTime();
711 BENCHMARK_CAPTURE(f32_velu, sse41_p6_x8,
712 xnn_f32_velu_ukernel__sse41_rr2_p6_x8,
713 xnn_init_f32_elu_sse2_rr2_p6_params,
714 benchmark::utils::CheckSSE41)
715 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
716 ->UseRealTime();
717 BENCHMARK_CAPTURE(f32_velu, sse41_p6_x12,
718 xnn_f32_velu_ukernel__sse41_rr2_p6_x12,
719 xnn_init_f32_elu_sse2_rr2_p6_params,
720 benchmark::utils::CheckSSE41)
721 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
722 ->UseRealTime();
723 BENCHMARK_CAPTURE(f32_velu, sse41_p6_x16,
724 xnn_f32_velu_ukernel__sse41_rr2_p6_x16,
725 xnn_init_f32_elu_sse2_rr2_p6_params,
726 benchmark::utils::CheckSSE41)
727 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
728 ->UseRealTime();
729 BENCHMARK_CAPTURE(f32_velu, sse41_p6_x20,
730 xnn_f32_velu_ukernel__sse41_rr2_p6_x20,
731 xnn_init_f32_elu_sse2_rr2_p6_params,
732 benchmark::utils::CheckSSE41)
733 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
734 ->UseRealTime();
735 BENCHMARK_CAPTURE(f32_velu, sse41_p6_x24,
736 xnn_f32_velu_ukernel__sse41_rr2_p6_x24,
737 xnn_init_f32_elu_sse2_rr2_p6_params,
738 benchmark::utils::CheckSSE41)
739 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
740 ->UseRealTime();
741
742 BENCHMARK_CAPTURE(f32_velu, sse2_lut16_p3_x4,
743 xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4,
744 xnn_init_f32_elu_sse2_rr2_lut16_p3_params)
745 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
746 ->UseRealTime();
747 BENCHMARK_CAPTURE(f32_velu, sse2_lut16_p3_x8,
748 xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8,
749 xnn_init_f32_elu_sse2_rr2_lut16_p3_params)
750 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
751 ->UseRealTime();
752 BENCHMARK_CAPTURE(f32_velu, sse2_lut16_p3_x12,
753 xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12,
754 xnn_init_f32_elu_sse2_rr2_lut16_p3_params)
755 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
756 ->UseRealTime();
757 BENCHMARK_CAPTURE(f32_velu, sse2_lut16_p3_x16,
758 xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16,
759 xnn_init_f32_elu_sse2_rr2_lut16_p3_params)
760 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
761 ->UseRealTime();
762 BENCHMARK_CAPTURE(f32_velu, sse2_lut16_p3_x20,
763 xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20,
764 xnn_init_f32_elu_sse2_rr2_lut16_p3_params)
765 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
766 ->UseRealTime();
767 BENCHMARK_CAPTURE(f32_velu, sse2_lut16_p3_x24,
768 xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24,
769 xnn_init_f32_elu_sse2_rr2_lut16_p3_params)
770 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
771 ->UseRealTime();
772
773 BENCHMARK_CAPTURE(f32_velu, sse2_p6_x4,
774 xnn_f32_velu_ukernel__sse2_rr2_p6_x4,
775 xnn_init_f32_elu_sse2_rr2_p6_params)
776 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
777 ->UseRealTime();
778 BENCHMARK_CAPTURE(f32_velu, sse2_p6_x8,
779 xnn_f32_velu_ukernel__sse2_rr2_p6_x8,
780 xnn_init_f32_elu_sse2_rr2_p6_params)
781 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
782 ->UseRealTime();
783 BENCHMARK_CAPTURE(f32_velu, sse2_p6_x12,
784 xnn_f32_velu_ukernel__sse2_rr2_p6_x12,
785 xnn_init_f32_elu_sse2_rr2_p6_params)
786 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
787 ->UseRealTime();
788 BENCHMARK_CAPTURE(f32_velu, sse2_p6_x16,
789 xnn_f32_velu_ukernel__sse2_rr2_p6_x16,
790 xnn_init_f32_elu_sse2_rr2_p6_params)
791 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
792 ->UseRealTime();
793 BENCHMARK_CAPTURE(f32_velu, sse2_p6_x20,
794 xnn_f32_velu_ukernel__sse2_rr2_p6_x20,
795 xnn_init_f32_elu_sse2_rr2_p6_params)
796 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
797 ->UseRealTime();
798 BENCHMARK_CAPTURE(f32_velu, sse2_p6_x24,
799 xnn_f32_velu_ukernel__sse2_rr2_p6_x24,
800 xnn_init_f32_elu_sse2_rr2_p6_params)
801 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
802 ->UseRealTime();
803 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
804
805 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
806 BENCHMARK_CAPTURE(f32_velu, wasmsimd_arm_lut16_p3_x4,
807 xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4,
808 xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params)
809 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
810 ->UseRealTime();
811 BENCHMARK_CAPTURE(f32_velu, wasmsimd_arm_lut16_p3_x8,
812 xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8,
813 xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params)
814 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
815 ->UseRealTime();
816 BENCHMARK_CAPTURE(f32_velu, wasmsimd_arm_lut16_p3_x12,
817 xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12,
818 xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params)
819 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
820 ->UseRealTime();
821 BENCHMARK_CAPTURE(f32_velu, wasmsimd_arm_lut16_p3_x16,
822 xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16,
823 xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params)
824 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
825 ->UseRealTime();
826 BENCHMARK_CAPTURE(f32_velu, wasmsimd_arm_lut16_p3_x20,
827 xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20,
828 xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params)
829 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
830 ->UseRealTime();
831 BENCHMARK_CAPTURE(f32_velu, wasmsimd_arm_lut16_p3_x24,
832 xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24,
833 xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params)
834 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
835 ->UseRealTime();
836
837 BENCHMARK_CAPTURE(f32_velu, wasmsimd_x86_lut16_p3_x4,
838 xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4,
839 xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params)
840 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
841 ->UseRealTime();
842 BENCHMARK_CAPTURE(f32_velu, wasmsimd_x86_lut16_p3_x8,
843 xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8,
844 xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params)
845 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
846 ->UseRealTime();
847 BENCHMARK_CAPTURE(f32_velu, wasmsimd_x86_lut16_p3_x12,
848 xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12,
849 xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params)
850 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
851 ->UseRealTime();
852 BENCHMARK_CAPTURE(f32_velu, wasmsimd_x86_lut16_p3_x16,
853 xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16,
854 xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params)
855 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
856 ->UseRealTime();
857 BENCHMARK_CAPTURE(f32_velu, wasmsimd_x86_lut16_p3_x20,
858 xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20,
859 xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params)
860 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
861 ->UseRealTime();
862 BENCHMARK_CAPTURE(f32_velu, wasmsimd_x86_lut16_p3_x24,
863 xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24,
864 xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params)
865 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
866 ->UseRealTime();
867
868 BENCHMARK_CAPTURE(f32_velu, wasmsimd_arm_p6_x4,
869 xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4,
870 xnn_init_f32_elu_wasmsimd_rr2_p6_params)
871 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
872 ->UseRealTime();
873 BENCHMARK_CAPTURE(f32_velu, wasmsimd_arm_p6_x8,
874 xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8,
875 xnn_init_f32_elu_wasmsimd_rr2_p6_params)
876 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
877 ->UseRealTime();
878 BENCHMARK_CAPTURE(f32_velu, wasmsimd_arm_p6_x12,
879 xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12,
880 xnn_init_f32_elu_wasmsimd_rr2_p6_params)
881 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
882 ->UseRealTime();
883 BENCHMARK_CAPTURE(f32_velu, wasmsimd_arm_p6_x16,
884 xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16,
885 xnn_init_f32_elu_wasmsimd_rr2_p6_params)
886 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
887 ->UseRealTime();
888 BENCHMARK_CAPTURE(f32_velu, wasmsimd_arm_p6_x20,
889 xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20,
890 xnn_init_f32_elu_wasmsimd_rr2_p6_params)
891 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
892 ->UseRealTime();
893 BENCHMARK_CAPTURE(f32_velu, wasmsimd_arm_p6_x24,
894 xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24,
895 xnn_init_f32_elu_wasmsimd_rr2_p6_params)
896 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
897 ->UseRealTime();
898
899 BENCHMARK_CAPTURE(f32_velu, wasmsimd_x86_p6_x4,
900 xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4,
901 xnn_init_f32_elu_wasmsimd_rr2_p6_params)
902 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
903 ->UseRealTime();
904 BENCHMARK_CAPTURE(f32_velu, wasmsimd_x86_p6_x8,
905 xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8,
906 xnn_init_f32_elu_wasmsimd_rr2_p6_params)
907 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
908 ->UseRealTime();
909 BENCHMARK_CAPTURE(f32_velu, wasmsimd_x86_p6_x12,
910 xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12,
911 xnn_init_f32_elu_wasmsimd_rr2_p6_params)
912 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
913 ->UseRealTime();
914 BENCHMARK_CAPTURE(f32_velu, wasmsimd_x86_p6_x16,
915 xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16,
916 xnn_init_f32_elu_wasmsimd_rr2_p6_params)
917 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
918 ->UseRealTime();
919 BENCHMARK_CAPTURE(f32_velu, wasmsimd_x86_p6_x20,
920 xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20,
921 xnn_init_f32_elu_wasmsimd_rr2_p6_params)
922 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
923 ->UseRealTime();
924 BENCHMARK_CAPTURE(f32_velu, wasmsimd_x86_p6_x24,
925 xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24,
926 xnn_init_f32_elu_wasmsimd_rr2_p6_params)
927 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
928 ->UseRealTime();
929 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
930
931 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
932 BENCHMARK_CAPTURE(f32_velu, wasm_lut16_p3_x1,
933 xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x1,
934 xnn_init_f32_elu_scalar_rr2_lut16_p3_params)
935 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
936 ->UseRealTime();
937 BENCHMARK_CAPTURE(f32_velu, wasm_lut16_p3_x2,
938 xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2,
939 xnn_init_f32_elu_scalar_rr2_lut16_p3_params)
940 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
941 ->UseRealTime();
942 BENCHMARK_CAPTURE(f32_velu, wasm_lut16_p3_x3,
943 xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3,
944 xnn_init_f32_elu_scalar_rr2_lut16_p3_params)
945 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
946 ->UseRealTime();
947 BENCHMARK_CAPTURE(f32_velu, wasm_lut16_p3_x4,
948 xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4,
949 xnn_init_f32_elu_scalar_rr2_lut16_p3_params)
950 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
951 ->UseRealTime();
952 BENCHMARK_CAPTURE(f32_velu, wasm_lut16_p3_x5,
953 xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5,
954 xnn_init_f32_elu_scalar_rr2_lut16_p3_params)
955 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
956 ->UseRealTime();
957 BENCHMARK_CAPTURE(f32_velu, wasm_lut16_p3_x6,
958 xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6,
959 xnn_init_f32_elu_scalar_rr2_lut16_p3_params)
960 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
961 ->UseRealTime();
962
963 BENCHMARK_CAPTURE(f32_velu, wasm_p6_x1,
964 xnn_f32_velu_ukernel__wasm_rr2_p6_x1,
965 xnn_init_f32_elu_scalar_rr2_p6_params)
966 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
967 ->UseRealTime();
968 BENCHMARK_CAPTURE(f32_velu, wasm_p6_x2,
969 xnn_f32_velu_ukernel__wasm_rr2_p6_x2,
970 xnn_init_f32_elu_scalar_rr2_p6_params)
971 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
972 ->UseRealTime();
973 BENCHMARK_CAPTURE(f32_velu, wasm_p6_x3,
974 xnn_f32_velu_ukernel__wasm_rr2_p6_x3,
975 xnn_init_f32_elu_scalar_rr2_p6_params)
976 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
977 ->UseRealTime();
978 BENCHMARK_CAPTURE(f32_velu, wasm_p6_x4,
979 xnn_f32_velu_ukernel__wasm_rr2_p6_x4,
980 xnn_init_f32_elu_scalar_rr2_p6_params)
981 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
982 ->UseRealTime();
983 BENCHMARK_CAPTURE(f32_velu, wasm_p6_x5,
984 xnn_f32_velu_ukernel__wasm_rr2_p6_x5,
985 xnn_init_f32_elu_scalar_rr2_p6_params)
986 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
987 ->UseRealTime();
988 BENCHMARK_CAPTURE(f32_velu, wasm_p6_x6,
989 xnn_f32_velu_ukernel__wasm_rr2_p6_x6,
990 xnn_init_f32_elu_scalar_rr2_p6_params)
991 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
992 ->UseRealTime();
993 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
994
995 BENCHMARK_CAPTURE(f32_velu, scalar_lut16_p3_x1,
996 xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x1,
997 xnn_init_f32_elu_scalar_rr2_lut16_p3_params)
998 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
999 ->UseRealTime();
1000 BENCHMARK_CAPTURE(f32_velu, scalar_lut16_p3_x2,
1001 xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2,
1002 xnn_init_f32_elu_scalar_rr2_lut16_p3_params)
1003 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1004 ->UseRealTime();
1005 BENCHMARK_CAPTURE(f32_velu, scalar_lut16_p3_x3,
1006 xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3,
1007 xnn_init_f32_elu_scalar_rr2_lut16_p3_params)
1008 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1009 ->UseRealTime();
1010 BENCHMARK_CAPTURE(f32_velu, scalar_lut16_p3_x4,
1011 xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4,
1012 xnn_init_f32_elu_scalar_rr2_lut16_p3_params)
1013 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1014 ->UseRealTime();
1015 BENCHMARK_CAPTURE(f32_velu, scalar_lut16_p3_x5,
1016 xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5,
1017 xnn_init_f32_elu_scalar_rr2_lut16_p3_params)
1018 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1019 ->UseRealTime();
1020 BENCHMARK_CAPTURE(f32_velu, scalar_lut16_p3_x6,
1021 xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6,
1022 xnn_init_f32_elu_scalar_rr2_lut16_p3_params)
1023 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1024 ->UseRealTime();
1025
1026 BENCHMARK_CAPTURE(f32_velu, scalar_p6_x1,
1027 xnn_f32_velu_ukernel__scalar_rr2_p6_x1,
1028 xnn_init_f32_elu_scalar_rr2_p6_params)
1029 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1030 ->UseRealTime();
1031 BENCHMARK_CAPTURE(f32_velu, scalar_p6_x2,
1032 xnn_f32_velu_ukernel__scalar_rr2_p6_x2,
1033 xnn_init_f32_elu_scalar_rr2_p6_params)
1034 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1035 ->UseRealTime();
1036 BENCHMARK_CAPTURE(f32_velu, scalar_p6_x3,
1037 xnn_f32_velu_ukernel__scalar_rr2_p6_x3,
1038 xnn_init_f32_elu_scalar_rr2_p6_params)
1039 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1040 ->UseRealTime();
1041 BENCHMARK_CAPTURE(f32_velu, scalar_p6_x4,
1042 xnn_f32_velu_ukernel__scalar_rr2_p6_x4,
1043 xnn_init_f32_elu_scalar_rr2_p6_params)
1044 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1045 ->UseRealTime();
1046 BENCHMARK_CAPTURE(f32_velu, scalar_p6_x5,
1047 xnn_f32_velu_ukernel__scalar_rr2_p6_x5,
1048 xnn_init_f32_elu_scalar_rr2_p6_params)
1049 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1050 ->UseRealTime();
1051 BENCHMARK_CAPTURE(f32_velu, scalar_p6_x6,
1052 xnn_f32_velu_ukernel__scalar_rr2_p6_x6,
1053 xnn_init_f32_elu_scalar_rr2_p6_params)
1054 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1055 ->UseRealTime();
1056
1057 #ifndef XNNPACK_BENCHMARK_NO_MAIN
1058 BENCHMARK_MAIN();
1059 #endif
1060