1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <cmath>
8 #include <functional>
9 #include <random>
10 #include <vector>
11
12 #include <benchmark/benchmark.h>
13 #include "bench/utils.h"
14
15 #include <xnnpack.h>
16 #include <xnnpack/aligned-allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/microfnptr.h>
19 #include <xnnpack/microparams-init.h>
20 #include <xnnpack/vunary.h>
21
22
f32_vsigmoid(benchmark::State & state,xnn_f32_vsigmoid_ukernel_function sigmoid,xnn_init_f32_sigmoid_params_fn init_params,benchmark::utils::IsaCheckFunction isa_check=nullptr)23 static void f32_vsigmoid(
24 benchmark::State& state,
25 xnn_f32_vsigmoid_ukernel_function sigmoid,
26 xnn_init_f32_sigmoid_params_fn init_params,
27 benchmark::utils::IsaCheckFunction isa_check = nullptr)
28 {
29 if (isa_check && !isa_check(state)) {
30 return;
31 }
32
33 const size_t num_elements = state.range(0);
34
35 std::random_device random_device;
36 auto rng = std::mt19937(random_device());
37 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
38
39 std::vector<float, AlignedAllocator<float, 64>> x(num_elements);
40 std::vector<float, AlignedAllocator<float, 64>> y(num_elements);
41 std::generate(x.begin(), x.end(), std::ref(f32rng));
42 std::fill(y.begin(), y.end(), std::nanf(""));
43
44 xnn_f32_sigmoid_params params;
45 init_params(¶ms);
46 for (auto _ : state) {
47 sigmoid(num_elements * sizeof(float), x.data(), y.data(), ¶ms);
48 }
49
50 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
51 if (cpu_frequency != 0) {
52 state.counters["cpufreq"] = cpu_frequency;
53 }
54
55 const size_t elements_per_iteration = num_elements;
56 state.counters["elements"] =
57 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
58
59 const size_t bytes_per_iteration = 2 * num_elements * sizeof(float);
60 state.counters["bytes"] =
61 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
62 }
63
64 #if XNN_ARCH_ARM64
65 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_div_x4,
66 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x4,
67 xnn_init_f32_sigmoid_neonfma_rr1_p5_params)
68 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
69 ->UseRealTime();
70 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_div_x8,
71 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x8,
72 xnn_init_f32_sigmoid_neonfma_rr1_p5_params)
73 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
74 ->UseRealTime();
75 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_div_x12,
76 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x12,
77 xnn_init_f32_sigmoid_neonfma_rr1_p5_params)
78 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
79 ->UseRealTime();
80 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_div_x16,
81 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x16,
82 xnn_init_f32_sigmoid_neonfma_rr1_p5_params)
83 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
84 ->UseRealTime();
85 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_div_x20,
86 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x20,
87 xnn_init_f32_sigmoid_neonfma_rr1_p5_params)
88 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
89 ->UseRealTime();
90 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_div_x24,
91 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x24,
92 xnn_init_f32_sigmoid_neonfma_rr1_p5_params)
93 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
94 ->UseRealTime();
95
96 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_div_x4,
97 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4,
98 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params)
99 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
100 ->UseRealTime();
101 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_div_x8,
102 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8,
103 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params)
104 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
105 ->UseRealTime();
106 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_div_x12,
107 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12,
108 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params)
109 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
110 ->UseRealTime();
111 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_div_x16,
112 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16,
113 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params)
114 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
115 ->UseRealTime();
116 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_div_x20,
117 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20,
118 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params)
119 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
120 ->UseRealTime();
121 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_div_x24,
122 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24,
123 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params)
124 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
125 ->UseRealTime();
126
127 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_div_x4,
128 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4,
129 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params)
130 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
131 ->UseRealTime();
132 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_div_x8,
133 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8,
134 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params)
135 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
136 ->UseRealTime();
137 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_div_x12,
138 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12,
139 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params)
140 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
141 ->UseRealTime();
142 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_div_x16,
143 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16,
144 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params)
145 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
146 ->UseRealTime();
147 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_div_x20,
148 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20,
149 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params)
150 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
151 ->UseRealTime();
152 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_div_x24,
153 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24,
154 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params)
155 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
156 ->UseRealTime();
157 #endif // XNN_ARCH_ARM64
158
159 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
160 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2fma_x4,
161 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x4,
162 xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
163 benchmark::utils::CheckNEONFMA)
164 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
165 ->UseRealTime();
166 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2fma_x8,
167 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8,
168 xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
169 benchmark::utils::CheckNEONFMA)
170 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
171 ->UseRealTime();
172 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2fma_x12,
173 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12,
174 xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
175 benchmark::utils::CheckNEONFMA)
176 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
177 ->UseRealTime();
178 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2fma_x16,
179 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16,
180 xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
181 benchmark::utils::CheckNEONFMA)
182 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
183 ->UseRealTime();
184 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2fma_x20,
185 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20,
186 xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
187 benchmark::utils::CheckNEONFMA)
188 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
189 ->UseRealTime();
190 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2fma_x24,
191 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24,
192 xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
193 benchmark::utils::CheckNEONFMA)
194 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
195 ->UseRealTime();
196
197 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr1recps1fma_x4,
198 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x4,
199 xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
200 benchmark::utils::CheckNEONFMA)
201 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
202 ->UseRealTime();
203 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr1recps1fma_x8,
204 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8,
205 xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
206 benchmark::utils::CheckNEONFMA)
207 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
208 ->UseRealTime();
209 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr1recps1fma_x12,
210 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12,
211 xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
212 benchmark::utils::CheckNEONFMA)
213 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
214 ->UseRealTime();
215 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr1recps1fma_x16,
216 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16,
217 xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
218 benchmark::utils::CheckNEONFMA)
219 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
220 ->UseRealTime();
221 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr1recps1fma_x20,
222 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20,
223 xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
224 benchmark::utils::CheckNEONFMA)
225 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
226 ->UseRealTime();
227 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr1recps1fma_x24,
228 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24,
229 xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
230 benchmark::utils::CheckNEONFMA)
231 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
232 ->UseRealTime();
233
234 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2recps_x4,
235 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x4,
236 xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
237 benchmark::utils::CheckNEONFMA)
238 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
239 ->UseRealTime();
240 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2recps_x8,
241 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8,
242 xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
243 benchmark::utils::CheckNEONFMA)
244 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
245 ->UseRealTime();
246 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2recps_x12,
247 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12,
248 xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
249 benchmark::utils::CheckNEONFMA)
250 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
251 ->UseRealTime();
252 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2recps_x16,
253 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16,
254 xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
255 benchmark::utils::CheckNEONFMA)
256 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
257 ->UseRealTime();
258 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2recps_x20,
259 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20,
260 xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
261 benchmark::utils::CheckNEONFMA)
262 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
263 ->UseRealTime();
264 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2recps_x24,
265 xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24,
266 xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
267 benchmark::utils::CheckNEONFMA)
268 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
269 ->UseRealTime();
270
271 BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_p5_nr2recps_x4,
272 xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x4,
273 xnn_init_f32_sigmoid_scalar_rr2_p5_params,
274 benchmark::utils::CheckNEON)
275 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
276 ->UseRealTime();
277 BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_p5_nr2recps_x8,
278 xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x8,
279 xnn_init_f32_sigmoid_scalar_rr2_p5_params,
280 benchmark::utils::CheckNEON)
281 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
282 ->UseRealTime();
283 BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_p5_nr2recps_x12,
284 xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x12,
285 xnn_init_f32_sigmoid_scalar_rr2_p5_params,
286 benchmark::utils::CheckNEON)
287 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
288 ->UseRealTime();
289 BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_p5_nr2recps_x16,
290 xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x16,
291 xnn_init_f32_sigmoid_scalar_rr2_p5_params,
292 benchmark::utils::CheckNEON)
293 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
294 ->UseRealTime();
295 BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_p5_nr2recps_x20,
296 xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20,
297 xnn_init_f32_sigmoid_scalar_rr2_p5_params,
298 benchmark::utils::CheckNEON)
299 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
300 ->UseRealTime();
301 BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_p5_nr2recps_x24,
302 xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x24,
303 xnn_init_f32_sigmoid_scalar_rr2_p5_params,
304 benchmark::utils::CheckNEON)
305 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
306 ->UseRealTime();
307
308 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2fma_x4,
309 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4,
310 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
311 benchmark::utils::CheckNEONFMA)
312 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
313 ->UseRealTime();
314 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2fma_x8,
315 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8,
316 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
317 benchmark::utils::CheckNEONFMA)
318 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
319 ->UseRealTime();
320 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2fma_x12,
321 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x12,
322 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
323 benchmark::utils::CheckNEONFMA)
324 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
325 ->UseRealTime();
326 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2fma_x16,
327 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x16,
328 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
329 benchmark::utils::CheckNEONFMA)
330 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
331 ->UseRealTime();
332 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2fma_x20,
333 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x20,
334 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
335 benchmark::utils::CheckNEONFMA)
336 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
337 ->UseRealTime();
338 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2fma_x24,
339 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24,
340 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
341 benchmark::utils::CheckNEONFMA)
342 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
343 ->UseRealTime();
344
345 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x4,
346 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4,
347 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
348 benchmark::utils::CheckNEONFMA)
349 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
350 ->UseRealTime();
351 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x8,
352 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8,
353 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
354 benchmark::utils::CheckNEONFMA)
355 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
356 ->UseRealTime();
357 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x12,
358 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x12,
359 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
360 benchmark::utils::CheckNEONFMA)
361 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
362 ->UseRealTime();
363 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x16,
364 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x16,
365 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
366 benchmark::utils::CheckNEONFMA)
367 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
368 ->UseRealTime();
369 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x20,
370 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x20,
371 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
372 benchmark::utils::CheckNEONFMA)
373 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
374 ->UseRealTime();
375 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x24,
376 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24,
377 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
378 benchmark::utils::CheckNEONFMA)
379 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
380 ->UseRealTime();
381
382 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2recps_x4,
383 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4,
384 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
385 benchmark::utils::CheckNEONFMA)
386 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
387 ->UseRealTime();
388 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2recps_x8,
389 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8,
390 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
391 benchmark::utils::CheckNEONFMA)
392 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
393 ->UseRealTime();
394 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2recps_x12,
395 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12,
396 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
397 benchmark::utils::CheckNEONFMA)
398 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
399 ->UseRealTime();
400 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2recps_x16,
401 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16,
402 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
403 benchmark::utils::CheckNEONFMA)
404 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
405 ->UseRealTime();
406 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2recps_x20,
407 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20,
408 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
409 benchmark::utils::CheckNEONFMA)
410 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
411 ->UseRealTime();
412 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2recps_x24,
413 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24,
414 xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
415 benchmark::utils::CheckNEONFMA)
416 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
417 ->UseRealTime();
418
419 BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut64_p2_nr2recps_x4,
420 xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4,
421 xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params,
422 benchmark::utils::CheckNEON)
423 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
424 ->UseRealTime();
425 BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut64_p2_nr2recps_x8,
426 xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8,
427 xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params,
428 benchmark::utils::CheckNEON)
429 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
430 ->UseRealTime();
431 BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut64_p2_nr2recps_x12,
432 xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x12,
433 xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params,
434 benchmark::utils::CheckNEON)
435 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
436 ->UseRealTime();
437 BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut64_p2_nr2recps_x16,
438 xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16,
439 xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params,
440 benchmark::utils::CheckNEON)
441 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
442 ->UseRealTime();
443 BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut64_p2_nr2recps_x20,
444 xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20,
445 xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params,
446 benchmark::utils::CheckNEON)
447 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
448 ->UseRealTime();
449 BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut64_p2_nr2recps_x24,
450 xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24,
451 xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params,
452 benchmark::utils::CheckNEON)
453 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
454 ->UseRealTime();
455
456 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2fma_x4,
457 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4,
458 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
459 benchmark::utils::CheckNEONFMA)
460 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
461 ->UseRealTime();
462 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2fma_x8,
463 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8,
464 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
465 benchmark::utils::CheckNEONFMA)
466 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
467 ->UseRealTime();
468 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2fma_x12,
469 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12,
470 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
471 benchmark::utils::CheckNEONFMA)
472 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
473 ->UseRealTime();
474 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2fma_x16,
475 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16,
476 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
477 benchmark::utils::CheckNEONFMA)
478 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
479 ->UseRealTime();
480 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2fma_x20,
481 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20,
482 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
483 benchmark::utils::CheckNEONFMA)
484 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
485 ->UseRealTime();
486 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2fma_x24,
487 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24,
488 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
489 benchmark::utils::CheckNEONFMA)
490 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
491 ->UseRealTime();
492
493 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x4,
494 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4,
495 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
496 benchmark::utils::CheckNEONFMA)
497 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
498 ->UseRealTime();
499 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x8,
500 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8,
501 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
502 benchmark::utils::CheckNEONFMA)
503 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
504 ->UseRealTime();
505 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x12,
506 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12,
507 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
508 benchmark::utils::CheckNEONFMA)
509 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
510 ->UseRealTime();
511 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x16,
512 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16,
513 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
514 benchmark::utils::CheckNEONFMA)
515 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
516 ->UseRealTime();
517 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x20,
518 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20,
519 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
520 benchmark::utils::CheckNEONFMA)
521 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
522 ->UseRealTime();
523 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x24,
524 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24,
525 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
526 benchmark::utils::CheckNEONFMA)
527 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
528 ->UseRealTime();
529
530 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2recps_x4,
531 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4,
532 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
533 benchmark::utils::CheckNEONFMA)
534 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
535 ->UseRealTime();
536 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2recps_x8,
537 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8,
538 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
539 benchmark::utils::CheckNEONFMA)
540 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
541 ->UseRealTime();
542 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2recps_x12,
543 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12,
544 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
545 benchmark::utils::CheckNEONFMA)
546 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
547 ->UseRealTime();
548 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2recps_x16,
549 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16,
550 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
551 benchmark::utils::CheckNEONFMA)
552 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
553 ->UseRealTime();
554 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2recps_x20,
555 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20,
556 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
557 benchmark::utils::CheckNEONFMA)
558 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
559 ->UseRealTime();
560 BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2recps_x24,
561 xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24,
562 xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
563 benchmark::utils::CheckNEONFMA)
564 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
565 ->UseRealTime();
566
567 BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut2048_p1_nr2recps_x4,
568 xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4,
569 xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params,
570 benchmark::utils::CheckNEON)
571 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
572 ->UseRealTime();
573 BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut2048_p1_nr2recps_x8,
574 xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8,
575 xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params,
576 benchmark::utils::CheckNEON)
577 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
578 ->UseRealTime();
579 BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut2048_p1_nr2recps_x12,
580 xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12,
581 xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params,
582 benchmark::utils::CheckNEON)
583 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
584 ->UseRealTime();
585 BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut2048_p1_nr2recps_x16,
586 xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16,
587 xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params,
588 benchmark::utils::CheckNEON)
589 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
590 ->UseRealTime();
591 BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut2048_p1_nr2recps_x20,
592 xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20,
593 xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params,
594 benchmark::utils::CheckNEON)
595 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
596 ->UseRealTime();
597 BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut2048_p1_nr2recps_x24,
598 xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24,
599 xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params,
600 benchmark::utils::CheckNEON)
601 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
602 ->UseRealTime();
603 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
604
605 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
606 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_div_x16,
607 xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x16,
608 xnn_init_f32_sigmoid_avx512_rr1_p5_params,
609 benchmark::utils::CheckAVX512F)
610 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
611 ->UseRealTime();
612 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_div_x32,
613 xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x32,
614 xnn_init_f32_sigmoid_avx512_rr1_p5_params,
615 benchmark::utils::CheckAVX512F)
616 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
617 ->UseRealTime();
618 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_div_x48,
619 xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x48,
620 xnn_init_f32_sigmoid_avx512_rr1_p5_params,
621 benchmark::utils::CheckAVX512F)
622 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
623 ->UseRealTime();
624 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_div_x64,
625 xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x64,
626 xnn_init_f32_sigmoid_avx512_rr1_p5_params,
627 benchmark::utils::CheckAVX512F)
628 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
629 ->UseRealTime();
630 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_div_x80,
631 xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80,
632 xnn_init_f32_sigmoid_avx512_rr1_p5_params,
633 benchmark::utils::CheckAVX512F)
634 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
635 ->UseRealTime();
636 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_div_x96,
637 xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96,
638 xnn_init_f32_sigmoid_avx512_rr1_p5_params,
639 benchmark::utils::CheckAVX512F)
640 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
641 ->UseRealTime();
642 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_div_x112,
643 xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112,
644 xnn_init_f32_sigmoid_avx512_rr1_p5_params,
645 benchmark::utils::CheckAVX512F)
646 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
647 ->UseRealTime();
648 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_div_x128,
649 xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128,
650 xnn_init_f32_sigmoid_avx512_rr1_p5_params,
651 benchmark::utils::CheckAVX512F)
652 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
653 ->UseRealTime();
654
655 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_nr1fma_x16,
656 xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x16,
657 xnn_init_f32_sigmoid_avx512_rr1_p5_params,
658 benchmark::utils::CheckAVX512F)
659 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
660 ->UseRealTime();
661 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_nr1fma_x32,
662 xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32,
663 xnn_init_f32_sigmoid_avx512_rr1_p5_params,
664 benchmark::utils::CheckAVX512F)
665 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
666 ->UseRealTime();
667 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_nr1fma_x48,
668 xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48,
669 xnn_init_f32_sigmoid_avx512_rr1_p5_params,
670 benchmark::utils::CheckAVX512F)
671 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
672 ->UseRealTime();
673 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_nr1fma_x64,
674 xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64,
675 xnn_init_f32_sigmoid_avx512_rr1_p5_params,
676 benchmark::utils::CheckAVX512F)
677 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
678 ->UseRealTime();
679 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_nr1fma_x80,
680 xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80,
681 xnn_init_f32_sigmoid_avx512_rr1_p5_params,
682 benchmark::utils::CheckAVX512F)
683 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
684 ->UseRealTime();
685 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_nr1fma_x96,
686 xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96,
687 xnn_init_f32_sigmoid_avx512_rr1_p5_params,
688 benchmark::utils::CheckAVX512F)
689 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
690 ->UseRealTime();
691 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_nr1fma_x112,
692 xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112,
693 xnn_init_f32_sigmoid_avx512_rr1_p5_params,
694 benchmark::utils::CheckAVX512F)
695 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
696 ->UseRealTime();
697 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_nr1fma_x128,
698 xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128,
699 xnn_init_f32_sigmoid_avx512_rr1_p5_params,
700 benchmark::utils::CheckAVX512F)
701 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
702 ->UseRealTime();
703
704 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_div_x16,
705 xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x16,
706 xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
707 benchmark::utils::CheckAVX512F)
708 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
709 ->UseRealTime();
710 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_div_x32,
711 xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x32,
712 xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
713 benchmark::utils::CheckAVX512F)
714 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
715 ->UseRealTime();
716 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_div_x48,
717 xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x48,
718 xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
719 benchmark::utils::CheckAVX512F)
720 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
721 ->UseRealTime();
722 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_div_x64,
723 xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x64,
724 xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
725 benchmark::utils::CheckAVX512F)
726 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
727 ->UseRealTime();
728 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_div_x80,
729 xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80,
730 xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
731 benchmark::utils::CheckAVX512F)
732 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
733 ->UseRealTime();
734 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_div_x96,
735 xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96,
736 xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
737 benchmark::utils::CheckAVX512F)
738 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
739 ->UseRealTime();
740 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_div_x112,
741 xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112,
742 xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
743 benchmark::utils::CheckAVX512F)
744 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
745 ->UseRealTime();
746 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_div_x128,
747 xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128,
748 xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
749 benchmark::utils::CheckAVX512F)
750 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
751 ->UseRealTime();
752
753 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x16,
754 xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x16,
755 xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
756 benchmark::utils::CheckAVX512F)
757 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
758 ->UseRealTime();
759 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x32,
760 xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32,
761 xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
762 benchmark::utils::CheckAVX512F)
763 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
764 ->UseRealTime();
765 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x48,
766 xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48,
767 xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
768 benchmark::utils::CheckAVX512F)
769 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
770 ->UseRealTime();
771 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x64,
772 xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64,
773 xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
774 benchmark::utils::CheckAVX512F)
775 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
776 ->UseRealTime();
777 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x80,
778 xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80,
779 xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
780 benchmark::utils::CheckAVX512F)
781 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
782 ->UseRealTime();
783 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x96,
784 xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96,
785 xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
786 benchmark::utils::CheckAVX512F)
787 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
788 ->UseRealTime();
789 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x112,
790 xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112,
791 xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
792 benchmark::utils::CheckAVX512F)
793 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
794 ->UseRealTime();
795 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x128,
796 xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128,
797 xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
798 benchmark::utils::CheckAVX512F)
799 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
800 ->UseRealTime();
801
802 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_div_x16,
803 xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x16,
804 xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
805 benchmark::utils::CheckAVX512F)
806 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
807 ->UseRealTime();
808 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_div_x32,
809 xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x32,
810 xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
811 benchmark::utils::CheckAVX512F)
812 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
813 ->UseRealTime();
814 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_div_x48,
815 xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x48,
816 xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
817 benchmark::utils::CheckAVX512F)
818 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
819 ->UseRealTime();
820 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_div_x64,
821 xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x64,
822 xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
823 benchmark::utils::CheckAVX512F)
824 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
825 ->UseRealTime();
826 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_div_x80,
827 xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80,
828 xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
829 benchmark::utils::CheckAVX512F)
830 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
831 ->UseRealTime();
832 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_div_x96,
833 xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96,
834 xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
835 benchmark::utils::CheckAVX512F)
836 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
837 ->UseRealTime();
838 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_div_x112,
839 xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112,
840 xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
841 benchmark::utils::CheckAVX512F)
842 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
843 ->UseRealTime();
844 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_div_x128,
845 xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128,
846 xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
847 benchmark::utils::CheckAVX512F)
848 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
849 ->UseRealTime();
850
851 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x16,
852 xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x16,
853 xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
854 benchmark::utils::CheckAVX512F)
855 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
856 ->UseRealTime();
857 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x32,
858 xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32,
859 xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
860 benchmark::utils::CheckAVX512F)
861 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
862 ->UseRealTime();
863 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x48,
864 xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48,
865 xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
866 benchmark::utils::CheckAVX512F)
867 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
868 ->UseRealTime();
869 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x64,
870 xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64,
871 xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
872 benchmark::utils::CheckAVX512F)
873 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
874 ->UseRealTime();
875 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x80,
876 xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80,
877 xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
878 benchmark::utils::CheckAVX512F)
879 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
880 ->UseRealTime();
881 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x96,
882 xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96,
883 xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
884 benchmark::utils::CheckAVX512F)
885 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
886 ->UseRealTime();
887 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x112,
888 xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112,
889 xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
890 benchmark::utils::CheckAVX512F)
891 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
892 ->UseRealTime();
893 BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x128,
894 xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128,
895 xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
896 benchmark::utils::CheckAVX512F)
897 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
898 ->UseRealTime();
899
900 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_div_x8,
901 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x8,
902 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
903 benchmark::utils::CheckAVX2)
904 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
905 ->UseRealTime();
906 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_div_x16,
907 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x16,
908 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
909 benchmark::utils::CheckAVX2)
910 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
911 ->UseRealTime();
912 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_div_x24,
913 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x24,
914 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
915 benchmark::utils::CheckAVX2)
916 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
917 ->UseRealTime();
918 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_div_x32,
919 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x32,
920 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
921 benchmark::utils::CheckAVX2)
922 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
923 ->UseRealTime();
924 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_div_x40,
925 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40,
926 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
927 benchmark::utils::CheckAVX2)
928 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
929 ->UseRealTime();
930 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_div_x48,
931 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x48,
932 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
933 benchmark::utils::CheckAVX2)
934 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
935 ->UseRealTime();
936 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_div_x56,
937 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x56,
938 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
939 benchmark::utils::CheckAVX2)
940 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
941 ->UseRealTime();
942 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_div_x64,
943 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x64,
944 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
945 benchmark::utils::CheckAVX2)
946 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
947 ->UseRealTime();
948 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_div_x72,
949 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x72,
950 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
951 benchmark::utils::CheckAVX2)
952 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
953 ->UseRealTime();
954 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_div_x80,
955 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x80,
956 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
957 benchmark::utils::CheckAVX2)
958 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
959 ->UseRealTime();
960
961 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr1fma_x8,
962 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x8,
963 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
964 benchmark::utils::CheckAVX2)
965 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
966 ->UseRealTime();
967 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr1fma_x16,
968 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x16,
969 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
970 benchmark::utils::CheckAVX2)
971 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
972 ->UseRealTime();
973 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr1fma_x24,
974 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x24,
975 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
976 benchmark::utils::CheckAVX2)
977 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
978 ->UseRealTime();
979 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr1fma_x32,
980 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x32,
981 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
982 benchmark::utils::CheckAVX2)
983 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
984 ->UseRealTime();
985 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr1fma_x40,
986 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x40,
987 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
988 benchmark::utils::CheckAVX2)
989 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
990 ->UseRealTime();
991 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr1fma_x48,
992 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x48,
993 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
994 benchmark::utils::CheckAVX2)
995 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
996 ->UseRealTime();
997 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr1fma_x56,
998 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x56,
999 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1000 benchmark::utils::CheckAVX2)
1001 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1002 ->UseRealTime();
1003 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr1fma_x64,
1004 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x64,
1005 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1006 benchmark::utils::CheckAVX2)
1007 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1008 ->UseRealTime();
1009 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr1fma_x72,
1010 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x72,
1011 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1012 benchmark::utils::CheckAVX2)
1013 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1014 ->UseRealTime();
1015 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr1fma_x80,
1016 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x80,
1017 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1018 benchmark::utils::CheckAVX2)
1019 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1020 ->UseRealTime();
1021
1022 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr2fma_x8,
1023 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x8,
1024 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1025 benchmark::utils::CheckAVX2)
1026 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1027 ->UseRealTime();
1028 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr2fma_x16,
1029 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x16,
1030 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1031 benchmark::utils::CheckAVX2)
1032 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1033 ->UseRealTime();
1034 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr2fma_x24,
1035 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x24,
1036 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1037 benchmark::utils::CheckAVX2)
1038 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1039 ->UseRealTime();
1040 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr2fma_x32,
1041 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x32,
1042 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1043 benchmark::utils::CheckAVX2)
1044 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1045 ->UseRealTime();
1046 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr2fma_x40,
1047 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40,
1048 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1049 benchmark::utils::CheckAVX2)
1050 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1051 ->UseRealTime();
1052 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr2fma_x48,
1053 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x48,
1054 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1055 benchmark::utils::CheckAVX2)
1056 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1057 ->UseRealTime();
1058 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr2fma_x56,
1059 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x56,
1060 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1061 benchmark::utils::CheckAVX2)
1062 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1063 ->UseRealTime();
1064 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr2fma_x64,
1065 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x64,
1066 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1067 benchmark::utils::CheckAVX2)
1068 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1069 ->UseRealTime();
1070 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr2fma_x72,
1071 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x72,
1072 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1073 benchmark::utils::CheckAVX2)
1074 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1075 ->UseRealTime();
1076 BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr2fma_x80,
1077 xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x80,
1078 xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1079 benchmark::utils::CheckAVX2)
1080 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1081 ->UseRealTime();
1082
1083 BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_div_x8,
1084 xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x8,
1085 xnn_init_f32_sigmoid_avx_rr2_p5_params,
1086 benchmark::utils::CheckAVX)
1087 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1088 ->UseRealTime();
1089 BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_div_x16,
1090 xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x16,
1091 xnn_init_f32_sigmoid_avx_rr2_p5_params,
1092 benchmark::utils::CheckAVX)
1093 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1094 ->UseRealTime();
1095 BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_div_x24,
1096 xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x24,
1097 xnn_init_f32_sigmoid_avx_rr2_p5_params,
1098 benchmark::utils::CheckAVX)
1099 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1100 ->UseRealTime();
1101 BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_div_x32,
1102 xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x32,
1103 xnn_init_f32_sigmoid_avx_rr2_p5_params,
1104 benchmark::utils::CheckAVX)
1105 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1106 ->UseRealTime();
1107 BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_div_x40,
1108 xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x40,
1109 xnn_init_f32_sigmoid_avx_rr2_p5_params,
1110 benchmark::utils::CheckAVX)
1111 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1112 ->UseRealTime();
1113 BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_div_x48,
1114 xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x48,
1115 xnn_init_f32_sigmoid_avx_rr2_p5_params,
1116 benchmark::utils::CheckAVX)
1117 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1118 ->UseRealTime();
1119 BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_div_x56,
1120 xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x56,
1121 xnn_init_f32_sigmoid_avx_rr2_p5_params,
1122 benchmark::utils::CheckAVX)
1123 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1124 ->UseRealTime();
1125 BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_div_x64,
1126 xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x64,
1127 xnn_init_f32_sigmoid_avx_rr2_p5_params,
1128 benchmark::utils::CheckAVX)
1129 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1130 ->UseRealTime();
1131 BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_div_x72,
1132 xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x72,
1133 xnn_init_f32_sigmoid_avx_rr2_p5_params,
1134 benchmark::utils::CheckAVX)
1135 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1136 ->UseRealTime();
1137 BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_div_x80,
1138 xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x80,
1139 xnn_init_f32_sigmoid_avx_rr2_p5_params,
1140 benchmark::utils::CheckAVX)
1141 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1142 ->UseRealTime();
1143 BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_nr2_x8,
1144 xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x8,
1145 xnn_init_f32_sigmoid_avx_rr2_p5_params,
1146 benchmark::utils::CheckAVX)
1147 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1148 ->UseRealTime();
1149 BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_nr2_x16,
1150 xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x16,
1151 xnn_init_f32_sigmoid_avx_rr2_p5_params,
1152 benchmark::utils::CheckAVX)
1153 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1154 ->UseRealTime();
1155 BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_nr2_x24,
1156 xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x24,
1157 xnn_init_f32_sigmoid_avx_rr2_p5_params,
1158 benchmark::utils::CheckAVX)
1159 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1160 ->UseRealTime();
1161 BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_nr2_x32,
1162 xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x32,
1163 xnn_init_f32_sigmoid_avx_rr2_p5_params,
1164 benchmark::utils::CheckAVX)
1165 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1166 ->UseRealTime();
1167 BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_nr2_x40,
1168 xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x40,
1169 xnn_init_f32_sigmoid_avx_rr2_p5_params,
1170 benchmark::utils::CheckAVX)
1171 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1172 ->UseRealTime();
1173 BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_nr2_x48,
1174 xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x48,
1175 xnn_init_f32_sigmoid_avx_rr2_p5_params,
1176 benchmark::utils::CheckAVX)
1177 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1178 ->UseRealTime();
1179 BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_nr2_x56,
1180 xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x56,
1181 xnn_init_f32_sigmoid_avx_rr2_p5_params,
1182 benchmark::utils::CheckAVX)
1183 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1184 ->UseRealTime();
1185 BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_nr2_x64,
1186 xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x64,
1187 xnn_init_f32_sigmoid_avx_rr2_p5_params,
1188 benchmark::utils::CheckAVX)
1189 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1190 ->UseRealTime();
1191 BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_nr2_x72,
1192 xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x72,
1193 xnn_init_f32_sigmoid_avx_rr2_p5_params,
1194 benchmark::utils::CheckAVX)
1195 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1196 ->UseRealTime();
1197 BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_nr2_x80,
1198 xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x80,
1199 xnn_init_f32_sigmoid_avx_rr2_p5_params,
1200 benchmark::utils::CheckAVX)
1201 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1202 ->UseRealTime();
1203
1204 BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_p5_div_x4,
1205 xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x4,
1206 xnn_init_f32_sigmoid_sse2_rr2_p5_params,
1207 benchmark::utils::CheckSSE41)
1208 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1209 ->UseRealTime();
1210 BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_p5_div_x8,
1211 xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x8,
1212 xnn_init_f32_sigmoid_sse2_rr2_p5_params,
1213 benchmark::utils::CheckSSE41)
1214 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1215 ->UseRealTime();
1216 BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_p5_div_x12,
1217 xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x12,
1218 xnn_init_f32_sigmoid_sse2_rr2_p5_params,
1219 benchmark::utils::CheckSSE41)
1220 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1221 ->UseRealTime();
1222 BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_p5_div_x16,
1223 xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x16,
1224 xnn_init_f32_sigmoid_sse2_rr2_p5_params,
1225 benchmark::utils::CheckSSE41)
1226 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1227 ->UseRealTime();
1228 BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_p5_div_x20,
1229 xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20,
1230 xnn_init_f32_sigmoid_sse2_rr2_p5_params,
1231 benchmark::utils::CheckSSE41)
1232 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1233 ->UseRealTime();
1234 BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_p5_div_x24,
1235 xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24,
1236 xnn_init_f32_sigmoid_sse2_rr2_p5_params,
1237 benchmark::utils::CheckSSE41)
1238 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1239 ->UseRealTime();
1240
1241 BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_lut64_p2_div_x4,
1242 xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x4,
1243 xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params,
1244 benchmark::utils::CheckSSE41)
1245 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1246 ->UseRealTime();
1247 BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_lut64_p2_div_x8,
1248 xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8,
1249 xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params,
1250 benchmark::utils::CheckSSE41)
1251 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1252 ->UseRealTime();
1253 BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_lut64_p2_div_x12,
1254 xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12,
1255 xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params,
1256 benchmark::utils::CheckSSE41)
1257 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1258 ->UseRealTime();
1259 BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_lut64_p2_div_x16,
1260 xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16,
1261 xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params,
1262 benchmark::utils::CheckSSE41)
1263 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1264 ->UseRealTime();
1265 BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_lut64_p2_div_x20,
1266 xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20,
1267 xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params,
1268 benchmark::utils::CheckSSE41)
1269 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1270 ->UseRealTime();
1271 BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_lut64_p2_div_x24,
1272 xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24,
1273 xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params,
1274 benchmark::utils::CheckSSE41)
1275 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1276 ->UseRealTime();
1277
1278 BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_p5_div_x4,
1279 xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x4,
1280 xnn_init_f32_sigmoid_sse2_rr2_p5_params)
1281 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1282 ->UseRealTime();
1283 BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_p5_div_x8,
1284 xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x8,
1285 xnn_init_f32_sigmoid_sse2_rr2_p5_params)
1286 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1287 ->UseRealTime();
1288 BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_p5_div_x12,
1289 xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x12,
1290 xnn_init_f32_sigmoid_sse2_rr2_p5_params)
1291 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1292 ->UseRealTime();
1293 BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_p5_div_x16,
1294 xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x16,
1295 xnn_init_f32_sigmoid_sse2_rr2_p5_params)
1296 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1297 ->UseRealTime();
1298 BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_p5_div_x20,
1299 xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20,
1300 xnn_init_f32_sigmoid_sse2_rr2_p5_params)
1301 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1302 ->UseRealTime();
1303 BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_p5_div_x24,
1304 xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x24,
1305 xnn_init_f32_sigmoid_sse2_rr2_p5_params)
1306 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1307 ->UseRealTime();
1308
1309 BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_lut64_p2_div_x4,
1310 xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x4,
1311 xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params)
1312 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1313 ->UseRealTime();
1314 BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_lut64_p2_div_x8,
1315 xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8,
1316 xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params)
1317 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1318 ->UseRealTime();
1319 BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_lut64_p2_div_x12,
1320 xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12,
1321 xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params)
1322 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1323 ->UseRealTime();
1324 BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_lut64_p2_div_x16,
1325 xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16,
1326 xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params)
1327 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1328 ->UseRealTime();
1329 BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_lut64_p2_div_x20,
1330 xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20,
1331 xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params)
1332 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1333 ->UseRealTime();
1334 BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_lut64_p2_div_x24,
1335 xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24,
1336 xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params)
1337 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1338 ->UseRealTime();
1339 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1340
1341 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1342 BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_lut64_p2_div_x4,
1343 xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x4,
1344 xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params)
1345 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1346 ->UseRealTime();
1347 BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_lut64_p2_div_x8,
1348 xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8,
1349 xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params)
1350 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1351 ->UseRealTime();
1352 BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_lut64_p2_div_x12,
1353 xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x12,
1354 xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params)
1355 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1356 ->UseRealTime();
1357 BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_lut64_p2_div_x16,
1358 xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x16,
1359 xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params)
1360 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1361 ->UseRealTime();
1362 BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_lut64_p2_div_x20,
1363 xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x20,
1364 xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params)
1365 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1366 ->UseRealTime();
1367 BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_lut64_p2_div_x24,
1368 xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x24,
1369 xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params)
1370 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1371 ->UseRealTime();
1372
1373 BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_p5_div_x4,
1374 xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x4,
1375 xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params)
1376 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1377 ->UseRealTime();
1378 BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_p5_div_x8,
1379 xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x8,
1380 xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params)
1381 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1382 ->UseRealTime();
1383 BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_p5_div_x12,
1384 xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x12,
1385 xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params)
1386 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1387 ->UseRealTime();
1388 BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_p5_div_x16,
1389 xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x16,
1390 xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params)
1391 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1392 ->UseRealTime();
1393 BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_p5_div_x20,
1394 xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20,
1395 xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params)
1396 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1397 ->UseRealTime();
1398 BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_p5_div_x24,
1399 xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24,
1400 xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params)
1401 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1402 ->UseRealTime();
1403 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1404
1405 BENCHMARK_CAPTURE(f32_vsigmoid, scalar_rr2_lut2048_p1_div_x1,
1406 xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x1,
1407 xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params)
1408 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1409 ->UseRealTime();
1410 BENCHMARK_CAPTURE(f32_vsigmoid, scalar_rr2_lut2048_p1_div_x2,
1411 xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x2,
1412 xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params)
1413 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1414 ->UseRealTime();
1415 BENCHMARK_CAPTURE(f32_vsigmoid, scalar_rr2_lut2048_p1_div_x4,
1416 xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x4,
1417 xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params)
1418 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1419 ->UseRealTime();
1420
1421 BENCHMARK_CAPTURE(f32_vsigmoid, scalar_rr2_lut64_p2_div_x1,
1422 xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x1,
1423 xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params)
1424 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1425 ->UseRealTime();
1426 BENCHMARK_CAPTURE(f32_vsigmoid, scalar_rr2_lut64_p2_div_x2,
1427 xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x2,
1428 xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params)
1429 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1430 ->UseRealTime();
1431 BENCHMARK_CAPTURE(f32_vsigmoid, scalar_rr2_lut64_p2_div_x4,
1432 xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x4,
1433 xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params)
1434 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1435 ->UseRealTime();
1436
1437 BENCHMARK_CAPTURE(f32_vsigmoid, scalar_rr2_p5_div_x1,
1438 xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x1,
1439 xnn_init_f32_sigmoid_scalar_rr2_p5_params)
1440 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1441 ->UseRealTime();
1442 BENCHMARK_CAPTURE(f32_vsigmoid, scalar_rr2_p5_div_x2,
1443 xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x2,
1444 xnn_init_f32_sigmoid_scalar_rr2_p5_params)
1445 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1446 ->UseRealTime();
1447 BENCHMARK_CAPTURE(f32_vsigmoid, scalar_rr2_p5_div_x4,
1448 xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x4,
1449 xnn_init_f32_sigmoid_scalar_rr2_p5_params)
1450 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1451 ->UseRealTime();
1452
1453 #ifndef XNNPACK_BENCHMARK_NO_MAIN
1454 BENCHMARK_MAIN();
1455 #endif
1456