xref: /aosp_15_r20/external/XNNPACK/bench/f32-vsigmoid.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <cmath>
8 #include <functional>
9 #include <random>
10 #include <vector>
11 
12 #include <benchmark/benchmark.h>
13 #include "bench/utils.h"
14 
15 #include <xnnpack.h>
16 #include <xnnpack/aligned-allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/microfnptr.h>
19 #include <xnnpack/microparams-init.h>
20 #include <xnnpack/vunary.h>
21 
22 
f32_vsigmoid(benchmark::State & state,xnn_f32_vsigmoid_ukernel_function sigmoid,xnn_init_f32_sigmoid_params_fn init_params,benchmark::utils::IsaCheckFunction isa_check=nullptr)23 static void f32_vsigmoid(
24   benchmark::State& state,
25   xnn_f32_vsigmoid_ukernel_function sigmoid,
26   xnn_init_f32_sigmoid_params_fn init_params,
27   benchmark::utils::IsaCheckFunction isa_check = nullptr)
28 {
29   if (isa_check && !isa_check(state)) {
30     return;
31   }
32 
33   const size_t num_elements = state.range(0);
34 
35   std::random_device random_device;
36   auto rng = std::mt19937(random_device());
37   auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
38 
39   std::vector<float, AlignedAllocator<float, 64>> x(num_elements);
40   std::vector<float, AlignedAllocator<float, 64>> y(num_elements);
41   std::generate(x.begin(), x.end(), std::ref(f32rng));
42   std::fill(y.begin(), y.end(), std::nanf(""));
43 
44   xnn_f32_sigmoid_params params;
45   init_params(&params);
46   for (auto _ : state) {
47     sigmoid(num_elements * sizeof(float), x.data(), y.data(), &params);
48   }
49 
50   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
51   if (cpu_frequency != 0) {
52     state.counters["cpufreq"] = cpu_frequency;
53   }
54 
55   const size_t elements_per_iteration = num_elements;
56   state.counters["elements"] =
57     benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
58 
59   const size_t bytes_per_iteration = 2 * num_elements * sizeof(float);
60   state.counters["bytes"] =
61     benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
62 }
63 
64 #if XNN_ARCH_ARM64
65   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_div_x4,
66                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x4,
67                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params)
68     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
69     ->UseRealTime();
70   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_div_x8,
71                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x8,
72                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params)
73     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
74     ->UseRealTime();
75   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_div_x12,
76                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x12,
77                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params)
78     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
79     ->UseRealTime();
80   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_div_x16,
81                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x16,
82                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params)
83     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
84     ->UseRealTime();
85   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_div_x20,
86                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x20,
87                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params)
88     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
89     ->UseRealTime();
90   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_div_x24,
91                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x24,
92                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params)
93     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
94     ->UseRealTime();
95 
96   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_div_x4,
97                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4,
98                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params)
99     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
100     ->UseRealTime();
101   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_div_x8,
102                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8,
103                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params)
104     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
105     ->UseRealTime();
106   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_div_x12,
107                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12,
108                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params)
109     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
110     ->UseRealTime();
111   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_div_x16,
112                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16,
113                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params)
114     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
115     ->UseRealTime();
116   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_div_x20,
117                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20,
118                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params)
119     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
120     ->UseRealTime();
121   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_div_x24,
122                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24,
123                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params)
124     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
125     ->UseRealTime();
126 
127   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_div_x4,
128                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4,
129                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params)
130     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
131     ->UseRealTime();
132   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_div_x8,
133                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8,
134                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params)
135     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
136     ->UseRealTime();
137   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_div_x12,
138                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12,
139                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params)
140     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
141     ->UseRealTime();
142   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_div_x16,
143                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16,
144                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params)
145     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
146     ->UseRealTime();
147   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_div_x20,
148                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20,
149                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params)
150     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
151     ->UseRealTime();
152   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_div_x24,
153                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24,
154                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params)
155     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
156     ->UseRealTime();
157 #endif  // XNN_ARCH_ARM64
158 
159 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
160   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2fma_x4,
161                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x4,
162                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
163                     benchmark::utils::CheckNEONFMA)
164     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
165     ->UseRealTime();
166   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2fma_x8,
167                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8,
168                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
169                     benchmark::utils::CheckNEONFMA)
170     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
171     ->UseRealTime();
172   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2fma_x12,
173                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12,
174                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
175                     benchmark::utils::CheckNEONFMA)
176     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
177     ->UseRealTime();
178   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2fma_x16,
179                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16,
180                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
181                     benchmark::utils::CheckNEONFMA)
182     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
183     ->UseRealTime();
184   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2fma_x20,
185                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20,
186                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
187                     benchmark::utils::CheckNEONFMA)
188     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
189     ->UseRealTime();
190   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2fma_x24,
191                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24,
192                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
193                     benchmark::utils::CheckNEONFMA)
194     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
195     ->UseRealTime();
196 
197   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr1recps1fma_x4,
198                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x4,
199                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
200                     benchmark::utils::CheckNEONFMA)
201     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
202     ->UseRealTime();
203   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr1recps1fma_x8,
204                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8,
205                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
206                     benchmark::utils::CheckNEONFMA)
207     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
208     ->UseRealTime();
209   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr1recps1fma_x12,
210                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12,
211                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
212                     benchmark::utils::CheckNEONFMA)
213     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
214     ->UseRealTime();
215   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr1recps1fma_x16,
216                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16,
217                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
218                     benchmark::utils::CheckNEONFMA)
219     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
220     ->UseRealTime();
221   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr1recps1fma_x20,
222                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20,
223                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
224                     benchmark::utils::CheckNEONFMA)
225     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
226     ->UseRealTime();
227   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr1recps1fma_x24,
228                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24,
229                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
230                     benchmark::utils::CheckNEONFMA)
231     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
232     ->UseRealTime();
233 
234   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2recps_x4,
235                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x4,
236                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
237                     benchmark::utils::CheckNEONFMA)
238     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
239     ->UseRealTime();
240   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2recps_x8,
241                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8,
242                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
243                     benchmark::utils::CheckNEONFMA)
244     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
245     ->UseRealTime();
246   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2recps_x12,
247                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12,
248                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
249                     benchmark::utils::CheckNEONFMA)
250     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
251     ->UseRealTime();
252   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2recps_x16,
253                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16,
254                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
255                     benchmark::utils::CheckNEONFMA)
256     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
257     ->UseRealTime();
258   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2recps_x20,
259                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20,
260                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
261                     benchmark::utils::CheckNEONFMA)
262     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
263     ->UseRealTime();
264   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_p5_nr2recps_x24,
265                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24,
266                     xnn_init_f32_sigmoid_neonfma_rr1_p5_params,
267                     benchmark::utils::CheckNEONFMA)
268     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
269     ->UseRealTime();
270 
271   BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_p5_nr2recps_x4,
272                     xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x4,
273                     xnn_init_f32_sigmoid_scalar_rr2_p5_params,
274                     benchmark::utils::CheckNEON)
275     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
276     ->UseRealTime();
277   BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_p5_nr2recps_x8,
278                     xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x8,
279                     xnn_init_f32_sigmoid_scalar_rr2_p5_params,
280                     benchmark::utils::CheckNEON)
281     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
282     ->UseRealTime();
283   BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_p5_nr2recps_x12,
284                     xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x12,
285                     xnn_init_f32_sigmoid_scalar_rr2_p5_params,
286                     benchmark::utils::CheckNEON)
287     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
288     ->UseRealTime();
289   BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_p5_nr2recps_x16,
290                     xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x16,
291                     xnn_init_f32_sigmoid_scalar_rr2_p5_params,
292                     benchmark::utils::CheckNEON)
293     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
294     ->UseRealTime();
295   BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_p5_nr2recps_x20,
296                     xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20,
297                     xnn_init_f32_sigmoid_scalar_rr2_p5_params,
298                     benchmark::utils::CheckNEON)
299     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
300     ->UseRealTime();
301   BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_p5_nr2recps_x24,
302                     xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x24,
303                     xnn_init_f32_sigmoid_scalar_rr2_p5_params,
304                     benchmark::utils::CheckNEON)
305     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
306     ->UseRealTime();
307 
308   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2fma_x4,
309                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4,
310                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
311                     benchmark::utils::CheckNEONFMA)
312     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
313     ->UseRealTime();
314   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2fma_x8,
315                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8,
316                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
317                     benchmark::utils::CheckNEONFMA)
318     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
319     ->UseRealTime();
320   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2fma_x12,
321                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x12,
322                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
323                     benchmark::utils::CheckNEONFMA)
324     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
325     ->UseRealTime();
326   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2fma_x16,
327                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x16,
328                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
329                     benchmark::utils::CheckNEONFMA)
330     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
331     ->UseRealTime();
332   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2fma_x20,
333                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x20,
334                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
335                     benchmark::utils::CheckNEONFMA)
336     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
337     ->UseRealTime();
338   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2fma_x24,
339                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24,
340                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
341                     benchmark::utils::CheckNEONFMA)
342     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
343     ->UseRealTime();
344 
345   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x4,
346                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4,
347                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
348                     benchmark::utils::CheckNEONFMA)
349     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
350     ->UseRealTime();
351   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x8,
352                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8,
353                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
354                     benchmark::utils::CheckNEONFMA)
355     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
356     ->UseRealTime();
357   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x12,
358                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x12,
359                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
360                     benchmark::utils::CheckNEONFMA)
361     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
362     ->UseRealTime();
363   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x16,
364                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x16,
365                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
366                     benchmark::utils::CheckNEONFMA)
367     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
368     ->UseRealTime();
369   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x20,
370                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x20,
371                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
372                     benchmark::utils::CheckNEONFMA)
373     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
374     ->UseRealTime();
375   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x24,
376                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24,
377                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
378                     benchmark::utils::CheckNEONFMA)
379     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
380     ->UseRealTime();
381 
382   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2recps_x4,
383                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4,
384                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
385                     benchmark::utils::CheckNEONFMA)
386     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
387     ->UseRealTime();
388   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2recps_x8,
389                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8,
390                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
391                     benchmark::utils::CheckNEONFMA)
392     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
393     ->UseRealTime();
394   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2recps_x12,
395                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12,
396                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
397                     benchmark::utils::CheckNEONFMA)
398     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
399     ->UseRealTime();
400   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2recps_x16,
401                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16,
402                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
403                     benchmark::utils::CheckNEONFMA)
404     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
405     ->UseRealTime();
406   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2recps_x20,
407                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20,
408                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
409                     benchmark::utils::CheckNEONFMA)
410     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
411     ->UseRealTime();
412   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut64_p2_nr2recps_x24,
413                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24,
414                     xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
415                     benchmark::utils::CheckNEONFMA)
416     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
417     ->UseRealTime();
418 
419   BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut64_p2_nr2recps_x4,
420                     xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4,
421                     xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params,
422                     benchmark::utils::CheckNEON)
423     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
424     ->UseRealTime();
425   BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut64_p2_nr2recps_x8,
426                     xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8,
427                     xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params,
428                     benchmark::utils::CheckNEON)
429     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
430     ->UseRealTime();
431   BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut64_p2_nr2recps_x12,
432                     xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x12,
433                     xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params,
434                     benchmark::utils::CheckNEON)
435     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
436     ->UseRealTime();
437   BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut64_p2_nr2recps_x16,
438                     xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16,
439                     xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params,
440                     benchmark::utils::CheckNEON)
441     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
442     ->UseRealTime();
443   BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut64_p2_nr2recps_x20,
444                     xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20,
445                     xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params,
446                     benchmark::utils::CheckNEON)
447     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
448     ->UseRealTime();
449   BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut64_p2_nr2recps_x24,
450                     xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24,
451                     xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params,
452                     benchmark::utils::CheckNEON)
453     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
454     ->UseRealTime();
455 
456   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2fma_x4,
457                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4,
458                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
459                     benchmark::utils::CheckNEONFMA)
460     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
461     ->UseRealTime();
462   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2fma_x8,
463                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8,
464                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
465                     benchmark::utils::CheckNEONFMA)
466     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
467     ->UseRealTime();
468   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2fma_x12,
469                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12,
470                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
471                     benchmark::utils::CheckNEONFMA)
472     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
473     ->UseRealTime();
474   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2fma_x16,
475                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16,
476                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
477                     benchmark::utils::CheckNEONFMA)
478     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
479     ->UseRealTime();
480   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2fma_x20,
481                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20,
482                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
483                     benchmark::utils::CheckNEONFMA)
484     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
485     ->UseRealTime();
486   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2fma_x24,
487                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24,
488                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
489                     benchmark::utils::CheckNEONFMA)
490     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
491     ->UseRealTime();
492 
493   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x4,
494                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4,
495                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
496                     benchmark::utils::CheckNEONFMA)
497     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
498     ->UseRealTime();
499   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x8,
500                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8,
501                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
502                     benchmark::utils::CheckNEONFMA)
503     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
504     ->UseRealTime();
505   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x12,
506                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12,
507                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
508                     benchmark::utils::CheckNEONFMA)
509     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
510     ->UseRealTime();
511   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x16,
512                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16,
513                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
514                     benchmark::utils::CheckNEONFMA)
515     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
516     ->UseRealTime();
517   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x20,
518                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20,
519                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
520                     benchmark::utils::CheckNEONFMA)
521     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
522     ->UseRealTime();
523   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x24,
524                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24,
525                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
526                     benchmark::utils::CheckNEONFMA)
527     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
528     ->UseRealTime();
529 
530   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2recps_x4,
531                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4,
532                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
533                     benchmark::utils::CheckNEONFMA)
534     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
535     ->UseRealTime();
536   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2recps_x8,
537                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8,
538                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
539                     benchmark::utils::CheckNEONFMA)
540     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
541     ->UseRealTime();
542   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2recps_x12,
543                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12,
544                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
545                     benchmark::utils::CheckNEONFMA)
546     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
547     ->UseRealTime();
548   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2recps_x16,
549                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16,
550                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
551                     benchmark::utils::CheckNEONFMA)
552     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
553     ->UseRealTime();
554   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2recps_x20,
555                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20,
556                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
557                     benchmark::utils::CheckNEONFMA)
558     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
559     ->UseRealTime();
560   BENCHMARK_CAPTURE(f32_vsigmoid, neonfma_rr1_lut2048_p1_nr2recps_x24,
561                     xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24,
562                     xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params,
563                     benchmark::utils::CheckNEONFMA)
564     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
565     ->UseRealTime();
566 
567   BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut2048_p1_nr2recps_x4,
568                     xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4,
569                     xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params,
570                     benchmark::utils::CheckNEON)
571     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
572     ->UseRealTime();
573   BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut2048_p1_nr2recps_x8,
574                     xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8,
575                     xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params,
576                     benchmark::utils::CheckNEON)
577     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
578     ->UseRealTime();
579   BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut2048_p1_nr2recps_x12,
580                     xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12,
581                     xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params,
582                     benchmark::utils::CheckNEON)
583     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
584     ->UseRealTime();
585   BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut2048_p1_nr2recps_x16,
586                     xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16,
587                     xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params,
588                     benchmark::utils::CheckNEON)
589     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
590     ->UseRealTime();
591   BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut2048_p1_nr2recps_x20,
592                     xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20,
593                     xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params,
594                     benchmark::utils::CheckNEON)
595     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
596     ->UseRealTime();
597   BENCHMARK_CAPTURE(f32_vsigmoid, neon_rr2_lut2048_p1_nr2recps_x24,
598                     xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24,
599                     xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params,
600                     benchmark::utils::CheckNEON)
601     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
602     ->UseRealTime();
603 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
604 
605 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
606   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_div_x16,
607                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x16,
608                     xnn_init_f32_sigmoid_avx512_rr1_p5_params,
609                     benchmark::utils::CheckAVX512F)
610     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
611     ->UseRealTime();
612   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_div_x32,
613                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x32,
614                     xnn_init_f32_sigmoid_avx512_rr1_p5_params,
615                     benchmark::utils::CheckAVX512F)
616     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
617     ->UseRealTime();
618   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_div_x48,
619                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x48,
620                     xnn_init_f32_sigmoid_avx512_rr1_p5_params,
621                     benchmark::utils::CheckAVX512F)
622     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
623     ->UseRealTime();
624   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_div_x64,
625                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x64,
626                     xnn_init_f32_sigmoid_avx512_rr1_p5_params,
627                     benchmark::utils::CheckAVX512F)
628     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
629     ->UseRealTime();
630   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_div_x80,
631                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80,
632                     xnn_init_f32_sigmoid_avx512_rr1_p5_params,
633                     benchmark::utils::CheckAVX512F)
634     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
635     ->UseRealTime();
636   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_div_x96,
637                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96,
638                     xnn_init_f32_sigmoid_avx512_rr1_p5_params,
639                     benchmark::utils::CheckAVX512F)
640     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
641     ->UseRealTime();
642   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_div_x112,
643                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112,
644                     xnn_init_f32_sigmoid_avx512_rr1_p5_params,
645                     benchmark::utils::CheckAVX512F)
646     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
647     ->UseRealTime();
648   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_div_x128,
649                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128,
650                     xnn_init_f32_sigmoid_avx512_rr1_p5_params,
651                     benchmark::utils::CheckAVX512F)
652     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
653     ->UseRealTime();
654 
655   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_nr1fma_x16,
656                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x16,
657                     xnn_init_f32_sigmoid_avx512_rr1_p5_params,
658                     benchmark::utils::CheckAVX512F)
659     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
660     ->UseRealTime();
661   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_nr1fma_x32,
662                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32,
663                     xnn_init_f32_sigmoid_avx512_rr1_p5_params,
664                     benchmark::utils::CheckAVX512F)
665     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
666     ->UseRealTime();
667   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_nr1fma_x48,
668                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48,
669                     xnn_init_f32_sigmoid_avx512_rr1_p5_params,
670                     benchmark::utils::CheckAVX512F)
671     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
672     ->UseRealTime();
673   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_nr1fma_x64,
674                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64,
675                     xnn_init_f32_sigmoid_avx512_rr1_p5_params,
676                     benchmark::utils::CheckAVX512F)
677     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
678     ->UseRealTime();
679   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_nr1fma_x80,
680                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80,
681                     xnn_init_f32_sigmoid_avx512_rr1_p5_params,
682                     benchmark::utils::CheckAVX512F)
683     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
684     ->UseRealTime();
685   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_nr1fma_x96,
686                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96,
687                     xnn_init_f32_sigmoid_avx512_rr1_p5_params,
688                     benchmark::utils::CheckAVX512F)
689     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
690     ->UseRealTime();
691   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_nr1fma_x112,
692                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112,
693                     xnn_init_f32_sigmoid_avx512_rr1_p5_params,
694                     benchmark::utils::CheckAVX512F)
695     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
696     ->UseRealTime();
697   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_p5_scalef_nr1fma_x128,
698                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128,
699                     xnn_init_f32_sigmoid_avx512_rr1_p5_params,
700                     benchmark::utils::CheckAVX512F)
701     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
702     ->UseRealTime();
703 
704   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_div_x16,
705                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x16,
706                     xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
707                     benchmark::utils::CheckAVX512F)
708     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
709     ->UseRealTime();
710   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_div_x32,
711                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x32,
712                     xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
713                     benchmark::utils::CheckAVX512F)
714     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
715     ->UseRealTime();
716   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_div_x48,
717                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x48,
718                     xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
719                     benchmark::utils::CheckAVX512F)
720     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
721     ->UseRealTime();
722   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_div_x64,
723                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x64,
724                     xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
725                     benchmark::utils::CheckAVX512F)
726     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
727     ->UseRealTime();
728   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_div_x80,
729                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80,
730                     xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
731                     benchmark::utils::CheckAVX512F)
732     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
733     ->UseRealTime();
734   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_div_x96,
735                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96,
736                     xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
737                     benchmark::utils::CheckAVX512F)
738     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
739     ->UseRealTime();
740   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_div_x112,
741                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112,
742                     xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
743                     benchmark::utils::CheckAVX512F)
744     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
745     ->UseRealTime();
746   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_div_x128,
747                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128,
748                     xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
749                     benchmark::utils::CheckAVX512F)
750     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
751     ->UseRealTime();
752 
753   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x16,
754                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x16,
755                     xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
756                     benchmark::utils::CheckAVX512F)
757     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
758     ->UseRealTime();
759   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x32,
760                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32,
761                     xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
762                     benchmark::utils::CheckAVX512F)
763     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
764     ->UseRealTime();
765   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x48,
766                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48,
767                     xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
768                     benchmark::utils::CheckAVX512F)
769     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
770     ->UseRealTime();
771   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x64,
772                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64,
773                     xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
774                     benchmark::utils::CheckAVX512F)
775     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
776     ->UseRealTime();
777   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x80,
778                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80,
779                     xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
780                     benchmark::utils::CheckAVX512F)
781     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
782     ->UseRealTime();
783   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x96,
784                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96,
785                     xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
786                     benchmark::utils::CheckAVX512F)
787     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
788     ->UseRealTime();
789   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x112,
790                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112,
791                     xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
792                     benchmark::utils::CheckAVX512F)
793     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
794     ->UseRealTime();
795   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x128,
796                     xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128,
797                     xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params,
798                     benchmark::utils::CheckAVX512F)
799     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
800     ->UseRealTime();
801 
802   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_div_x16,
803                     xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x16,
804                     xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
805                     benchmark::utils::CheckAVX512F)
806     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
807     ->UseRealTime();
808   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_div_x32,
809                     xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x32,
810                     xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
811                     benchmark::utils::CheckAVX512F)
812     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
813     ->UseRealTime();
814   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_div_x48,
815                     xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x48,
816                     xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
817                     benchmark::utils::CheckAVX512F)
818     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
819     ->UseRealTime();
820   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_div_x64,
821                     xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x64,
822                     xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
823                     benchmark::utils::CheckAVX512F)
824     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
825     ->UseRealTime();
826   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_div_x80,
827                     xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80,
828                     xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
829                     benchmark::utils::CheckAVX512F)
830     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
831     ->UseRealTime();
832   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_div_x96,
833                     xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96,
834                     xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
835                     benchmark::utils::CheckAVX512F)
836     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
837     ->UseRealTime();
838   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_div_x112,
839                     xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112,
840                     xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
841                     benchmark::utils::CheckAVX512F)
842     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
843     ->UseRealTime();
844   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_div_x128,
845                     xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128,
846                     xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
847                     benchmark::utils::CheckAVX512F)
848     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
849     ->UseRealTime();
850 
851   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x16,
852                     xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x16,
853                     xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
854                     benchmark::utils::CheckAVX512F)
855     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
856     ->UseRealTime();
857   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x32,
858                     xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32,
859                     xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
860                     benchmark::utils::CheckAVX512F)
861     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
862     ->UseRealTime();
863   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x48,
864                     xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48,
865                     xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
866                     benchmark::utils::CheckAVX512F)
867     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
868     ->UseRealTime();
869   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x64,
870                     xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64,
871                     xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
872                     benchmark::utils::CheckAVX512F)
873     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
874     ->UseRealTime();
875   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x80,
876                     xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80,
877                     xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
878                     benchmark::utils::CheckAVX512F)
879     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
880     ->UseRealTime();
881   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x96,
882                     xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96,
883                     xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
884                     benchmark::utils::CheckAVX512F)
885     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
886     ->UseRealTime();
887   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x112,
888                     xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112,
889                     xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
890                     benchmark::utils::CheckAVX512F)
891     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
892     ->UseRealTime();
893   BENCHMARK_CAPTURE(f32_vsigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x128,
894                     xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128,
895                     xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
896                     benchmark::utils::CheckAVX512F)
897     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
898     ->UseRealTime();
899 
900   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_div_x8,
901                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x8,
902                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
903                     benchmark::utils::CheckAVX2)
904     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
905     ->UseRealTime();
906   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_div_x16,
907                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x16,
908                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
909                     benchmark::utils::CheckAVX2)
910     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
911     ->UseRealTime();
912   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_div_x24,
913                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x24,
914                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
915                     benchmark::utils::CheckAVX2)
916     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
917     ->UseRealTime();
918   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_div_x32,
919                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x32,
920                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
921                     benchmark::utils::CheckAVX2)
922     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
923     ->UseRealTime();
924   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_div_x40,
925                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40,
926                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
927                     benchmark::utils::CheckAVX2)
928     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
929     ->UseRealTime();
930   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_div_x48,
931                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x48,
932                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
933                     benchmark::utils::CheckAVX2)
934     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
935     ->UseRealTime();
936   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_div_x56,
937                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x56,
938                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
939                     benchmark::utils::CheckAVX2)
940     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
941     ->UseRealTime();
942   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_div_x64,
943                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x64,
944                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
945                     benchmark::utils::CheckAVX2)
946     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
947     ->UseRealTime();
948   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_div_x72,
949                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x72,
950                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
951                     benchmark::utils::CheckAVX2)
952     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
953     ->UseRealTime();
954   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_div_x80,
955                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x80,
956                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
957                     benchmark::utils::CheckAVX2)
958     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
959     ->UseRealTime();
960 
961   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr1fma_x8,
962                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x8,
963                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
964                     benchmark::utils::CheckAVX2)
965     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
966     ->UseRealTime();
967   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr1fma_x16,
968                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x16,
969                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
970                     benchmark::utils::CheckAVX2)
971     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
972     ->UseRealTime();
973   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr1fma_x24,
974                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x24,
975                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
976                     benchmark::utils::CheckAVX2)
977     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
978     ->UseRealTime();
979   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr1fma_x32,
980                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x32,
981                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
982                     benchmark::utils::CheckAVX2)
983     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
984     ->UseRealTime();
985   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr1fma_x40,
986                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x40,
987                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
988                     benchmark::utils::CheckAVX2)
989     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
990     ->UseRealTime();
991   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr1fma_x48,
992                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x48,
993                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
994                     benchmark::utils::CheckAVX2)
995     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
996     ->UseRealTime();
997   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr1fma_x56,
998                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x56,
999                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1000                     benchmark::utils::CheckAVX2)
1001     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1002     ->UseRealTime();
1003   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr1fma_x64,
1004                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x64,
1005                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1006                     benchmark::utils::CheckAVX2)
1007     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1008     ->UseRealTime();
1009   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr1fma_x72,
1010                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x72,
1011                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1012                     benchmark::utils::CheckAVX2)
1013     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1014     ->UseRealTime();
1015   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr1fma_x80,
1016                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x80,
1017                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1018                     benchmark::utils::CheckAVX2)
1019     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1020     ->UseRealTime();
1021 
1022   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr2fma_x8,
1023                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x8,
1024                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1025                     benchmark::utils::CheckAVX2)
1026     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1027     ->UseRealTime();
1028   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr2fma_x16,
1029                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x16,
1030                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1031                     benchmark::utils::CheckAVX2)
1032     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1033     ->UseRealTime();
1034   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr2fma_x24,
1035                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x24,
1036                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1037                     benchmark::utils::CheckAVX2)
1038     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1039     ->UseRealTime();
1040   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr2fma_x32,
1041                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x32,
1042                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1043                     benchmark::utils::CheckAVX2)
1044     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1045     ->UseRealTime();
1046   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr2fma_x40,
1047                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40,
1048                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1049                     benchmark::utils::CheckAVX2)
1050     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1051     ->UseRealTime();
1052   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr2fma_x48,
1053                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x48,
1054                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1055                     benchmark::utils::CheckAVX2)
1056     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1057     ->UseRealTime();
1058   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr2fma_x56,
1059                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x56,
1060                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1061                     benchmark::utils::CheckAVX2)
1062     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1063     ->UseRealTime();
1064   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr2fma_x64,
1065                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x64,
1066                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1067                     benchmark::utils::CheckAVX2)
1068     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1069     ->UseRealTime();
1070   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr2fma_x72,
1071                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x72,
1072                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1073                     benchmark::utils::CheckAVX2)
1074     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1075     ->UseRealTime();
1076   BENCHMARK_CAPTURE(f32_vsigmoid, avx2_p5_nr2fma_x80,
1077                     xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x80,
1078                     xnn_init_f32_sigmoid_avx2_rr1_p5_params,
1079                     benchmark::utils::CheckAVX2)
1080     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1081     ->UseRealTime();
1082 
1083   BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_div_x8,
1084                     xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x8,
1085                     xnn_init_f32_sigmoid_avx_rr2_p5_params,
1086                     benchmark::utils::CheckAVX)
1087     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1088     ->UseRealTime();
1089   BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_div_x16,
1090                     xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x16,
1091                     xnn_init_f32_sigmoid_avx_rr2_p5_params,
1092                     benchmark::utils::CheckAVX)
1093     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1094     ->UseRealTime();
1095   BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_div_x24,
1096                     xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x24,
1097                     xnn_init_f32_sigmoid_avx_rr2_p5_params,
1098                     benchmark::utils::CheckAVX)
1099     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1100     ->UseRealTime();
1101   BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_div_x32,
1102                     xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x32,
1103                     xnn_init_f32_sigmoid_avx_rr2_p5_params,
1104                     benchmark::utils::CheckAVX)
1105     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1106     ->UseRealTime();
1107   BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_div_x40,
1108                     xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x40,
1109                     xnn_init_f32_sigmoid_avx_rr2_p5_params,
1110                     benchmark::utils::CheckAVX)
1111     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1112     ->UseRealTime();
1113   BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_div_x48,
1114                     xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x48,
1115                     xnn_init_f32_sigmoid_avx_rr2_p5_params,
1116                     benchmark::utils::CheckAVX)
1117     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1118     ->UseRealTime();
1119   BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_div_x56,
1120                     xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x56,
1121                     xnn_init_f32_sigmoid_avx_rr2_p5_params,
1122                     benchmark::utils::CheckAVX)
1123     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1124     ->UseRealTime();
1125   BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_div_x64,
1126                     xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x64,
1127                     xnn_init_f32_sigmoid_avx_rr2_p5_params,
1128                     benchmark::utils::CheckAVX)
1129     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1130     ->UseRealTime();
1131   BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_div_x72,
1132                     xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x72,
1133                     xnn_init_f32_sigmoid_avx_rr2_p5_params,
1134                     benchmark::utils::CheckAVX)
1135     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1136     ->UseRealTime();
1137   BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_div_x80,
1138                     xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x80,
1139                     xnn_init_f32_sigmoid_avx_rr2_p5_params,
1140                     benchmark::utils::CheckAVX)
1141     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1142     ->UseRealTime();
1143   BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_nr2_x8,
1144                     xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x8,
1145                     xnn_init_f32_sigmoid_avx_rr2_p5_params,
1146                     benchmark::utils::CheckAVX)
1147     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1148     ->UseRealTime();
1149   BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_nr2_x16,
1150                     xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x16,
1151                     xnn_init_f32_sigmoid_avx_rr2_p5_params,
1152                     benchmark::utils::CheckAVX)
1153     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1154     ->UseRealTime();
1155   BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_nr2_x24,
1156                     xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x24,
1157                     xnn_init_f32_sigmoid_avx_rr2_p5_params,
1158                     benchmark::utils::CheckAVX)
1159     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1160     ->UseRealTime();
1161   BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_nr2_x32,
1162                     xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x32,
1163                     xnn_init_f32_sigmoid_avx_rr2_p5_params,
1164                     benchmark::utils::CheckAVX)
1165     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1166     ->UseRealTime();
1167   BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_nr2_x40,
1168                     xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x40,
1169                     xnn_init_f32_sigmoid_avx_rr2_p5_params,
1170                     benchmark::utils::CheckAVX)
1171     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1172     ->UseRealTime();
1173   BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_nr2_x48,
1174                     xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x48,
1175                     xnn_init_f32_sigmoid_avx_rr2_p5_params,
1176                     benchmark::utils::CheckAVX)
1177     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1178     ->UseRealTime();
1179   BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_nr2_x56,
1180                     xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x56,
1181                     xnn_init_f32_sigmoid_avx_rr2_p5_params,
1182                     benchmark::utils::CheckAVX)
1183     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1184     ->UseRealTime();
1185   BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_nr2_x64,
1186                     xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x64,
1187                     xnn_init_f32_sigmoid_avx_rr2_p5_params,
1188                     benchmark::utils::CheckAVX)
1189     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1190     ->UseRealTime();
1191   BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_nr2_x72,
1192                     xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x72,
1193                     xnn_init_f32_sigmoid_avx_rr2_p5_params,
1194                     benchmark::utils::CheckAVX)
1195     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1196     ->UseRealTime();
1197   BENCHMARK_CAPTURE(f32_vsigmoid, avx_p5_nr2_x80,
1198                     xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x80,
1199                     xnn_init_f32_sigmoid_avx_rr2_p5_params,
1200                     benchmark::utils::CheckAVX)
1201     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1202     ->UseRealTime();
1203 
1204   BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_p5_div_x4,
1205                     xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x4,
1206                     xnn_init_f32_sigmoid_sse2_rr2_p5_params,
1207                     benchmark::utils::CheckSSE41)
1208     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1209     ->UseRealTime();
1210   BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_p5_div_x8,
1211                     xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x8,
1212                     xnn_init_f32_sigmoid_sse2_rr2_p5_params,
1213                     benchmark::utils::CheckSSE41)
1214     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1215     ->UseRealTime();
1216   BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_p5_div_x12,
1217                     xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x12,
1218                     xnn_init_f32_sigmoid_sse2_rr2_p5_params,
1219                     benchmark::utils::CheckSSE41)
1220     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1221     ->UseRealTime();
1222   BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_p5_div_x16,
1223                     xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x16,
1224                     xnn_init_f32_sigmoid_sse2_rr2_p5_params,
1225                     benchmark::utils::CheckSSE41)
1226     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1227     ->UseRealTime();
1228   BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_p5_div_x20,
1229                     xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20,
1230                     xnn_init_f32_sigmoid_sse2_rr2_p5_params,
1231                     benchmark::utils::CheckSSE41)
1232     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1233     ->UseRealTime();
1234   BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_p5_div_x24,
1235                     xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24,
1236                     xnn_init_f32_sigmoid_sse2_rr2_p5_params,
1237                     benchmark::utils::CheckSSE41)
1238     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1239     ->UseRealTime();
1240 
1241   BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_lut64_p2_div_x4,
1242                     xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x4,
1243                     xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params,
1244                     benchmark::utils::CheckSSE41)
1245     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1246     ->UseRealTime();
1247   BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_lut64_p2_div_x8,
1248                     xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8,
1249                     xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params,
1250                     benchmark::utils::CheckSSE41)
1251     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1252     ->UseRealTime();
1253   BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_lut64_p2_div_x12,
1254                     xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12,
1255                     xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params,
1256                     benchmark::utils::CheckSSE41)
1257     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1258     ->UseRealTime();
1259   BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_lut64_p2_div_x16,
1260                     xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16,
1261                     xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params,
1262                     benchmark::utils::CheckSSE41)
1263     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1264     ->UseRealTime();
1265   BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_lut64_p2_div_x20,
1266                     xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20,
1267                     xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params,
1268                     benchmark::utils::CheckSSE41)
1269     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1270     ->UseRealTime();
1271   BENCHMARK_CAPTURE(f32_vsigmoid, sse41_rr2_lut64_p2_div_x24,
1272                     xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24,
1273                     xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params,
1274                     benchmark::utils::CheckSSE41)
1275     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1276     ->UseRealTime();
1277 
1278   BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_p5_div_x4,
1279                     xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x4,
1280                     xnn_init_f32_sigmoid_sse2_rr2_p5_params)
1281     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1282     ->UseRealTime();
1283   BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_p5_div_x8,
1284                     xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x8,
1285                     xnn_init_f32_sigmoid_sse2_rr2_p5_params)
1286     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1287     ->UseRealTime();
1288   BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_p5_div_x12,
1289                     xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x12,
1290                     xnn_init_f32_sigmoid_sse2_rr2_p5_params)
1291     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1292     ->UseRealTime();
1293   BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_p5_div_x16,
1294                     xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x16,
1295                     xnn_init_f32_sigmoid_sse2_rr2_p5_params)
1296     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1297     ->UseRealTime();
1298   BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_p5_div_x20,
1299                     xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20,
1300                     xnn_init_f32_sigmoid_sse2_rr2_p5_params)
1301     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1302     ->UseRealTime();
1303   BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_p5_div_x24,
1304                     xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x24,
1305                     xnn_init_f32_sigmoid_sse2_rr2_p5_params)
1306     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1307     ->UseRealTime();
1308 
1309   BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_lut64_p2_div_x4,
1310                     xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x4,
1311                     xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params)
1312     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1313     ->UseRealTime();
1314   BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_lut64_p2_div_x8,
1315                     xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8,
1316                     xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params)
1317     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1318     ->UseRealTime();
1319   BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_lut64_p2_div_x12,
1320                     xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12,
1321                     xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params)
1322     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1323     ->UseRealTime();
1324   BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_lut64_p2_div_x16,
1325                     xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16,
1326                     xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params)
1327     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1328     ->UseRealTime();
1329   BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_lut64_p2_div_x20,
1330                     xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20,
1331                     xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params)
1332     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1333     ->UseRealTime();
1334   BENCHMARK_CAPTURE(f32_vsigmoid, sse2_rr2_lut64_p2_div_x24,
1335                     xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24,
1336                     xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params)
1337     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1338     ->UseRealTime();
1339 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1340 
1341 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1342   BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_lut64_p2_div_x4,
1343                     xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x4,
1344                     xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params)
1345     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1346     ->UseRealTime();
1347   BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_lut64_p2_div_x8,
1348                     xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8,
1349                     xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params)
1350     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1351     ->UseRealTime();
1352   BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_lut64_p2_div_x12,
1353                     xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x12,
1354                     xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params)
1355     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1356     ->UseRealTime();
1357   BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_lut64_p2_div_x16,
1358                     xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x16,
1359                     xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params)
1360     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1361     ->UseRealTime();
1362   BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_lut64_p2_div_x20,
1363                     xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x20,
1364                     xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params)
1365     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1366     ->UseRealTime();
1367   BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_lut64_p2_div_x24,
1368                     xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x24,
1369                     xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params)
1370     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1371     ->UseRealTime();
1372 
1373   BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_p5_div_x4,
1374                     xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x4,
1375                     xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params)
1376     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1377     ->UseRealTime();
1378   BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_p5_div_x8,
1379                     xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x8,
1380                     xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params)
1381     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1382     ->UseRealTime();
1383   BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_p5_div_x12,
1384                     xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x12,
1385                     xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params)
1386     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1387     ->UseRealTime();
1388   BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_p5_div_x16,
1389                     xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x16,
1390                     xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params)
1391     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1392     ->UseRealTime();
1393   BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_p5_div_x20,
1394                     xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20,
1395                     xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params)
1396     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1397     ->UseRealTime();
1398   BENCHMARK_CAPTURE(f32_vsigmoid, wasmsimd_rr2_p5_div_x24,
1399                     xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24,
1400                     xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params)
1401     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1402     ->UseRealTime();
1403 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1404 
1405 BENCHMARK_CAPTURE(f32_vsigmoid, scalar_rr2_lut2048_p1_div_x1,
1406                   xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x1,
1407                   xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params)
1408   ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1409   ->UseRealTime();
1410 BENCHMARK_CAPTURE(f32_vsigmoid, scalar_rr2_lut2048_p1_div_x2,
1411                   xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x2,
1412                   xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params)
1413   ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1414   ->UseRealTime();
1415 BENCHMARK_CAPTURE(f32_vsigmoid, scalar_rr2_lut2048_p1_div_x4,
1416                   xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x4,
1417                   xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params)
1418   ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1419   ->UseRealTime();
1420 
1421 BENCHMARK_CAPTURE(f32_vsigmoid, scalar_rr2_lut64_p2_div_x1,
1422                   xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x1,
1423                   xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params)
1424   ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1425   ->UseRealTime();
1426 BENCHMARK_CAPTURE(f32_vsigmoid, scalar_rr2_lut64_p2_div_x2,
1427                   xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x2,
1428                   xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params)
1429   ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1430   ->UseRealTime();
1431 BENCHMARK_CAPTURE(f32_vsigmoid, scalar_rr2_lut64_p2_div_x4,
1432                   xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x4,
1433                   xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params)
1434   ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1435   ->UseRealTime();
1436 
1437 BENCHMARK_CAPTURE(f32_vsigmoid, scalar_rr2_p5_div_x1,
1438                   xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x1,
1439                   xnn_init_f32_sigmoid_scalar_rr2_p5_params)
1440   ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1441   ->UseRealTime();
1442 BENCHMARK_CAPTURE(f32_vsigmoid, scalar_rr2_p5_div_x2,
1443                   xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x2,
1444                   xnn_init_f32_sigmoid_scalar_rr2_p5_params)
1445   ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1446   ->UseRealTime();
1447 BENCHMARK_CAPTURE(f32_vsigmoid, scalar_rr2_p5_div_x4,
1448                   xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x4,
1449                   xnn_init_f32_sigmoid_scalar_rr2_p5_params)
1450   ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
1451   ->UseRealTime();
1452 
1453 #ifndef XNNPACK_BENCHMARK_NO_MAIN
1454 BENCHMARK_MAIN();
1455 #endif
1456