xref: /aosp_15_r20/external/XNNPACK/test/f32-vhswish.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/f32-vhswish.yaml
8 //   Generator: tools/generate-vunary-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/vunary.h>
17 #include "vunary-microkernel-tester.h"
18 
19 
20 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VHSWISH__NEON_X4,batch_eq_4)21   TEST(F32_VHSWISH__NEON_X4, batch_eq_4) {
22     TEST_REQUIRES_ARM_NEON;
23     VUnaryMicrokernelTester()
24       .batch_size(4)
25       .Test(xnn_f32_vhswish_ukernel__neon_x4, xnn_init_f32_hswish_scalar_params);
26   }
27 
TEST(F32_VHSWISH__NEON_X4,batch_div_4)28   TEST(F32_VHSWISH__NEON_X4, batch_div_4) {
29     TEST_REQUIRES_ARM_NEON;
30     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
31       VUnaryMicrokernelTester()
32         .batch_size(batch_size)
33         .Test(xnn_f32_vhswish_ukernel__neon_x4, xnn_init_f32_hswish_scalar_params);
34     }
35   }
36 
TEST(F32_VHSWISH__NEON_X4,batch_lt_4)37   TEST(F32_VHSWISH__NEON_X4, batch_lt_4) {
38     TEST_REQUIRES_ARM_NEON;
39     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
40       VUnaryMicrokernelTester()
41         .batch_size(batch_size)
42         .Test(xnn_f32_vhswish_ukernel__neon_x4, xnn_init_f32_hswish_scalar_params);
43     }
44   }
45 
TEST(F32_VHSWISH__NEON_X4,batch_gt_4)46   TEST(F32_VHSWISH__NEON_X4, batch_gt_4) {
47     TEST_REQUIRES_ARM_NEON;
48     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
49       VUnaryMicrokernelTester()
50         .batch_size(batch_size)
51         .Test(xnn_f32_vhswish_ukernel__neon_x4, xnn_init_f32_hswish_scalar_params);
52     }
53   }
54 
TEST(F32_VHSWISH__NEON_X4,inplace)55   TEST(F32_VHSWISH__NEON_X4, inplace) {
56     TEST_REQUIRES_ARM_NEON;
57     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
58       VUnaryMicrokernelTester()
59         .batch_size(batch_size)
60         .inplace(true)
61         .Test(xnn_f32_vhswish_ukernel__neon_x4, xnn_init_f32_hswish_scalar_params);
62     }
63   }
64 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
65 
66 
67 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VHSWISH__NEON_X8,batch_eq_8)68   TEST(F32_VHSWISH__NEON_X8, batch_eq_8) {
69     TEST_REQUIRES_ARM_NEON;
70     VUnaryMicrokernelTester()
71       .batch_size(8)
72       .Test(xnn_f32_vhswish_ukernel__neon_x8, xnn_init_f32_hswish_scalar_params);
73   }
74 
TEST(F32_VHSWISH__NEON_X8,batch_div_8)75   TEST(F32_VHSWISH__NEON_X8, batch_div_8) {
76     TEST_REQUIRES_ARM_NEON;
77     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
78       VUnaryMicrokernelTester()
79         .batch_size(batch_size)
80         .Test(xnn_f32_vhswish_ukernel__neon_x8, xnn_init_f32_hswish_scalar_params);
81     }
82   }
83 
TEST(F32_VHSWISH__NEON_X8,batch_lt_8)84   TEST(F32_VHSWISH__NEON_X8, batch_lt_8) {
85     TEST_REQUIRES_ARM_NEON;
86     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
87       VUnaryMicrokernelTester()
88         .batch_size(batch_size)
89         .Test(xnn_f32_vhswish_ukernel__neon_x8, xnn_init_f32_hswish_scalar_params);
90     }
91   }
92 
TEST(F32_VHSWISH__NEON_X8,batch_gt_8)93   TEST(F32_VHSWISH__NEON_X8, batch_gt_8) {
94     TEST_REQUIRES_ARM_NEON;
95     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
96       VUnaryMicrokernelTester()
97         .batch_size(batch_size)
98         .Test(xnn_f32_vhswish_ukernel__neon_x8, xnn_init_f32_hswish_scalar_params);
99     }
100   }
101 
TEST(F32_VHSWISH__NEON_X8,inplace)102   TEST(F32_VHSWISH__NEON_X8, inplace) {
103     TEST_REQUIRES_ARM_NEON;
104     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
105       VUnaryMicrokernelTester()
106         .batch_size(batch_size)
107         .inplace(true)
108         .Test(xnn_f32_vhswish_ukernel__neon_x8, xnn_init_f32_hswish_scalar_params);
109     }
110   }
111 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
112 
113 
114 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VHSWISH__NEON_X16,batch_eq_16)115   TEST(F32_VHSWISH__NEON_X16, batch_eq_16) {
116     TEST_REQUIRES_ARM_NEON;
117     VUnaryMicrokernelTester()
118       .batch_size(16)
119       .Test(xnn_f32_vhswish_ukernel__neon_x16, xnn_init_f32_hswish_scalar_params);
120   }
121 
TEST(F32_VHSWISH__NEON_X16,batch_div_16)122   TEST(F32_VHSWISH__NEON_X16, batch_div_16) {
123     TEST_REQUIRES_ARM_NEON;
124     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
125       VUnaryMicrokernelTester()
126         .batch_size(batch_size)
127         .Test(xnn_f32_vhswish_ukernel__neon_x16, xnn_init_f32_hswish_scalar_params);
128     }
129   }
130 
TEST(F32_VHSWISH__NEON_X16,batch_lt_16)131   TEST(F32_VHSWISH__NEON_X16, batch_lt_16) {
132     TEST_REQUIRES_ARM_NEON;
133     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
134       VUnaryMicrokernelTester()
135         .batch_size(batch_size)
136         .Test(xnn_f32_vhswish_ukernel__neon_x16, xnn_init_f32_hswish_scalar_params);
137     }
138   }
139 
TEST(F32_VHSWISH__NEON_X16,batch_gt_16)140   TEST(F32_VHSWISH__NEON_X16, batch_gt_16) {
141     TEST_REQUIRES_ARM_NEON;
142     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
143       VUnaryMicrokernelTester()
144         .batch_size(batch_size)
145         .Test(xnn_f32_vhswish_ukernel__neon_x16, xnn_init_f32_hswish_scalar_params);
146     }
147   }
148 
TEST(F32_VHSWISH__NEON_X16,inplace)149   TEST(F32_VHSWISH__NEON_X16, inplace) {
150     TEST_REQUIRES_ARM_NEON;
151     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
152       VUnaryMicrokernelTester()
153         .batch_size(batch_size)
154         .inplace(true)
155         .Test(xnn_f32_vhswish_ukernel__neon_x16, xnn_init_f32_hswish_scalar_params);
156     }
157   }
158 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
159 
160 
161 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VHSWISH__SSE_X4,batch_eq_4)162   TEST(F32_VHSWISH__SSE_X4, batch_eq_4) {
163     TEST_REQUIRES_X86_SSE;
164     VUnaryMicrokernelTester()
165       .batch_size(4)
166       .Test(xnn_f32_vhswish_ukernel__sse_x4, xnn_init_f32_hswish_sse_params);
167   }
168 
TEST(F32_VHSWISH__SSE_X4,batch_div_4)169   TEST(F32_VHSWISH__SSE_X4, batch_div_4) {
170     TEST_REQUIRES_X86_SSE;
171     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
172       VUnaryMicrokernelTester()
173         .batch_size(batch_size)
174         .Test(xnn_f32_vhswish_ukernel__sse_x4, xnn_init_f32_hswish_sse_params);
175     }
176   }
177 
TEST(F32_VHSWISH__SSE_X4,batch_lt_4)178   TEST(F32_VHSWISH__SSE_X4, batch_lt_4) {
179     TEST_REQUIRES_X86_SSE;
180     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
181       VUnaryMicrokernelTester()
182         .batch_size(batch_size)
183         .Test(xnn_f32_vhswish_ukernel__sse_x4, xnn_init_f32_hswish_sse_params);
184     }
185   }
186 
TEST(F32_VHSWISH__SSE_X4,batch_gt_4)187   TEST(F32_VHSWISH__SSE_X4, batch_gt_4) {
188     TEST_REQUIRES_X86_SSE;
189     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
190       VUnaryMicrokernelTester()
191         .batch_size(batch_size)
192         .Test(xnn_f32_vhswish_ukernel__sse_x4, xnn_init_f32_hswish_sse_params);
193     }
194   }
195 
TEST(F32_VHSWISH__SSE_X4,inplace)196   TEST(F32_VHSWISH__SSE_X4, inplace) {
197     TEST_REQUIRES_X86_SSE;
198     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
199       VUnaryMicrokernelTester()
200         .batch_size(batch_size)
201         .inplace(true)
202         .Test(xnn_f32_vhswish_ukernel__sse_x4, xnn_init_f32_hswish_sse_params);
203     }
204   }
205 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
206 
207 
208 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VHSWISH__SSE_X8,batch_eq_8)209   TEST(F32_VHSWISH__SSE_X8, batch_eq_8) {
210     TEST_REQUIRES_X86_SSE;
211     VUnaryMicrokernelTester()
212       .batch_size(8)
213       .Test(xnn_f32_vhswish_ukernel__sse_x8, xnn_init_f32_hswish_sse_params);
214   }
215 
TEST(F32_VHSWISH__SSE_X8,batch_div_8)216   TEST(F32_VHSWISH__SSE_X8, batch_div_8) {
217     TEST_REQUIRES_X86_SSE;
218     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
219       VUnaryMicrokernelTester()
220         .batch_size(batch_size)
221         .Test(xnn_f32_vhswish_ukernel__sse_x8, xnn_init_f32_hswish_sse_params);
222     }
223   }
224 
TEST(F32_VHSWISH__SSE_X8,batch_lt_8)225   TEST(F32_VHSWISH__SSE_X8, batch_lt_8) {
226     TEST_REQUIRES_X86_SSE;
227     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
228       VUnaryMicrokernelTester()
229         .batch_size(batch_size)
230         .Test(xnn_f32_vhswish_ukernel__sse_x8, xnn_init_f32_hswish_sse_params);
231     }
232   }
233 
TEST(F32_VHSWISH__SSE_X8,batch_gt_8)234   TEST(F32_VHSWISH__SSE_X8, batch_gt_8) {
235     TEST_REQUIRES_X86_SSE;
236     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
237       VUnaryMicrokernelTester()
238         .batch_size(batch_size)
239         .Test(xnn_f32_vhswish_ukernel__sse_x8, xnn_init_f32_hswish_sse_params);
240     }
241   }
242 
TEST(F32_VHSWISH__SSE_X8,inplace)243   TEST(F32_VHSWISH__SSE_X8, inplace) {
244     TEST_REQUIRES_X86_SSE;
245     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
246       VUnaryMicrokernelTester()
247         .batch_size(batch_size)
248         .inplace(true)
249         .Test(xnn_f32_vhswish_ukernel__sse_x8, xnn_init_f32_hswish_sse_params);
250     }
251   }
252 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
253 
254 
255 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VHSWISH__AVX_X8,batch_eq_8)256   TEST(F32_VHSWISH__AVX_X8, batch_eq_8) {
257     TEST_REQUIRES_X86_AVX;
258     VUnaryMicrokernelTester()
259       .batch_size(8)
260       .Test(xnn_f32_vhswish_ukernel__avx_x8, xnn_init_f32_hswish_avx_params);
261   }
262 
TEST(F32_VHSWISH__AVX_X8,batch_div_8)263   TEST(F32_VHSWISH__AVX_X8, batch_div_8) {
264     TEST_REQUIRES_X86_AVX;
265     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
266       VUnaryMicrokernelTester()
267         .batch_size(batch_size)
268         .Test(xnn_f32_vhswish_ukernel__avx_x8, xnn_init_f32_hswish_avx_params);
269     }
270   }
271 
TEST(F32_VHSWISH__AVX_X8,batch_lt_8)272   TEST(F32_VHSWISH__AVX_X8, batch_lt_8) {
273     TEST_REQUIRES_X86_AVX;
274     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
275       VUnaryMicrokernelTester()
276         .batch_size(batch_size)
277         .Test(xnn_f32_vhswish_ukernel__avx_x8, xnn_init_f32_hswish_avx_params);
278     }
279   }
280 
TEST(F32_VHSWISH__AVX_X8,batch_gt_8)281   TEST(F32_VHSWISH__AVX_X8, batch_gt_8) {
282     TEST_REQUIRES_X86_AVX;
283     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
284       VUnaryMicrokernelTester()
285         .batch_size(batch_size)
286         .Test(xnn_f32_vhswish_ukernel__avx_x8, xnn_init_f32_hswish_avx_params);
287     }
288   }
289 
TEST(F32_VHSWISH__AVX_X8,inplace)290   TEST(F32_VHSWISH__AVX_X8, inplace) {
291     TEST_REQUIRES_X86_AVX;
292     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
293       VUnaryMicrokernelTester()
294         .batch_size(batch_size)
295         .inplace(true)
296         .Test(xnn_f32_vhswish_ukernel__avx_x8, xnn_init_f32_hswish_avx_params);
297     }
298   }
299 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
300 
301 
302 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VHSWISH__AVX_X16,batch_eq_16)303   TEST(F32_VHSWISH__AVX_X16, batch_eq_16) {
304     TEST_REQUIRES_X86_AVX;
305     VUnaryMicrokernelTester()
306       .batch_size(16)
307       .Test(xnn_f32_vhswish_ukernel__avx_x16, xnn_init_f32_hswish_avx_params);
308   }
309 
TEST(F32_VHSWISH__AVX_X16,batch_div_16)310   TEST(F32_VHSWISH__AVX_X16, batch_div_16) {
311     TEST_REQUIRES_X86_AVX;
312     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
313       VUnaryMicrokernelTester()
314         .batch_size(batch_size)
315         .Test(xnn_f32_vhswish_ukernel__avx_x16, xnn_init_f32_hswish_avx_params);
316     }
317   }
318 
TEST(F32_VHSWISH__AVX_X16,batch_lt_16)319   TEST(F32_VHSWISH__AVX_X16, batch_lt_16) {
320     TEST_REQUIRES_X86_AVX;
321     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
322       VUnaryMicrokernelTester()
323         .batch_size(batch_size)
324         .Test(xnn_f32_vhswish_ukernel__avx_x16, xnn_init_f32_hswish_avx_params);
325     }
326   }
327 
TEST(F32_VHSWISH__AVX_X16,batch_gt_16)328   TEST(F32_VHSWISH__AVX_X16, batch_gt_16) {
329     TEST_REQUIRES_X86_AVX;
330     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
331       VUnaryMicrokernelTester()
332         .batch_size(batch_size)
333         .Test(xnn_f32_vhswish_ukernel__avx_x16, xnn_init_f32_hswish_avx_params);
334     }
335   }
336 
TEST(F32_VHSWISH__AVX_X16,inplace)337   TEST(F32_VHSWISH__AVX_X16, inplace) {
338     TEST_REQUIRES_X86_AVX;
339     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
340       VUnaryMicrokernelTester()
341         .batch_size(batch_size)
342         .inplace(true)
343         .Test(xnn_f32_vhswish_ukernel__avx_x16, xnn_init_f32_hswish_avx_params);
344     }
345   }
346 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
347 
348 
349 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VHSWISH__FMA3_X8,batch_eq_8)350   TEST(F32_VHSWISH__FMA3_X8, batch_eq_8) {
351     TEST_REQUIRES_X86_FMA3;
352     VUnaryMicrokernelTester()
353       .batch_size(8)
354       .Test(xnn_f32_vhswish_ukernel__fma3_x8, xnn_init_f32_hswish_avx_params);
355   }
356 
TEST(F32_VHSWISH__FMA3_X8,batch_div_8)357   TEST(F32_VHSWISH__FMA3_X8, batch_div_8) {
358     TEST_REQUIRES_X86_FMA3;
359     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
360       VUnaryMicrokernelTester()
361         .batch_size(batch_size)
362         .Test(xnn_f32_vhswish_ukernel__fma3_x8, xnn_init_f32_hswish_avx_params);
363     }
364   }
365 
TEST(F32_VHSWISH__FMA3_X8,batch_lt_8)366   TEST(F32_VHSWISH__FMA3_X8, batch_lt_8) {
367     TEST_REQUIRES_X86_FMA3;
368     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
369       VUnaryMicrokernelTester()
370         .batch_size(batch_size)
371         .Test(xnn_f32_vhswish_ukernel__fma3_x8, xnn_init_f32_hswish_avx_params);
372     }
373   }
374 
TEST(F32_VHSWISH__FMA3_X8,batch_gt_8)375   TEST(F32_VHSWISH__FMA3_X8, batch_gt_8) {
376     TEST_REQUIRES_X86_FMA3;
377     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
378       VUnaryMicrokernelTester()
379         .batch_size(batch_size)
380         .Test(xnn_f32_vhswish_ukernel__fma3_x8, xnn_init_f32_hswish_avx_params);
381     }
382   }
383 
TEST(F32_VHSWISH__FMA3_X8,inplace)384   TEST(F32_VHSWISH__FMA3_X8, inplace) {
385     TEST_REQUIRES_X86_FMA3;
386     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
387       VUnaryMicrokernelTester()
388         .batch_size(batch_size)
389         .inplace(true)
390         .Test(xnn_f32_vhswish_ukernel__fma3_x8, xnn_init_f32_hswish_avx_params);
391     }
392   }
393 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
394 
395 
396 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VHSWISH__FMA3_X16,batch_eq_16)397   TEST(F32_VHSWISH__FMA3_X16, batch_eq_16) {
398     TEST_REQUIRES_X86_FMA3;
399     VUnaryMicrokernelTester()
400       .batch_size(16)
401       .Test(xnn_f32_vhswish_ukernel__fma3_x16, xnn_init_f32_hswish_avx_params);
402   }
403 
TEST(F32_VHSWISH__FMA3_X16,batch_div_16)404   TEST(F32_VHSWISH__FMA3_X16, batch_div_16) {
405     TEST_REQUIRES_X86_FMA3;
406     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
407       VUnaryMicrokernelTester()
408         .batch_size(batch_size)
409         .Test(xnn_f32_vhswish_ukernel__fma3_x16, xnn_init_f32_hswish_avx_params);
410     }
411   }
412 
TEST(F32_VHSWISH__FMA3_X16,batch_lt_16)413   TEST(F32_VHSWISH__FMA3_X16, batch_lt_16) {
414     TEST_REQUIRES_X86_FMA3;
415     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
416       VUnaryMicrokernelTester()
417         .batch_size(batch_size)
418         .Test(xnn_f32_vhswish_ukernel__fma3_x16, xnn_init_f32_hswish_avx_params);
419     }
420   }
421 
TEST(F32_VHSWISH__FMA3_X16,batch_gt_16)422   TEST(F32_VHSWISH__FMA3_X16, batch_gt_16) {
423     TEST_REQUIRES_X86_FMA3;
424     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
425       VUnaryMicrokernelTester()
426         .batch_size(batch_size)
427         .Test(xnn_f32_vhswish_ukernel__fma3_x16, xnn_init_f32_hswish_avx_params);
428     }
429   }
430 
TEST(F32_VHSWISH__FMA3_X16,inplace)431   TEST(F32_VHSWISH__FMA3_X16, inplace) {
432     TEST_REQUIRES_X86_FMA3;
433     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
434       VUnaryMicrokernelTester()
435         .batch_size(batch_size)
436         .inplace(true)
437         .Test(xnn_f32_vhswish_ukernel__fma3_x16, xnn_init_f32_hswish_avx_params);
438     }
439   }
440 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
441 
442 
443 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VHSWISH__AVX512F_X16,batch_eq_16)444   TEST(F32_VHSWISH__AVX512F_X16, batch_eq_16) {
445     TEST_REQUIRES_X86_AVX512F;
446     VUnaryMicrokernelTester()
447       .batch_size(16)
448       .Test(xnn_f32_vhswish_ukernel__avx512f_x16, xnn_init_f32_hswish_avx512_params);
449   }
450 
TEST(F32_VHSWISH__AVX512F_X16,batch_div_16)451   TEST(F32_VHSWISH__AVX512F_X16, batch_div_16) {
452     TEST_REQUIRES_X86_AVX512F;
453     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
454       VUnaryMicrokernelTester()
455         .batch_size(batch_size)
456         .Test(xnn_f32_vhswish_ukernel__avx512f_x16, xnn_init_f32_hswish_avx512_params);
457     }
458   }
459 
TEST(F32_VHSWISH__AVX512F_X16,batch_lt_16)460   TEST(F32_VHSWISH__AVX512F_X16, batch_lt_16) {
461     TEST_REQUIRES_X86_AVX512F;
462     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
463       VUnaryMicrokernelTester()
464         .batch_size(batch_size)
465         .Test(xnn_f32_vhswish_ukernel__avx512f_x16, xnn_init_f32_hswish_avx512_params);
466     }
467   }
468 
TEST(F32_VHSWISH__AVX512F_X16,batch_gt_16)469   TEST(F32_VHSWISH__AVX512F_X16, batch_gt_16) {
470     TEST_REQUIRES_X86_AVX512F;
471     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
472       VUnaryMicrokernelTester()
473         .batch_size(batch_size)
474         .Test(xnn_f32_vhswish_ukernel__avx512f_x16, xnn_init_f32_hswish_avx512_params);
475     }
476   }
477 
TEST(F32_VHSWISH__AVX512F_X16,inplace)478   TEST(F32_VHSWISH__AVX512F_X16, inplace) {
479     TEST_REQUIRES_X86_AVX512F;
480     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
481       VUnaryMicrokernelTester()
482         .batch_size(batch_size)
483         .inplace(true)
484         .Test(xnn_f32_vhswish_ukernel__avx512f_x16, xnn_init_f32_hswish_avx512_params);
485     }
486   }
487 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
488 
489 
490 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VHSWISH__AVX512F_X32,batch_eq_32)491   TEST(F32_VHSWISH__AVX512F_X32, batch_eq_32) {
492     TEST_REQUIRES_X86_AVX512F;
493     VUnaryMicrokernelTester()
494       .batch_size(32)
495       .Test(xnn_f32_vhswish_ukernel__avx512f_x32, xnn_init_f32_hswish_avx512_params);
496   }
497 
TEST(F32_VHSWISH__AVX512F_X32,batch_div_32)498   TEST(F32_VHSWISH__AVX512F_X32, batch_div_32) {
499     TEST_REQUIRES_X86_AVX512F;
500     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
501       VUnaryMicrokernelTester()
502         .batch_size(batch_size)
503         .Test(xnn_f32_vhswish_ukernel__avx512f_x32, xnn_init_f32_hswish_avx512_params);
504     }
505   }
506 
TEST(F32_VHSWISH__AVX512F_X32,batch_lt_32)507   TEST(F32_VHSWISH__AVX512F_X32, batch_lt_32) {
508     TEST_REQUIRES_X86_AVX512F;
509     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
510       VUnaryMicrokernelTester()
511         .batch_size(batch_size)
512         .Test(xnn_f32_vhswish_ukernel__avx512f_x32, xnn_init_f32_hswish_avx512_params);
513     }
514   }
515 
TEST(F32_VHSWISH__AVX512F_X32,batch_gt_32)516   TEST(F32_VHSWISH__AVX512F_X32, batch_gt_32) {
517     TEST_REQUIRES_X86_AVX512F;
518     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
519       VUnaryMicrokernelTester()
520         .batch_size(batch_size)
521         .Test(xnn_f32_vhswish_ukernel__avx512f_x32, xnn_init_f32_hswish_avx512_params);
522     }
523   }
524 
TEST(F32_VHSWISH__AVX512F_X32,inplace)525   TEST(F32_VHSWISH__AVX512F_X32, inplace) {
526     TEST_REQUIRES_X86_AVX512F;
527     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
528       VUnaryMicrokernelTester()
529         .batch_size(batch_size)
530         .inplace(true)
531         .Test(xnn_f32_vhswish_ukernel__avx512f_x32, xnn_init_f32_hswish_avx512_params);
532     }
533   }
534 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
535 
536 
537 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VHSWISH__WASMSIMD_X4,batch_eq_4)538   TEST(F32_VHSWISH__WASMSIMD_X4, batch_eq_4) {
539     VUnaryMicrokernelTester()
540       .batch_size(4)
541       .Test(xnn_f32_vhswish_ukernel__wasmsimd_x4, xnn_init_f32_hswish_wasmsimd_params);
542   }
543 
TEST(F32_VHSWISH__WASMSIMD_X4,batch_div_4)544   TEST(F32_VHSWISH__WASMSIMD_X4, batch_div_4) {
545     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
546       VUnaryMicrokernelTester()
547         .batch_size(batch_size)
548         .Test(xnn_f32_vhswish_ukernel__wasmsimd_x4, xnn_init_f32_hswish_wasmsimd_params);
549     }
550   }
551 
TEST(F32_VHSWISH__WASMSIMD_X4,batch_lt_4)552   TEST(F32_VHSWISH__WASMSIMD_X4, batch_lt_4) {
553     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
554       VUnaryMicrokernelTester()
555         .batch_size(batch_size)
556         .Test(xnn_f32_vhswish_ukernel__wasmsimd_x4, xnn_init_f32_hswish_wasmsimd_params);
557     }
558   }
559 
TEST(F32_VHSWISH__WASMSIMD_X4,batch_gt_4)560   TEST(F32_VHSWISH__WASMSIMD_X4, batch_gt_4) {
561     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
562       VUnaryMicrokernelTester()
563         .batch_size(batch_size)
564         .Test(xnn_f32_vhswish_ukernel__wasmsimd_x4, xnn_init_f32_hswish_wasmsimd_params);
565     }
566   }
567 
TEST(F32_VHSWISH__WASMSIMD_X4,inplace)568   TEST(F32_VHSWISH__WASMSIMD_X4, inplace) {
569     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
570       VUnaryMicrokernelTester()
571         .batch_size(batch_size)
572         .inplace(true)
573         .Test(xnn_f32_vhswish_ukernel__wasmsimd_x4, xnn_init_f32_hswish_wasmsimd_params);
574     }
575   }
576 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
577 
578 
579 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VHSWISH__WASMSIMD_X8,batch_eq_8)580   TEST(F32_VHSWISH__WASMSIMD_X8, batch_eq_8) {
581     VUnaryMicrokernelTester()
582       .batch_size(8)
583       .Test(xnn_f32_vhswish_ukernel__wasmsimd_x8, xnn_init_f32_hswish_wasmsimd_params);
584   }
585 
TEST(F32_VHSWISH__WASMSIMD_X8,batch_div_8)586   TEST(F32_VHSWISH__WASMSIMD_X8, batch_div_8) {
587     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
588       VUnaryMicrokernelTester()
589         .batch_size(batch_size)
590         .Test(xnn_f32_vhswish_ukernel__wasmsimd_x8, xnn_init_f32_hswish_wasmsimd_params);
591     }
592   }
593 
TEST(F32_VHSWISH__WASMSIMD_X8,batch_lt_8)594   TEST(F32_VHSWISH__WASMSIMD_X8, batch_lt_8) {
595     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
596       VUnaryMicrokernelTester()
597         .batch_size(batch_size)
598         .Test(xnn_f32_vhswish_ukernel__wasmsimd_x8, xnn_init_f32_hswish_wasmsimd_params);
599     }
600   }
601 
TEST(F32_VHSWISH__WASMSIMD_X8,batch_gt_8)602   TEST(F32_VHSWISH__WASMSIMD_X8, batch_gt_8) {
603     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
604       VUnaryMicrokernelTester()
605         .batch_size(batch_size)
606         .Test(xnn_f32_vhswish_ukernel__wasmsimd_x8, xnn_init_f32_hswish_wasmsimd_params);
607     }
608   }
609 
TEST(F32_VHSWISH__WASMSIMD_X8,inplace)610   TEST(F32_VHSWISH__WASMSIMD_X8, inplace) {
611     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
612       VUnaryMicrokernelTester()
613         .batch_size(batch_size)
614         .inplace(true)
615         .Test(xnn_f32_vhswish_ukernel__wasmsimd_x8, xnn_init_f32_hswish_wasmsimd_params);
616     }
617   }
618 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
619 
620 
621 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VHSWISH__WASMSIMD_X16,batch_eq_16)622   TEST(F32_VHSWISH__WASMSIMD_X16, batch_eq_16) {
623     VUnaryMicrokernelTester()
624       .batch_size(16)
625       .Test(xnn_f32_vhswish_ukernel__wasmsimd_x16, xnn_init_f32_hswish_wasmsimd_params);
626   }
627 
TEST(F32_VHSWISH__WASMSIMD_X16,batch_div_16)628   TEST(F32_VHSWISH__WASMSIMD_X16, batch_div_16) {
629     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
630       VUnaryMicrokernelTester()
631         .batch_size(batch_size)
632         .Test(xnn_f32_vhswish_ukernel__wasmsimd_x16, xnn_init_f32_hswish_wasmsimd_params);
633     }
634   }
635 
TEST(F32_VHSWISH__WASMSIMD_X16,batch_lt_16)636   TEST(F32_VHSWISH__WASMSIMD_X16, batch_lt_16) {
637     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
638       VUnaryMicrokernelTester()
639         .batch_size(batch_size)
640         .Test(xnn_f32_vhswish_ukernel__wasmsimd_x16, xnn_init_f32_hswish_wasmsimd_params);
641     }
642   }
643 
TEST(F32_VHSWISH__WASMSIMD_X16,batch_gt_16)644   TEST(F32_VHSWISH__WASMSIMD_X16, batch_gt_16) {
645     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
646       VUnaryMicrokernelTester()
647         .batch_size(batch_size)
648         .Test(xnn_f32_vhswish_ukernel__wasmsimd_x16, xnn_init_f32_hswish_wasmsimd_params);
649     }
650   }
651 
TEST(F32_VHSWISH__WASMSIMD_X16,inplace)652   TEST(F32_VHSWISH__WASMSIMD_X16, inplace) {
653     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
654       VUnaryMicrokernelTester()
655         .batch_size(batch_size)
656         .inplace(true)
657         .Test(xnn_f32_vhswish_ukernel__wasmsimd_x16, xnn_init_f32_hswish_wasmsimd_params);
658     }
659   }
660 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
661 
662 
663 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VHSWISH__WASM_X1,batch_eq_1)664   TEST(F32_VHSWISH__WASM_X1, batch_eq_1) {
665     VUnaryMicrokernelTester()
666       .batch_size(1)
667       .Test(xnn_f32_vhswish_ukernel__wasm_x1, xnn_init_f32_hswish_scalar_params);
668   }
669 
TEST(F32_VHSWISH__WASM_X1,batch_gt_1)670   TEST(F32_VHSWISH__WASM_X1, batch_gt_1) {
671     for (size_t batch_size = 2; batch_size < 10; batch_size++) {
672       VUnaryMicrokernelTester()
673         .batch_size(batch_size)
674         .Test(xnn_f32_vhswish_ukernel__wasm_x1, xnn_init_f32_hswish_scalar_params);
675     }
676   }
677 
TEST(F32_VHSWISH__WASM_X1,inplace)678   TEST(F32_VHSWISH__WASM_X1, inplace) {
679     for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
680       VUnaryMicrokernelTester()
681         .batch_size(batch_size)
682         .inplace(true)
683         .Test(xnn_f32_vhswish_ukernel__wasm_x1, xnn_init_f32_hswish_scalar_params);
684     }
685   }
686 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
687 
688 
689 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VHSWISH__WASM_X2,batch_eq_2)690   TEST(F32_VHSWISH__WASM_X2, batch_eq_2) {
691     VUnaryMicrokernelTester()
692       .batch_size(2)
693       .Test(xnn_f32_vhswish_ukernel__wasm_x2, xnn_init_f32_hswish_scalar_params);
694   }
695 
TEST(F32_VHSWISH__WASM_X2,batch_div_2)696   TEST(F32_VHSWISH__WASM_X2, batch_div_2) {
697     for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
698       VUnaryMicrokernelTester()
699         .batch_size(batch_size)
700         .Test(xnn_f32_vhswish_ukernel__wasm_x2, xnn_init_f32_hswish_scalar_params);
701     }
702   }
703 
TEST(F32_VHSWISH__WASM_X2,batch_lt_2)704   TEST(F32_VHSWISH__WASM_X2, batch_lt_2) {
705     for (size_t batch_size = 1; batch_size < 2; batch_size++) {
706       VUnaryMicrokernelTester()
707         .batch_size(batch_size)
708         .Test(xnn_f32_vhswish_ukernel__wasm_x2, xnn_init_f32_hswish_scalar_params);
709     }
710   }
711 
TEST(F32_VHSWISH__WASM_X2,batch_gt_2)712   TEST(F32_VHSWISH__WASM_X2, batch_gt_2) {
713     for (size_t batch_size = 3; batch_size < 4; batch_size++) {
714       VUnaryMicrokernelTester()
715         .batch_size(batch_size)
716         .Test(xnn_f32_vhswish_ukernel__wasm_x2, xnn_init_f32_hswish_scalar_params);
717     }
718   }
719 
TEST(F32_VHSWISH__WASM_X2,inplace)720   TEST(F32_VHSWISH__WASM_X2, inplace) {
721     for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
722       VUnaryMicrokernelTester()
723         .batch_size(batch_size)
724         .inplace(true)
725         .Test(xnn_f32_vhswish_ukernel__wasm_x2, xnn_init_f32_hswish_scalar_params);
726     }
727   }
728 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
729 
730 
731 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VHSWISH__WASM_X4,batch_eq_4)732   TEST(F32_VHSWISH__WASM_X4, batch_eq_4) {
733     VUnaryMicrokernelTester()
734       .batch_size(4)
735       .Test(xnn_f32_vhswish_ukernel__wasm_x4, xnn_init_f32_hswish_scalar_params);
736   }
737 
TEST(F32_VHSWISH__WASM_X4,batch_div_4)738   TEST(F32_VHSWISH__WASM_X4, batch_div_4) {
739     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
740       VUnaryMicrokernelTester()
741         .batch_size(batch_size)
742         .Test(xnn_f32_vhswish_ukernel__wasm_x4, xnn_init_f32_hswish_scalar_params);
743     }
744   }
745 
TEST(F32_VHSWISH__WASM_X4,batch_lt_4)746   TEST(F32_VHSWISH__WASM_X4, batch_lt_4) {
747     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
748       VUnaryMicrokernelTester()
749         .batch_size(batch_size)
750         .Test(xnn_f32_vhswish_ukernel__wasm_x4, xnn_init_f32_hswish_scalar_params);
751     }
752   }
753 
TEST(F32_VHSWISH__WASM_X4,batch_gt_4)754   TEST(F32_VHSWISH__WASM_X4, batch_gt_4) {
755     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
756       VUnaryMicrokernelTester()
757         .batch_size(batch_size)
758         .Test(xnn_f32_vhswish_ukernel__wasm_x4, xnn_init_f32_hswish_scalar_params);
759     }
760   }
761 
TEST(F32_VHSWISH__WASM_X4,inplace)762   TEST(F32_VHSWISH__WASM_X4, inplace) {
763     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
764       VUnaryMicrokernelTester()
765         .batch_size(batch_size)
766         .inplace(true)
767         .Test(xnn_f32_vhswish_ukernel__wasm_x4, xnn_init_f32_hswish_scalar_params);
768     }
769   }
770 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
771 
772 
TEST(F32_VHSWISH__SCALAR_X1,batch_eq_1)773 TEST(F32_VHSWISH__SCALAR_X1, batch_eq_1) {
774   VUnaryMicrokernelTester()
775     .batch_size(1)
776     .Test(xnn_f32_vhswish_ukernel__scalar_x1, xnn_init_f32_hswish_scalar_params);
777 }
778 
TEST(F32_VHSWISH__SCALAR_X1,batch_gt_1)779 TEST(F32_VHSWISH__SCALAR_X1, batch_gt_1) {
780   for (size_t batch_size = 2; batch_size < 10; batch_size++) {
781     VUnaryMicrokernelTester()
782       .batch_size(batch_size)
783       .Test(xnn_f32_vhswish_ukernel__scalar_x1, xnn_init_f32_hswish_scalar_params);
784   }
785 }
786 
TEST(F32_VHSWISH__SCALAR_X1,inplace)787 TEST(F32_VHSWISH__SCALAR_X1, inplace) {
788   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
789     VUnaryMicrokernelTester()
790       .batch_size(batch_size)
791       .inplace(true)
792       .Test(xnn_f32_vhswish_ukernel__scalar_x1, xnn_init_f32_hswish_scalar_params);
793   }
794 }
795 
796 
TEST(F32_VHSWISH__SCALAR_X2,batch_eq_2)797 TEST(F32_VHSWISH__SCALAR_X2, batch_eq_2) {
798   VUnaryMicrokernelTester()
799     .batch_size(2)
800     .Test(xnn_f32_vhswish_ukernel__scalar_x2, xnn_init_f32_hswish_scalar_params);
801 }
802 
TEST(F32_VHSWISH__SCALAR_X2,batch_div_2)803 TEST(F32_VHSWISH__SCALAR_X2, batch_div_2) {
804   for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
805     VUnaryMicrokernelTester()
806       .batch_size(batch_size)
807       .Test(xnn_f32_vhswish_ukernel__scalar_x2, xnn_init_f32_hswish_scalar_params);
808   }
809 }
810 
TEST(F32_VHSWISH__SCALAR_X2,batch_lt_2)811 TEST(F32_VHSWISH__SCALAR_X2, batch_lt_2) {
812   for (size_t batch_size = 1; batch_size < 2; batch_size++) {
813     VUnaryMicrokernelTester()
814       .batch_size(batch_size)
815       .Test(xnn_f32_vhswish_ukernel__scalar_x2, xnn_init_f32_hswish_scalar_params);
816   }
817 }
818 
TEST(F32_VHSWISH__SCALAR_X2,batch_gt_2)819 TEST(F32_VHSWISH__SCALAR_X2, batch_gt_2) {
820   for (size_t batch_size = 3; batch_size < 4; batch_size++) {
821     VUnaryMicrokernelTester()
822       .batch_size(batch_size)
823       .Test(xnn_f32_vhswish_ukernel__scalar_x2, xnn_init_f32_hswish_scalar_params);
824   }
825 }
826 
TEST(F32_VHSWISH__SCALAR_X2,inplace)827 TEST(F32_VHSWISH__SCALAR_X2, inplace) {
828   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
829     VUnaryMicrokernelTester()
830       .batch_size(batch_size)
831       .inplace(true)
832       .Test(xnn_f32_vhswish_ukernel__scalar_x2, xnn_init_f32_hswish_scalar_params);
833   }
834 }
835 
836 
TEST(F32_VHSWISH__SCALAR_X4,batch_eq_4)837 TEST(F32_VHSWISH__SCALAR_X4, batch_eq_4) {
838   VUnaryMicrokernelTester()
839     .batch_size(4)
840     .Test(xnn_f32_vhswish_ukernel__scalar_x4, xnn_init_f32_hswish_scalar_params);
841 }
842 
TEST(F32_VHSWISH__SCALAR_X4,batch_div_4)843 TEST(F32_VHSWISH__SCALAR_X4, batch_div_4) {
844   for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
845     VUnaryMicrokernelTester()
846       .batch_size(batch_size)
847       .Test(xnn_f32_vhswish_ukernel__scalar_x4, xnn_init_f32_hswish_scalar_params);
848   }
849 }
850 
TEST(F32_VHSWISH__SCALAR_X4,batch_lt_4)851 TEST(F32_VHSWISH__SCALAR_X4, batch_lt_4) {
852   for (size_t batch_size = 1; batch_size < 4; batch_size++) {
853     VUnaryMicrokernelTester()
854       .batch_size(batch_size)
855       .Test(xnn_f32_vhswish_ukernel__scalar_x4, xnn_init_f32_hswish_scalar_params);
856   }
857 }
858 
TEST(F32_VHSWISH__SCALAR_X4,batch_gt_4)859 TEST(F32_VHSWISH__SCALAR_X4, batch_gt_4) {
860   for (size_t batch_size = 5; batch_size < 8; batch_size++) {
861     VUnaryMicrokernelTester()
862       .batch_size(batch_size)
863       .Test(xnn_f32_vhswish_ukernel__scalar_x4, xnn_init_f32_hswish_scalar_params);
864   }
865 }
866 
TEST(F32_VHSWISH__SCALAR_X4,inplace)867 TEST(F32_VHSWISH__SCALAR_X4, inplace) {
868   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
869     VUnaryMicrokernelTester()
870       .batch_size(batch_size)
871       .inplace(true)
872       .Test(xnn_f32_vhswish_ukernel__scalar_x4, xnn_init_f32_hswish_scalar_params);
873   }
874 }
875