xref: /aosp_15_r20/external/XNNPACK/test/f32-vsqrt.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/f32-vsqrt.yaml
8 //   Generator: tools/generate-vunary-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/vunary.h>
17 #include "vunary-microkernel-tester.h"
18 
19 
20 #if XNN_ARCH_ARM64
TEST(F32_VSQRT__NEON_SQRT_X4,batch_eq_4)21   TEST(F32_VSQRT__NEON_SQRT_X4, batch_eq_4) {
22     TEST_REQUIRES_ARM_NEON;
23     VUnaryMicrokernelTester()
24       .batch_size(4)
25       .Test(xnn_f32_vsqrt_ukernel__neon_sqrt_x4);
26   }
27 
TEST(F32_VSQRT__NEON_SQRT_X4,batch_div_4)28   TEST(F32_VSQRT__NEON_SQRT_X4, batch_div_4) {
29     TEST_REQUIRES_ARM_NEON;
30     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
31       VUnaryMicrokernelTester()
32         .batch_size(batch_size)
33         .Test(xnn_f32_vsqrt_ukernel__neon_sqrt_x4);
34     }
35   }
36 
TEST(F32_VSQRT__NEON_SQRT_X4,batch_lt_4)37   TEST(F32_VSQRT__NEON_SQRT_X4, batch_lt_4) {
38     TEST_REQUIRES_ARM_NEON;
39     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
40       VUnaryMicrokernelTester()
41         .batch_size(batch_size)
42         .Test(xnn_f32_vsqrt_ukernel__neon_sqrt_x4);
43     }
44   }
45 
TEST(F32_VSQRT__NEON_SQRT_X4,batch_gt_4)46   TEST(F32_VSQRT__NEON_SQRT_X4, batch_gt_4) {
47     TEST_REQUIRES_ARM_NEON;
48     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
49       VUnaryMicrokernelTester()
50         .batch_size(batch_size)
51         .Test(xnn_f32_vsqrt_ukernel__neon_sqrt_x4);
52     }
53   }
54 
TEST(F32_VSQRT__NEON_SQRT_X4,inplace)55   TEST(F32_VSQRT__NEON_SQRT_X4, inplace) {
56     TEST_REQUIRES_ARM_NEON;
57     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
58       VUnaryMicrokernelTester()
59         .batch_size(batch_size)
60         .inplace(true)
61         .Test(xnn_f32_vsqrt_ukernel__neon_sqrt_x4);
62     }
63   }
64 #endif  // XNN_ARCH_ARM64
65 
66 
67 #if XNN_ARCH_ARM64
TEST(F32_VSQRT__NEON_SQRT_X8,batch_eq_8)68   TEST(F32_VSQRT__NEON_SQRT_X8, batch_eq_8) {
69     TEST_REQUIRES_ARM_NEON;
70     VUnaryMicrokernelTester()
71       .batch_size(8)
72       .Test(xnn_f32_vsqrt_ukernel__neon_sqrt_x8);
73   }
74 
TEST(F32_VSQRT__NEON_SQRT_X8,batch_div_8)75   TEST(F32_VSQRT__NEON_SQRT_X8, batch_div_8) {
76     TEST_REQUIRES_ARM_NEON;
77     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
78       VUnaryMicrokernelTester()
79         .batch_size(batch_size)
80         .Test(xnn_f32_vsqrt_ukernel__neon_sqrt_x8);
81     }
82   }
83 
TEST(F32_VSQRT__NEON_SQRT_X8,batch_lt_8)84   TEST(F32_VSQRT__NEON_SQRT_X8, batch_lt_8) {
85     TEST_REQUIRES_ARM_NEON;
86     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
87       VUnaryMicrokernelTester()
88         .batch_size(batch_size)
89         .Test(xnn_f32_vsqrt_ukernel__neon_sqrt_x8);
90     }
91   }
92 
TEST(F32_VSQRT__NEON_SQRT_X8,batch_gt_8)93   TEST(F32_VSQRT__NEON_SQRT_X8, batch_gt_8) {
94     TEST_REQUIRES_ARM_NEON;
95     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
96       VUnaryMicrokernelTester()
97         .batch_size(batch_size)
98         .Test(xnn_f32_vsqrt_ukernel__neon_sqrt_x8);
99     }
100   }
101 
TEST(F32_VSQRT__NEON_SQRT_X8,inplace)102   TEST(F32_VSQRT__NEON_SQRT_X8, inplace) {
103     TEST_REQUIRES_ARM_NEON;
104     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
105       VUnaryMicrokernelTester()
106         .batch_size(batch_size)
107         .inplace(true)
108         .Test(xnn_f32_vsqrt_ukernel__neon_sqrt_x8);
109     }
110   }
111 #endif  // XNN_ARCH_ARM64
112 
113 
114 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4,batch_eq_4)115   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4, batch_eq_4) {
116     TEST_REQUIRES_ARM_NEON_FMA;
117     VUnaryMicrokernelTester()
118       .batch_size(4)
119       .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x4);
120   }
121 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4,batch_div_4)122   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4, batch_div_4) {
123     TEST_REQUIRES_ARM_NEON_FMA;
124     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
125       VUnaryMicrokernelTester()
126         .batch_size(batch_size)
127         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x4);
128     }
129   }
130 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4,batch_lt_4)131   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4, batch_lt_4) {
132     TEST_REQUIRES_ARM_NEON_FMA;
133     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
134       VUnaryMicrokernelTester()
135         .batch_size(batch_size)
136         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x4);
137     }
138   }
139 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4,batch_gt_4)140   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4, batch_gt_4) {
141     TEST_REQUIRES_ARM_NEON_FMA;
142     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
143       VUnaryMicrokernelTester()
144         .batch_size(batch_size)
145         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x4);
146     }
147   }
148 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4,inplace)149   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4, inplace) {
150     TEST_REQUIRES_ARM_NEON_FMA;
151     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
152       VUnaryMicrokernelTester()
153         .batch_size(batch_size)
154         .inplace(true)
155         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x4);
156     }
157   }
158 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
159 
160 
161 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8,batch_eq_8)162   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8, batch_eq_8) {
163     TEST_REQUIRES_ARM_NEON_FMA;
164     VUnaryMicrokernelTester()
165       .batch_size(8)
166       .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8);
167   }
168 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8,batch_div_8)169   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8, batch_div_8) {
170     TEST_REQUIRES_ARM_NEON_FMA;
171     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
172       VUnaryMicrokernelTester()
173         .batch_size(batch_size)
174         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8);
175     }
176   }
177 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8,batch_lt_8)178   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8, batch_lt_8) {
179     TEST_REQUIRES_ARM_NEON_FMA;
180     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
181       VUnaryMicrokernelTester()
182         .batch_size(batch_size)
183         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8);
184     }
185   }
186 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8,batch_gt_8)187   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8, batch_gt_8) {
188     TEST_REQUIRES_ARM_NEON_FMA;
189     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
190       VUnaryMicrokernelTester()
191         .batch_size(batch_size)
192         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8);
193     }
194   }
195 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8,inplace)196   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8, inplace) {
197     TEST_REQUIRES_ARM_NEON_FMA;
198     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
199       VUnaryMicrokernelTester()
200         .batch_size(batch_size)
201         .inplace(true)
202         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8);
203     }
204   }
205 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
206 
207 
208 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12,batch_eq_12)209   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12, batch_eq_12) {
210     TEST_REQUIRES_ARM_NEON_FMA;
211     VUnaryMicrokernelTester()
212       .batch_size(12)
213       .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12);
214   }
215 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12,batch_div_12)216   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12, batch_div_12) {
217     TEST_REQUIRES_ARM_NEON_FMA;
218     for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
219       VUnaryMicrokernelTester()
220         .batch_size(batch_size)
221         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12);
222     }
223   }
224 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12,batch_lt_12)225   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12, batch_lt_12) {
226     TEST_REQUIRES_ARM_NEON_FMA;
227     for (size_t batch_size = 1; batch_size < 12; batch_size++) {
228       VUnaryMicrokernelTester()
229         .batch_size(batch_size)
230         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12);
231     }
232   }
233 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12,batch_gt_12)234   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12, batch_gt_12) {
235     TEST_REQUIRES_ARM_NEON_FMA;
236     for (size_t batch_size = 13; batch_size < 24; batch_size++) {
237       VUnaryMicrokernelTester()
238         .batch_size(batch_size)
239         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12);
240     }
241   }
242 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12,inplace)243   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12, inplace) {
244     TEST_REQUIRES_ARM_NEON_FMA;
245     for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
246       VUnaryMicrokernelTester()
247         .batch_size(batch_size)
248         .inplace(true)
249         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12);
250     }
251   }
252 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
253 
254 
255 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16,batch_eq_16)256   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16, batch_eq_16) {
257     TEST_REQUIRES_ARM_NEON_FMA;
258     VUnaryMicrokernelTester()
259       .batch_size(16)
260       .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16);
261   }
262 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16,batch_div_16)263   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16, batch_div_16) {
264     TEST_REQUIRES_ARM_NEON_FMA;
265     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
266       VUnaryMicrokernelTester()
267         .batch_size(batch_size)
268         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16);
269     }
270   }
271 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16,batch_lt_16)272   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16, batch_lt_16) {
273     TEST_REQUIRES_ARM_NEON_FMA;
274     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
275       VUnaryMicrokernelTester()
276         .batch_size(batch_size)
277         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16);
278     }
279   }
280 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16,batch_gt_16)281   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16, batch_gt_16) {
282     TEST_REQUIRES_ARM_NEON_FMA;
283     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
284       VUnaryMicrokernelTester()
285         .batch_size(batch_size)
286         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16);
287     }
288   }
289 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16,inplace)290   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16, inplace) {
291     TEST_REQUIRES_ARM_NEON_FMA;
292     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
293       VUnaryMicrokernelTester()
294         .batch_size(batch_size)
295         .inplace(true)
296         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16);
297     }
298   }
299 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
300 
301 
302 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20,batch_eq_20)303   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20, batch_eq_20) {
304     TEST_REQUIRES_ARM_NEON_FMA;
305     VUnaryMicrokernelTester()
306       .batch_size(20)
307       .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20);
308   }
309 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20,batch_div_20)310   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20, batch_div_20) {
311     TEST_REQUIRES_ARM_NEON_FMA;
312     for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
313       VUnaryMicrokernelTester()
314         .batch_size(batch_size)
315         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20);
316     }
317   }
318 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20,batch_lt_20)319   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20, batch_lt_20) {
320     TEST_REQUIRES_ARM_NEON_FMA;
321     for (size_t batch_size = 1; batch_size < 20; batch_size++) {
322       VUnaryMicrokernelTester()
323         .batch_size(batch_size)
324         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20);
325     }
326   }
327 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20,batch_gt_20)328   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20, batch_gt_20) {
329     TEST_REQUIRES_ARM_NEON_FMA;
330     for (size_t batch_size = 21; batch_size < 40; batch_size++) {
331       VUnaryMicrokernelTester()
332         .batch_size(batch_size)
333         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20);
334     }
335   }
336 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20,inplace)337   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20, inplace) {
338     TEST_REQUIRES_ARM_NEON_FMA;
339     for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
340       VUnaryMicrokernelTester()
341         .batch_size(batch_size)
342         .inplace(true)
343         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20);
344     }
345   }
346 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
347 
348 
349 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24,batch_eq_24)350   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24, batch_eq_24) {
351     TEST_REQUIRES_ARM_NEON_FMA;
352     VUnaryMicrokernelTester()
353       .batch_size(24)
354       .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24);
355   }
356 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24,batch_div_24)357   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24, batch_div_24) {
358     TEST_REQUIRES_ARM_NEON_FMA;
359     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
360       VUnaryMicrokernelTester()
361         .batch_size(batch_size)
362         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24);
363     }
364   }
365 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24,batch_lt_24)366   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24, batch_lt_24) {
367     TEST_REQUIRES_ARM_NEON_FMA;
368     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
369       VUnaryMicrokernelTester()
370         .batch_size(batch_size)
371         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24);
372     }
373   }
374 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24,batch_gt_24)375   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24, batch_gt_24) {
376     TEST_REQUIRES_ARM_NEON_FMA;
377     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
378       VUnaryMicrokernelTester()
379         .batch_size(batch_size)
380         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24);
381     }
382   }
383 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24,inplace)384   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24, inplace) {
385     TEST_REQUIRES_ARM_NEON_FMA;
386     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
387       VUnaryMicrokernelTester()
388         .batch_size(batch_size)
389         .inplace(true)
390         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24);
391     }
392   }
393 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
394 
395 
396 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28,batch_eq_28)397   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28, batch_eq_28) {
398     TEST_REQUIRES_ARM_NEON_FMA;
399     VUnaryMicrokernelTester()
400       .batch_size(28)
401       .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28);
402   }
403 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28,batch_div_28)404   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28, batch_div_28) {
405     TEST_REQUIRES_ARM_NEON_FMA;
406     for (size_t batch_size = 56; batch_size < 280; batch_size += 28) {
407       VUnaryMicrokernelTester()
408         .batch_size(batch_size)
409         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28);
410     }
411   }
412 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28,batch_lt_28)413   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28, batch_lt_28) {
414     TEST_REQUIRES_ARM_NEON_FMA;
415     for (size_t batch_size = 1; batch_size < 28; batch_size++) {
416       VUnaryMicrokernelTester()
417         .batch_size(batch_size)
418         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28);
419     }
420   }
421 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28,batch_gt_28)422   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28, batch_gt_28) {
423     TEST_REQUIRES_ARM_NEON_FMA;
424     for (size_t batch_size = 29; batch_size < 56; batch_size++) {
425       VUnaryMicrokernelTester()
426         .batch_size(batch_size)
427         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28);
428     }
429   }
430 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28,inplace)431   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28, inplace) {
432     TEST_REQUIRES_ARM_NEON_FMA;
433     for (size_t batch_size = 1; batch_size <= 140; batch_size += 27) {
434       VUnaryMicrokernelTester()
435         .batch_size(batch_size)
436         .inplace(true)
437         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28);
438     }
439   }
440 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
441 
442 
443 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32,batch_eq_32)444   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32, batch_eq_32) {
445     TEST_REQUIRES_ARM_NEON_FMA;
446     VUnaryMicrokernelTester()
447       .batch_size(32)
448       .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32);
449   }
450 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32,batch_div_32)451   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32, batch_div_32) {
452     TEST_REQUIRES_ARM_NEON_FMA;
453     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
454       VUnaryMicrokernelTester()
455         .batch_size(batch_size)
456         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32);
457     }
458   }
459 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32,batch_lt_32)460   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32, batch_lt_32) {
461     TEST_REQUIRES_ARM_NEON_FMA;
462     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
463       VUnaryMicrokernelTester()
464         .batch_size(batch_size)
465         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32);
466     }
467   }
468 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32,batch_gt_32)469   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32, batch_gt_32) {
470     TEST_REQUIRES_ARM_NEON_FMA;
471     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
472       VUnaryMicrokernelTester()
473         .batch_size(batch_size)
474         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32);
475     }
476   }
477 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32,inplace)478   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32, inplace) {
479     TEST_REQUIRES_ARM_NEON_FMA;
480     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
481       VUnaryMicrokernelTester()
482         .batch_size(batch_size)
483         .inplace(true)
484         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32);
485     }
486   }
487 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
488 
489 
490 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36,batch_eq_36)491   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36, batch_eq_36) {
492     TEST_REQUIRES_ARM_NEON_FMA;
493     VUnaryMicrokernelTester()
494       .batch_size(36)
495       .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36);
496   }
497 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36,batch_div_36)498   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36, batch_div_36) {
499     TEST_REQUIRES_ARM_NEON_FMA;
500     for (size_t batch_size = 72; batch_size < 360; batch_size += 36) {
501       VUnaryMicrokernelTester()
502         .batch_size(batch_size)
503         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36);
504     }
505   }
506 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36,batch_lt_36)507   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36, batch_lt_36) {
508     TEST_REQUIRES_ARM_NEON_FMA;
509     for (size_t batch_size = 1; batch_size < 36; batch_size++) {
510       VUnaryMicrokernelTester()
511         .batch_size(batch_size)
512         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36);
513     }
514   }
515 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36,batch_gt_36)516   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36, batch_gt_36) {
517     TEST_REQUIRES_ARM_NEON_FMA;
518     for (size_t batch_size = 37; batch_size < 72; batch_size++) {
519       VUnaryMicrokernelTester()
520         .batch_size(batch_size)
521         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36);
522     }
523   }
524 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36,inplace)525   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36, inplace) {
526     TEST_REQUIRES_ARM_NEON_FMA;
527     for (size_t batch_size = 1; batch_size <= 180; batch_size += 35) {
528       VUnaryMicrokernelTester()
529         .batch_size(batch_size)
530         .inplace(true)
531         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36);
532     }
533   }
534 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
535 
536 
537 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40,batch_eq_40)538   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40, batch_eq_40) {
539     TEST_REQUIRES_ARM_NEON_FMA;
540     VUnaryMicrokernelTester()
541       .batch_size(40)
542       .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40);
543   }
544 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40,batch_div_40)545   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40, batch_div_40) {
546     TEST_REQUIRES_ARM_NEON_FMA;
547     for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
548       VUnaryMicrokernelTester()
549         .batch_size(batch_size)
550         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40);
551     }
552   }
553 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40,batch_lt_40)554   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40, batch_lt_40) {
555     TEST_REQUIRES_ARM_NEON_FMA;
556     for (size_t batch_size = 1; batch_size < 40; batch_size++) {
557       VUnaryMicrokernelTester()
558         .batch_size(batch_size)
559         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40);
560     }
561   }
562 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40,batch_gt_40)563   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40, batch_gt_40) {
564     TEST_REQUIRES_ARM_NEON_FMA;
565     for (size_t batch_size = 41; batch_size < 80; batch_size++) {
566       VUnaryMicrokernelTester()
567         .batch_size(batch_size)
568         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40);
569     }
570   }
571 
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40,inplace)572   TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40, inplace) {
573     TEST_REQUIRES_ARM_NEON_FMA;
574     for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
575       VUnaryMicrokernelTester()
576         .batch_size(batch_size)
577         .inplace(true)
578         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40);
579     }
580   }
581 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
582 
583 
584 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4,batch_eq_4)585   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4, batch_eq_4) {
586     TEST_REQUIRES_ARM_NEON_FMA;
587     VUnaryMicrokernelTester()
588       .batch_size(4)
589       .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x4);
590   }
591 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4,batch_div_4)592   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4, batch_div_4) {
593     TEST_REQUIRES_ARM_NEON_FMA;
594     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
595       VUnaryMicrokernelTester()
596         .batch_size(batch_size)
597         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x4);
598     }
599   }
600 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4,batch_lt_4)601   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4, batch_lt_4) {
602     TEST_REQUIRES_ARM_NEON_FMA;
603     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
604       VUnaryMicrokernelTester()
605         .batch_size(batch_size)
606         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x4);
607     }
608   }
609 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4,batch_gt_4)610   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4, batch_gt_4) {
611     TEST_REQUIRES_ARM_NEON_FMA;
612     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
613       VUnaryMicrokernelTester()
614         .batch_size(batch_size)
615         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x4);
616     }
617   }
618 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4,inplace)619   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4, inplace) {
620     TEST_REQUIRES_ARM_NEON_FMA;
621     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
622       VUnaryMicrokernelTester()
623         .batch_size(batch_size)
624         .inplace(true)
625         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x4);
626     }
627   }
628 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
629 
630 
631 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8,batch_eq_8)632   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8, batch_eq_8) {
633     TEST_REQUIRES_ARM_NEON_FMA;
634     VUnaryMicrokernelTester()
635       .batch_size(8)
636       .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8);
637   }
638 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8,batch_div_8)639   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8, batch_div_8) {
640     TEST_REQUIRES_ARM_NEON_FMA;
641     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
642       VUnaryMicrokernelTester()
643         .batch_size(batch_size)
644         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8);
645     }
646   }
647 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8,batch_lt_8)648   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8, batch_lt_8) {
649     TEST_REQUIRES_ARM_NEON_FMA;
650     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
651       VUnaryMicrokernelTester()
652         .batch_size(batch_size)
653         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8);
654     }
655   }
656 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8,batch_gt_8)657   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8, batch_gt_8) {
658     TEST_REQUIRES_ARM_NEON_FMA;
659     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
660       VUnaryMicrokernelTester()
661         .batch_size(batch_size)
662         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8);
663     }
664   }
665 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8,inplace)666   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8, inplace) {
667     TEST_REQUIRES_ARM_NEON_FMA;
668     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
669       VUnaryMicrokernelTester()
670         .batch_size(batch_size)
671         .inplace(true)
672         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8);
673     }
674   }
675 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
676 
677 
678 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12,batch_eq_12)679   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12, batch_eq_12) {
680     TEST_REQUIRES_ARM_NEON_FMA;
681     VUnaryMicrokernelTester()
682       .batch_size(12)
683       .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12);
684   }
685 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12,batch_div_12)686   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12, batch_div_12) {
687     TEST_REQUIRES_ARM_NEON_FMA;
688     for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
689       VUnaryMicrokernelTester()
690         .batch_size(batch_size)
691         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12);
692     }
693   }
694 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12,batch_lt_12)695   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12, batch_lt_12) {
696     TEST_REQUIRES_ARM_NEON_FMA;
697     for (size_t batch_size = 1; batch_size < 12; batch_size++) {
698       VUnaryMicrokernelTester()
699         .batch_size(batch_size)
700         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12);
701     }
702   }
703 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12,batch_gt_12)704   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12, batch_gt_12) {
705     TEST_REQUIRES_ARM_NEON_FMA;
706     for (size_t batch_size = 13; batch_size < 24; batch_size++) {
707       VUnaryMicrokernelTester()
708         .batch_size(batch_size)
709         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12);
710     }
711   }
712 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12,inplace)713   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12, inplace) {
714     TEST_REQUIRES_ARM_NEON_FMA;
715     for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
716       VUnaryMicrokernelTester()
717         .batch_size(batch_size)
718         .inplace(true)
719         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12);
720     }
721   }
722 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
723 
724 
725 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16,batch_eq_16)726   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16, batch_eq_16) {
727     TEST_REQUIRES_ARM_NEON_FMA;
728     VUnaryMicrokernelTester()
729       .batch_size(16)
730       .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16);
731   }
732 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16,batch_div_16)733   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16, batch_div_16) {
734     TEST_REQUIRES_ARM_NEON_FMA;
735     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
736       VUnaryMicrokernelTester()
737         .batch_size(batch_size)
738         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16);
739     }
740   }
741 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16,batch_lt_16)742   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16, batch_lt_16) {
743     TEST_REQUIRES_ARM_NEON_FMA;
744     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
745       VUnaryMicrokernelTester()
746         .batch_size(batch_size)
747         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16);
748     }
749   }
750 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16,batch_gt_16)751   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16, batch_gt_16) {
752     TEST_REQUIRES_ARM_NEON_FMA;
753     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
754       VUnaryMicrokernelTester()
755         .batch_size(batch_size)
756         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16);
757     }
758   }
759 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16,inplace)760   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16, inplace) {
761     TEST_REQUIRES_ARM_NEON_FMA;
762     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
763       VUnaryMicrokernelTester()
764         .batch_size(batch_size)
765         .inplace(true)
766         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16);
767     }
768   }
769 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
770 
771 
772 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20,batch_eq_20)773   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20, batch_eq_20) {
774     TEST_REQUIRES_ARM_NEON_FMA;
775     VUnaryMicrokernelTester()
776       .batch_size(20)
777       .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20);
778   }
779 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20,batch_div_20)780   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20, batch_div_20) {
781     TEST_REQUIRES_ARM_NEON_FMA;
782     for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
783       VUnaryMicrokernelTester()
784         .batch_size(batch_size)
785         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20);
786     }
787   }
788 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20,batch_lt_20)789   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20, batch_lt_20) {
790     TEST_REQUIRES_ARM_NEON_FMA;
791     for (size_t batch_size = 1; batch_size < 20; batch_size++) {
792       VUnaryMicrokernelTester()
793         .batch_size(batch_size)
794         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20);
795     }
796   }
797 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20,batch_gt_20)798   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20, batch_gt_20) {
799     TEST_REQUIRES_ARM_NEON_FMA;
800     for (size_t batch_size = 21; batch_size < 40; batch_size++) {
801       VUnaryMicrokernelTester()
802         .batch_size(batch_size)
803         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20);
804     }
805   }
806 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20,inplace)807   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20, inplace) {
808     TEST_REQUIRES_ARM_NEON_FMA;
809     for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
810       VUnaryMicrokernelTester()
811         .batch_size(batch_size)
812         .inplace(true)
813         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20);
814     }
815   }
816 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
817 
818 
819 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24,batch_eq_24)820   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24, batch_eq_24) {
821     TEST_REQUIRES_ARM_NEON_FMA;
822     VUnaryMicrokernelTester()
823       .batch_size(24)
824       .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24);
825   }
826 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24,batch_div_24)827   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24, batch_div_24) {
828     TEST_REQUIRES_ARM_NEON_FMA;
829     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
830       VUnaryMicrokernelTester()
831         .batch_size(batch_size)
832         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24);
833     }
834   }
835 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24,batch_lt_24)836   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24, batch_lt_24) {
837     TEST_REQUIRES_ARM_NEON_FMA;
838     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
839       VUnaryMicrokernelTester()
840         .batch_size(batch_size)
841         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24);
842     }
843   }
844 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24,batch_gt_24)845   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24, batch_gt_24) {
846     TEST_REQUIRES_ARM_NEON_FMA;
847     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
848       VUnaryMicrokernelTester()
849         .batch_size(batch_size)
850         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24);
851     }
852   }
853 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24,inplace)854   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24, inplace) {
855     TEST_REQUIRES_ARM_NEON_FMA;
856     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
857       VUnaryMicrokernelTester()
858         .batch_size(batch_size)
859         .inplace(true)
860         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24);
861     }
862   }
863 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
864 
865 
866 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28,batch_eq_28)867   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28, batch_eq_28) {
868     TEST_REQUIRES_ARM_NEON_FMA;
869     VUnaryMicrokernelTester()
870       .batch_size(28)
871       .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28);
872   }
873 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28,batch_div_28)874   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28, batch_div_28) {
875     TEST_REQUIRES_ARM_NEON_FMA;
876     for (size_t batch_size = 56; batch_size < 280; batch_size += 28) {
877       VUnaryMicrokernelTester()
878         .batch_size(batch_size)
879         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28);
880     }
881   }
882 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28,batch_lt_28)883   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28, batch_lt_28) {
884     TEST_REQUIRES_ARM_NEON_FMA;
885     for (size_t batch_size = 1; batch_size < 28; batch_size++) {
886       VUnaryMicrokernelTester()
887         .batch_size(batch_size)
888         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28);
889     }
890   }
891 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28,batch_gt_28)892   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28, batch_gt_28) {
893     TEST_REQUIRES_ARM_NEON_FMA;
894     for (size_t batch_size = 29; batch_size < 56; batch_size++) {
895       VUnaryMicrokernelTester()
896         .batch_size(batch_size)
897         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28);
898     }
899   }
900 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28,inplace)901   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28, inplace) {
902     TEST_REQUIRES_ARM_NEON_FMA;
903     for (size_t batch_size = 1; batch_size <= 140; batch_size += 27) {
904       VUnaryMicrokernelTester()
905         .batch_size(batch_size)
906         .inplace(true)
907         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28);
908     }
909   }
910 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
911 
912 
913 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32,batch_eq_32)914   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32, batch_eq_32) {
915     TEST_REQUIRES_ARM_NEON_FMA;
916     VUnaryMicrokernelTester()
917       .batch_size(32)
918       .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32);
919   }
920 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32,batch_div_32)921   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32, batch_div_32) {
922     TEST_REQUIRES_ARM_NEON_FMA;
923     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
924       VUnaryMicrokernelTester()
925         .batch_size(batch_size)
926         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32);
927     }
928   }
929 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32,batch_lt_32)930   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32, batch_lt_32) {
931     TEST_REQUIRES_ARM_NEON_FMA;
932     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
933       VUnaryMicrokernelTester()
934         .batch_size(batch_size)
935         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32);
936     }
937   }
938 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32,batch_gt_32)939   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32, batch_gt_32) {
940     TEST_REQUIRES_ARM_NEON_FMA;
941     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
942       VUnaryMicrokernelTester()
943         .batch_size(batch_size)
944         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32);
945     }
946   }
947 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32,inplace)948   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32, inplace) {
949     TEST_REQUIRES_ARM_NEON_FMA;
950     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
951       VUnaryMicrokernelTester()
952         .batch_size(batch_size)
953         .inplace(true)
954         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32);
955     }
956   }
957 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
958 
959 
960 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36,batch_eq_36)961   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36, batch_eq_36) {
962     TEST_REQUIRES_ARM_NEON_FMA;
963     VUnaryMicrokernelTester()
964       .batch_size(36)
965       .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36);
966   }
967 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36,batch_div_36)968   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36, batch_div_36) {
969     TEST_REQUIRES_ARM_NEON_FMA;
970     for (size_t batch_size = 72; batch_size < 360; batch_size += 36) {
971       VUnaryMicrokernelTester()
972         .batch_size(batch_size)
973         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36);
974     }
975   }
976 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36,batch_lt_36)977   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36, batch_lt_36) {
978     TEST_REQUIRES_ARM_NEON_FMA;
979     for (size_t batch_size = 1; batch_size < 36; batch_size++) {
980       VUnaryMicrokernelTester()
981         .batch_size(batch_size)
982         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36);
983     }
984   }
985 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36,batch_gt_36)986   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36, batch_gt_36) {
987     TEST_REQUIRES_ARM_NEON_FMA;
988     for (size_t batch_size = 37; batch_size < 72; batch_size++) {
989       VUnaryMicrokernelTester()
990         .batch_size(batch_size)
991         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36);
992     }
993   }
994 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36,inplace)995   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36, inplace) {
996     TEST_REQUIRES_ARM_NEON_FMA;
997     for (size_t batch_size = 1; batch_size <= 180; batch_size += 35) {
998       VUnaryMicrokernelTester()
999         .batch_size(batch_size)
1000         .inplace(true)
1001         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36);
1002     }
1003   }
1004 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1005 
1006 
1007 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40,batch_eq_40)1008   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40, batch_eq_40) {
1009     TEST_REQUIRES_ARM_NEON_FMA;
1010     VUnaryMicrokernelTester()
1011       .batch_size(40)
1012       .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40);
1013   }
1014 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40,batch_div_40)1015   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40, batch_div_40) {
1016     TEST_REQUIRES_ARM_NEON_FMA;
1017     for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
1018       VUnaryMicrokernelTester()
1019         .batch_size(batch_size)
1020         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40);
1021     }
1022   }
1023 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40,batch_lt_40)1024   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40, batch_lt_40) {
1025     TEST_REQUIRES_ARM_NEON_FMA;
1026     for (size_t batch_size = 1; batch_size < 40; batch_size++) {
1027       VUnaryMicrokernelTester()
1028         .batch_size(batch_size)
1029         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40);
1030     }
1031   }
1032 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40,batch_gt_40)1033   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40, batch_gt_40) {
1034     TEST_REQUIRES_ARM_NEON_FMA;
1035     for (size_t batch_size = 41; batch_size < 80; batch_size++) {
1036       VUnaryMicrokernelTester()
1037         .batch_size(batch_size)
1038         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40);
1039     }
1040   }
1041 
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40,inplace)1042   TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40, inplace) {
1043     TEST_REQUIRES_ARM_NEON_FMA;
1044     for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
1045       VUnaryMicrokernelTester()
1046         .batch_size(batch_size)
1047         .inplace(true)
1048         .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40);
1049     }
1050   }
1051 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1052 
1053 
1054 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__SSE_SQRT_X4,batch_eq_4)1055   TEST(F32_VSQRT__SSE_SQRT_X4, batch_eq_4) {
1056     TEST_REQUIRES_X86_SSE;
1057     VUnaryMicrokernelTester()
1058       .batch_size(4)
1059       .Test(xnn_f32_vsqrt_ukernel__sse_sqrt_x4);
1060   }
1061 
TEST(F32_VSQRT__SSE_SQRT_X4,batch_div_4)1062   TEST(F32_VSQRT__SSE_SQRT_X4, batch_div_4) {
1063     TEST_REQUIRES_X86_SSE;
1064     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
1065       VUnaryMicrokernelTester()
1066         .batch_size(batch_size)
1067         .Test(xnn_f32_vsqrt_ukernel__sse_sqrt_x4);
1068     }
1069   }
1070 
TEST(F32_VSQRT__SSE_SQRT_X4,batch_lt_4)1071   TEST(F32_VSQRT__SSE_SQRT_X4, batch_lt_4) {
1072     TEST_REQUIRES_X86_SSE;
1073     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
1074       VUnaryMicrokernelTester()
1075         .batch_size(batch_size)
1076         .Test(xnn_f32_vsqrt_ukernel__sse_sqrt_x4);
1077     }
1078   }
1079 
TEST(F32_VSQRT__SSE_SQRT_X4,batch_gt_4)1080   TEST(F32_VSQRT__SSE_SQRT_X4, batch_gt_4) {
1081     TEST_REQUIRES_X86_SSE;
1082     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
1083       VUnaryMicrokernelTester()
1084         .batch_size(batch_size)
1085         .Test(xnn_f32_vsqrt_ukernel__sse_sqrt_x4);
1086     }
1087   }
1088 
TEST(F32_VSQRT__SSE_SQRT_X4,inplace)1089   TEST(F32_VSQRT__SSE_SQRT_X4, inplace) {
1090     TEST_REQUIRES_X86_SSE;
1091     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1092       VUnaryMicrokernelTester()
1093         .batch_size(batch_size)
1094         .inplace(true)
1095         .Test(xnn_f32_vsqrt_ukernel__sse_sqrt_x4);
1096     }
1097   }
1098 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1099 
1100 
1101 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__SSE_SQRT_X8,batch_eq_8)1102   TEST(F32_VSQRT__SSE_SQRT_X8, batch_eq_8) {
1103     TEST_REQUIRES_X86_SSE;
1104     VUnaryMicrokernelTester()
1105       .batch_size(8)
1106       .Test(xnn_f32_vsqrt_ukernel__sse_sqrt_x8);
1107   }
1108 
TEST(F32_VSQRT__SSE_SQRT_X8,batch_div_8)1109   TEST(F32_VSQRT__SSE_SQRT_X8, batch_div_8) {
1110     TEST_REQUIRES_X86_SSE;
1111     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1112       VUnaryMicrokernelTester()
1113         .batch_size(batch_size)
1114         .Test(xnn_f32_vsqrt_ukernel__sse_sqrt_x8);
1115     }
1116   }
1117 
TEST(F32_VSQRT__SSE_SQRT_X8,batch_lt_8)1118   TEST(F32_VSQRT__SSE_SQRT_X8, batch_lt_8) {
1119     TEST_REQUIRES_X86_SSE;
1120     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1121       VUnaryMicrokernelTester()
1122         .batch_size(batch_size)
1123         .Test(xnn_f32_vsqrt_ukernel__sse_sqrt_x8);
1124     }
1125   }
1126 
TEST(F32_VSQRT__SSE_SQRT_X8,batch_gt_8)1127   TEST(F32_VSQRT__SSE_SQRT_X8, batch_gt_8) {
1128     TEST_REQUIRES_X86_SSE;
1129     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1130       VUnaryMicrokernelTester()
1131         .batch_size(batch_size)
1132         .Test(xnn_f32_vsqrt_ukernel__sse_sqrt_x8);
1133     }
1134   }
1135 
TEST(F32_VSQRT__SSE_SQRT_X8,inplace)1136   TEST(F32_VSQRT__SSE_SQRT_X8, inplace) {
1137     TEST_REQUIRES_X86_SSE;
1138     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1139       VUnaryMicrokernelTester()
1140         .batch_size(batch_size)
1141         .inplace(true)
1142         .Test(xnn_f32_vsqrt_ukernel__sse_sqrt_x8);
1143     }
1144   }
1145 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1146 
1147 
1148 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__AVX_SQRT_X8,batch_eq_8)1149   TEST(F32_VSQRT__AVX_SQRT_X8, batch_eq_8) {
1150     TEST_REQUIRES_X86_AVX;
1151     VUnaryMicrokernelTester()
1152       .batch_size(8)
1153       .Test(xnn_f32_vsqrt_ukernel__avx_sqrt_x8, xnn_init_f32_sqrt_avx_params);
1154   }
1155 
TEST(F32_VSQRT__AVX_SQRT_X8,batch_div_8)1156   TEST(F32_VSQRT__AVX_SQRT_X8, batch_div_8) {
1157     TEST_REQUIRES_X86_AVX;
1158     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1159       VUnaryMicrokernelTester()
1160         .batch_size(batch_size)
1161         .Test(xnn_f32_vsqrt_ukernel__avx_sqrt_x8, xnn_init_f32_sqrt_avx_params);
1162     }
1163   }
1164 
TEST(F32_VSQRT__AVX_SQRT_X8,batch_lt_8)1165   TEST(F32_VSQRT__AVX_SQRT_X8, batch_lt_8) {
1166     TEST_REQUIRES_X86_AVX;
1167     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1168       VUnaryMicrokernelTester()
1169         .batch_size(batch_size)
1170         .Test(xnn_f32_vsqrt_ukernel__avx_sqrt_x8, xnn_init_f32_sqrt_avx_params);
1171     }
1172   }
1173 
TEST(F32_VSQRT__AVX_SQRT_X8,batch_gt_8)1174   TEST(F32_VSQRT__AVX_SQRT_X8, batch_gt_8) {
1175     TEST_REQUIRES_X86_AVX;
1176     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1177       VUnaryMicrokernelTester()
1178         .batch_size(batch_size)
1179         .Test(xnn_f32_vsqrt_ukernel__avx_sqrt_x8, xnn_init_f32_sqrt_avx_params);
1180     }
1181   }
1182 
TEST(F32_VSQRT__AVX_SQRT_X8,inplace)1183   TEST(F32_VSQRT__AVX_SQRT_X8, inplace) {
1184     TEST_REQUIRES_X86_AVX;
1185     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1186       VUnaryMicrokernelTester()
1187         .batch_size(batch_size)
1188         .inplace(true)
1189         .Test(xnn_f32_vsqrt_ukernel__avx_sqrt_x8, xnn_init_f32_sqrt_avx_params);
1190     }
1191   }
1192 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1193 
1194 
1195 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__AVX_SQRT_X16,batch_eq_16)1196   TEST(F32_VSQRT__AVX_SQRT_X16, batch_eq_16) {
1197     TEST_REQUIRES_X86_AVX;
1198     VUnaryMicrokernelTester()
1199       .batch_size(16)
1200       .Test(xnn_f32_vsqrt_ukernel__avx_sqrt_x16, xnn_init_f32_sqrt_avx_params);
1201   }
1202 
TEST(F32_VSQRT__AVX_SQRT_X16,batch_div_16)1203   TEST(F32_VSQRT__AVX_SQRT_X16, batch_div_16) {
1204     TEST_REQUIRES_X86_AVX;
1205     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1206       VUnaryMicrokernelTester()
1207         .batch_size(batch_size)
1208         .Test(xnn_f32_vsqrt_ukernel__avx_sqrt_x16, xnn_init_f32_sqrt_avx_params);
1209     }
1210   }
1211 
TEST(F32_VSQRT__AVX_SQRT_X16,batch_lt_16)1212   TEST(F32_VSQRT__AVX_SQRT_X16, batch_lt_16) {
1213     TEST_REQUIRES_X86_AVX;
1214     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1215       VUnaryMicrokernelTester()
1216         .batch_size(batch_size)
1217         .Test(xnn_f32_vsqrt_ukernel__avx_sqrt_x16, xnn_init_f32_sqrt_avx_params);
1218     }
1219   }
1220 
TEST(F32_VSQRT__AVX_SQRT_X16,batch_gt_16)1221   TEST(F32_VSQRT__AVX_SQRT_X16, batch_gt_16) {
1222     TEST_REQUIRES_X86_AVX;
1223     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1224       VUnaryMicrokernelTester()
1225         .batch_size(batch_size)
1226         .Test(xnn_f32_vsqrt_ukernel__avx_sqrt_x16, xnn_init_f32_sqrt_avx_params);
1227     }
1228   }
1229 
TEST(F32_VSQRT__AVX_SQRT_X16,inplace)1230   TEST(F32_VSQRT__AVX_SQRT_X16, inplace) {
1231     TEST_REQUIRES_X86_AVX;
1232     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1233       VUnaryMicrokernelTester()
1234         .batch_size(batch_size)
1235         .inplace(true)
1236         .Test(xnn_f32_vsqrt_ukernel__avx_sqrt_x16, xnn_init_f32_sqrt_avx_params);
1237     }
1238   }
1239 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1240 
1241 
1242 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8,batch_eq_8)1243   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8, batch_eq_8) {
1244     TEST_REQUIRES_X86_FMA3;
1245     VUnaryMicrokernelTester()
1246       .batch_size(8)
1247       .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8, xnn_init_f32_sqrt_fma_params);
1248   }
1249 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8,batch_div_8)1250   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8, batch_div_8) {
1251     TEST_REQUIRES_X86_FMA3;
1252     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1253       VUnaryMicrokernelTester()
1254         .batch_size(batch_size)
1255         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8, xnn_init_f32_sqrt_fma_params);
1256     }
1257   }
1258 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8,batch_lt_8)1259   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8, batch_lt_8) {
1260     TEST_REQUIRES_X86_FMA3;
1261     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1262       VUnaryMicrokernelTester()
1263         .batch_size(batch_size)
1264         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8, xnn_init_f32_sqrt_fma_params);
1265     }
1266   }
1267 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8,batch_gt_8)1268   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8, batch_gt_8) {
1269     TEST_REQUIRES_X86_FMA3;
1270     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1271       VUnaryMicrokernelTester()
1272         .batch_size(batch_size)
1273         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8, xnn_init_f32_sqrt_fma_params);
1274     }
1275   }
1276 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8,inplace)1277   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8, inplace) {
1278     TEST_REQUIRES_X86_FMA3;
1279     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1280       VUnaryMicrokernelTester()
1281         .batch_size(batch_size)
1282         .inplace(true)
1283         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8, xnn_init_f32_sqrt_fma_params);
1284     }
1285   }
1286 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1287 
1288 
1289 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16,batch_eq_16)1290   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16, batch_eq_16) {
1291     TEST_REQUIRES_X86_FMA3;
1292     VUnaryMicrokernelTester()
1293       .batch_size(16)
1294       .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16, xnn_init_f32_sqrt_fma_params);
1295   }
1296 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16,batch_div_16)1297   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16, batch_div_16) {
1298     TEST_REQUIRES_X86_FMA3;
1299     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1300       VUnaryMicrokernelTester()
1301         .batch_size(batch_size)
1302         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16, xnn_init_f32_sqrt_fma_params);
1303     }
1304   }
1305 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16,batch_lt_16)1306   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16, batch_lt_16) {
1307     TEST_REQUIRES_X86_FMA3;
1308     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1309       VUnaryMicrokernelTester()
1310         .batch_size(batch_size)
1311         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16, xnn_init_f32_sqrt_fma_params);
1312     }
1313   }
1314 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16,batch_gt_16)1315   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16, batch_gt_16) {
1316     TEST_REQUIRES_X86_FMA3;
1317     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1318       VUnaryMicrokernelTester()
1319         .batch_size(batch_size)
1320         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16, xnn_init_f32_sqrt_fma_params);
1321     }
1322   }
1323 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16,inplace)1324   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16, inplace) {
1325     TEST_REQUIRES_X86_FMA3;
1326     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1327       VUnaryMicrokernelTester()
1328         .batch_size(batch_size)
1329         .inplace(true)
1330         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16, xnn_init_f32_sqrt_fma_params);
1331     }
1332   }
1333 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1334 
1335 
1336 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24,batch_eq_24)1337   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24, batch_eq_24) {
1338     TEST_REQUIRES_X86_FMA3;
1339     VUnaryMicrokernelTester()
1340       .batch_size(24)
1341       .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24, xnn_init_f32_sqrt_fma_params);
1342   }
1343 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24,batch_div_24)1344   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24, batch_div_24) {
1345     TEST_REQUIRES_X86_FMA3;
1346     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1347       VUnaryMicrokernelTester()
1348         .batch_size(batch_size)
1349         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24, xnn_init_f32_sqrt_fma_params);
1350     }
1351   }
1352 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24,batch_lt_24)1353   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24, batch_lt_24) {
1354     TEST_REQUIRES_X86_FMA3;
1355     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1356       VUnaryMicrokernelTester()
1357         .batch_size(batch_size)
1358         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24, xnn_init_f32_sqrt_fma_params);
1359     }
1360   }
1361 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24,batch_gt_24)1362   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24, batch_gt_24) {
1363     TEST_REQUIRES_X86_FMA3;
1364     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1365       VUnaryMicrokernelTester()
1366         .batch_size(batch_size)
1367         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24, xnn_init_f32_sqrt_fma_params);
1368     }
1369   }
1370 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24,inplace)1371   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24, inplace) {
1372     TEST_REQUIRES_X86_FMA3;
1373     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1374       VUnaryMicrokernelTester()
1375         .batch_size(batch_size)
1376         .inplace(true)
1377         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24, xnn_init_f32_sqrt_fma_params);
1378     }
1379   }
1380 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1381 
1382 
1383 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32,batch_eq_32)1384   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32, batch_eq_32) {
1385     TEST_REQUIRES_X86_FMA3;
1386     VUnaryMicrokernelTester()
1387       .batch_size(32)
1388       .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32, xnn_init_f32_sqrt_fma_params);
1389   }
1390 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32,batch_div_32)1391   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32, batch_div_32) {
1392     TEST_REQUIRES_X86_FMA3;
1393     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
1394       VUnaryMicrokernelTester()
1395         .batch_size(batch_size)
1396         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32, xnn_init_f32_sqrt_fma_params);
1397     }
1398   }
1399 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32,batch_lt_32)1400   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32, batch_lt_32) {
1401     TEST_REQUIRES_X86_FMA3;
1402     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
1403       VUnaryMicrokernelTester()
1404         .batch_size(batch_size)
1405         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32, xnn_init_f32_sqrt_fma_params);
1406     }
1407   }
1408 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32,batch_gt_32)1409   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32, batch_gt_32) {
1410     TEST_REQUIRES_X86_FMA3;
1411     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
1412       VUnaryMicrokernelTester()
1413         .batch_size(batch_size)
1414         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32, xnn_init_f32_sqrt_fma_params);
1415     }
1416   }
1417 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32,inplace)1418   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32, inplace) {
1419     TEST_REQUIRES_X86_FMA3;
1420     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1421       VUnaryMicrokernelTester()
1422         .batch_size(batch_size)
1423         .inplace(true)
1424         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32, xnn_init_f32_sqrt_fma_params);
1425     }
1426   }
1427 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1428 
1429 
1430 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40,batch_eq_40)1431   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40, batch_eq_40) {
1432     TEST_REQUIRES_X86_FMA3;
1433     VUnaryMicrokernelTester()
1434       .batch_size(40)
1435       .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40, xnn_init_f32_sqrt_fma_params);
1436   }
1437 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40,batch_div_40)1438   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40, batch_div_40) {
1439     TEST_REQUIRES_X86_FMA3;
1440     for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
1441       VUnaryMicrokernelTester()
1442         .batch_size(batch_size)
1443         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40, xnn_init_f32_sqrt_fma_params);
1444     }
1445   }
1446 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40,batch_lt_40)1447   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40, batch_lt_40) {
1448     TEST_REQUIRES_X86_FMA3;
1449     for (size_t batch_size = 1; batch_size < 40; batch_size++) {
1450       VUnaryMicrokernelTester()
1451         .batch_size(batch_size)
1452         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40, xnn_init_f32_sqrt_fma_params);
1453     }
1454   }
1455 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40,batch_gt_40)1456   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40, batch_gt_40) {
1457     TEST_REQUIRES_X86_FMA3;
1458     for (size_t batch_size = 41; batch_size < 80; batch_size++) {
1459       VUnaryMicrokernelTester()
1460         .batch_size(batch_size)
1461         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40, xnn_init_f32_sqrt_fma_params);
1462     }
1463   }
1464 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40,inplace)1465   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40, inplace) {
1466     TEST_REQUIRES_X86_FMA3;
1467     for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
1468       VUnaryMicrokernelTester()
1469         .batch_size(batch_size)
1470         .inplace(true)
1471         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40, xnn_init_f32_sqrt_fma_params);
1472     }
1473   }
1474 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1475 
1476 
1477 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48,batch_eq_48)1478   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48, batch_eq_48) {
1479     TEST_REQUIRES_X86_FMA3;
1480     VUnaryMicrokernelTester()
1481       .batch_size(48)
1482       .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48, xnn_init_f32_sqrt_fma_params);
1483   }
1484 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48,batch_div_48)1485   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48, batch_div_48) {
1486     TEST_REQUIRES_X86_FMA3;
1487     for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
1488       VUnaryMicrokernelTester()
1489         .batch_size(batch_size)
1490         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48, xnn_init_f32_sqrt_fma_params);
1491     }
1492   }
1493 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48,batch_lt_48)1494   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48, batch_lt_48) {
1495     TEST_REQUIRES_X86_FMA3;
1496     for (size_t batch_size = 1; batch_size < 48; batch_size++) {
1497       VUnaryMicrokernelTester()
1498         .batch_size(batch_size)
1499         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48, xnn_init_f32_sqrt_fma_params);
1500     }
1501   }
1502 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48,batch_gt_48)1503   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48, batch_gt_48) {
1504     TEST_REQUIRES_X86_FMA3;
1505     for (size_t batch_size = 49; batch_size < 96; batch_size++) {
1506       VUnaryMicrokernelTester()
1507         .batch_size(batch_size)
1508         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48, xnn_init_f32_sqrt_fma_params);
1509     }
1510   }
1511 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48,inplace)1512   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48, inplace) {
1513     TEST_REQUIRES_X86_FMA3;
1514     for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
1515       VUnaryMicrokernelTester()
1516         .batch_size(batch_size)
1517         .inplace(true)
1518         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48, xnn_init_f32_sqrt_fma_params);
1519     }
1520   }
1521 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1522 
1523 
1524 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56,batch_eq_56)1525   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56, batch_eq_56) {
1526     TEST_REQUIRES_X86_FMA3;
1527     VUnaryMicrokernelTester()
1528       .batch_size(56)
1529       .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56, xnn_init_f32_sqrt_fma_params);
1530   }
1531 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56,batch_div_56)1532   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56, batch_div_56) {
1533     TEST_REQUIRES_X86_FMA3;
1534     for (size_t batch_size = 112; batch_size < 560; batch_size += 56) {
1535       VUnaryMicrokernelTester()
1536         .batch_size(batch_size)
1537         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56, xnn_init_f32_sqrt_fma_params);
1538     }
1539   }
1540 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56,batch_lt_56)1541   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56, batch_lt_56) {
1542     TEST_REQUIRES_X86_FMA3;
1543     for (size_t batch_size = 1; batch_size < 56; batch_size++) {
1544       VUnaryMicrokernelTester()
1545         .batch_size(batch_size)
1546         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56, xnn_init_f32_sqrt_fma_params);
1547     }
1548   }
1549 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56,batch_gt_56)1550   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56, batch_gt_56) {
1551     TEST_REQUIRES_X86_FMA3;
1552     for (size_t batch_size = 57; batch_size < 112; batch_size++) {
1553       VUnaryMicrokernelTester()
1554         .batch_size(batch_size)
1555         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56, xnn_init_f32_sqrt_fma_params);
1556     }
1557   }
1558 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56,inplace)1559   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56, inplace) {
1560     TEST_REQUIRES_X86_FMA3;
1561     for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
1562       VUnaryMicrokernelTester()
1563         .batch_size(batch_size)
1564         .inplace(true)
1565         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56, xnn_init_f32_sqrt_fma_params);
1566     }
1567   }
1568 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1569 
1570 
1571 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64,batch_eq_64)1572   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64, batch_eq_64) {
1573     TEST_REQUIRES_X86_FMA3;
1574     VUnaryMicrokernelTester()
1575       .batch_size(64)
1576       .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64, xnn_init_f32_sqrt_fma_params);
1577   }
1578 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64,batch_div_64)1579   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64, batch_div_64) {
1580     TEST_REQUIRES_X86_FMA3;
1581     for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
1582       VUnaryMicrokernelTester()
1583         .batch_size(batch_size)
1584         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64, xnn_init_f32_sqrt_fma_params);
1585     }
1586   }
1587 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64,batch_lt_64)1588   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64, batch_lt_64) {
1589     TEST_REQUIRES_X86_FMA3;
1590     for (size_t batch_size = 1; batch_size < 64; batch_size++) {
1591       VUnaryMicrokernelTester()
1592         .batch_size(batch_size)
1593         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64, xnn_init_f32_sqrt_fma_params);
1594     }
1595   }
1596 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64,batch_gt_64)1597   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64, batch_gt_64) {
1598     TEST_REQUIRES_X86_FMA3;
1599     for (size_t batch_size = 65; batch_size < 128; batch_size++) {
1600       VUnaryMicrokernelTester()
1601         .batch_size(batch_size)
1602         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64, xnn_init_f32_sqrt_fma_params);
1603     }
1604   }
1605 
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64,inplace)1606   TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64, inplace) {
1607     TEST_REQUIRES_X86_FMA3;
1608     for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
1609       VUnaryMicrokernelTester()
1610         .batch_size(batch_size)
1611         .inplace(true)
1612         .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64, xnn_init_f32_sqrt_fma_params);
1613     }
1614   }
1615 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1616 
1617 
1618 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16,batch_eq_16)1619   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16, batch_eq_16) {
1620     TEST_REQUIRES_X86_AVX512F;
1621     VUnaryMicrokernelTester()
1622       .batch_size(16)
1623       .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16, xnn_init_f32_sqrt_avx512_params);
1624   }
1625 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16,batch_div_16)1626   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16, batch_div_16) {
1627     TEST_REQUIRES_X86_AVX512F;
1628     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1629       VUnaryMicrokernelTester()
1630         .batch_size(batch_size)
1631         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16, xnn_init_f32_sqrt_avx512_params);
1632     }
1633   }
1634 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16,batch_lt_16)1635   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16, batch_lt_16) {
1636     TEST_REQUIRES_X86_AVX512F;
1637     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1638       VUnaryMicrokernelTester()
1639         .batch_size(batch_size)
1640         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16, xnn_init_f32_sqrt_avx512_params);
1641     }
1642   }
1643 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16,batch_gt_16)1644   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16, batch_gt_16) {
1645     TEST_REQUIRES_X86_AVX512F;
1646     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1647       VUnaryMicrokernelTester()
1648         .batch_size(batch_size)
1649         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16, xnn_init_f32_sqrt_avx512_params);
1650     }
1651   }
1652 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16,inplace)1653   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16, inplace) {
1654     TEST_REQUIRES_X86_AVX512F;
1655     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1656       VUnaryMicrokernelTester()
1657         .batch_size(batch_size)
1658         .inplace(true)
1659         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16, xnn_init_f32_sqrt_avx512_params);
1660     }
1661   }
1662 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1663 
1664 
1665 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32,batch_eq_32)1666   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32, batch_eq_32) {
1667     TEST_REQUIRES_X86_AVX512F;
1668     VUnaryMicrokernelTester()
1669       .batch_size(32)
1670       .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32, xnn_init_f32_sqrt_avx512_params);
1671   }
1672 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32,batch_div_32)1673   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32, batch_div_32) {
1674     TEST_REQUIRES_X86_AVX512F;
1675     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
1676       VUnaryMicrokernelTester()
1677         .batch_size(batch_size)
1678         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32, xnn_init_f32_sqrt_avx512_params);
1679     }
1680   }
1681 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32,batch_lt_32)1682   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32, batch_lt_32) {
1683     TEST_REQUIRES_X86_AVX512F;
1684     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
1685       VUnaryMicrokernelTester()
1686         .batch_size(batch_size)
1687         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32, xnn_init_f32_sqrt_avx512_params);
1688     }
1689   }
1690 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32,batch_gt_32)1691   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32, batch_gt_32) {
1692     TEST_REQUIRES_X86_AVX512F;
1693     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
1694       VUnaryMicrokernelTester()
1695         .batch_size(batch_size)
1696         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32, xnn_init_f32_sqrt_avx512_params);
1697     }
1698   }
1699 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32,inplace)1700   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32, inplace) {
1701     TEST_REQUIRES_X86_AVX512F;
1702     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1703       VUnaryMicrokernelTester()
1704         .batch_size(batch_size)
1705         .inplace(true)
1706         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32, xnn_init_f32_sqrt_avx512_params);
1707     }
1708   }
1709 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1710 
1711 
1712 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48,batch_eq_48)1713   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48, batch_eq_48) {
1714     TEST_REQUIRES_X86_AVX512F;
1715     VUnaryMicrokernelTester()
1716       .batch_size(48)
1717       .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48, xnn_init_f32_sqrt_avx512_params);
1718   }
1719 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48,batch_div_48)1720   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48, batch_div_48) {
1721     TEST_REQUIRES_X86_AVX512F;
1722     for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
1723       VUnaryMicrokernelTester()
1724         .batch_size(batch_size)
1725         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48, xnn_init_f32_sqrt_avx512_params);
1726     }
1727   }
1728 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48,batch_lt_48)1729   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48, batch_lt_48) {
1730     TEST_REQUIRES_X86_AVX512F;
1731     for (size_t batch_size = 1; batch_size < 48; batch_size++) {
1732       VUnaryMicrokernelTester()
1733         .batch_size(batch_size)
1734         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48, xnn_init_f32_sqrt_avx512_params);
1735     }
1736   }
1737 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48,batch_gt_48)1738   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48, batch_gt_48) {
1739     TEST_REQUIRES_X86_AVX512F;
1740     for (size_t batch_size = 49; batch_size < 96; batch_size++) {
1741       VUnaryMicrokernelTester()
1742         .batch_size(batch_size)
1743         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48, xnn_init_f32_sqrt_avx512_params);
1744     }
1745   }
1746 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48,inplace)1747   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48, inplace) {
1748     TEST_REQUIRES_X86_AVX512F;
1749     for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
1750       VUnaryMicrokernelTester()
1751         .batch_size(batch_size)
1752         .inplace(true)
1753         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48, xnn_init_f32_sqrt_avx512_params);
1754     }
1755   }
1756 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1757 
1758 
1759 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64,batch_eq_64)1760   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64, batch_eq_64) {
1761     TEST_REQUIRES_X86_AVX512F;
1762     VUnaryMicrokernelTester()
1763       .batch_size(64)
1764       .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64, xnn_init_f32_sqrt_avx512_params);
1765   }
1766 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64,batch_div_64)1767   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64, batch_div_64) {
1768     TEST_REQUIRES_X86_AVX512F;
1769     for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
1770       VUnaryMicrokernelTester()
1771         .batch_size(batch_size)
1772         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64, xnn_init_f32_sqrt_avx512_params);
1773     }
1774   }
1775 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64,batch_lt_64)1776   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64, batch_lt_64) {
1777     TEST_REQUIRES_X86_AVX512F;
1778     for (size_t batch_size = 1; batch_size < 64; batch_size++) {
1779       VUnaryMicrokernelTester()
1780         .batch_size(batch_size)
1781         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64, xnn_init_f32_sqrt_avx512_params);
1782     }
1783   }
1784 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64,batch_gt_64)1785   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64, batch_gt_64) {
1786     TEST_REQUIRES_X86_AVX512F;
1787     for (size_t batch_size = 65; batch_size < 128; batch_size++) {
1788       VUnaryMicrokernelTester()
1789         .batch_size(batch_size)
1790         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64, xnn_init_f32_sqrt_avx512_params);
1791     }
1792   }
1793 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64,inplace)1794   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64, inplace) {
1795     TEST_REQUIRES_X86_AVX512F;
1796     for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
1797       VUnaryMicrokernelTester()
1798         .batch_size(batch_size)
1799         .inplace(true)
1800         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64, xnn_init_f32_sqrt_avx512_params);
1801     }
1802   }
1803 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1804 
1805 
1806 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80,batch_eq_80)1807   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80, batch_eq_80) {
1808     TEST_REQUIRES_X86_AVX512F;
1809     VUnaryMicrokernelTester()
1810       .batch_size(80)
1811       .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80, xnn_init_f32_sqrt_avx512_params);
1812   }
1813 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80,batch_div_80)1814   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80, batch_div_80) {
1815     TEST_REQUIRES_X86_AVX512F;
1816     for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
1817       VUnaryMicrokernelTester()
1818         .batch_size(batch_size)
1819         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80, xnn_init_f32_sqrt_avx512_params);
1820     }
1821   }
1822 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80,batch_lt_80)1823   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80, batch_lt_80) {
1824     TEST_REQUIRES_X86_AVX512F;
1825     for (size_t batch_size = 1; batch_size < 80; batch_size++) {
1826       VUnaryMicrokernelTester()
1827         .batch_size(batch_size)
1828         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80, xnn_init_f32_sqrt_avx512_params);
1829     }
1830   }
1831 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80,batch_gt_80)1832   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80, batch_gt_80) {
1833     TEST_REQUIRES_X86_AVX512F;
1834     for (size_t batch_size = 81; batch_size < 160; batch_size++) {
1835       VUnaryMicrokernelTester()
1836         .batch_size(batch_size)
1837         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80, xnn_init_f32_sqrt_avx512_params);
1838     }
1839   }
1840 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80,inplace)1841   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80, inplace) {
1842     TEST_REQUIRES_X86_AVX512F;
1843     for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
1844       VUnaryMicrokernelTester()
1845         .batch_size(batch_size)
1846         .inplace(true)
1847         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80, xnn_init_f32_sqrt_avx512_params);
1848     }
1849   }
1850 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1851 
1852 
1853 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96,batch_eq_96)1854   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96, batch_eq_96) {
1855     TEST_REQUIRES_X86_AVX512F;
1856     VUnaryMicrokernelTester()
1857       .batch_size(96)
1858       .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96, xnn_init_f32_sqrt_avx512_params);
1859   }
1860 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96,batch_div_96)1861   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96, batch_div_96) {
1862     TEST_REQUIRES_X86_AVX512F;
1863     for (size_t batch_size = 192; batch_size < 960; batch_size += 96) {
1864       VUnaryMicrokernelTester()
1865         .batch_size(batch_size)
1866         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96, xnn_init_f32_sqrt_avx512_params);
1867     }
1868   }
1869 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96,batch_lt_96)1870   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96, batch_lt_96) {
1871     TEST_REQUIRES_X86_AVX512F;
1872     for (size_t batch_size = 1; batch_size < 96; batch_size++) {
1873       VUnaryMicrokernelTester()
1874         .batch_size(batch_size)
1875         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96, xnn_init_f32_sqrt_avx512_params);
1876     }
1877   }
1878 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96,batch_gt_96)1879   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96, batch_gt_96) {
1880     TEST_REQUIRES_X86_AVX512F;
1881     for (size_t batch_size = 97; batch_size < 192; batch_size++) {
1882       VUnaryMicrokernelTester()
1883         .batch_size(batch_size)
1884         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96, xnn_init_f32_sqrt_avx512_params);
1885     }
1886   }
1887 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96,inplace)1888   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96, inplace) {
1889     TEST_REQUIRES_X86_AVX512F;
1890     for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
1891       VUnaryMicrokernelTester()
1892         .batch_size(batch_size)
1893         .inplace(true)
1894         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96, xnn_init_f32_sqrt_avx512_params);
1895     }
1896   }
1897 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1898 
1899 
1900 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112,batch_eq_112)1901   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112, batch_eq_112) {
1902     TEST_REQUIRES_X86_AVX512F;
1903     VUnaryMicrokernelTester()
1904       .batch_size(112)
1905       .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112, xnn_init_f32_sqrt_avx512_params);
1906   }
1907 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112,batch_div_112)1908   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112, batch_div_112) {
1909     TEST_REQUIRES_X86_AVX512F;
1910     for (size_t batch_size = 224; batch_size < 1120; batch_size += 112) {
1911       VUnaryMicrokernelTester()
1912         .batch_size(batch_size)
1913         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112, xnn_init_f32_sqrt_avx512_params);
1914     }
1915   }
1916 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112,batch_lt_112)1917   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112, batch_lt_112) {
1918     TEST_REQUIRES_X86_AVX512F;
1919     for (size_t batch_size = 1; batch_size < 112; batch_size++) {
1920       VUnaryMicrokernelTester()
1921         .batch_size(batch_size)
1922         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112, xnn_init_f32_sqrt_avx512_params);
1923     }
1924   }
1925 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112,batch_gt_112)1926   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112, batch_gt_112) {
1927     TEST_REQUIRES_X86_AVX512F;
1928     for (size_t batch_size = 113; batch_size < 224; batch_size++) {
1929       VUnaryMicrokernelTester()
1930         .batch_size(batch_size)
1931         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112, xnn_init_f32_sqrt_avx512_params);
1932     }
1933   }
1934 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112,inplace)1935   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112, inplace) {
1936     TEST_REQUIRES_X86_AVX512F;
1937     for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
1938       VUnaryMicrokernelTester()
1939         .batch_size(batch_size)
1940         .inplace(true)
1941         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112, xnn_init_f32_sqrt_avx512_params);
1942     }
1943   }
1944 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1945 
1946 
1947 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128,batch_eq_128)1948   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128, batch_eq_128) {
1949     TEST_REQUIRES_X86_AVX512F;
1950     VUnaryMicrokernelTester()
1951       .batch_size(128)
1952       .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128, xnn_init_f32_sqrt_avx512_params);
1953   }
1954 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128,batch_div_128)1955   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128, batch_div_128) {
1956     TEST_REQUIRES_X86_AVX512F;
1957     for (size_t batch_size = 256; batch_size < 1280; batch_size += 128) {
1958       VUnaryMicrokernelTester()
1959         .batch_size(batch_size)
1960         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128, xnn_init_f32_sqrt_avx512_params);
1961     }
1962   }
1963 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128,batch_lt_128)1964   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128, batch_lt_128) {
1965     TEST_REQUIRES_X86_AVX512F;
1966     for (size_t batch_size = 1; batch_size < 128; batch_size++) {
1967       VUnaryMicrokernelTester()
1968         .batch_size(batch_size)
1969         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128, xnn_init_f32_sqrt_avx512_params);
1970     }
1971   }
1972 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128,batch_gt_128)1973   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128, batch_gt_128) {
1974     TEST_REQUIRES_X86_AVX512F;
1975     for (size_t batch_size = 129; batch_size < 256; batch_size++) {
1976       VUnaryMicrokernelTester()
1977         .batch_size(batch_size)
1978         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128, xnn_init_f32_sqrt_avx512_params);
1979     }
1980   }
1981 
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128,inplace)1982   TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128, inplace) {
1983     TEST_REQUIRES_X86_AVX512F;
1984     for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
1985       VUnaryMicrokernelTester()
1986         .batch_size(batch_size)
1987         .inplace(true)
1988         .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128, xnn_init_f32_sqrt_avx512_params);
1989     }
1990   }
1991 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1992 
1993 
1994 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VSQRT__WASMSIMD_SQRT_X4,batch_eq_4)1995   TEST(F32_VSQRT__WASMSIMD_SQRT_X4, batch_eq_4) {
1996     VUnaryMicrokernelTester()
1997       .batch_size(4)
1998       .Test(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x4);
1999   }
2000 
TEST(F32_VSQRT__WASMSIMD_SQRT_X4,batch_div_4)2001   TEST(F32_VSQRT__WASMSIMD_SQRT_X4, batch_div_4) {
2002     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
2003       VUnaryMicrokernelTester()
2004         .batch_size(batch_size)
2005         .Test(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x4);
2006     }
2007   }
2008 
TEST(F32_VSQRT__WASMSIMD_SQRT_X4,batch_lt_4)2009   TEST(F32_VSQRT__WASMSIMD_SQRT_X4, batch_lt_4) {
2010     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
2011       VUnaryMicrokernelTester()
2012         .batch_size(batch_size)
2013         .Test(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x4);
2014     }
2015   }
2016 
TEST(F32_VSQRT__WASMSIMD_SQRT_X4,batch_gt_4)2017   TEST(F32_VSQRT__WASMSIMD_SQRT_X4, batch_gt_4) {
2018     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
2019       VUnaryMicrokernelTester()
2020         .batch_size(batch_size)
2021         .Test(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x4);
2022     }
2023   }
2024 
TEST(F32_VSQRT__WASMSIMD_SQRT_X4,inplace)2025   TEST(F32_VSQRT__WASMSIMD_SQRT_X4, inplace) {
2026     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2027       VUnaryMicrokernelTester()
2028         .batch_size(batch_size)
2029         .inplace(true)
2030         .Test(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x4);
2031     }
2032   }
2033 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2034 
2035 
2036 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VSQRT__WASMSIMD_SQRT_X8,batch_eq_8)2037   TEST(F32_VSQRT__WASMSIMD_SQRT_X8, batch_eq_8) {
2038     VUnaryMicrokernelTester()
2039       .batch_size(8)
2040       .Test(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x8);
2041   }
2042 
TEST(F32_VSQRT__WASMSIMD_SQRT_X8,batch_div_8)2043   TEST(F32_VSQRT__WASMSIMD_SQRT_X8, batch_div_8) {
2044     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2045       VUnaryMicrokernelTester()
2046         .batch_size(batch_size)
2047         .Test(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x8);
2048     }
2049   }
2050 
TEST(F32_VSQRT__WASMSIMD_SQRT_X8,batch_lt_8)2051   TEST(F32_VSQRT__WASMSIMD_SQRT_X8, batch_lt_8) {
2052     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2053       VUnaryMicrokernelTester()
2054         .batch_size(batch_size)
2055         .Test(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x8);
2056     }
2057   }
2058 
TEST(F32_VSQRT__WASMSIMD_SQRT_X8,batch_gt_8)2059   TEST(F32_VSQRT__WASMSIMD_SQRT_X8, batch_gt_8) {
2060     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2061       VUnaryMicrokernelTester()
2062         .batch_size(batch_size)
2063         .Test(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x8);
2064     }
2065   }
2066 
TEST(F32_VSQRT__WASMSIMD_SQRT_X8,inplace)2067   TEST(F32_VSQRT__WASMSIMD_SQRT_X8, inplace) {
2068     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2069       VUnaryMicrokernelTester()
2070         .batch_size(batch_size)
2071         .inplace(true)
2072         .Test(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x8);
2073     }
2074   }
2075 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2076 
2077 
TEST(F32_VSQRT__SCALAR_SQRT_X1,batch_eq_1)2078 TEST(F32_VSQRT__SCALAR_SQRT_X1, batch_eq_1) {
2079   VUnaryMicrokernelTester()
2080     .batch_size(1)
2081     .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x1);
2082 }
2083 
TEST(F32_VSQRT__SCALAR_SQRT_X1,batch_gt_1)2084 TEST(F32_VSQRT__SCALAR_SQRT_X1, batch_gt_1) {
2085   for (size_t batch_size = 2; batch_size < 10; batch_size++) {
2086     VUnaryMicrokernelTester()
2087       .batch_size(batch_size)
2088       .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x1);
2089   }
2090 }
2091 
TEST(F32_VSQRT__SCALAR_SQRT_X1,inplace)2092 TEST(F32_VSQRT__SCALAR_SQRT_X1, inplace) {
2093   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2094     VUnaryMicrokernelTester()
2095       .batch_size(batch_size)
2096       .inplace(true)
2097       .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x1);
2098   }
2099 }
2100 
2101 
TEST(F32_VSQRT__SCALAR_SQRT_X2,batch_eq_2)2102 TEST(F32_VSQRT__SCALAR_SQRT_X2, batch_eq_2) {
2103   VUnaryMicrokernelTester()
2104     .batch_size(2)
2105     .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x2);
2106 }
2107 
TEST(F32_VSQRT__SCALAR_SQRT_X2,batch_div_2)2108 TEST(F32_VSQRT__SCALAR_SQRT_X2, batch_div_2) {
2109   for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
2110     VUnaryMicrokernelTester()
2111       .batch_size(batch_size)
2112       .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x2);
2113   }
2114 }
2115 
TEST(F32_VSQRT__SCALAR_SQRT_X2,batch_lt_2)2116 TEST(F32_VSQRT__SCALAR_SQRT_X2, batch_lt_2) {
2117   for (size_t batch_size = 1; batch_size < 2; batch_size++) {
2118     VUnaryMicrokernelTester()
2119       .batch_size(batch_size)
2120       .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x2);
2121   }
2122 }
2123 
TEST(F32_VSQRT__SCALAR_SQRT_X2,batch_gt_2)2124 TEST(F32_VSQRT__SCALAR_SQRT_X2, batch_gt_2) {
2125   for (size_t batch_size = 3; batch_size < 4; batch_size++) {
2126     VUnaryMicrokernelTester()
2127       .batch_size(batch_size)
2128       .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x2);
2129   }
2130 }
2131 
TEST(F32_VSQRT__SCALAR_SQRT_X2,inplace)2132 TEST(F32_VSQRT__SCALAR_SQRT_X2, inplace) {
2133   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2134     VUnaryMicrokernelTester()
2135       .batch_size(batch_size)
2136       .inplace(true)
2137       .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x2);
2138   }
2139 }
2140 
2141 
TEST(F32_VSQRT__SCALAR_SQRT_X4,batch_eq_4)2142 TEST(F32_VSQRT__SCALAR_SQRT_X4, batch_eq_4) {
2143   VUnaryMicrokernelTester()
2144     .batch_size(4)
2145     .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x4);
2146 }
2147 
TEST(F32_VSQRT__SCALAR_SQRT_X4,batch_div_4)2148 TEST(F32_VSQRT__SCALAR_SQRT_X4, batch_div_4) {
2149   for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
2150     VUnaryMicrokernelTester()
2151       .batch_size(batch_size)
2152       .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x4);
2153   }
2154 }
2155 
TEST(F32_VSQRT__SCALAR_SQRT_X4,batch_lt_4)2156 TEST(F32_VSQRT__SCALAR_SQRT_X4, batch_lt_4) {
2157   for (size_t batch_size = 1; batch_size < 4; batch_size++) {
2158     VUnaryMicrokernelTester()
2159       .batch_size(batch_size)
2160       .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x4);
2161   }
2162 }
2163 
TEST(F32_VSQRT__SCALAR_SQRT_X4,batch_gt_4)2164 TEST(F32_VSQRT__SCALAR_SQRT_X4, batch_gt_4) {
2165   for (size_t batch_size = 5; batch_size < 8; batch_size++) {
2166     VUnaryMicrokernelTester()
2167       .batch_size(batch_size)
2168       .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x4);
2169   }
2170 }
2171 
TEST(F32_VSQRT__SCALAR_SQRT_X4,inplace)2172 TEST(F32_VSQRT__SCALAR_SQRT_X4, inplace) {
2173   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2174     VUnaryMicrokernelTester()
2175       .batch_size(batch_size)
2176       .inplace(true)
2177       .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x4);
2178   }
2179 }
2180