1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 // Specification: test/f32-vsqrt.yaml
8 // Generator: tools/generate-vunary-test.py
9
10
11 #include <gtest/gtest.h>
12
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15
16 #include <xnnpack/vunary.h>
17 #include "vunary-microkernel-tester.h"
18
19
20 #if XNN_ARCH_ARM64
TEST(F32_VSQRT__NEON_SQRT_X4,batch_eq_4)21 TEST(F32_VSQRT__NEON_SQRT_X4, batch_eq_4) {
22 TEST_REQUIRES_ARM_NEON;
23 VUnaryMicrokernelTester()
24 .batch_size(4)
25 .Test(xnn_f32_vsqrt_ukernel__neon_sqrt_x4);
26 }
27
TEST(F32_VSQRT__NEON_SQRT_X4,batch_div_4)28 TEST(F32_VSQRT__NEON_SQRT_X4, batch_div_4) {
29 TEST_REQUIRES_ARM_NEON;
30 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
31 VUnaryMicrokernelTester()
32 .batch_size(batch_size)
33 .Test(xnn_f32_vsqrt_ukernel__neon_sqrt_x4);
34 }
35 }
36
TEST(F32_VSQRT__NEON_SQRT_X4,batch_lt_4)37 TEST(F32_VSQRT__NEON_SQRT_X4, batch_lt_4) {
38 TEST_REQUIRES_ARM_NEON;
39 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
40 VUnaryMicrokernelTester()
41 .batch_size(batch_size)
42 .Test(xnn_f32_vsqrt_ukernel__neon_sqrt_x4);
43 }
44 }
45
TEST(F32_VSQRT__NEON_SQRT_X4,batch_gt_4)46 TEST(F32_VSQRT__NEON_SQRT_X4, batch_gt_4) {
47 TEST_REQUIRES_ARM_NEON;
48 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
49 VUnaryMicrokernelTester()
50 .batch_size(batch_size)
51 .Test(xnn_f32_vsqrt_ukernel__neon_sqrt_x4);
52 }
53 }
54
TEST(F32_VSQRT__NEON_SQRT_X4,inplace)55 TEST(F32_VSQRT__NEON_SQRT_X4, inplace) {
56 TEST_REQUIRES_ARM_NEON;
57 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
58 VUnaryMicrokernelTester()
59 .batch_size(batch_size)
60 .inplace(true)
61 .Test(xnn_f32_vsqrt_ukernel__neon_sqrt_x4);
62 }
63 }
64 #endif // XNN_ARCH_ARM64
65
66
67 #if XNN_ARCH_ARM64
TEST(F32_VSQRT__NEON_SQRT_X8,batch_eq_8)68 TEST(F32_VSQRT__NEON_SQRT_X8, batch_eq_8) {
69 TEST_REQUIRES_ARM_NEON;
70 VUnaryMicrokernelTester()
71 .batch_size(8)
72 .Test(xnn_f32_vsqrt_ukernel__neon_sqrt_x8);
73 }
74
TEST(F32_VSQRT__NEON_SQRT_X8,batch_div_8)75 TEST(F32_VSQRT__NEON_SQRT_X8, batch_div_8) {
76 TEST_REQUIRES_ARM_NEON;
77 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
78 VUnaryMicrokernelTester()
79 .batch_size(batch_size)
80 .Test(xnn_f32_vsqrt_ukernel__neon_sqrt_x8);
81 }
82 }
83
TEST(F32_VSQRT__NEON_SQRT_X8,batch_lt_8)84 TEST(F32_VSQRT__NEON_SQRT_X8, batch_lt_8) {
85 TEST_REQUIRES_ARM_NEON;
86 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
87 VUnaryMicrokernelTester()
88 .batch_size(batch_size)
89 .Test(xnn_f32_vsqrt_ukernel__neon_sqrt_x8);
90 }
91 }
92
TEST(F32_VSQRT__NEON_SQRT_X8,batch_gt_8)93 TEST(F32_VSQRT__NEON_SQRT_X8, batch_gt_8) {
94 TEST_REQUIRES_ARM_NEON;
95 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
96 VUnaryMicrokernelTester()
97 .batch_size(batch_size)
98 .Test(xnn_f32_vsqrt_ukernel__neon_sqrt_x8);
99 }
100 }
101
TEST(F32_VSQRT__NEON_SQRT_X8,inplace)102 TEST(F32_VSQRT__NEON_SQRT_X8, inplace) {
103 TEST_REQUIRES_ARM_NEON;
104 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
105 VUnaryMicrokernelTester()
106 .batch_size(batch_size)
107 .inplace(true)
108 .Test(xnn_f32_vsqrt_ukernel__neon_sqrt_x8);
109 }
110 }
111 #endif // XNN_ARCH_ARM64
112
113
114 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4,batch_eq_4)115 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4, batch_eq_4) {
116 TEST_REQUIRES_ARM_NEON_FMA;
117 VUnaryMicrokernelTester()
118 .batch_size(4)
119 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x4);
120 }
121
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4,batch_div_4)122 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4, batch_div_4) {
123 TEST_REQUIRES_ARM_NEON_FMA;
124 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
125 VUnaryMicrokernelTester()
126 .batch_size(batch_size)
127 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x4);
128 }
129 }
130
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4,batch_lt_4)131 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4, batch_lt_4) {
132 TEST_REQUIRES_ARM_NEON_FMA;
133 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
134 VUnaryMicrokernelTester()
135 .batch_size(batch_size)
136 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x4);
137 }
138 }
139
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4,batch_gt_4)140 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4, batch_gt_4) {
141 TEST_REQUIRES_ARM_NEON_FMA;
142 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
143 VUnaryMicrokernelTester()
144 .batch_size(batch_size)
145 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x4);
146 }
147 }
148
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4,inplace)149 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X4, inplace) {
150 TEST_REQUIRES_ARM_NEON_FMA;
151 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
152 VUnaryMicrokernelTester()
153 .batch_size(batch_size)
154 .inplace(true)
155 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x4);
156 }
157 }
158 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
159
160
161 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8,batch_eq_8)162 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8, batch_eq_8) {
163 TEST_REQUIRES_ARM_NEON_FMA;
164 VUnaryMicrokernelTester()
165 .batch_size(8)
166 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8);
167 }
168
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8,batch_div_8)169 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8, batch_div_8) {
170 TEST_REQUIRES_ARM_NEON_FMA;
171 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
172 VUnaryMicrokernelTester()
173 .batch_size(batch_size)
174 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8);
175 }
176 }
177
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8,batch_lt_8)178 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8, batch_lt_8) {
179 TEST_REQUIRES_ARM_NEON_FMA;
180 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
181 VUnaryMicrokernelTester()
182 .batch_size(batch_size)
183 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8);
184 }
185 }
186
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8,batch_gt_8)187 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8, batch_gt_8) {
188 TEST_REQUIRES_ARM_NEON_FMA;
189 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
190 VUnaryMicrokernelTester()
191 .batch_size(batch_size)
192 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8);
193 }
194 }
195
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8,inplace)196 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X8, inplace) {
197 TEST_REQUIRES_ARM_NEON_FMA;
198 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
199 VUnaryMicrokernelTester()
200 .batch_size(batch_size)
201 .inplace(true)
202 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8);
203 }
204 }
205 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
206
207
208 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12,batch_eq_12)209 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12, batch_eq_12) {
210 TEST_REQUIRES_ARM_NEON_FMA;
211 VUnaryMicrokernelTester()
212 .batch_size(12)
213 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12);
214 }
215
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12,batch_div_12)216 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12, batch_div_12) {
217 TEST_REQUIRES_ARM_NEON_FMA;
218 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
219 VUnaryMicrokernelTester()
220 .batch_size(batch_size)
221 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12);
222 }
223 }
224
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12,batch_lt_12)225 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12, batch_lt_12) {
226 TEST_REQUIRES_ARM_NEON_FMA;
227 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
228 VUnaryMicrokernelTester()
229 .batch_size(batch_size)
230 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12);
231 }
232 }
233
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12,batch_gt_12)234 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12, batch_gt_12) {
235 TEST_REQUIRES_ARM_NEON_FMA;
236 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
237 VUnaryMicrokernelTester()
238 .batch_size(batch_size)
239 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12);
240 }
241 }
242
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12,inplace)243 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X12, inplace) {
244 TEST_REQUIRES_ARM_NEON_FMA;
245 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
246 VUnaryMicrokernelTester()
247 .batch_size(batch_size)
248 .inplace(true)
249 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12);
250 }
251 }
252 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
253
254
255 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16,batch_eq_16)256 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16, batch_eq_16) {
257 TEST_REQUIRES_ARM_NEON_FMA;
258 VUnaryMicrokernelTester()
259 .batch_size(16)
260 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16);
261 }
262
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16,batch_div_16)263 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16, batch_div_16) {
264 TEST_REQUIRES_ARM_NEON_FMA;
265 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
266 VUnaryMicrokernelTester()
267 .batch_size(batch_size)
268 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16);
269 }
270 }
271
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16,batch_lt_16)272 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16, batch_lt_16) {
273 TEST_REQUIRES_ARM_NEON_FMA;
274 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
275 VUnaryMicrokernelTester()
276 .batch_size(batch_size)
277 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16);
278 }
279 }
280
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16,batch_gt_16)281 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16, batch_gt_16) {
282 TEST_REQUIRES_ARM_NEON_FMA;
283 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
284 VUnaryMicrokernelTester()
285 .batch_size(batch_size)
286 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16);
287 }
288 }
289
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16,inplace)290 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X16, inplace) {
291 TEST_REQUIRES_ARM_NEON_FMA;
292 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
293 VUnaryMicrokernelTester()
294 .batch_size(batch_size)
295 .inplace(true)
296 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16);
297 }
298 }
299 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
300
301
302 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20,batch_eq_20)303 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20, batch_eq_20) {
304 TEST_REQUIRES_ARM_NEON_FMA;
305 VUnaryMicrokernelTester()
306 .batch_size(20)
307 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20);
308 }
309
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20,batch_div_20)310 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20, batch_div_20) {
311 TEST_REQUIRES_ARM_NEON_FMA;
312 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
313 VUnaryMicrokernelTester()
314 .batch_size(batch_size)
315 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20);
316 }
317 }
318
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20,batch_lt_20)319 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20, batch_lt_20) {
320 TEST_REQUIRES_ARM_NEON_FMA;
321 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
322 VUnaryMicrokernelTester()
323 .batch_size(batch_size)
324 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20);
325 }
326 }
327
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20,batch_gt_20)328 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20, batch_gt_20) {
329 TEST_REQUIRES_ARM_NEON_FMA;
330 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
331 VUnaryMicrokernelTester()
332 .batch_size(batch_size)
333 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20);
334 }
335 }
336
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20,inplace)337 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X20, inplace) {
338 TEST_REQUIRES_ARM_NEON_FMA;
339 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
340 VUnaryMicrokernelTester()
341 .batch_size(batch_size)
342 .inplace(true)
343 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20);
344 }
345 }
346 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
347
348
349 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24,batch_eq_24)350 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24, batch_eq_24) {
351 TEST_REQUIRES_ARM_NEON_FMA;
352 VUnaryMicrokernelTester()
353 .batch_size(24)
354 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24);
355 }
356
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24,batch_div_24)357 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24, batch_div_24) {
358 TEST_REQUIRES_ARM_NEON_FMA;
359 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
360 VUnaryMicrokernelTester()
361 .batch_size(batch_size)
362 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24);
363 }
364 }
365
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24,batch_lt_24)366 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24, batch_lt_24) {
367 TEST_REQUIRES_ARM_NEON_FMA;
368 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
369 VUnaryMicrokernelTester()
370 .batch_size(batch_size)
371 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24);
372 }
373 }
374
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24,batch_gt_24)375 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24, batch_gt_24) {
376 TEST_REQUIRES_ARM_NEON_FMA;
377 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
378 VUnaryMicrokernelTester()
379 .batch_size(batch_size)
380 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24);
381 }
382 }
383
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24,inplace)384 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X24, inplace) {
385 TEST_REQUIRES_ARM_NEON_FMA;
386 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
387 VUnaryMicrokernelTester()
388 .batch_size(batch_size)
389 .inplace(true)
390 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24);
391 }
392 }
393 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
394
395
396 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28,batch_eq_28)397 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28, batch_eq_28) {
398 TEST_REQUIRES_ARM_NEON_FMA;
399 VUnaryMicrokernelTester()
400 .batch_size(28)
401 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28);
402 }
403
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28,batch_div_28)404 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28, batch_div_28) {
405 TEST_REQUIRES_ARM_NEON_FMA;
406 for (size_t batch_size = 56; batch_size < 280; batch_size += 28) {
407 VUnaryMicrokernelTester()
408 .batch_size(batch_size)
409 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28);
410 }
411 }
412
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28,batch_lt_28)413 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28, batch_lt_28) {
414 TEST_REQUIRES_ARM_NEON_FMA;
415 for (size_t batch_size = 1; batch_size < 28; batch_size++) {
416 VUnaryMicrokernelTester()
417 .batch_size(batch_size)
418 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28);
419 }
420 }
421
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28,batch_gt_28)422 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28, batch_gt_28) {
423 TEST_REQUIRES_ARM_NEON_FMA;
424 for (size_t batch_size = 29; batch_size < 56; batch_size++) {
425 VUnaryMicrokernelTester()
426 .batch_size(batch_size)
427 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28);
428 }
429 }
430
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28,inplace)431 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X28, inplace) {
432 TEST_REQUIRES_ARM_NEON_FMA;
433 for (size_t batch_size = 1; batch_size <= 140; batch_size += 27) {
434 VUnaryMicrokernelTester()
435 .batch_size(batch_size)
436 .inplace(true)
437 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28);
438 }
439 }
440 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
441
442
443 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32,batch_eq_32)444 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32, batch_eq_32) {
445 TEST_REQUIRES_ARM_NEON_FMA;
446 VUnaryMicrokernelTester()
447 .batch_size(32)
448 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32);
449 }
450
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32,batch_div_32)451 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32, batch_div_32) {
452 TEST_REQUIRES_ARM_NEON_FMA;
453 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
454 VUnaryMicrokernelTester()
455 .batch_size(batch_size)
456 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32);
457 }
458 }
459
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32,batch_lt_32)460 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32, batch_lt_32) {
461 TEST_REQUIRES_ARM_NEON_FMA;
462 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
463 VUnaryMicrokernelTester()
464 .batch_size(batch_size)
465 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32);
466 }
467 }
468
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32,batch_gt_32)469 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32, batch_gt_32) {
470 TEST_REQUIRES_ARM_NEON_FMA;
471 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
472 VUnaryMicrokernelTester()
473 .batch_size(batch_size)
474 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32);
475 }
476 }
477
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32,inplace)478 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X32, inplace) {
479 TEST_REQUIRES_ARM_NEON_FMA;
480 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
481 VUnaryMicrokernelTester()
482 .batch_size(batch_size)
483 .inplace(true)
484 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32);
485 }
486 }
487 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
488
489
490 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36,batch_eq_36)491 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36, batch_eq_36) {
492 TEST_REQUIRES_ARM_NEON_FMA;
493 VUnaryMicrokernelTester()
494 .batch_size(36)
495 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36);
496 }
497
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36,batch_div_36)498 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36, batch_div_36) {
499 TEST_REQUIRES_ARM_NEON_FMA;
500 for (size_t batch_size = 72; batch_size < 360; batch_size += 36) {
501 VUnaryMicrokernelTester()
502 .batch_size(batch_size)
503 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36);
504 }
505 }
506
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36,batch_lt_36)507 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36, batch_lt_36) {
508 TEST_REQUIRES_ARM_NEON_FMA;
509 for (size_t batch_size = 1; batch_size < 36; batch_size++) {
510 VUnaryMicrokernelTester()
511 .batch_size(batch_size)
512 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36);
513 }
514 }
515
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36,batch_gt_36)516 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36, batch_gt_36) {
517 TEST_REQUIRES_ARM_NEON_FMA;
518 for (size_t batch_size = 37; batch_size < 72; batch_size++) {
519 VUnaryMicrokernelTester()
520 .batch_size(batch_size)
521 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36);
522 }
523 }
524
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36,inplace)525 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X36, inplace) {
526 TEST_REQUIRES_ARM_NEON_FMA;
527 for (size_t batch_size = 1; batch_size <= 180; batch_size += 35) {
528 VUnaryMicrokernelTester()
529 .batch_size(batch_size)
530 .inplace(true)
531 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36);
532 }
533 }
534 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
535
536
537 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40,batch_eq_40)538 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40, batch_eq_40) {
539 TEST_REQUIRES_ARM_NEON_FMA;
540 VUnaryMicrokernelTester()
541 .batch_size(40)
542 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40);
543 }
544
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40,batch_div_40)545 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40, batch_div_40) {
546 TEST_REQUIRES_ARM_NEON_FMA;
547 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
548 VUnaryMicrokernelTester()
549 .batch_size(batch_size)
550 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40);
551 }
552 }
553
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40,batch_lt_40)554 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40, batch_lt_40) {
555 TEST_REQUIRES_ARM_NEON_FMA;
556 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
557 VUnaryMicrokernelTester()
558 .batch_size(batch_size)
559 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40);
560 }
561 }
562
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40,batch_gt_40)563 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40, batch_gt_40) {
564 TEST_REQUIRES_ARM_NEON_FMA;
565 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
566 VUnaryMicrokernelTester()
567 .batch_size(batch_size)
568 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40);
569 }
570 }
571
TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40,inplace)572 TEST(F32_VSQRT__NEONFMA_NR1RSQRTS1FMA1ADJ_X40, inplace) {
573 TEST_REQUIRES_ARM_NEON_FMA;
574 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
575 VUnaryMicrokernelTester()
576 .batch_size(batch_size)
577 .inplace(true)
578 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40);
579 }
580 }
581 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
582
583
584 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4,batch_eq_4)585 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4, batch_eq_4) {
586 TEST_REQUIRES_ARM_NEON_FMA;
587 VUnaryMicrokernelTester()
588 .batch_size(4)
589 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x4);
590 }
591
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4,batch_div_4)592 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4, batch_div_4) {
593 TEST_REQUIRES_ARM_NEON_FMA;
594 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
595 VUnaryMicrokernelTester()
596 .batch_size(batch_size)
597 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x4);
598 }
599 }
600
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4,batch_lt_4)601 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4, batch_lt_4) {
602 TEST_REQUIRES_ARM_NEON_FMA;
603 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
604 VUnaryMicrokernelTester()
605 .batch_size(batch_size)
606 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x4);
607 }
608 }
609
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4,batch_gt_4)610 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4, batch_gt_4) {
611 TEST_REQUIRES_ARM_NEON_FMA;
612 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
613 VUnaryMicrokernelTester()
614 .batch_size(batch_size)
615 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x4);
616 }
617 }
618
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4,inplace)619 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X4, inplace) {
620 TEST_REQUIRES_ARM_NEON_FMA;
621 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
622 VUnaryMicrokernelTester()
623 .batch_size(batch_size)
624 .inplace(true)
625 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x4);
626 }
627 }
628 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
629
630
631 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8,batch_eq_8)632 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8, batch_eq_8) {
633 TEST_REQUIRES_ARM_NEON_FMA;
634 VUnaryMicrokernelTester()
635 .batch_size(8)
636 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8);
637 }
638
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8,batch_div_8)639 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8, batch_div_8) {
640 TEST_REQUIRES_ARM_NEON_FMA;
641 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
642 VUnaryMicrokernelTester()
643 .batch_size(batch_size)
644 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8);
645 }
646 }
647
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8,batch_lt_8)648 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8, batch_lt_8) {
649 TEST_REQUIRES_ARM_NEON_FMA;
650 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
651 VUnaryMicrokernelTester()
652 .batch_size(batch_size)
653 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8);
654 }
655 }
656
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8,batch_gt_8)657 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8, batch_gt_8) {
658 TEST_REQUIRES_ARM_NEON_FMA;
659 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
660 VUnaryMicrokernelTester()
661 .batch_size(batch_size)
662 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8);
663 }
664 }
665
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8,inplace)666 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X8, inplace) {
667 TEST_REQUIRES_ARM_NEON_FMA;
668 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
669 VUnaryMicrokernelTester()
670 .batch_size(batch_size)
671 .inplace(true)
672 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8);
673 }
674 }
675 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
676
677
678 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12,batch_eq_12)679 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12, batch_eq_12) {
680 TEST_REQUIRES_ARM_NEON_FMA;
681 VUnaryMicrokernelTester()
682 .batch_size(12)
683 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12);
684 }
685
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12,batch_div_12)686 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12, batch_div_12) {
687 TEST_REQUIRES_ARM_NEON_FMA;
688 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
689 VUnaryMicrokernelTester()
690 .batch_size(batch_size)
691 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12);
692 }
693 }
694
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12,batch_lt_12)695 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12, batch_lt_12) {
696 TEST_REQUIRES_ARM_NEON_FMA;
697 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
698 VUnaryMicrokernelTester()
699 .batch_size(batch_size)
700 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12);
701 }
702 }
703
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12,batch_gt_12)704 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12, batch_gt_12) {
705 TEST_REQUIRES_ARM_NEON_FMA;
706 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
707 VUnaryMicrokernelTester()
708 .batch_size(batch_size)
709 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12);
710 }
711 }
712
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12,inplace)713 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X12, inplace) {
714 TEST_REQUIRES_ARM_NEON_FMA;
715 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
716 VUnaryMicrokernelTester()
717 .batch_size(batch_size)
718 .inplace(true)
719 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12);
720 }
721 }
722 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
723
724
725 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16,batch_eq_16)726 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16, batch_eq_16) {
727 TEST_REQUIRES_ARM_NEON_FMA;
728 VUnaryMicrokernelTester()
729 .batch_size(16)
730 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16);
731 }
732
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16,batch_div_16)733 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16, batch_div_16) {
734 TEST_REQUIRES_ARM_NEON_FMA;
735 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
736 VUnaryMicrokernelTester()
737 .batch_size(batch_size)
738 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16);
739 }
740 }
741
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16,batch_lt_16)742 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16, batch_lt_16) {
743 TEST_REQUIRES_ARM_NEON_FMA;
744 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
745 VUnaryMicrokernelTester()
746 .batch_size(batch_size)
747 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16);
748 }
749 }
750
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16,batch_gt_16)751 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16, batch_gt_16) {
752 TEST_REQUIRES_ARM_NEON_FMA;
753 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
754 VUnaryMicrokernelTester()
755 .batch_size(batch_size)
756 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16);
757 }
758 }
759
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16,inplace)760 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X16, inplace) {
761 TEST_REQUIRES_ARM_NEON_FMA;
762 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
763 VUnaryMicrokernelTester()
764 .batch_size(batch_size)
765 .inplace(true)
766 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16);
767 }
768 }
769 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
770
771
772 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20,batch_eq_20)773 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20, batch_eq_20) {
774 TEST_REQUIRES_ARM_NEON_FMA;
775 VUnaryMicrokernelTester()
776 .batch_size(20)
777 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20);
778 }
779
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20,batch_div_20)780 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20, batch_div_20) {
781 TEST_REQUIRES_ARM_NEON_FMA;
782 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
783 VUnaryMicrokernelTester()
784 .batch_size(batch_size)
785 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20);
786 }
787 }
788
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20,batch_lt_20)789 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20, batch_lt_20) {
790 TEST_REQUIRES_ARM_NEON_FMA;
791 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
792 VUnaryMicrokernelTester()
793 .batch_size(batch_size)
794 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20);
795 }
796 }
797
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20,batch_gt_20)798 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20, batch_gt_20) {
799 TEST_REQUIRES_ARM_NEON_FMA;
800 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
801 VUnaryMicrokernelTester()
802 .batch_size(batch_size)
803 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20);
804 }
805 }
806
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20,inplace)807 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X20, inplace) {
808 TEST_REQUIRES_ARM_NEON_FMA;
809 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
810 VUnaryMicrokernelTester()
811 .batch_size(batch_size)
812 .inplace(true)
813 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20);
814 }
815 }
816 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
817
818
819 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24,batch_eq_24)820 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24, batch_eq_24) {
821 TEST_REQUIRES_ARM_NEON_FMA;
822 VUnaryMicrokernelTester()
823 .batch_size(24)
824 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24);
825 }
826
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24,batch_div_24)827 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24, batch_div_24) {
828 TEST_REQUIRES_ARM_NEON_FMA;
829 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
830 VUnaryMicrokernelTester()
831 .batch_size(batch_size)
832 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24);
833 }
834 }
835
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24,batch_lt_24)836 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24, batch_lt_24) {
837 TEST_REQUIRES_ARM_NEON_FMA;
838 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
839 VUnaryMicrokernelTester()
840 .batch_size(batch_size)
841 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24);
842 }
843 }
844
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24,batch_gt_24)845 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24, batch_gt_24) {
846 TEST_REQUIRES_ARM_NEON_FMA;
847 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
848 VUnaryMicrokernelTester()
849 .batch_size(batch_size)
850 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24);
851 }
852 }
853
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24,inplace)854 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X24, inplace) {
855 TEST_REQUIRES_ARM_NEON_FMA;
856 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
857 VUnaryMicrokernelTester()
858 .batch_size(batch_size)
859 .inplace(true)
860 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24);
861 }
862 }
863 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
864
865
866 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28,batch_eq_28)867 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28, batch_eq_28) {
868 TEST_REQUIRES_ARM_NEON_FMA;
869 VUnaryMicrokernelTester()
870 .batch_size(28)
871 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28);
872 }
873
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28,batch_div_28)874 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28, batch_div_28) {
875 TEST_REQUIRES_ARM_NEON_FMA;
876 for (size_t batch_size = 56; batch_size < 280; batch_size += 28) {
877 VUnaryMicrokernelTester()
878 .batch_size(batch_size)
879 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28);
880 }
881 }
882
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28,batch_lt_28)883 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28, batch_lt_28) {
884 TEST_REQUIRES_ARM_NEON_FMA;
885 for (size_t batch_size = 1; batch_size < 28; batch_size++) {
886 VUnaryMicrokernelTester()
887 .batch_size(batch_size)
888 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28);
889 }
890 }
891
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28,batch_gt_28)892 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28, batch_gt_28) {
893 TEST_REQUIRES_ARM_NEON_FMA;
894 for (size_t batch_size = 29; batch_size < 56; batch_size++) {
895 VUnaryMicrokernelTester()
896 .batch_size(batch_size)
897 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28);
898 }
899 }
900
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28,inplace)901 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X28, inplace) {
902 TEST_REQUIRES_ARM_NEON_FMA;
903 for (size_t batch_size = 1; batch_size <= 140; batch_size += 27) {
904 VUnaryMicrokernelTester()
905 .batch_size(batch_size)
906 .inplace(true)
907 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28);
908 }
909 }
910 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
911
912
913 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32,batch_eq_32)914 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32, batch_eq_32) {
915 TEST_REQUIRES_ARM_NEON_FMA;
916 VUnaryMicrokernelTester()
917 .batch_size(32)
918 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32);
919 }
920
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32,batch_div_32)921 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32, batch_div_32) {
922 TEST_REQUIRES_ARM_NEON_FMA;
923 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
924 VUnaryMicrokernelTester()
925 .batch_size(batch_size)
926 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32);
927 }
928 }
929
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32,batch_lt_32)930 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32, batch_lt_32) {
931 TEST_REQUIRES_ARM_NEON_FMA;
932 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
933 VUnaryMicrokernelTester()
934 .batch_size(batch_size)
935 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32);
936 }
937 }
938
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32,batch_gt_32)939 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32, batch_gt_32) {
940 TEST_REQUIRES_ARM_NEON_FMA;
941 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
942 VUnaryMicrokernelTester()
943 .batch_size(batch_size)
944 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32);
945 }
946 }
947
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32,inplace)948 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X32, inplace) {
949 TEST_REQUIRES_ARM_NEON_FMA;
950 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
951 VUnaryMicrokernelTester()
952 .batch_size(batch_size)
953 .inplace(true)
954 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32);
955 }
956 }
957 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
958
959
960 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36,batch_eq_36)961 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36, batch_eq_36) {
962 TEST_REQUIRES_ARM_NEON_FMA;
963 VUnaryMicrokernelTester()
964 .batch_size(36)
965 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36);
966 }
967
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36,batch_div_36)968 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36, batch_div_36) {
969 TEST_REQUIRES_ARM_NEON_FMA;
970 for (size_t batch_size = 72; batch_size < 360; batch_size += 36) {
971 VUnaryMicrokernelTester()
972 .batch_size(batch_size)
973 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36);
974 }
975 }
976
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36,batch_lt_36)977 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36, batch_lt_36) {
978 TEST_REQUIRES_ARM_NEON_FMA;
979 for (size_t batch_size = 1; batch_size < 36; batch_size++) {
980 VUnaryMicrokernelTester()
981 .batch_size(batch_size)
982 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36);
983 }
984 }
985
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36,batch_gt_36)986 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36, batch_gt_36) {
987 TEST_REQUIRES_ARM_NEON_FMA;
988 for (size_t batch_size = 37; batch_size < 72; batch_size++) {
989 VUnaryMicrokernelTester()
990 .batch_size(batch_size)
991 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36);
992 }
993 }
994
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36,inplace)995 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X36, inplace) {
996 TEST_REQUIRES_ARM_NEON_FMA;
997 for (size_t batch_size = 1; batch_size <= 180; batch_size += 35) {
998 VUnaryMicrokernelTester()
999 .batch_size(batch_size)
1000 .inplace(true)
1001 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36);
1002 }
1003 }
1004 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1005
1006
1007 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40,batch_eq_40)1008 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40, batch_eq_40) {
1009 TEST_REQUIRES_ARM_NEON_FMA;
1010 VUnaryMicrokernelTester()
1011 .batch_size(40)
1012 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40);
1013 }
1014
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40,batch_div_40)1015 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40, batch_div_40) {
1016 TEST_REQUIRES_ARM_NEON_FMA;
1017 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
1018 VUnaryMicrokernelTester()
1019 .batch_size(batch_size)
1020 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40);
1021 }
1022 }
1023
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40,batch_lt_40)1024 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40, batch_lt_40) {
1025 TEST_REQUIRES_ARM_NEON_FMA;
1026 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
1027 VUnaryMicrokernelTester()
1028 .batch_size(batch_size)
1029 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40);
1030 }
1031 }
1032
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40,batch_gt_40)1033 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40, batch_gt_40) {
1034 TEST_REQUIRES_ARM_NEON_FMA;
1035 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
1036 VUnaryMicrokernelTester()
1037 .batch_size(batch_size)
1038 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40);
1039 }
1040 }
1041
TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40,inplace)1042 TEST(F32_VSQRT__NEONFMA_NR2FMA1ADJ_X40, inplace) {
1043 TEST_REQUIRES_ARM_NEON_FMA;
1044 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
1045 VUnaryMicrokernelTester()
1046 .batch_size(batch_size)
1047 .inplace(true)
1048 .Test(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40);
1049 }
1050 }
1051 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1052
1053
1054 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__SSE_SQRT_X4,batch_eq_4)1055 TEST(F32_VSQRT__SSE_SQRT_X4, batch_eq_4) {
1056 TEST_REQUIRES_X86_SSE;
1057 VUnaryMicrokernelTester()
1058 .batch_size(4)
1059 .Test(xnn_f32_vsqrt_ukernel__sse_sqrt_x4);
1060 }
1061
TEST(F32_VSQRT__SSE_SQRT_X4,batch_div_4)1062 TEST(F32_VSQRT__SSE_SQRT_X4, batch_div_4) {
1063 TEST_REQUIRES_X86_SSE;
1064 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
1065 VUnaryMicrokernelTester()
1066 .batch_size(batch_size)
1067 .Test(xnn_f32_vsqrt_ukernel__sse_sqrt_x4);
1068 }
1069 }
1070
TEST(F32_VSQRT__SSE_SQRT_X4,batch_lt_4)1071 TEST(F32_VSQRT__SSE_SQRT_X4, batch_lt_4) {
1072 TEST_REQUIRES_X86_SSE;
1073 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
1074 VUnaryMicrokernelTester()
1075 .batch_size(batch_size)
1076 .Test(xnn_f32_vsqrt_ukernel__sse_sqrt_x4);
1077 }
1078 }
1079
TEST(F32_VSQRT__SSE_SQRT_X4,batch_gt_4)1080 TEST(F32_VSQRT__SSE_SQRT_X4, batch_gt_4) {
1081 TEST_REQUIRES_X86_SSE;
1082 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
1083 VUnaryMicrokernelTester()
1084 .batch_size(batch_size)
1085 .Test(xnn_f32_vsqrt_ukernel__sse_sqrt_x4);
1086 }
1087 }
1088
TEST(F32_VSQRT__SSE_SQRT_X4,inplace)1089 TEST(F32_VSQRT__SSE_SQRT_X4, inplace) {
1090 TEST_REQUIRES_X86_SSE;
1091 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1092 VUnaryMicrokernelTester()
1093 .batch_size(batch_size)
1094 .inplace(true)
1095 .Test(xnn_f32_vsqrt_ukernel__sse_sqrt_x4);
1096 }
1097 }
1098 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1099
1100
1101 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__SSE_SQRT_X8,batch_eq_8)1102 TEST(F32_VSQRT__SSE_SQRT_X8, batch_eq_8) {
1103 TEST_REQUIRES_X86_SSE;
1104 VUnaryMicrokernelTester()
1105 .batch_size(8)
1106 .Test(xnn_f32_vsqrt_ukernel__sse_sqrt_x8);
1107 }
1108
TEST(F32_VSQRT__SSE_SQRT_X8,batch_div_8)1109 TEST(F32_VSQRT__SSE_SQRT_X8, batch_div_8) {
1110 TEST_REQUIRES_X86_SSE;
1111 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1112 VUnaryMicrokernelTester()
1113 .batch_size(batch_size)
1114 .Test(xnn_f32_vsqrt_ukernel__sse_sqrt_x8);
1115 }
1116 }
1117
TEST(F32_VSQRT__SSE_SQRT_X8,batch_lt_8)1118 TEST(F32_VSQRT__SSE_SQRT_X8, batch_lt_8) {
1119 TEST_REQUIRES_X86_SSE;
1120 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1121 VUnaryMicrokernelTester()
1122 .batch_size(batch_size)
1123 .Test(xnn_f32_vsqrt_ukernel__sse_sqrt_x8);
1124 }
1125 }
1126
TEST(F32_VSQRT__SSE_SQRT_X8,batch_gt_8)1127 TEST(F32_VSQRT__SSE_SQRT_X8, batch_gt_8) {
1128 TEST_REQUIRES_X86_SSE;
1129 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1130 VUnaryMicrokernelTester()
1131 .batch_size(batch_size)
1132 .Test(xnn_f32_vsqrt_ukernel__sse_sqrt_x8);
1133 }
1134 }
1135
TEST(F32_VSQRT__SSE_SQRT_X8,inplace)1136 TEST(F32_VSQRT__SSE_SQRT_X8, inplace) {
1137 TEST_REQUIRES_X86_SSE;
1138 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1139 VUnaryMicrokernelTester()
1140 .batch_size(batch_size)
1141 .inplace(true)
1142 .Test(xnn_f32_vsqrt_ukernel__sse_sqrt_x8);
1143 }
1144 }
1145 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1146
1147
1148 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__AVX_SQRT_X8,batch_eq_8)1149 TEST(F32_VSQRT__AVX_SQRT_X8, batch_eq_8) {
1150 TEST_REQUIRES_X86_AVX;
1151 VUnaryMicrokernelTester()
1152 .batch_size(8)
1153 .Test(xnn_f32_vsqrt_ukernel__avx_sqrt_x8, xnn_init_f32_sqrt_avx_params);
1154 }
1155
TEST(F32_VSQRT__AVX_SQRT_X8,batch_div_8)1156 TEST(F32_VSQRT__AVX_SQRT_X8, batch_div_8) {
1157 TEST_REQUIRES_X86_AVX;
1158 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1159 VUnaryMicrokernelTester()
1160 .batch_size(batch_size)
1161 .Test(xnn_f32_vsqrt_ukernel__avx_sqrt_x8, xnn_init_f32_sqrt_avx_params);
1162 }
1163 }
1164
TEST(F32_VSQRT__AVX_SQRT_X8,batch_lt_8)1165 TEST(F32_VSQRT__AVX_SQRT_X8, batch_lt_8) {
1166 TEST_REQUIRES_X86_AVX;
1167 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1168 VUnaryMicrokernelTester()
1169 .batch_size(batch_size)
1170 .Test(xnn_f32_vsqrt_ukernel__avx_sqrt_x8, xnn_init_f32_sqrt_avx_params);
1171 }
1172 }
1173
TEST(F32_VSQRT__AVX_SQRT_X8,batch_gt_8)1174 TEST(F32_VSQRT__AVX_SQRT_X8, batch_gt_8) {
1175 TEST_REQUIRES_X86_AVX;
1176 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1177 VUnaryMicrokernelTester()
1178 .batch_size(batch_size)
1179 .Test(xnn_f32_vsqrt_ukernel__avx_sqrt_x8, xnn_init_f32_sqrt_avx_params);
1180 }
1181 }
1182
TEST(F32_VSQRT__AVX_SQRT_X8,inplace)1183 TEST(F32_VSQRT__AVX_SQRT_X8, inplace) {
1184 TEST_REQUIRES_X86_AVX;
1185 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1186 VUnaryMicrokernelTester()
1187 .batch_size(batch_size)
1188 .inplace(true)
1189 .Test(xnn_f32_vsqrt_ukernel__avx_sqrt_x8, xnn_init_f32_sqrt_avx_params);
1190 }
1191 }
1192 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1193
1194
1195 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__AVX_SQRT_X16,batch_eq_16)1196 TEST(F32_VSQRT__AVX_SQRT_X16, batch_eq_16) {
1197 TEST_REQUIRES_X86_AVX;
1198 VUnaryMicrokernelTester()
1199 .batch_size(16)
1200 .Test(xnn_f32_vsqrt_ukernel__avx_sqrt_x16, xnn_init_f32_sqrt_avx_params);
1201 }
1202
TEST(F32_VSQRT__AVX_SQRT_X16,batch_div_16)1203 TEST(F32_VSQRT__AVX_SQRT_X16, batch_div_16) {
1204 TEST_REQUIRES_X86_AVX;
1205 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1206 VUnaryMicrokernelTester()
1207 .batch_size(batch_size)
1208 .Test(xnn_f32_vsqrt_ukernel__avx_sqrt_x16, xnn_init_f32_sqrt_avx_params);
1209 }
1210 }
1211
TEST(F32_VSQRT__AVX_SQRT_X16,batch_lt_16)1212 TEST(F32_VSQRT__AVX_SQRT_X16, batch_lt_16) {
1213 TEST_REQUIRES_X86_AVX;
1214 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1215 VUnaryMicrokernelTester()
1216 .batch_size(batch_size)
1217 .Test(xnn_f32_vsqrt_ukernel__avx_sqrt_x16, xnn_init_f32_sqrt_avx_params);
1218 }
1219 }
1220
TEST(F32_VSQRT__AVX_SQRT_X16,batch_gt_16)1221 TEST(F32_VSQRT__AVX_SQRT_X16, batch_gt_16) {
1222 TEST_REQUIRES_X86_AVX;
1223 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1224 VUnaryMicrokernelTester()
1225 .batch_size(batch_size)
1226 .Test(xnn_f32_vsqrt_ukernel__avx_sqrt_x16, xnn_init_f32_sqrt_avx_params);
1227 }
1228 }
1229
TEST(F32_VSQRT__AVX_SQRT_X16,inplace)1230 TEST(F32_VSQRT__AVX_SQRT_X16, inplace) {
1231 TEST_REQUIRES_X86_AVX;
1232 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1233 VUnaryMicrokernelTester()
1234 .batch_size(batch_size)
1235 .inplace(true)
1236 .Test(xnn_f32_vsqrt_ukernel__avx_sqrt_x16, xnn_init_f32_sqrt_avx_params);
1237 }
1238 }
1239 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1240
1241
1242 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8,batch_eq_8)1243 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8, batch_eq_8) {
1244 TEST_REQUIRES_X86_FMA3;
1245 VUnaryMicrokernelTester()
1246 .batch_size(8)
1247 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8, xnn_init_f32_sqrt_fma_params);
1248 }
1249
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8,batch_div_8)1250 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8, batch_div_8) {
1251 TEST_REQUIRES_X86_FMA3;
1252 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1253 VUnaryMicrokernelTester()
1254 .batch_size(batch_size)
1255 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8, xnn_init_f32_sqrt_fma_params);
1256 }
1257 }
1258
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8,batch_lt_8)1259 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8, batch_lt_8) {
1260 TEST_REQUIRES_X86_FMA3;
1261 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1262 VUnaryMicrokernelTester()
1263 .batch_size(batch_size)
1264 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8, xnn_init_f32_sqrt_fma_params);
1265 }
1266 }
1267
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8,batch_gt_8)1268 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8, batch_gt_8) {
1269 TEST_REQUIRES_X86_FMA3;
1270 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1271 VUnaryMicrokernelTester()
1272 .batch_size(batch_size)
1273 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8, xnn_init_f32_sqrt_fma_params);
1274 }
1275 }
1276
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8,inplace)1277 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X8, inplace) {
1278 TEST_REQUIRES_X86_FMA3;
1279 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1280 VUnaryMicrokernelTester()
1281 .batch_size(batch_size)
1282 .inplace(true)
1283 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8, xnn_init_f32_sqrt_fma_params);
1284 }
1285 }
1286 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1287
1288
1289 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16,batch_eq_16)1290 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16, batch_eq_16) {
1291 TEST_REQUIRES_X86_FMA3;
1292 VUnaryMicrokernelTester()
1293 .batch_size(16)
1294 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16, xnn_init_f32_sqrt_fma_params);
1295 }
1296
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16,batch_div_16)1297 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16, batch_div_16) {
1298 TEST_REQUIRES_X86_FMA3;
1299 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1300 VUnaryMicrokernelTester()
1301 .batch_size(batch_size)
1302 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16, xnn_init_f32_sqrt_fma_params);
1303 }
1304 }
1305
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16,batch_lt_16)1306 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16, batch_lt_16) {
1307 TEST_REQUIRES_X86_FMA3;
1308 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1309 VUnaryMicrokernelTester()
1310 .batch_size(batch_size)
1311 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16, xnn_init_f32_sqrt_fma_params);
1312 }
1313 }
1314
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16,batch_gt_16)1315 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16, batch_gt_16) {
1316 TEST_REQUIRES_X86_FMA3;
1317 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1318 VUnaryMicrokernelTester()
1319 .batch_size(batch_size)
1320 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16, xnn_init_f32_sqrt_fma_params);
1321 }
1322 }
1323
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16,inplace)1324 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X16, inplace) {
1325 TEST_REQUIRES_X86_FMA3;
1326 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1327 VUnaryMicrokernelTester()
1328 .batch_size(batch_size)
1329 .inplace(true)
1330 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16, xnn_init_f32_sqrt_fma_params);
1331 }
1332 }
1333 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1334
1335
1336 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24,batch_eq_24)1337 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24, batch_eq_24) {
1338 TEST_REQUIRES_X86_FMA3;
1339 VUnaryMicrokernelTester()
1340 .batch_size(24)
1341 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24, xnn_init_f32_sqrt_fma_params);
1342 }
1343
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24,batch_div_24)1344 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24, batch_div_24) {
1345 TEST_REQUIRES_X86_FMA3;
1346 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1347 VUnaryMicrokernelTester()
1348 .batch_size(batch_size)
1349 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24, xnn_init_f32_sqrt_fma_params);
1350 }
1351 }
1352
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24,batch_lt_24)1353 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24, batch_lt_24) {
1354 TEST_REQUIRES_X86_FMA3;
1355 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1356 VUnaryMicrokernelTester()
1357 .batch_size(batch_size)
1358 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24, xnn_init_f32_sqrt_fma_params);
1359 }
1360 }
1361
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24,batch_gt_24)1362 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24, batch_gt_24) {
1363 TEST_REQUIRES_X86_FMA3;
1364 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1365 VUnaryMicrokernelTester()
1366 .batch_size(batch_size)
1367 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24, xnn_init_f32_sqrt_fma_params);
1368 }
1369 }
1370
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24,inplace)1371 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X24, inplace) {
1372 TEST_REQUIRES_X86_FMA3;
1373 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1374 VUnaryMicrokernelTester()
1375 .batch_size(batch_size)
1376 .inplace(true)
1377 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24, xnn_init_f32_sqrt_fma_params);
1378 }
1379 }
1380 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1381
1382
1383 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32,batch_eq_32)1384 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32, batch_eq_32) {
1385 TEST_REQUIRES_X86_FMA3;
1386 VUnaryMicrokernelTester()
1387 .batch_size(32)
1388 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32, xnn_init_f32_sqrt_fma_params);
1389 }
1390
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32,batch_div_32)1391 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32, batch_div_32) {
1392 TEST_REQUIRES_X86_FMA3;
1393 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
1394 VUnaryMicrokernelTester()
1395 .batch_size(batch_size)
1396 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32, xnn_init_f32_sqrt_fma_params);
1397 }
1398 }
1399
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32,batch_lt_32)1400 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32, batch_lt_32) {
1401 TEST_REQUIRES_X86_FMA3;
1402 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
1403 VUnaryMicrokernelTester()
1404 .batch_size(batch_size)
1405 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32, xnn_init_f32_sqrt_fma_params);
1406 }
1407 }
1408
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32,batch_gt_32)1409 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32, batch_gt_32) {
1410 TEST_REQUIRES_X86_FMA3;
1411 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
1412 VUnaryMicrokernelTester()
1413 .batch_size(batch_size)
1414 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32, xnn_init_f32_sqrt_fma_params);
1415 }
1416 }
1417
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32,inplace)1418 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X32, inplace) {
1419 TEST_REQUIRES_X86_FMA3;
1420 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1421 VUnaryMicrokernelTester()
1422 .batch_size(batch_size)
1423 .inplace(true)
1424 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32, xnn_init_f32_sqrt_fma_params);
1425 }
1426 }
1427 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1428
1429
1430 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40,batch_eq_40)1431 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40, batch_eq_40) {
1432 TEST_REQUIRES_X86_FMA3;
1433 VUnaryMicrokernelTester()
1434 .batch_size(40)
1435 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40, xnn_init_f32_sqrt_fma_params);
1436 }
1437
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40,batch_div_40)1438 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40, batch_div_40) {
1439 TEST_REQUIRES_X86_FMA3;
1440 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
1441 VUnaryMicrokernelTester()
1442 .batch_size(batch_size)
1443 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40, xnn_init_f32_sqrt_fma_params);
1444 }
1445 }
1446
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40,batch_lt_40)1447 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40, batch_lt_40) {
1448 TEST_REQUIRES_X86_FMA3;
1449 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
1450 VUnaryMicrokernelTester()
1451 .batch_size(batch_size)
1452 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40, xnn_init_f32_sqrt_fma_params);
1453 }
1454 }
1455
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40,batch_gt_40)1456 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40, batch_gt_40) {
1457 TEST_REQUIRES_X86_FMA3;
1458 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
1459 VUnaryMicrokernelTester()
1460 .batch_size(batch_size)
1461 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40, xnn_init_f32_sqrt_fma_params);
1462 }
1463 }
1464
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40,inplace)1465 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X40, inplace) {
1466 TEST_REQUIRES_X86_FMA3;
1467 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
1468 VUnaryMicrokernelTester()
1469 .batch_size(batch_size)
1470 .inplace(true)
1471 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40, xnn_init_f32_sqrt_fma_params);
1472 }
1473 }
1474 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1475
1476
1477 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48,batch_eq_48)1478 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48, batch_eq_48) {
1479 TEST_REQUIRES_X86_FMA3;
1480 VUnaryMicrokernelTester()
1481 .batch_size(48)
1482 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48, xnn_init_f32_sqrt_fma_params);
1483 }
1484
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48,batch_div_48)1485 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48, batch_div_48) {
1486 TEST_REQUIRES_X86_FMA3;
1487 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
1488 VUnaryMicrokernelTester()
1489 .batch_size(batch_size)
1490 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48, xnn_init_f32_sqrt_fma_params);
1491 }
1492 }
1493
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48,batch_lt_48)1494 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48, batch_lt_48) {
1495 TEST_REQUIRES_X86_FMA3;
1496 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
1497 VUnaryMicrokernelTester()
1498 .batch_size(batch_size)
1499 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48, xnn_init_f32_sqrt_fma_params);
1500 }
1501 }
1502
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48,batch_gt_48)1503 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48, batch_gt_48) {
1504 TEST_REQUIRES_X86_FMA3;
1505 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
1506 VUnaryMicrokernelTester()
1507 .batch_size(batch_size)
1508 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48, xnn_init_f32_sqrt_fma_params);
1509 }
1510 }
1511
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48,inplace)1512 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X48, inplace) {
1513 TEST_REQUIRES_X86_FMA3;
1514 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
1515 VUnaryMicrokernelTester()
1516 .batch_size(batch_size)
1517 .inplace(true)
1518 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48, xnn_init_f32_sqrt_fma_params);
1519 }
1520 }
1521 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1522
1523
1524 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56,batch_eq_56)1525 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56, batch_eq_56) {
1526 TEST_REQUIRES_X86_FMA3;
1527 VUnaryMicrokernelTester()
1528 .batch_size(56)
1529 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56, xnn_init_f32_sqrt_fma_params);
1530 }
1531
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56,batch_div_56)1532 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56, batch_div_56) {
1533 TEST_REQUIRES_X86_FMA3;
1534 for (size_t batch_size = 112; batch_size < 560; batch_size += 56) {
1535 VUnaryMicrokernelTester()
1536 .batch_size(batch_size)
1537 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56, xnn_init_f32_sqrt_fma_params);
1538 }
1539 }
1540
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56,batch_lt_56)1541 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56, batch_lt_56) {
1542 TEST_REQUIRES_X86_FMA3;
1543 for (size_t batch_size = 1; batch_size < 56; batch_size++) {
1544 VUnaryMicrokernelTester()
1545 .batch_size(batch_size)
1546 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56, xnn_init_f32_sqrt_fma_params);
1547 }
1548 }
1549
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56,batch_gt_56)1550 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56, batch_gt_56) {
1551 TEST_REQUIRES_X86_FMA3;
1552 for (size_t batch_size = 57; batch_size < 112; batch_size++) {
1553 VUnaryMicrokernelTester()
1554 .batch_size(batch_size)
1555 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56, xnn_init_f32_sqrt_fma_params);
1556 }
1557 }
1558
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56,inplace)1559 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X56, inplace) {
1560 TEST_REQUIRES_X86_FMA3;
1561 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
1562 VUnaryMicrokernelTester()
1563 .batch_size(batch_size)
1564 .inplace(true)
1565 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56, xnn_init_f32_sqrt_fma_params);
1566 }
1567 }
1568 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1569
1570
1571 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64,batch_eq_64)1572 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64, batch_eq_64) {
1573 TEST_REQUIRES_X86_FMA3;
1574 VUnaryMicrokernelTester()
1575 .batch_size(64)
1576 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64, xnn_init_f32_sqrt_fma_params);
1577 }
1578
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64,batch_div_64)1579 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64, batch_div_64) {
1580 TEST_REQUIRES_X86_FMA3;
1581 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
1582 VUnaryMicrokernelTester()
1583 .batch_size(batch_size)
1584 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64, xnn_init_f32_sqrt_fma_params);
1585 }
1586 }
1587
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64,batch_lt_64)1588 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64, batch_lt_64) {
1589 TEST_REQUIRES_X86_FMA3;
1590 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
1591 VUnaryMicrokernelTester()
1592 .batch_size(batch_size)
1593 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64, xnn_init_f32_sqrt_fma_params);
1594 }
1595 }
1596
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64,batch_gt_64)1597 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64, batch_gt_64) {
1598 TEST_REQUIRES_X86_FMA3;
1599 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
1600 VUnaryMicrokernelTester()
1601 .batch_size(batch_size)
1602 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64, xnn_init_f32_sqrt_fma_params);
1603 }
1604 }
1605
TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64,inplace)1606 TEST(F32_VSQRT__FMA3_NR1FMA1ADJ_X64, inplace) {
1607 TEST_REQUIRES_X86_FMA3;
1608 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
1609 VUnaryMicrokernelTester()
1610 .batch_size(batch_size)
1611 .inplace(true)
1612 .Test(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64, xnn_init_f32_sqrt_fma_params);
1613 }
1614 }
1615 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1616
1617
1618 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16,batch_eq_16)1619 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16, batch_eq_16) {
1620 TEST_REQUIRES_X86_AVX512F;
1621 VUnaryMicrokernelTester()
1622 .batch_size(16)
1623 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16, xnn_init_f32_sqrt_avx512_params);
1624 }
1625
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16,batch_div_16)1626 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16, batch_div_16) {
1627 TEST_REQUIRES_X86_AVX512F;
1628 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1629 VUnaryMicrokernelTester()
1630 .batch_size(batch_size)
1631 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16, xnn_init_f32_sqrt_avx512_params);
1632 }
1633 }
1634
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16,batch_lt_16)1635 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16, batch_lt_16) {
1636 TEST_REQUIRES_X86_AVX512F;
1637 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1638 VUnaryMicrokernelTester()
1639 .batch_size(batch_size)
1640 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16, xnn_init_f32_sqrt_avx512_params);
1641 }
1642 }
1643
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16,batch_gt_16)1644 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16, batch_gt_16) {
1645 TEST_REQUIRES_X86_AVX512F;
1646 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1647 VUnaryMicrokernelTester()
1648 .batch_size(batch_size)
1649 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16, xnn_init_f32_sqrt_avx512_params);
1650 }
1651 }
1652
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16,inplace)1653 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X16, inplace) {
1654 TEST_REQUIRES_X86_AVX512F;
1655 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1656 VUnaryMicrokernelTester()
1657 .batch_size(batch_size)
1658 .inplace(true)
1659 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16, xnn_init_f32_sqrt_avx512_params);
1660 }
1661 }
1662 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1663
1664
1665 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32,batch_eq_32)1666 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32, batch_eq_32) {
1667 TEST_REQUIRES_X86_AVX512F;
1668 VUnaryMicrokernelTester()
1669 .batch_size(32)
1670 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32, xnn_init_f32_sqrt_avx512_params);
1671 }
1672
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32,batch_div_32)1673 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32, batch_div_32) {
1674 TEST_REQUIRES_X86_AVX512F;
1675 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
1676 VUnaryMicrokernelTester()
1677 .batch_size(batch_size)
1678 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32, xnn_init_f32_sqrt_avx512_params);
1679 }
1680 }
1681
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32,batch_lt_32)1682 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32, batch_lt_32) {
1683 TEST_REQUIRES_X86_AVX512F;
1684 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
1685 VUnaryMicrokernelTester()
1686 .batch_size(batch_size)
1687 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32, xnn_init_f32_sqrt_avx512_params);
1688 }
1689 }
1690
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32,batch_gt_32)1691 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32, batch_gt_32) {
1692 TEST_REQUIRES_X86_AVX512F;
1693 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
1694 VUnaryMicrokernelTester()
1695 .batch_size(batch_size)
1696 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32, xnn_init_f32_sqrt_avx512_params);
1697 }
1698 }
1699
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32,inplace)1700 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X32, inplace) {
1701 TEST_REQUIRES_X86_AVX512F;
1702 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1703 VUnaryMicrokernelTester()
1704 .batch_size(batch_size)
1705 .inplace(true)
1706 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32, xnn_init_f32_sqrt_avx512_params);
1707 }
1708 }
1709 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1710
1711
1712 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48,batch_eq_48)1713 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48, batch_eq_48) {
1714 TEST_REQUIRES_X86_AVX512F;
1715 VUnaryMicrokernelTester()
1716 .batch_size(48)
1717 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48, xnn_init_f32_sqrt_avx512_params);
1718 }
1719
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48,batch_div_48)1720 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48, batch_div_48) {
1721 TEST_REQUIRES_X86_AVX512F;
1722 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
1723 VUnaryMicrokernelTester()
1724 .batch_size(batch_size)
1725 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48, xnn_init_f32_sqrt_avx512_params);
1726 }
1727 }
1728
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48,batch_lt_48)1729 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48, batch_lt_48) {
1730 TEST_REQUIRES_X86_AVX512F;
1731 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
1732 VUnaryMicrokernelTester()
1733 .batch_size(batch_size)
1734 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48, xnn_init_f32_sqrt_avx512_params);
1735 }
1736 }
1737
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48,batch_gt_48)1738 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48, batch_gt_48) {
1739 TEST_REQUIRES_X86_AVX512F;
1740 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
1741 VUnaryMicrokernelTester()
1742 .batch_size(batch_size)
1743 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48, xnn_init_f32_sqrt_avx512_params);
1744 }
1745 }
1746
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48,inplace)1747 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X48, inplace) {
1748 TEST_REQUIRES_X86_AVX512F;
1749 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
1750 VUnaryMicrokernelTester()
1751 .batch_size(batch_size)
1752 .inplace(true)
1753 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48, xnn_init_f32_sqrt_avx512_params);
1754 }
1755 }
1756 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1757
1758
1759 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64,batch_eq_64)1760 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64, batch_eq_64) {
1761 TEST_REQUIRES_X86_AVX512F;
1762 VUnaryMicrokernelTester()
1763 .batch_size(64)
1764 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64, xnn_init_f32_sqrt_avx512_params);
1765 }
1766
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64,batch_div_64)1767 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64, batch_div_64) {
1768 TEST_REQUIRES_X86_AVX512F;
1769 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
1770 VUnaryMicrokernelTester()
1771 .batch_size(batch_size)
1772 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64, xnn_init_f32_sqrt_avx512_params);
1773 }
1774 }
1775
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64,batch_lt_64)1776 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64, batch_lt_64) {
1777 TEST_REQUIRES_X86_AVX512F;
1778 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
1779 VUnaryMicrokernelTester()
1780 .batch_size(batch_size)
1781 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64, xnn_init_f32_sqrt_avx512_params);
1782 }
1783 }
1784
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64,batch_gt_64)1785 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64, batch_gt_64) {
1786 TEST_REQUIRES_X86_AVX512F;
1787 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
1788 VUnaryMicrokernelTester()
1789 .batch_size(batch_size)
1790 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64, xnn_init_f32_sqrt_avx512_params);
1791 }
1792 }
1793
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64,inplace)1794 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X64, inplace) {
1795 TEST_REQUIRES_X86_AVX512F;
1796 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
1797 VUnaryMicrokernelTester()
1798 .batch_size(batch_size)
1799 .inplace(true)
1800 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64, xnn_init_f32_sqrt_avx512_params);
1801 }
1802 }
1803 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1804
1805
1806 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80,batch_eq_80)1807 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80, batch_eq_80) {
1808 TEST_REQUIRES_X86_AVX512F;
1809 VUnaryMicrokernelTester()
1810 .batch_size(80)
1811 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80, xnn_init_f32_sqrt_avx512_params);
1812 }
1813
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80,batch_div_80)1814 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80, batch_div_80) {
1815 TEST_REQUIRES_X86_AVX512F;
1816 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
1817 VUnaryMicrokernelTester()
1818 .batch_size(batch_size)
1819 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80, xnn_init_f32_sqrt_avx512_params);
1820 }
1821 }
1822
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80,batch_lt_80)1823 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80, batch_lt_80) {
1824 TEST_REQUIRES_X86_AVX512F;
1825 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
1826 VUnaryMicrokernelTester()
1827 .batch_size(batch_size)
1828 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80, xnn_init_f32_sqrt_avx512_params);
1829 }
1830 }
1831
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80,batch_gt_80)1832 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80, batch_gt_80) {
1833 TEST_REQUIRES_X86_AVX512F;
1834 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
1835 VUnaryMicrokernelTester()
1836 .batch_size(batch_size)
1837 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80, xnn_init_f32_sqrt_avx512_params);
1838 }
1839 }
1840
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80,inplace)1841 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X80, inplace) {
1842 TEST_REQUIRES_X86_AVX512F;
1843 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
1844 VUnaryMicrokernelTester()
1845 .batch_size(batch_size)
1846 .inplace(true)
1847 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80, xnn_init_f32_sqrt_avx512_params);
1848 }
1849 }
1850 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1851
1852
1853 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96,batch_eq_96)1854 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96, batch_eq_96) {
1855 TEST_REQUIRES_X86_AVX512F;
1856 VUnaryMicrokernelTester()
1857 .batch_size(96)
1858 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96, xnn_init_f32_sqrt_avx512_params);
1859 }
1860
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96,batch_div_96)1861 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96, batch_div_96) {
1862 TEST_REQUIRES_X86_AVX512F;
1863 for (size_t batch_size = 192; batch_size < 960; batch_size += 96) {
1864 VUnaryMicrokernelTester()
1865 .batch_size(batch_size)
1866 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96, xnn_init_f32_sqrt_avx512_params);
1867 }
1868 }
1869
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96,batch_lt_96)1870 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96, batch_lt_96) {
1871 TEST_REQUIRES_X86_AVX512F;
1872 for (size_t batch_size = 1; batch_size < 96; batch_size++) {
1873 VUnaryMicrokernelTester()
1874 .batch_size(batch_size)
1875 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96, xnn_init_f32_sqrt_avx512_params);
1876 }
1877 }
1878
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96,batch_gt_96)1879 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96, batch_gt_96) {
1880 TEST_REQUIRES_X86_AVX512F;
1881 for (size_t batch_size = 97; batch_size < 192; batch_size++) {
1882 VUnaryMicrokernelTester()
1883 .batch_size(batch_size)
1884 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96, xnn_init_f32_sqrt_avx512_params);
1885 }
1886 }
1887
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96,inplace)1888 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X96, inplace) {
1889 TEST_REQUIRES_X86_AVX512F;
1890 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
1891 VUnaryMicrokernelTester()
1892 .batch_size(batch_size)
1893 .inplace(true)
1894 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96, xnn_init_f32_sqrt_avx512_params);
1895 }
1896 }
1897 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1898
1899
1900 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112,batch_eq_112)1901 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112, batch_eq_112) {
1902 TEST_REQUIRES_X86_AVX512F;
1903 VUnaryMicrokernelTester()
1904 .batch_size(112)
1905 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112, xnn_init_f32_sqrt_avx512_params);
1906 }
1907
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112,batch_div_112)1908 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112, batch_div_112) {
1909 TEST_REQUIRES_X86_AVX512F;
1910 for (size_t batch_size = 224; batch_size < 1120; batch_size += 112) {
1911 VUnaryMicrokernelTester()
1912 .batch_size(batch_size)
1913 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112, xnn_init_f32_sqrt_avx512_params);
1914 }
1915 }
1916
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112,batch_lt_112)1917 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112, batch_lt_112) {
1918 TEST_REQUIRES_X86_AVX512F;
1919 for (size_t batch_size = 1; batch_size < 112; batch_size++) {
1920 VUnaryMicrokernelTester()
1921 .batch_size(batch_size)
1922 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112, xnn_init_f32_sqrt_avx512_params);
1923 }
1924 }
1925
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112,batch_gt_112)1926 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112, batch_gt_112) {
1927 TEST_REQUIRES_X86_AVX512F;
1928 for (size_t batch_size = 113; batch_size < 224; batch_size++) {
1929 VUnaryMicrokernelTester()
1930 .batch_size(batch_size)
1931 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112, xnn_init_f32_sqrt_avx512_params);
1932 }
1933 }
1934
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112,inplace)1935 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X112, inplace) {
1936 TEST_REQUIRES_X86_AVX512F;
1937 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
1938 VUnaryMicrokernelTester()
1939 .batch_size(batch_size)
1940 .inplace(true)
1941 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112, xnn_init_f32_sqrt_avx512_params);
1942 }
1943 }
1944 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1945
1946
1947 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128,batch_eq_128)1948 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128, batch_eq_128) {
1949 TEST_REQUIRES_X86_AVX512F;
1950 VUnaryMicrokernelTester()
1951 .batch_size(128)
1952 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128, xnn_init_f32_sqrt_avx512_params);
1953 }
1954
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128,batch_div_128)1955 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128, batch_div_128) {
1956 TEST_REQUIRES_X86_AVX512F;
1957 for (size_t batch_size = 256; batch_size < 1280; batch_size += 128) {
1958 VUnaryMicrokernelTester()
1959 .batch_size(batch_size)
1960 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128, xnn_init_f32_sqrt_avx512_params);
1961 }
1962 }
1963
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128,batch_lt_128)1964 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128, batch_lt_128) {
1965 TEST_REQUIRES_X86_AVX512F;
1966 for (size_t batch_size = 1; batch_size < 128; batch_size++) {
1967 VUnaryMicrokernelTester()
1968 .batch_size(batch_size)
1969 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128, xnn_init_f32_sqrt_avx512_params);
1970 }
1971 }
1972
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128,batch_gt_128)1973 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128, batch_gt_128) {
1974 TEST_REQUIRES_X86_AVX512F;
1975 for (size_t batch_size = 129; batch_size < 256; batch_size++) {
1976 VUnaryMicrokernelTester()
1977 .batch_size(batch_size)
1978 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128, xnn_init_f32_sqrt_avx512_params);
1979 }
1980 }
1981
TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128,inplace)1982 TEST(F32_VSQRT__AVX512F_NR1FMA1ADJ_X128, inplace) {
1983 TEST_REQUIRES_X86_AVX512F;
1984 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
1985 VUnaryMicrokernelTester()
1986 .batch_size(batch_size)
1987 .inplace(true)
1988 .Test(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128, xnn_init_f32_sqrt_avx512_params);
1989 }
1990 }
1991 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1992
1993
1994 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VSQRT__WASMSIMD_SQRT_X4,batch_eq_4)1995 TEST(F32_VSQRT__WASMSIMD_SQRT_X4, batch_eq_4) {
1996 VUnaryMicrokernelTester()
1997 .batch_size(4)
1998 .Test(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x4);
1999 }
2000
TEST(F32_VSQRT__WASMSIMD_SQRT_X4,batch_div_4)2001 TEST(F32_VSQRT__WASMSIMD_SQRT_X4, batch_div_4) {
2002 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
2003 VUnaryMicrokernelTester()
2004 .batch_size(batch_size)
2005 .Test(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x4);
2006 }
2007 }
2008
TEST(F32_VSQRT__WASMSIMD_SQRT_X4,batch_lt_4)2009 TEST(F32_VSQRT__WASMSIMD_SQRT_X4, batch_lt_4) {
2010 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
2011 VUnaryMicrokernelTester()
2012 .batch_size(batch_size)
2013 .Test(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x4);
2014 }
2015 }
2016
TEST(F32_VSQRT__WASMSIMD_SQRT_X4,batch_gt_4)2017 TEST(F32_VSQRT__WASMSIMD_SQRT_X4, batch_gt_4) {
2018 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
2019 VUnaryMicrokernelTester()
2020 .batch_size(batch_size)
2021 .Test(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x4);
2022 }
2023 }
2024
TEST(F32_VSQRT__WASMSIMD_SQRT_X4,inplace)2025 TEST(F32_VSQRT__WASMSIMD_SQRT_X4, inplace) {
2026 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2027 VUnaryMicrokernelTester()
2028 .batch_size(batch_size)
2029 .inplace(true)
2030 .Test(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x4);
2031 }
2032 }
2033 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2034
2035
2036 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VSQRT__WASMSIMD_SQRT_X8,batch_eq_8)2037 TEST(F32_VSQRT__WASMSIMD_SQRT_X8, batch_eq_8) {
2038 VUnaryMicrokernelTester()
2039 .batch_size(8)
2040 .Test(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x8);
2041 }
2042
TEST(F32_VSQRT__WASMSIMD_SQRT_X8,batch_div_8)2043 TEST(F32_VSQRT__WASMSIMD_SQRT_X8, batch_div_8) {
2044 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2045 VUnaryMicrokernelTester()
2046 .batch_size(batch_size)
2047 .Test(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x8);
2048 }
2049 }
2050
TEST(F32_VSQRT__WASMSIMD_SQRT_X8,batch_lt_8)2051 TEST(F32_VSQRT__WASMSIMD_SQRT_X8, batch_lt_8) {
2052 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2053 VUnaryMicrokernelTester()
2054 .batch_size(batch_size)
2055 .Test(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x8);
2056 }
2057 }
2058
TEST(F32_VSQRT__WASMSIMD_SQRT_X8,batch_gt_8)2059 TEST(F32_VSQRT__WASMSIMD_SQRT_X8, batch_gt_8) {
2060 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2061 VUnaryMicrokernelTester()
2062 .batch_size(batch_size)
2063 .Test(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x8);
2064 }
2065 }
2066
TEST(F32_VSQRT__WASMSIMD_SQRT_X8,inplace)2067 TEST(F32_VSQRT__WASMSIMD_SQRT_X8, inplace) {
2068 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2069 VUnaryMicrokernelTester()
2070 .batch_size(batch_size)
2071 .inplace(true)
2072 .Test(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x8);
2073 }
2074 }
2075 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2076
2077
TEST(F32_VSQRT__SCALAR_SQRT_X1,batch_eq_1)2078 TEST(F32_VSQRT__SCALAR_SQRT_X1, batch_eq_1) {
2079 VUnaryMicrokernelTester()
2080 .batch_size(1)
2081 .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x1);
2082 }
2083
TEST(F32_VSQRT__SCALAR_SQRT_X1,batch_gt_1)2084 TEST(F32_VSQRT__SCALAR_SQRT_X1, batch_gt_1) {
2085 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
2086 VUnaryMicrokernelTester()
2087 .batch_size(batch_size)
2088 .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x1);
2089 }
2090 }
2091
TEST(F32_VSQRT__SCALAR_SQRT_X1,inplace)2092 TEST(F32_VSQRT__SCALAR_SQRT_X1, inplace) {
2093 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2094 VUnaryMicrokernelTester()
2095 .batch_size(batch_size)
2096 .inplace(true)
2097 .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x1);
2098 }
2099 }
2100
2101
TEST(F32_VSQRT__SCALAR_SQRT_X2,batch_eq_2)2102 TEST(F32_VSQRT__SCALAR_SQRT_X2, batch_eq_2) {
2103 VUnaryMicrokernelTester()
2104 .batch_size(2)
2105 .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x2);
2106 }
2107
TEST(F32_VSQRT__SCALAR_SQRT_X2,batch_div_2)2108 TEST(F32_VSQRT__SCALAR_SQRT_X2, batch_div_2) {
2109 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
2110 VUnaryMicrokernelTester()
2111 .batch_size(batch_size)
2112 .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x2);
2113 }
2114 }
2115
TEST(F32_VSQRT__SCALAR_SQRT_X2,batch_lt_2)2116 TEST(F32_VSQRT__SCALAR_SQRT_X2, batch_lt_2) {
2117 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
2118 VUnaryMicrokernelTester()
2119 .batch_size(batch_size)
2120 .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x2);
2121 }
2122 }
2123
TEST(F32_VSQRT__SCALAR_SQRT_X2,batch_gt_2)2124 TEST(F32_VSQRT__SCALAR_SQRT_X2, batch_gt_2) {
2125 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
2126 VUnaryMicrokernelTester()
2127 .batch_size(batch_size)
2128 .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x2);
2129 }
2130 }
2131
TEST(F32_VSQRT__SCALAR_SQRT_X2,inplace)2132 TEST(F32_VSQRT__SCALAR_SQRT_X2, inplace) {
2133 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2134 VUnaryMicrokernelTester()
2135 .batch_size(batch_size)
2136 .inplace(true)
2137 .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x2);
2138 }
2139 }
2140
2141
TEST(F32_VSQRT__SCALAR_SQRT_X4,batch_eq_4)2142 TEST(F32_VSQRT__SCALAR_SQRT_X4, batch_eq_4) {
2143 VUnaryMicrokernelTester()
2144 .batch_size(4)
2145 .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x4);
2146 }
2147
TEST(F32_VSQRT__SCALAR_SQRT_X4,batch_div_4)2148 TEST(F32_VSQRT__SCALAR_SQRT_X4, batch_div_4) {
2149 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
2150 VUnaryMicrokernelTester()
2151 .batch_size(batch_size)
2152 .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x4);
2153 }
2154 }
2155
TEST(F32_VSQRT__SCALAR_SQRT_X4,batch_lt_4)2156 TEST(F32_VSQRT__SCALAR_SQRT_X4, batch_lt_4) {
2157 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
2158 VUnaryMicrokernelTester()
2159 .batch_size(batch_size)
2160 .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x4);
2161 }
2162 }
2163
TEST(F32_VSQRT__SCALAR_SQRT_X4,batch_gt_4)2164 TEST(F32_VSQRT__SCALAR_SQRT_X4, batch_gt_4) {
2165 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
2166 VUnaryMicrokernelTester()
2167 .batch_size(batch_size)
2168 .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x4);
2169 }
2170 }
2171
TEST(F32_VSQRT__SCALAR_SQRT_X4,inplace)2172 TEST(F32_VSQRT__SCALAR_SQRT_X4, inplace) {
2173 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2174 VUnaryMicrokernelTester()
2175 .batch_size(batch_size)
2176 .inplace(true)
2177 .Test(xnn_f32_vsqrt_ukernel__scalar_sqrt_x4);
2178 }
2179 }
2180