xref: /aosp_15_r20/external/XNNPACK/test/f32-vsqrdiff.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/f32-vsqrdiff.yaml
8 //   Generator: tools/generate-vbinary-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/microparams-init.h>
17 #include <xnnpack/vbinary.h>
18 #include "vbinary-microkernel-tester.h"
19 
20 
21 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRDIFF__NEON_X4,batch_eq_4)22   TEST(F32_VSQRDIFF__NEON_X4, batch_eq_4) {
23     TEST_REQUIRES_ARM_NEON;
24     VBinaryMicrokernelTester()
25       .batch_size(4)
26       .Test(xnn_f32_vsqrdiff_ukernel__neon_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
27   }
28 
TEST(F32_VSQRDIFF__NEON_X4,batch_div_4)29   TEST(F32_VSQRDIFF__NEON_X4, batch_div_4) {
30     TEST_REQUIRES_ARM_NEON;
31     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
32       VBinaryMicrokernelTester()
33         .batch_size(batch_size)
34         .Test(xnn_f32_vsqrdiff_ukernel__neon_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
35     }
36   }
37 
TEST(F32_VSQRDIFF__NEON_X4,batch_lt_4)38   TEST(F32_VSQRDIFF__NEON_X4, batch_lt_4) {
39     TEST_REQUIRES_ARM_NEON;
40     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
41       VBinaryMicrokernelTester()
42         .batch_size(batch_size)
43         .Test(xnn_f32_vsqrdiff_ukernel__neon_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
44     }
45   }
46 
TEST(F32_VSQRDIFF__NEON_X4,batch_gt_4)47   TEST(F32_VSQRDIFF__NEON_X4, batch_gt_4) {
48     TEST_REQUIRES_ARM_NEON;
49     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
50       VBinaryMicrokernelTester()
51         .batch_size(batch_size)
52         .Test(xnn_f32_vsqrdiff_ukernel__neon_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
53     }
54   }
55 
TEST(F32_VSQRDIFF__NEON_X4,inplace_a)56   TEST(F32_VSQRDIFF__NEON_X4, inplace_a) {
57     TEST_REQUIRES_ARM_NEON;
58     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
59       VBinaryMicrokernelTester()
60         .batch_size(batch_size)
61         .inplace_a(true)
62         .Test(xnn_f32_vsqrdiff_ukernel__neon_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
63     }
64   }
65 
TEST(F32_VSQRDIFF__NEON_X4,inplace_b)66   TEST(F32_VSQRDIFF__NEON_X4, inplace_b) {
67     TEST_REQUIRES_ARM_NEON;
68     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
69       VBinaryMicrokernelTester()
70         .batch_size(batch_size)
71         .inplace_b(true)
72         .Test(xnn_f32_vsqrdiff_ukernel__neon_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
73     }
74   }
75 
TEST(F32_VSQRDIFF__NEON_X4,inplace_a_and_b)76   TEST(F32_VSQRDIFF__NEON_X4, inplace_a_and_b) {
77     TEST_REQUIRES_ARM_NEON;
78     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
79       VBinaryMicrokernelTester()
80         .batch_size(batch_size)
81         .inplace_a(true)
82         .inplace_b(true)
83         .Test(xnn_f32_vsqrdiff_ukernel__neon_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
84     }
85   }
86 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
87 
88 
89 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSQRDIFF__NEON_X8,batch_eq_8)90   TEST(F32_VSQRDIFF__NEON_X8, batch_eq_8) {
91     TEST_REQUIRES_ARM_NEON;
92     VBinaryMicrokernelTester()
93       .batch_size(8)
94       .Test(xnn_f32_vsqrdiff_ukernel__neon_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
95   }
96 
TEST(F32_VSQRDIFF__NEON_X8,batch_div_8)97   TEST(F32_VSQRDIFF__NEON_X8, batch_div_8) {
98     TEST_REQUIRES_ARM_NEON;
99     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
100       VBinaryMicrokernelTester()
101         .batch_size(batch_size)
102         .Test(xnn_f32_vsqrdiff_ukernel__neon_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
103     }
104   }
105 
TEST(F32_VSQRDIFF__NEON_X8,batch_lt_8)106   TEST(F32_VSQRDIFF__NEON_X8, batch_lt_8) {
107     TEST_REQUIRES_ARM_NEON;
108     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
109       VBinaryMicrokernelTester()
110         .batch_size(batch_size)
111         .Test(xnn_f32_vsqrdiff_ukernel__neon_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
112     }
113   }
114 
TEST(F32_VSQRDIFF__NEON_X8,batch_gt_8)115   TEST(F32_VSQRDIFF__NEON_X8, batch_gt_8) {
116     TEST_REQUIRES_ARM_NEON;
117     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
118       VBinaryMicrokernelTester()
119         .batch_size(batch_size)
120         .Test(xnn_f32_vsqrdiff_ukernel__neon_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
121     }
122   }
123 
TEST(F32_VSQRDIFF__NEON_X8,inplace_a)124   TEST(F32_VSQRDIFF__NEON_X8, inplace_a) {
125     TEST_REQUIRES_ARM_NEON;
126     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
127       VBinaryMicrokernelTester()
128         .batch_size(batch_size)
129         .inplace_a(true)
130         .Test(xnn_f32_vsqrdiff_ukernel__neon_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
131     }
132   }
133 
TEST(F32_VSQRDIFF__NEON_X8,inplace_b)134   TEST(F32_VSQRDIFF__NEON_X8, inplace_b) {
135     TEST_REQUIRES_ARM_NEON;
136     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
137       VBinaryMicrokernelTester()
138         .batch_size(batch_size)
139         .inplace_b(true)
140         .Test(xnn_f32_vsqrdiff_ukernel__neon_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
141     }
142   }
143 
TEST(F32_VSQRDIFF__NEON_X8,inplace_a_and_b)144   TEST(F32_VSQRDIFF__NEON_X8, inplace_a_and_b) {
145     TEST_REQUIRES_ARM_NEON;
146     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
147       VBinaryMicrokernelTester()
148         .batch_size(batch_size)
149         .inplace_a(true)
150         .inplace_b(true)
151         .Test(xnn_f32_vsqrdiff_ukernel__neon_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
152     }
153   }
154 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
155 
156 
157 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRDIFF__SSE_X4,batch_eq_4)158   TEST(F32_VSQRDIFF__SSE_X4, batch_eq_4) {
159     TEST_REQUIRES_X86_SSE;
160     VBinaryMicrokernelTester()
161       .batch_size(4)
162       .Test(xnn_f32_vsqrdiff_ukernel__sse_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
163   }
164 
TEST(F32_VSQRDIFF__SSE_X4,batch_div_4)165   TEST(F32_VSQRDIFF__SSE_X4, batch_div_4) {
166     TEST_REQUIRES_X86_SSE;
167     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
168       VBinaryMicrokernelTester()
169         .batch_size(batch_size)
170         .Test(xnn_f32_vsqrdiff_ukernel__sse_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
171     }
172   }
173 
TEST(F32_VSQRDIFF__SSE_X4,batch_lt_4)174   TEST(F32_VSQRDIFF__SSE_X4, batch_lt_4) {
175     TEST_REQUIRES_X86_SSE;
176     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
177       VBinaryMicrokernelTester()
178         .batch_size(batch_size)
179         .Test(xnn_f32_vsqrdiff_ukernel__sse_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
180     }
181   }
182 
TEST(F32_VSQRDIFF__SSE_X4,batch_gt_4)183   TEST(F32_VSQRDIFF__SSE_X4, batch_gt_4) {
184     TEST_REQUIRES_X86_SSE;
185     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
186       VBinaryMicrokernelTester()
187         .batch_size(batch_size)
188         .Test(xnn_f32_vsqrdiff_ukernel__sse_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
189     }
190   }
191 
TEST(F32_VSQRDIFF__SSE_X4,inplace_a)192   TEST(F32_VSQRDIFF__SSE_X4, inplace_a) {
193     TEST_REQUIRES_X86_SSE;
194     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
195       VBinaryMicrokernelTester()
196         .batch_size(batch_size)
197         .inplace_a(true)
198         .Test(xnn_f32_vsqrdiff_ukernel__sse_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
199     }
200   }
201 
TEST(F32_VSQRDIFF__SSE_X4,inplace_b)202   TEST(F32_VSQRDIFF__SSE_X4, inplace_b) {
203     TEST_REQUIRES_X86_SSE;
204     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
205       VBinaryMicrokernelTester()
206         .batch_size(batch_size)
207         .inplace_b(true)
208         .Test(xnn_f32_vsqrdiff_ukernel__sse_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
209     }
210   }
211 
TEST(F32_VSQRDIFF__SSE_X4,inplace_a_and_b)212   TEST(F32_VSQRDIFF__SSE_X4, inplace_a_and_b) {
213     TEST_REQUIRES_X86_SSE;
214     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
215       VBinaryMicrokernelTester()
216         .batch_size(batch_size)
217         .inplace_a(true)
218         .inplace_b(true)
219         .Test(xnn_f32_vsqrdiff_ukernel__sse_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
220     }
221   }
222 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
223 
224 
225 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRDIFF__SSE_X8,batch_eq_8)226   TEST(F32_VSQRDIFF__SSE_X8, batch_eq_8) {
227     TEST_REQUIRES_X86_SSE;
228     VBinaryMicrokernelTester()
229       .batch_size(8)
230       .Test(xnn_f32_vsqrdiff_ukernel__sse_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
231   }
232 
TEST(F32_VSQRDIFF__SSE_X8,batch_div_8)233   TEST(F32_VSQRDIFF__SSE_X8, batch_div_8) {
234     TEST_REQUIRES_X86_SSE;
235     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
236       VBinaryMicrokernelTester()
237         .batch_size(batch_size)
238         .Test(xnn_f32_vsqrdiff_ukernel__sse_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
239     }
240   }
241 
TEST(F32_VSQRDIFF__SSE_X8,batch_lt_8)242   TEST(F32_VSQRDIFF__SSE_X8, batch_lt_8) {
243     TEST_REQUIRES_X86_SSE;
244     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
245       VBinaryMicrokernelTester()
246         .batch_size(batch_size)
247         .Test(xnn_f32_vsqrdiff_ukernel__sse_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
248     }
249   }
250 
TEST(F32_VSQRDIFF__SSE_X8,batch_gt_8)251   TEST(F32_VSQRDIFF__SSE_X8, batch_gt_8) {
252     TEST_REQUIRES_X86_SSE;
253     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
254       VBinaryMicrokernelTester()
255         .batch_size(batch_size)
256         .Test(xnn_f32_vsqrdiff_ukernel__sse_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
257     }
258   }
259 
TEST(F32_VSQRDIFF__SSE_X8,inplace_a)260   TEST(F32_VSQRDIFF__SSE_X8, inplace_a) {
261     TEST_REQUIRES_X86_SSE;
262     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
263       VBinaryMicrokernelTester()
264         .batch_size(batch_size)
265         .inplace_a(true)
266         .Test(xnn_f32_vsqrdiff_ukernel__sse_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
267     }
268   }
269 
TEST(F32_VSQRDIFF__SSE_X8,inplace_b)270   TEST(F32_VSQRDIFF__SSE_X8, inplace_b) {
271     TEST_REQUIRES_X86_SSE;
272     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
273       VBinaryMicrokernelTester()
274         .batch_size(batch_size)
275         .inplace_b(true)
276         .Test(xnn_f32_vsqrdiff_ukernel__sse_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
277     }
278   }
279 
TEST(F32_VSQRDIFF__SSE_X8,inplace_a_and_b)280   TEST(F32_VSQRDIFF__SSE_X8, inplace_a_and_b) {
281     TEST_REQUIRES_X86_SSE;
282     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
283       VBinaryMicrokernelTester()
284         .batch_size(batch_size)
285         .inplace_a(true)
286         .inplace_b(true)
287         .Test(xnn_f32_vsqrdiff_ukernel__sse_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
288     }
289   }
290 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
291 
292 
293 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRDIFF__AVX_X8,batch_eq_8)294   TEST(F32_VSQRDIFF__AVX_X8, batch_eq_8) {
295     TEST_REQUIRES_X86_AVX;
296     VBinaryMicrokernelTester()
297       .batch_size(8)
298       .Test(xnn_f32_vsqrdiff_ukernel__avx_x8, VBinaryMicrokernelTester::OpType::SqrDiff, xnn_init_f32_default_avx_params);
299   }
300 
TEST(F32_VSQRDIFF__AVX_X8,batch_div_8)301   TEST(F32_VSQRDIFF__AVX_X8, batch_div_8) {
302     TEST_REQUIRES_X86_AVX;
303     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
304       VBinaryMicrokernelTester()
305         .batch_size(batch_size)
306         .Test(xnn_f32_vsqrdiff_ukernel__avx_x8, VBinaryMicrokernelTester::OpType::SqrDiff, xnn_init_f32_default_avx_params);
307     }
308   }
309 
TEST(F32_VSQRDIFF__AVX_X8,batch_lt_8)310   TEST(F32_VSQRDIFF__AVX_X8, batch_lt_8) {
311     TEST_REQUIRES_X86_AVX;
312     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
313       VBinaryMicrokernelTester()
314         .batch_size(batch_size)
315         .Test(xnn_f32_vsqrdiff_ukernel__avx_x8, VBinaryMicrokernelTester::OpType::SqrDiff, xnn_init_f32_default_avx_params);
316     }
317   }
318 
TEST(F32_VSQRDIFF__AVX_X8,batch_gt_8)319   TEST(F32_VSQRDIFF__AVX_X8, batch_gt_8) {
320     TEST_REQUIRES_X86_AVX;
321     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
322       VBinaryMicrokernelTester()
323         .batch_size(batch_size)
324         .Test(xnn_f32_vsqrdiff_ukernel__avx_x8, VBinaryMicrokernelTester::OpType::SqrDiff, xnn_init_f32_default_avx_params);
325     }
326   }
327 
TEST(F32_VSQRDIFF__AVX_X8,inplace_a)328   TEST(F32_VSQRDIFF__AVX_X8, inplace_a) {
329     TEST_REQUIRES_X86_AVX;
330     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
331       VBinaryMicrokernelTester()
332         .batch_size(batch_size)
333         .inplace_a(true)
334         .Test(xnn_f32_vsqrdiff_ukernel__avx_x8, VBinaryMicrokernelTester::OpType::SqrDiff, xnn_init_f32_default_avx_params);
335     }
336   }
337 
TEST(F32_VSQRDIFF__AVX_X8,inplace_b)338   TEST(F32_VSQRDIFF__AVX_X8, inplace_b) {
339     TEST_REQUIRES_X86_AVX;
340     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
341       VBinaryMicrokernelTester()
342         .batch_size(batch_size)
343         .inplace_b(true)
344         .Test(xnn_f32_vsqrdiff_ukernel__avx_x8, VBinaryMicrokernelTester::OpType::SqrDiff, xnn_init_f32_default_avx_params);
345     }
346   }
347 
TEST(F32_VSQRDIFF__AVX_X8,inplace_a_and_b)348   TEST(F32_VSQRDIFF__AVX_X8, inplace_a_and_b) {
349     TEST_REQUIRES_X86_AVX;
350     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
351       VBinaryMicrokernelTester()
352         .batch_size(batch_size)
353         .inplace_a(true)
354         .inplace_b(true)
355         .Test(xnn_f32_vsqrdiff_ukernel__avx_x8, VBinaryMicrokernelTester::OpType::SqrDiff, xnn_init_f32_default_avx_params);
356     }
357   }
358 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
359 
360 
361 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRDIFF__AVX_X16,batch_eq_16)362   TEST(F32_VSQRDIFF__AVX_X16, batch_eq_16) {
363     TEST_REQUIRES_X86_AVX;
364     VBinaryMicrokernelTester()
365       .batch_size(16)
366       .Test(xnn_f32_vsqrdiff_ukernel__avx_x16, VBinaryMicrokernelTester::OpType::SqrDiff, xnn_init_f32_default_avx_params);
367   }
368 
TEST(F32_VSQRDIFF__AVX_X16,batch_div_16)369   TEST(F32_VSQRDIFF__AVX_X16, batch_div_16) {
370     TEST_REQUIRES_X86_AVX;
371     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
372       VBinaryMicrokernelTester()
373         .batch_size(batch_size)
374         .Test(xnn_f32_vsqrdiff_ukernel__avx_x16, VBinaryMicrokernelTester::OpType::SqrDiff, xnn_init_f32_default_avx_params);
375     }
376   }
377 
TEST(F32_VSQRDIFF__AVX_X16,batch_lt_16)378   TEST(F32_VSQRDIFF__AVX_X16, batch_lt_16) {
379     TEST_REQUIRES_X86_AVX;
380     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
381       VBinaryMicrokernelTester()
382         .batch_size(batch_size)
383         .Test(xnn_f32_vsqrdiff_ukernel__avx_x16, VBinaryMicrokernelTester::OpType::SqrDiff, xnn_init_f32_default_avx_params);
384     }
385   }
386 
TEST(F32_VSQRDIFF__AVX_X16,batch_gt_16)387   TEST(F32_VSQRDIFF__AVX_X16, batch_gt_16) {
388     TEST_REQUIRES_X86_AVX;
389     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
390       VBinaryMicrokernelTester()
391         .batch_size(batch_size)
392         .Test(xnn_f32_vsqrdiff_ukernel__avx_x16, VBinaryMicrokernelTester::OpType::SqrDiff, xnn_init_f32_default_avx_params);
393     }
394   }
395 
TEST(F32_VSQRDIFF__AVX_X16,inplace_a)396   TEST(F32_VSQRDIFF__AVX_X16, inplace_a) {
397     TEST_REQUIRES_X86_AVX;
398     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
399       VBinaryMicrokernelTester()
400         .batch_size(batch_size)
401         .inplace_a(true)
402         .Test(xnn_f32_vsqrdiff_ukernel__avx_x16, VBinaryMicrokernelTester::OpType::SqrDiff, xnn_init_f32_default_avx_params);
403     }
404   }
405 
TEST(F32_VSQRDIFF__AVX_X16,inplace_b)406   TEST(F32_VSQRDIFF__AVX_X16, inplace_b) {
407     TEST_REQUIRES_X86_AVX;
408     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
409       VBinaryMicrokernelTester()
410         .batch_size(batch_size)
411         .inplace_b(true)
412         .Test(xnn_f32_vsqrdiff_ukernel__avx_x16, VBinaryMicrokernelTester::OpType::SqrDiff, xnn_init_f32_default_avx_params);
413     }
414   }
415 
TEST(F32_VSQRDIFF__AVX_X16,inplace_a_and_b)416   TEST(F32_VSQRDIFF__AVX_X16, inplace_a_and_b) {
417     TEST_REQUIRES_X86_AVX;
418     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
419       VBinaryMicrokernelTester()
420         .batch_size(batch_size)
421         .inplace_a(true)
422         .inplace_b(true)
423         .Test(xnn_f32_vsqrdiff_ukernel__avx_x16, VBinaryMicrokernelTester::OpType::SqrDiff, xnn_init_f32_default_avx_params);
424     }
425   }
426 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
427 
428 
429 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRDIFF__AVX512F_X16,batch_eq_16)430   TEST(F32_VSQRDIFF__AVX512F_X16, batch_eq_16) {
431     TEST_REQUIRES_X86_AVX512F;
432     VBinaryMicrokernelTester()
433       .batch_size(16)
434       .Test(xnn_f32_vsqrdiff_ukernel__avx512f_x16, VBinaryMicrokernelTester::OpType::SqrDiff);
435   }
436 
TEST(F32_VSQRDIFF__AVX512F_X16,batch_div_16)437   TEST(F32_VSQRDIFF__AVX512F_X16, batch_div_16) {
438     TEST_REQUIRES_X86_AVX512F;
439     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
440       VBinaryMicrokernelTester()
441         .batch_size(batch_size)
442         .Test(xnn_f32_vsqrdiff_ukernel__avx512f_x16, VBinaryMicrokernelTester::OpType::SqrDiff);
443     }
444   }
445 
TEST(F32_VSQRDIFF__AVX512F_X16,batch_lt_16)446   TEST(F32_VSQRDIFF__AVX512F_X16, batch_lt_16) {
447     TEST_REQUIRES_X86_AVX512F;
448     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
449       VBinaryMicrokernelTester()
450         .batch_size(batch_size)
451         .Test(xnn_f32_vsqrdiff_ukernel__avx512f_x16, VBinaryMicrokernelTester::OpType::SqrDiff);
452     }
453   }
454 
TEST(F32_VSQRDIFF__AVX512F_X16,batch_gt_16)455   TEST(F32_VSQRDIFF__AVX512F_X16, batch_gt_16) {
456     TEST_REQUIRES_X86_AVX512F;
457     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
458       VBinaryMicrokernelTester()
459         .batch_size(batch_size)
460         .Test(xnn_f32_vsqrdiff_ukernel__avx512f_x16, VBinaryMicrokernelTester::OpType::SqrDiff);
461     }
462   }
463 
TEST(F32_VSQRDIFF__AVX512F_X16,inplace_a)464   TEST(F32_VSQRDIFF__AVX512F_X16, inplace_a) {
465     TEST_REQUIRES_X86_AVX512F;
466     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
467       VBinaryMicrokernelTester()
468         .batch_size(batch_size)
469         .inplace_a(true)
470         .Test(xnn_f32_vsqrdiff_ukernel__avx512f_x16, VBinaryMicrokernelTester::OpType::SqrDiff);
471     }
472   }
473 
TEST(F32_VSQRDIFF__AVX512F_X16,inplace_b)474   TEST(F32_VSQRDIFF__AVX512F_X16, inplace_b) {
475     TEST_REQUIRES_X86_AVX512F;
476     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
477       VBinaryMicrokernelTester()
478         .batch_size(batch_size)
479         .inplace_b(true)
480         .Test(xnn_f32_vsqrdiff_ukernel__avx512f_x16, VBinaryMicrokernelTester::OpType::SqrDiff);
481     }
482   }
483 
TEST(F32_VSQRDIFF__AVX512F_X16,inplace_a_and_b)484   TEST(F32_VSQRDIFF__AVX512F_X16, inplace_a_and_b) {
485     TEST_REQUIRES_X86_AVX512F;
486     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
487       VBinaryMicrokernelTester()
488         .batch_size(batch_size)
489         .inplace_a(true)
490         .inplace_b(true)
491         .Test(xnn_f32_vsqrdiff_ukernel__avx512f_x16, VBinaryMicrokernelTester::OpType::SqrDiff);
492     }
493   }
494 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
495 
496 
497 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSQRDIFF__AVX512F_X32,batch_eq_32)498   TEST(F32_VSQRDIFF__AVX512F_X32, batch_eq_32) {
499     TEST_REQUIRES_X86_AVX512F;
500     VBinaryMicrokernelTester()
501       .batch_size(32)
502       .Test(xnn_f32_vsqrdiff_ukernel__avx512f_x32, VBinaryMicrokernelTester::OpType::SqrDiff);
503   }
504 
TEST(F32_VSQRDIFF__AVX512F_X32,batch_div_32)505   TEST(F32_VSQRDIFF__AVX512F_X32, batch_div_32) {
506     TEST_REQUIRES_X86_AVX512F;
507     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
508       VBinaryMicrokernelTester()
509         .batch_size(batch_size)
510         .Test(xnn_f32_vsqrdiff_ukernel__avx512f_x32, VBinaryMicrokernelTester::OpType::SqrDiff);
511     }
512   }
513 
TEST(F32_VSQRDIFF__AVX512F_X32,batch_lt_32)514   TEST(F32_VSQRDIFF__AVX512F_X32, batch_lt_32) {
515     TEST_REQUIRES_X86_AVX512F;
516     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
517       VBinaryMicrokernelTester()
518         .batch_size(batch_size)
519         .Test(xnn_f32_vsqrdiff_ukernel__avx512f_x32, VBinaryMicrokernelTester::OpType::SqrDiff);
520     }
521   }
522 
TEST(F32_VSQRDIFF__AVX512F_X32,batch_gt_32)523   TEST(F32_VSQRDIFF__AVX512F_X32, batch_gt_32) {
524     TEST_REQUIRES_X86_AVX512F;
525     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
526       VBinaryMicrokernelTester()
527         .batch_size(batch_size)
528         .Test(xnn_f32_vsqrdiff_ukernel__avx512f_x32, VBinaryMicrokernelTester::OpType::SqrDiff);
529     }
530   }
531 
TEST(F32_VSQRDIFF__AVX512F_X32,inplace_a)532   TEST(F32_VSQRDIFF__AVX512F_X32, inplace_a) {
533     TEST_REQUIRES_X86_AVX512F;
534     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
535       VBinaryMicrokernelTester()
536         .batch_size(batch_size)
537         .inplace_a(true)
538         .Test(xnn_f32_vsqrdiff_ukernel__avx512f_x32, VBinaryMicrokernelTester::OpType::SqrDiff);
539     }
540   }
541 
TEST(F32_VSQRDIFF__AVX512F_X32,inplace_b)542   TEST(F32_VSQRDIFF__AVX512F_X32, inplace_b) {
543     TEST_REQUIRES_X86_AVX512F;
544     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
545       VBinaryMicrokernelTester()
546         .batch_size(batch_size)
547         .inplace_b(true)
548         .Test(xnn_f32_vsqrdiff_ukernel__avx512f_x32, VBinaryMicrokernelTester::OpType::SqrDiff);
549     }
550   }
551 
TEST(F32_VSQRDIFF__AVX512F_X32,inplace_a_and_b)552   TEST(F32_VSQRDIFF__AVX512F_X32, inplace_a_and_b) {
553     TEST_REQUIRES_X86_AVX512F;
554     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
555       VBinaryMicrokernelTester()
556         .batch_size(batch_size)
557         .inplace_a(true)
558         .inplace_b(true)
559         .Test(xnn_f32_vsqrdiff_ukernel__avx512f_x32, VBinaryMicrokernelTester::OpType::SqrDiff);
560     }
561   }
562 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
563 
564 
565 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VSQRDIFF__WASMSIMD_X4,batch_eq_4)566   TEST(F32_VSQRDIFF__WASMSIMD_X4, batch_eq_4) {
567     VBinaryMicrokernelTester()
568       .batch_size(4)
569       .Test(xnn_f32_vsqrdiff_ukernel__wasmsimd_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
570   }
571 
TEST(F32_VSQRDIFF__WASMSIMD_X4,batch_div_4)572   TEST(F32_VSQRDIFF__WASMSIMD_X4, batch_div_4) {
573     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
574       VBinaryMicrokernelTester()
575         .batch_size(batch_size)
576         .Test(xnn_f32_vsqrdiff_ukernel__wasmsimd_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
577     }
578   }
579 
TEST(F32_VSQRDIFF__WASMSIMD_X4,batch_lt_4)580   TEST(F32_VSQRDIFF__WASMSIMD_X4, batch_lt_4) {
581     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
582       VBinaryMicrokernelTester()
583         .batch_size(batch_size)
584         .Test(xnn_f32_vsqrdiff_ukernel__wasmsimd_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
585     }
586   }
587 
TEST(F32_VSQRDIFF__WASMSIMD_X4,batch_gt_4)588   TEST(F32_VSQRDIFF__WASMSIMD_X4, batch_gt_4) {
589     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
590       VBinaryMicrokernelTester()
591         .batch_size(batch_size)
592         .Test(xnn_f32_vsqrdiff_ukernel__wasmsimd_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
593     }
594   }
595 
TEST(F32_VSQRDIFF__WASMSIMD_X4,inplace_a)596   TEST(F32_VSQRDIFF__WASMSIMD_X4, inplace_a) {
597     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
598       VBinaryMicrokernelTester()
599         .batch_size(batch_size)
600         .inplace_a(true)
601         .Test(xnn_f32_vsqrdiff_ukernel__wasmsimd_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
602     }
603   }
604 
TEST(F32_VSQRDIFF__WASMSIMD_X4,inplace_b)605   TEST(F32_VSQRDIFF__WASMSIMD_X4, inplace_b) {
606     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
607       VBinaryMicrokernelTester()
608         .batch_size(batch_size)
609         .inplace_b(true)
610         .Test(xnn_f32_vsqrdiff_ukernel__wasmsimd_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
611     }
612   }
613 
TEST(F32_VSQRDIFF__WASMSIMD_X4,inplace_a_and_b)614   TEST(F32_VSQRDIFF__WASMSIMD_X4, inplace_a_and_b) {
615     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
616       VBinaryMicrokernelTester()
617         .batch_size(batch_size)
618         .inplace_a(true)
619         .inplace_b(true)
620         .Test(xnn_f32_vsqrdiff_ukernel__wasmsimd_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
621     }
622   }
623 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
624 
625 
626 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VSQRDIFF__WASMSIMD_X8,batch_eq_8)627   TEST(F32_VSQRDIFF__WASMSIMD_X8, batch_eq_8) {
628     VBinaryMicrokernelTester()
629       .batch_size(8)
630       .Test(xnn_f32_vsqrdiff_ukernel__wasmsimd_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
631   }
632 
TEST(F32_VSQRDIFF__WASMSIMD_X8,batch_div_8)633   TEST(F32_VSQRDIFF__WASMSIMD_X8, batch_div_8) {
634     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
635       VBinaryMicrokernelTester()
636         .batch_size(batch_size)
637         .Test(xnn_f32_vsqrdiff_ukernel__wasmsimd_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
638     }
639   }
640 
TEST(F32_VSQRDIFF__WASMSIMD_X8,batch_lt_8)641   TEST(F32_VSQRDIFF__WASMSIMD_X8, batch_lt_8) {
642     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
643       VBinaryMicrokernelTester()
644         .batch_size(batch_size)
645         .Test(xnn_f32_vsqrdiff_ukernel__wasmsimd_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
646     }
647   }
648 
TEST(F32_VSQRDIFF__WASMSIMD_X8,batch_gt_8)649   TEST(F32_VSQRDIFF__WASMSIMD_X8, batch_gt_8) {
650     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
651       VBinaryMicrokernelTester()
652         .batch_size(batch_size)
653         .Test(xnn_f32_vsqrdiff_ukernel__wasmsimd_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
654     }
655   }
656 
TEST(F32_VSQRDIFF__WASMSIMD_X8,inplace_a)657   TEST(F32_VSQRDIFF__WASMSIMD_X8, inplace_a) {
658     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
659       VBinaryMicrokernelTester()
660         .batch_size(batch_size)
661         .inplace_a(true)
662         .Test(xnn_f32_vsqrdiff_ukernel__wasmsimd_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
663     }
664   }
665 
TEST(F32_VSQRDIFF__WASMSIMD_X8,inplace_b)666   TEST(F32_VSQRDIFF__WASMSIMD_X8, inplace_b) {
667     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
668       VBinaryMicrokernelTester()
669         .batch_size(batch_size)
670         .inplace_b(true)
671         .Test(xnn_f32_vsqrdiff_ukernel__wasmsimd_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
672     }
673   }
674 
TEST(F32_VSQRDIFF__WASMSIMD_X8,inplace_a_and_b)675   TEST(F32_VSQRDIFF__WASMSIMD_X8, inplace_a_and_b) {
676     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
677       VBinaryMicrokernelTester()
678         .batch_size(batch_size)
679         .inplace_a(true)
680         .inplace_b(true)
681         .Test(xnn_f32_vsqrdiff_ukernel__wasmsimd_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
682     }
683   }
684 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
685 
686 
687 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VSQRDIFF__WASMSIMD_X16,batch_eq_16)688   TEST(F32_VSQRDIFF__WASMSIMD_X16, batch_eq_16) {
689     VBinaryMicrokernelTester()
690       .batch_size(16)
691       .Test(xnn_f32_vsqrdiff_ukernel__wasmsimd_x16, VBinaryMicrokernelTester::OpType::SqrDiff);
692   }
693 
TEST(F32_VSQRDIFF__WASMSIMD_X16,batch_div_16)694   TEST(F32_VSQRDIFF__WASMSIMD_X16, batch_div_16) {
695     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
696       VBinaryMicrokernelTester()
697         .batch_size(batch_size)
698         .Test(xnn_f32_vsqrdiff_ukernel__wasmsimd_x16, VBinaryMicrokernelTester::OpType::SqrDiff);
699     }
700   }
701 
TEST(F32_VSQRDIFF__WASMSIMD_X16,batch_lt_16)702   TEST(F32_VSQRDIFF__WASMSIMD_X16, batch_lt_16) {
703     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
704       VBinaryMicrokernelTester()
705         .batch_size(batch_size)
706         .Test(xnn_f32_vsqrdiff_ukernel__wasmsimd_x16, VBinaryMicrokernelTester::OpType::SqrDiff);
707     }
708   }
709 
TEST(F32_VSQRDIFF__WASMSIMD_X16,batch_gt_16)710   TEST(F32_VSQRDIFF__WASMSIMD_X16, batch_gt_16) {
711     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
712       VBinaryMicrokernelTester()
713         .batch_size(batch_size)
714         .Test(xnn_f32_vsqrdiff_ukernel__wasmsimd_x16, VBinaryMicrokernelTester::OpType::SqrDiff);
715     }
716   }
717 
TEST(F32_VSQRDIFF__WASMSIMD_X16,inplace_a)718   TEST(F32_VSQRDIFF__WASMSIMD_X16, inplace_a) {
719     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
720       VBinaryMicrokernelTester()
721         .batch_size(batch_size)
722         .inplace_a(true)
723         .Test(xnn_f32_vsqrdiff_ukernel__wasmsimd_x16, VBinaryMicrokernelTester::OpType::SqrDiff);
724     }
725   }
726 
TEST(F32_VSQRDIFF__WASMSIMD_X16,inplace_b)727   TEST(F32_VSQRDIFF__WASMSIMD_X16, inplace_b) {
728     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
729       VBinaryMicrokernelTester()
730         .batch_size(batch_size)
731         .inplace_b(true)
732         .Test(xnn_f32_vsqrdiff_ukernel__wasmsimd_x16, VBinaryMicrokernelTester::OpType::SqrDiff);
733     }
734   }
735 
TEST(F32_VSQRDIFF__WASMSIMD_X16,inplace_a_and_b)736   TEST(F32_VSQRDIFF__WASMSIMD_X16, inplace_a_and_b) {
737     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
738       VBinaryMicrokernelTester()
739         .batch_size(batch_size)
740         .inplace_a(true)
741         .inplace_b(true)
742         .Test(xnn_f32_vsqrdiff_ukernel__wasmsimd_x16, VBinaryMicrokernelTester::OpType::SqrDiff);
743     }
744   }
745 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
746 
747 
TEST(F32_VSQRDIFF__SCALAR_X1,batch_eq_1)748 TEST(F32_VSQRDIFF__SCALAR_X1, batch_eq_1) {
749   VBinaryMicrokernelTester()
750     .batch_size(1)
751     .Test(xnn_f32_vsqrdiff_ukernel__scalar_x1, VBinaryMicrokernelTester::OpType::SqrDiff);
752 }
753 
TEST(F32_VSQRDIFF__SCALAR_X1,batch_gt_1)754 TEST(F32_VSQRDIFF__SCALAR_X1, batch_gt_1) {
755   for (size_t batch_size = 2; batch_size < 10; batch_size++) {
756     VBinaryMicrokernelTester()
757       .batch_size(batch_size)
758       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x1, VBinaryMicrokernelTester::OpType::SqrDiff);
759   }
760 }
761 
TEST(F32_VSQRDIFF__SCALAR_X1,inplace_a)762 TEST(F32_VSQRDIFF__SCALAR_X1, inplace_a) {
763   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
764     VBinaryMicrokernelTester()
765       .batch_size(batch_size)
766       .inplace_a(true)
767       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x1, VBinaryMicrokernelTester::OpType::SqrDiff);
768   }
769 }
770 
TEST(F32_VSQRDIFF__SCALAR_X1,inplace_b)771 TEST(F32_VSQRDIFF__SCALAR_X1, inplace_b) {
772   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
773     VBinaryMicrokernelTester()
774       .batch_size(batch_size)
775       .inplace_b(true)
776       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x1, VBinaryMicrokernelTester::OpType::SqrDiff);
777   }
778 }
779 
TEST(F32_VSQRDIFF__SCALAR_X1,inplace_a_and_b)780 TEST(F32_VSQRDIFF__SCALAR_X1, inplace_a_and_b) {
781   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
782     VBinaryMicrokernelTester()
783       .batch_size(batch_size)
784       .inplace_a(true)
785       .inplace_b(true)
786       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x1, VBinaryMicrokernelTester::OpType::SqrDiff);
787   }
788 }
789 
790 
TEST(F32_VSQRDIFF__SCALAR_X2,batch_eq_2)791 TEST(F32_VSQRDIFF__SCALAR_X2, batch_eq_2) {
792   VBinaryMicrokernelTester()
793     .batch_size(2)
794     .Test(xnn_f32_vsqrdiff_ukernel__scalar_x2, VBinaryMicrokernelTester::OpType::SqrDiff);
795 }
796 
TEST(F32_VSQRDIFF__SCALAR_X2,batch_div_2)797 TEST(F32_VSQRDIFF__SCALAR_X2, batch_div_2) {
798   for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
799     VBinaryMicrokernelTester()
800       .batch_size(batch_size)
801       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x2, VBinaryMicrokernelTester::OpType::SqrDiff);
802   }
803 }
804 
TEST(F32_VSQRDIFF__SCALAR_X2,batch_lt_2)805 TEST(F32_VSQRDIFF__SCALAR_X2, batch_lt_2) {
806   for (size_t batch_size = 1; batch_size < 2; batch_size++) {
807     VBinaryMicrokernelTester()
808       .batch_size(batch_size)
809       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x2, VBinaryMicrokernelTester::OpType::SqrDiff);
810   }
811 }
812 
TEST(F32_VSQRDIFF__SCALAR_X2,batch_gt_2)813 TEST(F32_VSQRDIFF__SCALAR_X2, batch_gt_2) {
814   for (size_t batch_size = 3; batch_size < 4; batch_size++) {
815     VBinaryMicrokernelTester()
816       .batch_size(batch_size)
817       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x2, VBinaryMicrokernelTester::OpType::SqrDiff);
818   }
819 }
820 
TEST(F32_VSQRDIFF__SCALAR_X2,inplace_a)821 TEST(F32_VSQRDIFF__SCALAR_X2, inplace_a) {
822   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
823     VBinaryMicrokernelTester()
824       .batch_size(batch_size)
825       .inplace_a(true)
826       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x2, VBinaryMicrokernelTester::OpType::SqrDiff);
827   }
828 }
829 
TEST(F32_VSQRDIFF__SCALAR_X2,inplace_b)830 TEST(F32_VSQRDIFF__SCALAR_X2, inplace_b) {
831   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
832     VBinaryMicrokernelTester()
833       .batch_size(batch_size)
834       .inplace_b(true)
835       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x2, VBinaryMicrokernelTester::OpType::SqrDiff);
836   }
837 }
838 
TEST(F32_VSQRDIFF__SCALAR_X2,inplace_a_and_b)839 TEST(F32_VSQRDIFF__SCALAR_X2, inplace_a_and_b) {
840   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
841     VBinaryMicrokernelTester()
842       .batch_size(batch_size)
843       .inplace_a(true)
844       .inplace_b(true)
845       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x2, VBinaryMicrokernelTester::OpType::SqrDiff);
846   }
847 }
848 
849 
TEST(F32_VSQRDIFF__SCALAR_X4,batch_eq_4)850 TEST(F32_VSQRDIFF__SCALAR_X4, batch_eq_4) {
851   VBinaryMicrokernelTester()
852     .batch_size(4)
853     .Test(xnn_f32_vsqrdiff_ukernel__scalar_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
854 }
855 
TEST(F32_VSQRDIFF__SCALAR_X4,batch_div_4)856 TEST(F32_VSQRDIFF__SCALAR_X4, batch_div_4) {
857   for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
858     VBinaryMicrokernelTester()
859       .batch_size(batch_size)
860       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
861   }
862 }
863 
TEST(F32_VSQRDIFF__SCALAR_X4,batch_lt_4)864 TEST(F32_VSQRDIFF__SCALAR_X4, batch_lt_4) {
865   for (size_t batch_size = 1; batch_size < 4; batch_size++) {
866     VBinaryMicrokernelTester()
867       .batch_size(batch_size)
868       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
869   }
870 }
871 
TEST(F32_VSQRDIFF__SCALAR_X4,batch_gt_4)872 TEST(F32_VSQRDIFF__SCALAR_X4, batch_gt_4) {
873   for (size_t batch_size = 5; batch_size < 8; batch_size++) {
874     VBinaryMicrokernelTester()
875       .batch_size(batch_size)
876       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
877   }
878 }
879 
TEST(F32_VSQRDIFF__SCALAR_X4,inplace_a)880 TEST(F32_VSQRDIFF__SCALAR_X4, inplace_a) {
881   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
882     VBinaryMicrokernelTester()
883       .batch_size(batch_size)
884       .inplace_a(true)
885       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
886   }
887 }
888 
TEST(F32_VSQRDIFF__SCALAR_X4,inplace_b)889 TEST(F32_VSQRDIFF__SCALAR_X4, inplace_b) {
890   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
891     VBinaryMicrokernelTester()
892       .batch_size(batch_size)
893       .inplace_b(true)
894       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
895   }
896 }
897 
TEST(F32_VSQRDIFF__SCALAR_X4,inplace_a_and_b)898 TEST(F32_VSQRDIFF__SCALAR_X4, inplace_a_and_b) {
899   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
900     VBinaryMicrokernelTester()
901       .batch_size(batch_size)
902       .inplace_a(true)
903       .inplace_b(true)
904       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x4, VBinaryMicrokernelTester::OpType::SqrDiff);
905   }
906 }
907 
908 
TEST(F32_VSQRDIFF__SCALAR_X8,batch_eq_8)909 TEST(F32_VSQRDIFF__SCALAR_X8, batch_eq_8) {
910   VBinaryMicrokernelTester()
911     .batch_size(8)
912     .Test(xnn_f32_vsqrdiff_ukernel__scalar_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
913 }
914 
TEST(F32_VSQRDIFF__SCALAR_X8,batch_div_8)915 TEST(F32_VSQRDIFF__SCALAR_X8, batch_div_8) {
916   for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
917     VBinaryMicrokernelTester()
918       .batch_size(batch_size)
919       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
920   }
921 }
922 
TEST(F32_VSQRDIFF__SCALAR_X8,batch_lt_8)923 TEST(F32_VSQRDIFF__SCALAR_X8, batch_lt_8) {
924   for (size_t batch_size = 1; batch_size < 8; batch_size++) {
925     VBinaryMicrokernelTester()
926       .batch_size(batch_size)
927       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
928   }
929 }
930 
TEST(F32_VSQRDIFF__SCALAR_X8,batch_gt_8)931 TEST(F32_VSQRDIFF__SCALAR_X8, batch_gt_8) {
932   for (size_t batch_size = 9; batch_size < 16; batch_size++) {
933     VBinaryMicrokernelTester()
934       .batch_size(batch_size)
935       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
936   }
937 }
938 
TEST(F32_VSQRDIFF__SCALAR_X8,inplace_a)939 TEST(F32_VSQRDIFF__SCALAR_X8, inplace_a) {
940   for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
941     VBinaryMicrokernelTester()
942       .batch_size(batch_size)
943       .inplace_a(true)
944       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
945   }
946 }
947 
TEST(F32_VSQRDIFF__SCALAR_X8,inplace_b)948 TEST(F32_VSQRDIFF__SCALAR_X8, inplace_b) {
949   for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
950     VBinaryMicrokernelTester()
951       .batch_size(batch_size)
952       .inplace_b(true)
953       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
954   }
955 }
956 
TEST(F32_VSQRDIFF__SCALAR_X8,inplace_a_and_b)957 TEST(F32_VSQRDIFF__SCALAR_X8, inplace_a_and_b) {
958   for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
959     VBinaryMicrokernelTester()
960       .batch_size(batch_size)
961       .inplace_a(true)
962       .inplace_b(true)
963       .Test(xnn_f32_vsqrdiff_ukernel__scalar_x8, VBinaryMicrokernelTester::OpType::SqrDiff);
964   }
965 }
966