xref: /aosp_15_r20/external/XNNPACK/test/f16-dwconv-minmax.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 //   Specification: test/f16-dwconv-minmax.yaml
11 //   Generator: tools/generate-dwconv-test.py
12 
13 
14 #include <gtest/gtest.h>
15 
16 #include <xnnpack/common.h>
17 #include <xnnpack/isa-checks.h>
18 
19 #include <xnnpack/dwconv.h>
20 #include "dwconv-microkernel-tester.h"
21 
22 
23 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,c_eq_8)24   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, c_eq_8) {
25     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
26     DWConvMicrokernelTester()
27       .cr(8)
28       .kr(3)
29       .channels(8)
30       .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
31   }
32 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,c_div_8)33   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, c_div_8) {
34     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
35     for (uint32_t channels = 16; channels < 128; channels += 24) {
36       DWConvMicrokernelTester()
37         .cr(8)
38         .kr(3)
39         .channels(channels)
40         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
41     }
42   }
43 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,c_div_8_with_qmin)44   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, c_div_8_with_qmin) {
45     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
46     for (uint32_t channels = 16; channels < 128; channels += 24) {
47       DWConvMicrokernelTester()
48         .cr(8)
49         .kr(3)
50         .channels(channels)
51         .qmin(128)
52         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
53     }
54   }
55 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,c_div_8_with_qmax)56   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, c_div_8_with_qmax) {
57     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
58     for (uint32_t channels = 16; channels < 128; channels += 24) {
59       DWConvMicrokernelTester()
60         .cr(8)
61         .kr(3)
62         .channels(channels)
63         .qmax(128)
64         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
65     }
66   }
67 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,c_lt_8)68   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, c_lt_8) {
69     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
70     for (uint32_t channels = 1; channels < 8; channels++) {
71       DWConvMicrokernelTester()
72         .cr(8)
73         .kr(3)
74         .channels(channels)
75         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
76     }
77   }
78 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,c_gt_8)79   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, c_gt_8) {
80     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
81     for (uint32_t channels = 9; channels < 16; channels++) {
82       DWConvMicrokernelTester()
83         .cr(8)
84         .kr(3)
85         .channels(channels)
86         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
87     }
88   }
89 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,c_gt_8_with_qmin)90   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, c_gt_8_with_qmin) {
91     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
92     for (uint32_t channels = 9; channels < 16; channels++) {
93       DWConvMicrokernelTester()
94         .cr(8)
95         .kr(3)
96         .channels(channels)
97         .qmin(128)
98         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
99     }
100   }
101 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,c_gt_8_with_qmax)102   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, c_gt_8_with_qmax) {
103     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
104     for (uint32_t channels = 9; channels < 16; channels++) {
105       DWConvMicrokernelTester()
106         .cr(8)
107         .kr(3)
108         .channels(channels)
109         .qmax(128)
110         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
111     }
112   }
113 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,multipixel)114   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, multipixel) {
115     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
116     for (size_t channels = 1; channels <= 40; channels += 7) {
117       DWConvMicrokernelTester()
118         .cr(8)
119         .kr(3)
120         .channels(channels)
121         .width(3)
122         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
123     }
124   }
125 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,multipixel_with_step)126   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, multipixel_with_step) {
127     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
128     for (size_t channels = 1; channels <= 40; channels += 7) {
129       for (size_t step = 2; step <= 3; step++) {
130         DWConvMicrokernelTester()
131           .cr(8)
132           .kr(3)
133           .channels(channels)
134           .width(3)
135           .step(step)
136           .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
137       }
138     }
139   }
140 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,multipixel_with_output_stride)141   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, multipixel_with_output_stride) {
142     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
143     for (size_t channels = 1; channels <= 40; channels += 7) {
144       DWConvMicrokernelTester()
145         .cr(8)
146         .kr(3)
147         .channels(8)
148         .width(5)
149         .output_stride(43)
150         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
151     }
152   }
153 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,multipixel_with_qmin)154   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, multipixel_with_qmin) {
155     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
156     for (size_t channels = 1; channels <= 40; channels += 7) {
157       DWConvMicrokernelTester()
158         .cr(8)
159         .kr(3)
160         .channels(channels)
161         .width(3)
162         .qmin(128)
163         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
164     }
165   }
166 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,multipixel_with_qmax)167   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, multipixel_with_qmax) {
168     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
169     for (size_t channels = 1; channels <= 40; channels += 7) {
170       DWConvMicrokernelTester()
171         .cr(8)
172         .kr(3)
173         .channels(channels)
174         .width(3)
175         .qmax(128)
176         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
177     }
178   }
179 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,input_offset)180   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, input_offset) {
181     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
182     for (uint32_t channels = 16; channels < 128; channels += 24) {
183       DWConvMicrokernelTester()
184         .cr(8)
185         .kr(3)
186         .channels(channels)
187         .input_offset(176)
188         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
189     }
190   }
191 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,zero)192   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, zero) {
193     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
194     for (uint32_t mz = 0; mz < 3; mz++) {
195       for (uint32_t channels = 16; channels < 128; channels += 24) {
196         DWConvMicrokernelTester()
197           .cr(8)
198           .kr(3)
199           .channels(channels)
200           .input_offset(176)
201           .zero_index(mz)
202           .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
203       }
204     }
205   }
206 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
207 
208 
209 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,c_eq_8)210   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, c_eq_8) {
211     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
212     DWConvMicrokernelTester()
213       .cr(8)
214       .kr(3)
215       .channels(8)
216       .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
217   }
218 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,c_div_8)219   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, c_div_8) {
220     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
221     for (uint32_t channels = 16; channels < 128; channels += 24) {
222       DWConvMicrokernelTester()
223         .cr(8)
224         .kr(3)
225         .channels(channels)
226         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
227     }
228   }
229 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,c_div_8_with_qmin)230   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, c_div_8_with_qmin) {
231     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
232     for (uint32_t channels = 16; channels < 128; channels += 24) {
233       DWConvMicrokernelTester()
234         .cr(8)
235         .kr(3)
236         .channels(channels)
237         .qmin(128)
238         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
239     }
240   }
241 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,c_div_8_with_qmax)242   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, c_div_8_with_qmax) {
243     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
244     for (uint32_t channels = 16; channels < 128; channels += 24) {
245       DWConvMicrokernelTester()
246         .cr(8)
247         .kr(3)
248         .channels(channels)
249         .qmax(128)
250         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
251     }
252   }
253 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,c_lt_8)254   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, c_lt_8) {
255     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
256     for (uint32_t channels = 1; channels < 8; channels++) {
257       DWConvMicrokernelTester()
258         .cr(8)
259         .kr(3)
260         .channels(channels)
261         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
262     }
263   }
264 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,c_gt_8)265   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, c_gt_8) {
266     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
267     for (uint32_t channels = 9; channels < 16; channels++) {
268       DWConvMicrokernelTester()
269         .cr(8)
270         .kr(3)
271         .channels(channels)
272         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
273     }
274   }
275 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,c_gt_8_with_qmin)276   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, c_gt_8_with_qmin) {
277     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
278     for (uint32_t channels = 9; channels < 16; channels++) {
279       DWConvMicrokernelTester()
280         .cr(8)
281         .kr(3)
282         .channels(channels)
283         .qmin(128)
284         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
285     }
286   }
287 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,c_gt_8_with_qmax)288   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, c_gt_8_with_qmax) {
289     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
290     for (uint32_t channels = 9; channels < 16; channels++) {
291       DWConvMicrokernelTester()
292         .cr(8)
293         .kr(3)
294         .channels(channels)
295         .qmax(128)
296         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
297     }
298   }
299 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,multipixel)300   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, multipixel) {
301     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
302     for (size_t channels = 1; channels <= 40; channels += 7) {
303       DWConvMicrokernelTester()
304         .cr(8)
305         .kr(3)
306         .channels(channels)
307         .width(3)
308         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
309     }
310   }
311 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,multipixel_with_step)312   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, multipixel_with_step) {
313     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
314     for (size_t channels = 1; channels <= 40; channels += 7) {
315       for (size_t step = 2; step <= 3; step++) {
316         DWConvMicrokernelTester()
317           .cr(8)
318           .kr(3)
319           .channels(channels)
320           .width(3)
321           .step(step)
322           .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
323       }
324     }
325   }
326 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,multipixel_with_output_stride)327   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
328     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
329     for (size_t channels = 1; channels <= 40; channels += 7) {
330       DWConvMicrokernelTester()
331         .cr(8)
332         .kr(3)
333         .channels(8)
334         .width(5)
335         .output_stride(43)
336         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
337     }
338   }
339 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,multipixel_with_qmin)340   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
341     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
342     for (size_t channels = 1; channels <= 40; channels += 7) {
343       DWConvMicrokernelTester()
344         .cr(8)
345         .kr(3)
346         .channels(channels)
347         .width(3)
348         .qmin(128)
349         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
350     }
351   }
352 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,multipixel_with_qmax)353   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
354     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
355     for (size_t channels = 1; channels <= 40; channels += 7) {
356       DWConvMicrokernelTester()
357         .cr(8)
358         .kr(3)
359         .channels(channels)
360         .width(3)
361         .qmax(128)
362         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
363     }
364   }
365 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,input_offset)366   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, input_offset) {
367     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
368     for (uint32_t channels = 16; channels < 128; channels += 24) {
369       DWConvMicrokernelTester()
370         .cr(8)
371         .kr(3)
372         .channels(channels)
373         .input_offset(176)
374         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
375     }
376   }
377 
TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,zero)378   TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, zero) {
379     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
380     for (uint32_t mz = 0; mz < 3; mz++) {
381       for (uint32_t channels = 16; channels < 128; channels += 24) {
382         DWConvMicrokernelTester()
383           .cr(8)
384           .kr(3)
385           .channels(channels)
386           .input_offset(176)
387           .zero_index(mz)
388           .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
389       }
390     }
391   }
392 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
393 
394 
395 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_eq_8)396   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_eq_8) {
397     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
398     DWConvMicrokernelTester()
399       .cr(8)
400       .kr(4)
401       .channels(8)
402       .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
403   }
404 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_div_8)405   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_div_8) {
406     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
407     for (uint32_t channels = 16; channels < 128; channels += 24) {
408       DWConvMicrokernelTester()
409         .cr(8)
410         .kr(4)
411         .channels(channels)
412         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
413     }
414   }
415 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_div_8_with_qmin)416   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_div_8_with_qmin) {
417     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
418     for (uint32_t channels = 16; channels < 128; channels += 24) {
419       DWConvMicrokernelTester()
420         .cr(8)
421         .kr(4)
422         .channels(channels)
423         .qmin(128)
424         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
425     }
426   }
427 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_div_8_with_qmax)428   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_div_8_with_qmax) {
429     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
430     for (uint32_t channels = 16; channels < 128; channels += 24) {
431       DWConvMicrokernelTester()
432         .cr(8)
433         .kr(4)
434         .channels(channels)
435         .qmax(128)
436         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
437     }
438   }
439 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_lt_8)440   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_lt_8) {
441     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
442     for (uint32_t channels = 1; channels < 8; channels++) {
443       DWConvMicrokernelTester()
444         .cr(8)
445         .kr(4)
446         .channels(channels)
447         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
448     }
449   }
450 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_gt_8)451   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_gt_8) {
452     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
453     for (uint32_t channels = 9; channels < 16; channels++) {
454       DWConvMicrokernelTester()
455         .cr(8)
456         .kr(4)
457         .channels(channels)
458         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
459     }
460   }
461 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_gt_8_with_qmin)462   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_gt_8_with_qmin) {
463     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
464     for (uint32_t channels = 9; channels < 16; channels++) {
465       DWConvMicrokernelTester()
466         .cr(8)
467         .kr(4)
468         .channels(channels)
469         .qmin(128)
470         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
471     }
472   }
473 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_gt_8_with_qmax)474   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_gt_8_with_qmax) {
475     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
476     for (uint32_t channels = 9; channels < 16; channels++) {
477       DWConvMicrokernelTester()
478         .cr(8)
479         .kr(4)
480         .channels(channels)
481         .qmax(128)
482         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
483     }
484   }
485 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,multipixel)486   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel) {
487     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
488     for (size_t channels = 1; channels <= 40; channels += 7) {
489       DWConvMicrokernelTester()
490         .cr(8)
491         .kr(4)
492         .channels(channels)
493         .width(3)
494         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
495     }
496   }
497 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,multipixel_with_step)498   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel_with_step) {
499     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
500     for (size_t channels = 1; channels <= 40; channels += 7) {
501       for (size_t step = 2; step <= 4; step++) {
502         DWConvMicrokernelTester()
503           .cr(8)
504           .kr(4)
505           .channels(channels)
506           .width(3)
507           .step(step)
508           .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
509       }
510     }
511   }
512 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,multipixel_with_output_stride)513   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel_with_output_stride) {
514     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
515     for (size_t channels = 1; channels <= 40; channels += 7) {
516       DWConvMicrokernelTester()
517         .cr(8)
518         .kr(4)
519         .channels(8)
520         .width(5)
521         .output_stride(43)
522         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
523     }
524   }
525 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,multipixel_with_qmin)526   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel_with_qmin) {
527     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
528     for (size_t channels = 1; channels <= 40; channels += 7) {
529       DWConvMicrokernelTester()
530         .cr(8)
531         .kr(4)
532         .channels(channels)
533         .width(3)
534         .qmin(128)
535         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
536     }
537   }
538 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,multipixel_with_qmax)539   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel_with_qmax) {
540     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
541     for (size_t channels = 1; channels <= 40; channels += 7) {
542       DWConvMicrokernelTester()
543         .cr(8)
544         .kr(4)
545         .channels(channels)
546         .width(3)
547         .qmax(128)
548         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
549     }
550   }
551 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,input_offset)552   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, input_offset) {
553     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
554     for (uint32_t channels = 16; channels < 128; channels += 24) {
555       DWConvMicrokernelTester()
556         .cr(8)
557         .kr(4)
558         .channels(channels)
559         .input_offset(176)
560         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
561     }
562   }
563 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,zero)564   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, zero) {
565     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
566     for (uint32_t mz = 0; mz < 4; mz++) {
567       for (uint32_t channels = 16; channels < 128; channels += 24) {
568         DWConvMicrokernelTester()
569           .cr(8)
570           .kr(4)
571           .channels(channels)
572           .input_offset(176)
573           .zero_index(mz)
574           .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
575       }
576     }
577   }
578 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
579 
580 
581 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_eq_8)582   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_eq_8) {
583     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
584     DWConvMicrokernelTester()
585       .cr(8)
586       .kr(4)
587       .channels(8)
588       .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
589   }
590 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_div_8)591   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_div_8) {
592     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
593     for (uint32_t channels = 16; channels < 128; channels += 24) {
594       DWConvMicrokernelTester()
595         .cr(8)
596         .kr(4)
597         .channels(channels)
598         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
599     }
600   }
601 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_div_8_with_qmin)602   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_div_8_with_qmin) {
603     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
604     for (uint32_t channels = 16; channels < 128; channels += 24) {
605       DWConvMicrokernelTester()
606         .cr(8)
607         .kr(4)
608         .channels(channels)
609         .qmin(128)
610         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
611     }
612   }
613 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_div_8_with_qmax)614   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_div_8_with_qmax) {
615     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
616     for (uint32_t channels = 16; channels < 128; channels += 24) {
617       DWConvMicrokernelTester()
618         .cr(8)
619         .kr(4)
620         .channels(channels)
621         .qmax(128)
622         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
623     }
624   }
625 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_lt_8)626   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_lt_8) {
627     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
628     for (uint32_t channels = 1; channels < 8; channels++) {
629       DWConvMicrokernelTester()
630         .cr(8)
631         .kr(4)
632         .channels(channels)
633         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
634     }
635   }
636 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_gt_8)637   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_gt_8) {
638     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
639     for (uint32_t channels = 9; channels < 16; channels++) {
640       DWConvMicrokernelTester()
641         .cr(8)
642         .kr(4)
643         .channels(channels)
644         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
645     }
646   }
647 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_gt_8_with_qmin)648   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_gt_8_with_qmin) {
649     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
650     for (uint32_t channels = 9; channels < 16; channels++) {
651       DWConvMicrokernelTester()
652         .cr(8)
653         .kr(4)
654         .channels(channels)
655         .qmin(128)
656         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
657     }
658   }
659 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_gt_8_with_qmax)660   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_gt_8_with_qmax) {
661     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
662     for (uint32_t channels = 9; channels < 16; channels++) {
663       DWConvMicrokernelTester()
664         .cr(8)
665         .kr(4)
666         .channels(channels)
667         .qmax(128)
668         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
669     }
670   }
671 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,multipixel)672   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel) {
673     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
674     for (size_t channels = 1; channels <= 40; channels += 7) {
675       DWConvMicrokernelTester()
676         .cr(8)
677         .kr(4)
678         .channels(channels)
679         .width(3)
680         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
681     }
682   }
683 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,multipixel_with_step)684   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel_with_step) {
685     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
686     for (size_t channels = 1; channels <= 40; channels += 7) {
687       for (size_t step = 2; step <= 4; step++) {
688         DWConvMicrokernelTester()
689           .cr(8)
690           .kr(4)
691           .channels(channels)
692           .width(3)
693           .step(step)
694           .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
695       }
696     }
697   }
698 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,multipixel_with_output_stride)699   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
700     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
701     for (size_t channels = 1; channels <= 40; channels += 7) {
702       DWConvMicrokernelTester()
703         .cr(8)
704         .kr(4)
705         .channels(8)
706         .width(5)
707         .output_stride(43)
708         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
709     }
710   }
711 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,multipixel_with_qmin)712   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
713     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
714     for (size_t channels = 1; channels <= 40; channels += 7) {
715       DWConvMicrokernelTester()
716         .cr(8)
717         .kr(4)
718         .channels(channels)
719         .width(3)
720         .qmin(128)
721         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
722     }
723   }
724 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,multipixel_with_qmax)725   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
726     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
727     for (size_t channels = 1; channels <= 40; channels += 7) {
728       DWConvMicrokernelTester()
729         .cr(8)
730         .kr(4)
731         .channels(channels)
732         .width(3)
733         .qmax(128)
734         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
735     }
736   }
737 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,input_offset)738   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, input_offset) {
739     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
740     for (uint32_t channels = 16; channels < 128; channels += 24) {
741       DWConvMicrokernelTester()
742         .cr(8)
743         .kr(4)
744         .channels(channels)
745         .input_offset(176)
746         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
747     }
748   }
749 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,zero)750   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, zero) {
751     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
752     for (uint32_t mz = 0; mz < 4; mz++) {
753       for (uint32_t channels = 16; channels < 128; channels += 24) {
754         DWConvMicrokernelTester()
755           .cr(8)
756           .kr(4)
757           .channels(channels)
758           .input_offset(176)
759           .zero_index(mz)
760           .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
761       }
762     }
763   }
764 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
765 
766 
767 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_eq_8)768   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_eq_8) {
769     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
770     DWConvMicrokernelTester()
771       .cr(8)
772       .kr(9)
773       .channels(8)
774       .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
775   }
776 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_div_8)777   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_div_8) {
778     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
779     for (uint32_t channels = 16; channels < 128; channels += 24) {
780       DWConvMicrokernelTester()
781         .cr(8)
782         .kr(9)
783         .channels(channels)
784         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
785     }
786   }
787 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_div_8_with_qmin)788   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_div_8_with_qmin) {
789     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
790     for (uint32_t channels = 16; channels < 128; channels += 24) {
791       DWConvMicrokernelTester()
792         .cr(8)
793         .kr(9)
794         .channels(channels)
795         .qmin(128)
796         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
797     }
798   }
799 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_div_8_with_qmax)800   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_div_8_with_qmax) {
801     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
802     for (uint32_t channels = 16; channels < 128; channels += 24) {
803       DWConvMicrokernelTester()
804         .cr(8)
805         .kr(9)
806         .channels(channels)
807         .qmax(128)
808         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
809     }
810   }
811 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_lt_8)812   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_lt_8) {
813     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
814     for (uint32_t channels = 1; channels < 8; channels++) {
815       DWConvMicrokernelTester()
816         .cr(8)
817         .kr(9)
818         .channels(channels)
819         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
820     }
821   }
822 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_gt_8)823   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_gt_8) {
824     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
825     for (uint32_t channels = 9; channels < 16; channels++) {
826       DWConvMicrokernelTester()
827         .cr(8)
828         .kr(9)
829         .channels(channels)
830         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
831     }
832   }
833 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_gt_8_with_qmin)834   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_gt_8_with_qmin) {
835     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
836     for (uint32_t channels = 9; channels < 16; channels++) {
837       DWConvMicrokernelTester()
838         .cr(8)
839         .kr(9)
840         .channels(channels)
841         .qmin(128)
842         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
843     }
844   }
845 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_gt_8_with_qmax)846   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_gt_8_with_qmax) {
847     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
848     for (uint32_t channels = 9; channels < 16; channels++) {
849       DWConvMicrokernelTester()
850         .cr(8)
851         .kr(9)
852         .channels(channels)
853         .qmax(128)
854         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
855     }
856   }
857 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,multipixel)858   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel) {
859     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
860     for (size_t channels = 1; channels <= 40; channels += 7) {
861       DWConvMicrokernelTester()
862         .cr(8)
863         .kr(9)
864         .channels(channels)
865         .width(3)
866         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
867     }
868   }
869 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,multipixel_with_step)870   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel_with_step) {
871     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
872     for (size_t channels = 1; channels <= 40; channels += 7) {
873       for (size_t step = 2; step <= 9; step++) {
874         DWConvMicrokernelTester()
875           .cr(8)
876           .kr(9)
877           .channels(channels)
878           .width(3)
879           .step(step)
880           .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
881       }
882     }
883   }
884 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,multipixel_with_output_stride)885   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel_with_output_stride) {
886     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
887     for (size_t channels = 1; channels <= 40; channels += 7) {
888       DWConvMicrokernelTester()
889         .cr(8)
890         .kr(9)
891         .channels(8)
892         .width(5)
893         .output_stride(43)
894         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
895     }
896   }
897 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,multipixel_with_qmin)898   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel_with_qmin) {
899     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
900     for (size_t channels = 1; channels <= 40; channels += 7) {
901       DWConvMicrokernelTester()
902         .cr(8)
903         .kr(9)
904         .channels(channels)
905         .width(3)
906         .qmin(128)
907         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
908     }
909   }
910 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,multipixel_with_qmax)911   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel_with_qmax) {
912     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
913     for (size_t channels = 1; channels <= 40; channels += 7) {
914       DWConvMicrokernelTester()
915         .cr(8)
916         .kr(9)
917         .channels(channels)
918         .width(3)
919         .qmax(128)
920         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
921     }
922   }
923 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,input_offset)924   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, input_offset) {
925     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
926     for (uint32_t channels = 16; channels < 128; channels += 24) {
927       DWConvMicrokernelTester()
928         .cr(8)
929         .kr(9)
930         .channels(channels)
931         .input_offset(176)
932         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
933     }
934   }
935 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,zero)936   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, zero) {
937     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
938     for (uint32_t mz = 0; mz < 9; mz++) {
939       for (uint32_t channels = 16; channels < 128; channels += 24) {
940         DWConvMicrokernelTester()
941           .cr(8)
942           .kr(9)
943           .channels(channels)
944           .input_offset(176)
945           .zero_index(mz)
946           .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
947       }
948     }
949   }
950 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
951 
952 
953 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_eq_8)954   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_eq_8) {
955     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
956     DWConvMicrokernelTester()
957       .cr(8)
958       .kr(9)
959       .channels(8)
960       .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
961   }
962 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_div_8)963   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_div_8) {
964     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
965     for (uint32_t channels = 16; channels < 128; channels += 24) {
966       DWConvMicrokernelTester()
967         .cr(8)
968         .kr(9)
969         .channels(channels)
970         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
971     }
972   }
973 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_div_8_with_qmin)974   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_div_8_with_qmin) {
975     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
976     for (uint32_t channels = 16; channels < 128; channels += 24) {
977       DWConvMicrokernelTester()
978         .cr(8)
979         .kr(9)
980         .channels(channels)
981         .qmin(128)
982         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
983     }
984   }
985 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_div_8_with_qmax)986   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_div_8_with_qmax) {
987     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
988     for (uint32_t channels = 16; channels < 128; channels += 24) {
989       DWConvMicrokernelTester()
990         .cr(8)
991         .kr(9)
992         .channels(channels)
993         .qmax(128)
994         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
995     }
996   }
997 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_lt_8)998   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_lt_8) {
999     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1000     for (uint32_t channels = 1; channels < 8; channels++) {
1001       DWConvMicrokernelTester()
1002         .cr(8)
1003         .kr(9)
1004         .channels(channels)
1005         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1006     }
1007   }
1008 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_gt_8)1009   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_gt_8) {
1010     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1011     for (uint32_t channels = 9; channels < 16; channels++) {
1012       DWConvMicrokernelTester()
1013         .cr(8)
1014         .kr(9)
1015         .channels(channels)
1016         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1017     }
1018   }
1019 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_gt_8_with_qmin)1020   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_gt_8_with_qmin) {
1021     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1022     for (uint32_t channels = 9; channels < 16; channels++) {
1023       DWConvMicrokernelTester()
1024         .cr(8)
1025         .kr(9)
1026         .channels(channels)
1027         .qmin(128)
1028         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1029     }
1030   }
1031 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_gt_8_with_qmax)1032   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_gt_8_with_qmax) {
1033     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1034     for (uint32_t channels = 9; channels < 16; channels++) {
1035       DWConvMicrokernelTester()
1036         .cr(8)
1037         .kr(9)
1038         .channels(channels)
1039         .qmax(128)
1040         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1041     }
1042   }
1043 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,multipixel)1044   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel) {
1045     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1046     for (size_t channels = 1; channels <= 40; channels += 7) {
1047       DWConvMicrokernelTester()
1048         .cr(8)
1049         .kr(9)
1050         .channels(channels)
1051         .width(3)
1052         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1053     }
1054   }
1055 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,multipixel_with_step)1056   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel_with_step) {
1057     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1058     for (size_t channels = 1; channels <= 40; channels += 7) {
1059       for (size_t step = 2; step <= 9; step++) {
1060         DWConvMicrokernelTester()
1061           .cr(8)
1062           .kr(9)
1063           .channels(channels)
1064           .width(3)
1065           .step(step)
1066           .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1067       }
1068     }
1069   }
1070 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,multipixel_with_output_stride)1071   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
1072     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1073     for (size_t channels = 1; channels <= 40; channels += 7) {
1074       DWConvMicrokernelTester()
1075         .cr(8)
1076         .kr(9)
1077         .channels(8)
1078         .width(5)
1079         .output_stride(43)
1080         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1081     }
1082   }
1083 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,multipixel_with_qmin)1084   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
1085     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1086     for (size_t channels = 1; channels <= 40; channels += 7) {
1087       DWConvMicrokernelTester()
1088         .cr(8)
1089         .kr(9)
1090         .channels(channels)
1091         .width(3)
1092         .qmin(128)
1093         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1094     }
1095   }
1096 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,multipixel_with_qmax)1097   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
1098     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1099     for (size_t channels = 1; channels <= 40; channels += 7) {
1100       DWConvMicrokernelTester()
1101         .cr(8)
1102         .kr(9)
1103         .channels(channels)
1104         .width(3)
1105         .qmax(128)
1106         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1107     }
1108   }
1109 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,input_offset)1110   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, input_offset) {
1111     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1112     for (uint32_t channels = 16; channels < 128; channels += 24) {
1113       DWConvMicrokernelTester()
1114         .cr(8)
1115         .kr(9)
1116         .channels(channels)
1117         .input_offset(176)
1118         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1119     }
1120   }
1121 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,zero)1122   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, zero) {
1123     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1124     for (uint32_t mz = 0; mz < 9; mz++) {
1125       for (uint32_t channels = 16; channels < 128; channels += 24) {
1126         DWConvMicrokernelTester()
1127           .cr(8)
1128           .kr(9)
1129           .channels(channels)
1130           .input_offset(176)
1131           .zero_index(mz)
1132           .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1133       }
1134     }
1135   }
1136 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
1137 
1138 
1139 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_eq_8)1140   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_eq_8) {
1141     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1142     DWConvMicrokernelTester()
1143       .cr(8)
1144       .kr(25)
1145       .channels(8)
1146       .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
1147   }
1148 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_div_8)1149   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_div_8) {
1150     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1151     for (uint32_t channels = 16; channels < 128; channels += 24) {
1152       DWConvMicrokernelTester()
1153         .cr(8)
1154         .kr(25)
1155         .channels(channels)
1156         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
1157     }
1158   }
1159 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_div_8_with_qmin)1160   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_div_8_with_qmin) {
1161     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1162     for (uint32_t channels = 16; channels < 128; channels += 24) {
1163       DWConvMicrokernelTester()
1164         .cr(8)
1165         .kr(25)
1166         .channels(channels)
1167         .qmin(128)
1168         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
1169     }
1170   }
1171 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_div_8_with_qmax)1172   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_div_8_with_qmax) {
1173     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1174     for (uint32_t channels = 16; channels < 128; channels += 24) {
1175       DWConvMicrokernelTester()
1176         .cr(8)
1177         .kr(25)
1178         .channels(channels)
1179         .qmax(128)
1180         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
1181     }
1182   }
1183 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_lt_8)1184   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_lt_8) {
1185     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1186     for (uint32_t channels = 1; channels < 8; channels++) {
1187       DWConvMicrokernelTester()
1188         .cr(8)
1189         .kr(25)
1190         .channels(channels)
1191         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
1192     }
1193   }
1194 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_gt_8)1195   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_gt_8) {
1196     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1197     for (uint32_t channels = 9; channels < 16; channels++) {
1198       DWConvMicrokernelTester()
1199         .cr(8)
1200         .kr(25)
1201         .channels(channels)
1202         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
1203     }
1204   }
1205 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_gt_8_with_qmin)1206   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_gt_8_with_qmin) {
1207     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1208     for (uint32_t channels = 9; channels < 16; channels++) {
1209       DWConvMicrokernelTester()
1210         .cr(8)
1211         .kr(25)
1212         .channels(channels)
1213         .qmin(128)
1214         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
1215     }
1216   }
1217 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_gt_8_with_qmax)1218   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_gt_8_with_qmax) {
1219     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1220     for (uint32_t channels = 9; channels < 16; channels++) {
1221       DWConvMicrokernelTester()
1222         .cr(8)
1223         .kr(25)
1224         .channels(channels)
1225         .qmax(128)
1226         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
1227     }
1228   }
1229 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,multipixel)1230   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel) {
1231     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1232     for (size_t channels = 1; channels <= 40; channels += 7) {
1233       DWConvMicrokernelTester()
1234         .cr(8)
1235         .kr(25)
1236         .channels(channels)
1237         .width(3)
1238         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
1239     }
1240   }
1241 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,multipixel_with_step)1242   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel_with_step) {
1243     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1244     for (size_t channels = 1; channels <= 40; channels += 7) {
1245       for (size_t step = 2; step <= 25; step++) {
1246         DWConvMicrokernelTester()
1247           .cr(8)
1248           .kr(25)
1249           .channels(channels)
1250           .width(3)
1251           .step(step)
1252           .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
1253       }
1254     }
1255   }
1256 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,multipixel_with_output_stride)1257   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel_with_output_stride) {
1258     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1259     for (size_t channels = 1; channels <= 40; channels += 7) {
1260       DWConvMicrokernelTester()
1261         .cr(8)
1262         .kr(25)
1263         .channels(8)
1264         .width(5)
1265         .output_stride(43)
1266         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
1267     }
1268   }
1269 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,multipixel_with_qmin)1270   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel_with_qmin) {
1271     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1272     for (size_t channels = 1; channels <= 40; channels += 7) {
1273       DWConvMicrokernelTester()
1274         .cr(8)
1275         .kr(25)
1276         .channels(channels)
1277         .width(3)
1278         .qmin(128)
1279         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
1280     }
1281   }
1282 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,multipixel_with_qmax)1283   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel_with_qmax) {
1284     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1285     for (size_t channels = 1; channels <= 40; channels += 7) {
1286       DWConvMicrokernelTester()
1287         .cr(8)
1288         .kr(25)
1289         .channels(channels)
1290         .width(3)
1291         .qmax(128)
1292         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
1293     }
1294   }
1295 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,input_offset)1296   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, input_offset) {
1297     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1298     for (uint32_t channels = 16; channels < 128; channels += 24) {
1299       DWConvMicrokernelTester()
1300         .cr(8)
1301         .kr(25)
1302         .channels(channels)
1303         .input_offset(176)
1304         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
1305     }
1306   }
1307 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,zero)1308   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, zero) {
1309     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1310     for (uint32_t mz = 0; mz < 25; mz++) {
1311       for (uint32_t channels = 16; channels < 128; channels += 24) {
1312         DWConvMicrokernelTester()
1313           .cr(8)
1314           .kr(25)
1315           .channels(channels)
1316           .input_offset(176)
1317           .zero_index(mz)
1318           .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
1319       }
1320     }
1321   }
1322 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
1323 
1324 
1325 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_eq_8)1326   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_eq_8) {
1327     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1328     DWConvMicrokernelTester()
1329       .cr(8)
1330       .kr(25)
1331       .channels(8)
1332       .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1333   }
1334 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_div_8)1335   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_div_8) {
1336     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1337     for (uint32_t channels = 16; channels < 128; channels += 24) {
1338       DWConvMicrokernelTester()
1339         .cr(8)
1340         .kr(25)
1341         .channels(channels)
1342         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1343     }
1344   }
1345 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_div_8_with_qmin)1346   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_div_8_with_qmin) {
1347     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1348     for (uint32_t channels = 16; channels < 128; channels += 24) {
1349       DWConvMicrokernelTester()
1350         .cr(8)
1351         .kr(25)
1352         .channels(channels)
1353         .qmin(128)
1354         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1355     }
1356   }
1357 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_div_8_with_qmax)1358   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_div_8_with_qmax) {
1359     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1360     for (uint32_t channels = 16; channels < 128; channels += 24) {
1361       DWConvMicrokernelTester()
1362         .cr(8)
1363         .kr(25)
1364         .channels(channels)
1365         .qmax(128)
1366         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1367     }
1368   }
1369 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_lt_8)1370   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_lt_8) {
1371     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1372     for (uint32_t channels = 1; channels < 8; channels++) {
1373       DWConvMicrokernelTester()
1374         .cr(8)
1375         .kr(25)
1376         .channels(channels)
1377         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1378     }
1379   }
1380 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_gt_8)1381   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_gt_8) {
1382     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1383     for (uint32_t channels = 9; channels < 16; channels++) {
1384       DWConvMicrokernelTester()
1385         .cr(8)
1386         .kr(25)
1387         .channels(channels)
1388         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1389     }
1390   }
1391 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_gt_8_with_qmin)1392   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_gt_8_with_qmin) {
1393     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1394     for (uint32_t channels = 9; channels < 16; channels++) {
1395       DWConvMicrokernelTester()
1396         .cr(8)
1397         .kr(25)
1398         .channels(channels)
1399         .qmin(128)
1400         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1401     }
1402   }
1403 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_gt_8_with_qmax)1404   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_gt_8_with_qmax) {
1405     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1406     for (uint32_t channels = 9; channels < 16; channels++) {
1407       DWConvMicrokernelTester()
1408         .cr(8)
1409         .kr(25)
1410         .channels(channels)
1411         .qmax(128)
1412         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1413     }
1414   }
1415 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,multipixel)1416   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel) {
1417     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1418     for (size_t channels = 1; channels <= 40; channels += 7) {
1419       DWConvMicrokernelTester()
1420         .cr(8)
1421         .kr(25)
1422         .channels(channels)
1423         .width(3)
1424         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1425     }
1426   }
1427 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,multipixel_with_step)1428   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel_with_step) {
1429     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1430     for (size_t channels = 1; channels <= 40; channels += 7) {
1431       for (size_t step = 2; step <= 25; step++) {
1432         DWConvMicrokernelTester()
1433           .cr(8)
1434           .kr(25)
1435           .channels(channels)
1436           .width(3)
1437           .step(step)
1438           .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1439       }
1440     }
1441   }
1442 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,multipixel_with_output_stride)1443   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
1444     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1445     for (size_t channels = 1; channels <= 40; channels += 7) {
1446       DWConvMicrokernelTester()
1447         .cr(8)
1448         .kr(25)
1449         .channels(8)
1450         .width(5)
1451         .output_stride(43)
1452         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1453     }
1454   }
1455 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,multipixel_with_qmin)1456   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
1457     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1458     for (size_t channels = 1; channels <= 40; channels += 7) {
1459       DWConvMicrokernelTester()
1460         .cr(8)
1461         .kr(25)
1462         .channels(channels)
1463         .width(3)
1464         .qmin(128)
1465         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1466     }
1467   }
1468 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,multipixel_with_qmax)1469   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
1470     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1471     for (size_t channels = 1; channels <= 40; channels += 7) {
1472       DWConvMicrokernelTester()
1473         .cr(8)
1474         .kr(25)
1475         .channels(channels)
1476         .width(3)
1477         .qmax(128)
1478         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1479     }
1480   }
1481 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,input_offset)1482   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, input_offset) {
1483     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1484     for (uint32_t channels = 16; channels < 128; channels += 24) {
1485       DWConvMicrokernelTester()
1486         .cr(8)
1487         .kr(25)
1488         .channels(channels)
1489         .input_offset(176)
1490         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1491     }
1492   }
1493 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,zero)1494   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, zero) {
1495     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1496     for (uint32_t mz = 0; mz < 25; mz++) {
1497       for (uint32_t channels = 16; channels < 128; channels += 24) {
1498         DWConvMicrokernelTester()
1499           .cr(8)
1500           .kr(25)
1501           .channels(channels)
1502           .input_offset(176)
1503           .zero_index(mz)
1504           .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1505       }
1506     }
1507   }
1508 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
1509 
1510 
1511 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,c_eq_16)1512   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, c_eq_16) {
1513     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1514     DWConvMicrokernelTester()
1515       .cr(16)
1516       .kr(3)
1517       .channels(16)
1518       .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
1519   }
1520 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,c_div_16)1521   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, c_div_16) {
1522     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1523     for (uint32_t channels = 32; channels < 256; channels += 48) {
1524       DWConvMicrokernelTester()
1525         .cr(16)
1526         .kr(3)
1527         .channels(channels)
1528         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
1529     }
1530   }
1531 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,c_div_16_with_qmin)1532   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, c_div_16_with_qmin) {
1533     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1534     for (uint32_t channels = 32; channels < 256; channels += 48) {
1535       DWConvMicrokernelTester()
1536         .cr(16)
1537         .kr(3)
1538         .channels(channels)
1539         .qmin(128)
1540         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
1541     }
1542   }
1543 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,c_div_16_with_qmax)1544   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, c_div_16_with_qmax) {
1545     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1546     for (uint32_t channels = 32; channels < 256; channels += 48) {
1547       DWConvMicrokernelTester()
1548         .cr(16)
1549         .kr(3)
1550         .channels(channels)
1551         .qmax(128)
1552         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
1553     }
1554   }
1555 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,c_lt_16)1556   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, c_lt_16) {
1557     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1558     for (uint32_t channels = 1; channels < 16; channels++) {
1559       DWConvMicrokernelTester()
1560         .cr(16)
1561         .kr(3)
1562         .channels(channels)
1563         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
1564     }
1565   }
1566 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,c_gt_16)1567   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, c_gt_16) {
1568     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1569     for (uint32_t channels = 17; channels < 32; channels++) {
1570       DWConvMicrokernelTester()
1571         .cr(16)
1572         .kr(3)
1573         .channels(channels)
1574         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
1575     }
1576   }
1577 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,c_gt_16_with_qmin)1578   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, c_gt_16_with_qmin) {
1579     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1580     for (uint32_t channels = 17; channels < 32; channels++) {
1581       DWConvMicrokernelTester()
1582         .cr(16)
1583         .kr(3)
1584         .channels(channels)
1585         .qmin(128)
1586         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
1587     }
1588   }
1589 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,c_gt_16_with_qmax)1590   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, c_gt_16_with_qmax) {
1591     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1592     for (uint32_t channels = 17; channels < 32; channels++) {
1593       DWConvMicrokernelTester()
1594         .cr(16)
1595         .kr(3)
1596         .channels(channels)
1597         .qmax(128)
1598         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
1599     }
1600   }
1601 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,multipixel)1602   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, multipixel) {
1603     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1604     for (size_t channels = 1; channels <= 80; channels += 15) {
1605       DWConvMicrokernelTester()
1606         .cr(16)
1607         .kr(3)
1608         .channels(channels)
1609         .width(3)
1610         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
1611     }
1612   }
1613 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,multipixel_with_step)1614   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, multipixel_with_step) {
1615     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1616     for (size_t channels = 1; channels <= 80; channels += 15) {
1617       for (size_t step = 2; step <= 3; step++) {
1618         DWConvMicrokernelTester()
1619           .cr(16)
1620           .kr(3)
1621           .channels(channels)
1622           .width(3)
1623           .step(step)
1624           .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
1625       }
1626     }
1627   }
1628 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,multipixel_with_output_stride)1629   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, multipixel_with_output_stride) {
1630     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1631     for (size_t channels = 1; channels <= 80; channels += 15) {
1632       DWConvMicrokernelTester()
1633         .cr(16)
1634         .kr(3)
1635         .channels(16)
1636         .width(5)
1637         .output_stride(83)
1638         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
1639     }
1640   }
1641 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,multipixel_with_qmin)1642   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, multipixel_with_qmin) {
1643     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1644     for (size_t channels = 1; channels <= 80; channels += 15) {
1645       DWConvMicrokernelTester()
1646         .cr(16)
1647         .kr(3)
1648         .channels(channels)
1649         .width(3)
1650         .qmin(128)
1651         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
1652     }
1653   }
1654 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,multipixel_with_qmax)1655   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, multipixel_with_qmax) {
1656     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1657     for (size_t channels = 1; channels <= 80; channels += 15) {
1658       DWConvMicrokernelTester()
1659         .cr(16)
1660         .kr(3)
1661         .channels(channels)
1662         .width(3)
1663         .qmax(128)
1664         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
1665     }
1666   }
1667 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,input_offset)1668   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, input_offset) {
1669     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1670     for (uint32_t channels = 32; channels < 256; channels += 48) {
1671       DWConvMicrokernelTester()
1672         .cr(16)
1673         .kr(3)
1674         .channels(channels)
1675         .input_offset(304)
1676         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
1677     }
1678   }
1679 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,zero)1680   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, zero) {
1681     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1682     for (uint32_t mz = 0; mz < 3; mz++) {
1683       for (uint32_t channels = 32; channels < 256; channels += 48) {
1684         DWConvMicrokernelTester()
1685           .cr(16)
1686           .kr(3)
1687           .channels(channels)
1688           .input_offset(304)
1689           .zero_index(mz)
1690           .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
1691       }
1692     }
1693   }
1694 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
1695 
1696 
1697 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,c_eq_16)1698   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, c_eq_16) {
1699     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1700     DWConvMicrokernelTester()
1701       .cr(16)
1702       .kr(3)
1703       .channels(16)
1704       .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1705   }
1706 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,c_div_16)1707   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, c_div_16) {
1708     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1709     for (uint32_t channels = 32; channels < 256; channels += 48) {
1710       DWConvMicrokernelTester()
1711         .cr(16)
1712         .kr(3)
1713         .channels(channels)
1714         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1715     }
1716   }
1717 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,c_div_16_with_qmin)1718   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, c_div_16_with_qmin) {
1719     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1720     for (uint32_t channels = 32; channels < 256; channels += 48) {
1721       DWConvMicrokernelTester()
1722         .cr(16)
1723         .kr(3)
1724         .channels(channels)
1725         .qmin(128)
1726         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1727     }
1728   }
1729 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,c_div_16_with_qmax)1730   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, c_div_16_with_qmax) {
1731     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1732     for (uint32_t channels = 32; channels < 256; channels += 48) {
1733       DWConvMicrokernelTester()
1734         .cr(16)
1735         .kr(3)
1736         .channels(channels)
1737         .qmax(128)
1738         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1739     }
1740   }
1741 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,c_lt_16)1742   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, c_lt_16) {
1743     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1744     for (uint32_t channels = 1; channels < 16; channels++) {
1745       DWConvMicrokernelTester()
1746         .cr(16)
1747         .kr(3)
1748         .channels(channels)
1749         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1750     }
1751   }
1752 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,c_gt_16)1753   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, c_gt_16) {
1754     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1755     for (uint32_t channels = 17; channels < 32; channels++) {
1756       DWConvMicrokernelTester()
1757         .cr(16)
1758         .kr(3)
1759         .channels(channels)
1760         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1761     }
1762   }
1763 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,c_gt_16_with_qmin)1764   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, c_gt_16_with_qmin) {
1765     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1766     for (uint32_t channels = 17; channels < 32; channels++) {
1767       DWConvMicrokernelTester()
1768         .cr(16)
1769         .kr(3)
1770         .channels(channels)
1771         .qmin(128)
1772         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1773     }
1774   }
1775 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,c_gt_16_with_qmax)1776   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, c_gt_16_with_qmax) {
1777     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1778     for (uint32_t channels = 17; channels < 32; channels++) {
1779       DWConvMicrokernelTester()
1780         .cr(16)
1781         .kr(3)
1782         .channels(channels)
1783         .qmax(128)
1784         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1785     }
1786   }
1787 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,multipixel)1788   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, multipixel) {
1789     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1790     for (size_t channels = 1; channels <= 80; channels += 15) {
1791       DWConvMicrokernelTester()
1792         .cr(16)
1793         .kr(3)
1794         .channels(channels)
1795         .width(3)
1796         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1797     }
1798   }
1799 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,multipixel_with_step)1800   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, multipixel_with_step) {
1801     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1802     for (size_t channels = 1; channels <= 80; channels += 15) {
1803       for (size_t step = 2; step <= 3; step++) {
1804         DWConvMicrokernelTester()
1805           .cr(16)
1806           .kr(3)
1807           .channels(channels)
1808           .width(3)
1809           .step(step)
1810           .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1811       }
1812     }
1813   }
1814 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,multipixel_with_output_stride)1815   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
1816     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1817     for (size_t channels = 1; channels <= 80; channels += 15) {
1818       DWConvMicrokernelTester()
1819         .cr(16)
1820         .kr(3)
1821         .channels(16)
1822         .width(5)
1823         .output_stride(83)
1824         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1825     }
1826   }
1827 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,multipixel_with_qmin)1828   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
1829     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1830     for (size_t channels = 1; channels <= 80; channels += 15) {
1831       DWConvMicrokernelTester()
1832         .cr(16)
1833         .kr(3)
1834         .channels(channels)
1835         .width(3)
1836         .qmin(128)
1837         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1838     }
1839   }
1840 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,multipixel_with_qmax)1841   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
1842     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1843     for (size_t channels = 1; channels <= 80; channels += 15) {
1844       DWConvMicrokernelTester()
1845         .cr(16)
1846         .kr(3)
1847         .channels(channels)
1848         .width(3)
1849         .qmax(128)
1850         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1851     }
1852   }
1853 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,input_offset)1854   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, input_offset) {
1855     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1856     for (uint32_t channels = 32; channels < 256; channels += 48) {
1857       DWConvMicrokernelTester()
1858         .cr(16)
1859         .kr(3)
1860         .channels(channels)
1861         .input_offset(304)
1862         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1863     }
1864   }
1865 
TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,zero)1866   TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, zero) {
1867     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1868     for (uint32_t mz = 0; mz < 3; mz++) {
1869       for (uint32_t channels = 32; channels < 256; channels += 48) {
1870         DWConvMicrokernelTester()
1871           .cr(16)
1872           .kr(3)
1873           .channels(channels)
1874           .input_offset(304)
1875           .zero_index(mz)
1876           .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
1877       }
1878     }
1879   }
1880 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
1881 
1882 
1883 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_eq_16)1884   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_eq_16) {
1885     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1886     DWConvMicrokernelTester()
1887       .cr(16)
1888       .kr(4)
1889       .channels(16)
1890       .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
1891   }
1892 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_div_16)1893   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_div_16) {
1894     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1895     for (uint32_t channels = 32; channels < 256; channels += 48) {
1896       DWConvMicrokernelTester()
1897         .cr(16)
1898         .kr(4)
1899         .channels(channels)
1900         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
1901     }
1902   }
1903 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_div_16_with_qmin)1904   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_div_16_with_qmin) {
1905     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1906     for (uint32_t channels = 32; channels < 256; channels += 48) {
1907       DWConvMicrokernelTester()
1908         .cr(16)
1909         .kr(4)
1910         .channels(channels)
1911         .qmin(128)
1912         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
1913     }
1914   }
1915 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_div_16_with_qmax)1916   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_div_16_with_qmax) {
1917     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1918     for (uint32_t channels = 32; channels < 256; channels += 48) {
1919       DWConvMicrokernelTester()
1920         .cr(16)
1921         .kr(4)
1922         .channels(channels)
1923         .qmax(128)
1924         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
1925     }
1926   }
1927 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_lt_16)1928   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_lt_16) {
1929     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1930     for (uint32_t channels = 1; channels < 16; channels++) {
1931       DWConvMicrokernelTester()
1932         .cr(16)
1933         .kr(4)
1934         .channels(channels)
1935         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
1936     }
1937   }
1938 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_gt_16)1939   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_gt_16) {
1940     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1941     for (uint32_t channels = 17; channels < 32; channels++) {
1942       DWConvMicrokernelTester()
1943         .cr(16)
1944         .kr(4)
1945         .channels(channels)
1946         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
1947     }
1948   }
1949 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_gt_16_with_qmin)1950   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_gt_16_with_qmin) {
1951     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1952     for (uint32_t channels = 17; channels < 32; channels++) {
1953       DWConvMicrokernelTester()
1954         .cr(16)
1955         .kr(4)
1956         .channels(channels)
1957         .qmin(128)
1958         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
1959     }
1960   }
1961 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_gt_16_with_qmax)1962   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_gt_16_with_qmax) {
1963     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1964     for (uint32_t channels = 17; channels < 32; channels++) {
1965       DWConvMicrokernelTester()
1966         .cr(16)
1967         .kr(4)
1968         .channels(channels)
1969         .qmax(128)
1970         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
1971     }
1972   }
1973 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,multipixel)1974   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel) {
1975     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1976     for (size_t channels = 1; channels <= 80; channels += 15) {
1977       DWConvMicrokernelTester()
1978         .cr(16)
1979         .kr(4)
1980         .channels(channels)
1981         .width(3)
1982         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
1983     }
1984   }
1985 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,multipixel_with_step)1986   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel_with_step) {
1987     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1988     for (size_t channels = 1; channels <= 80; channels += 15) {
1989       for (size_t step = 2; step <= 4; step++) {
1990         DWConvMicrokernelTester()
1991           .cr(16)
1992           .kr(4)
1993           .channels(channels)
1994           .width(3)
1995           .step(step)
1996           .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
1997       }
1998     }
1999   }
2000 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,multipixel_with_output_stride)2001   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel_with_output_stride) {
2002     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2003     for (size_t channels = 1; channels <= 80; channels += 15) {
2004       DWConvMicrokernelTester()
2005         .cr(16)
2006         .kr(4)
2007         .channels(16)
2008         .width(5)
2009         .output_stride(83)
2010         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
2011     }
2012   }
2013 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,multipixel_with_qmin)2014   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel_with_qmin) {
2015     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2016     for (size_t channels = 1; channels <= 80; channels += 15) {
2017       DWConvMicrokernelTester()
2018         .cr(16)
2019         .kr(4)
2020         .channels(channels)
2021         .width(3)
2022         .qmin(128)
2023         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
2024     }
2025   }
2026 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,multipixel_with_qmax)2027   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel_with_qmax) {
2028     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2029     for (size_t channels = 1; channels <= 80; channels += 15) {
2030       DWConvMicrokernelTester()
2031         .cr(16)
2032         .kr(4)
2033         .channels(channels)
2034         .width(3)
2035         .qmax(128)
2036         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
2037     }
2038   }
2039 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,input_offset)2040   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, input_offset) {
2041     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2042     for (uint32_t channels = 32; channels < 256; channels += 48) {
2043       DWConvMicrokernelTester()
2044         .cr(16)
2045         .kr(4)
2046         .channels(channels)
2047         .input_offset(304)
2048         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
2049     }
2050   }
2051 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,zero)2052   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, zero) {
2053     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2054     for (uint32_t mz = 0; mz < 4; mz++) {
2055       for (uint32_t channels = 32; channels < 256; channels += 48) {
2056         DWConvMicrokernelTester()
2057           .cr(16)
2058           .kr(4)
2059           .channels(channels)
2060           .input_offset(304)
2061           .zero_index(mz)
2062           .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
2063       }
2064     }
2065   }
2066 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
2067 
2068 
2069 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_eq_16)2070   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_eq_16) {
2071     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2072     DWConvMicrokernelTester()
2073       .cr(16)
2074       .kr(4)
2075       .channels(16)
2076       .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2077   }
2078 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_div_16)2079   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_div_16) {
2080     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2081     for (uint32_t channels = 32; channels < 256; channels += 48) {
2082       DWConvMicrokernelTester()
2083         .cr(16)
2084         .kr(4)
2085         .channels(channels)
2086         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2087     }
2088   }
2089 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_div_16_with_qmin)2090   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_div_16_with_qmin) {
2091     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2092     for (uint32_t channels = 32; channels < 256; channels += 48) {
2093       DWConvMicrokernelTester()
2094         .cr(16)
2095         .kr(4)
2096         .channels(channels)
2097         .qmin(128)
2098         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2099     }
2100   }
2101 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_div_16_with_qmax)2102   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_div_16_with_qmax) {
2103     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2104     for (uint32_t channels = 32; channels < 256; channels += 48) {
2105       DWConvMicrokernelTester()
2106         .cr(16)
2107         .kr(4)
2108         .channels(channels)
2109         .qmax(128)
2110         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2111     }
2112   }
2113 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_lt_16)2114   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_lt_16) {
2115     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2116     for (uint32_t channels = 1; channels < 16; channels++) {
2117       DWConvMicrokernelTester()
2118         .cr(16)
2119         .kr(4)
2120         .channels(channels)
2121         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2122     }
2123   }
2124 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_gt_16)2125   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_gt_16) {
2126     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2127     for (uint32_t channels = 17; channels < 32; channels++) {
2128       DWConvMicrokernelTester()
2129         .cr(16)
2130         .kr(4)
2131         .channels(channels)
2132         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2133     }
2134   }
2135 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_gt_16_with_qmin)2136   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_gt_16_with_qmin) {
2137     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2138     for (uint32_t channels = 17; channels < 32; channels++) {
2139       DWConvMicrokernelTester()
2140         .cr(16)
2141         .kr(4)
2142         .channels(channels)
2143         .qmin(128)
2144         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2145     }
2146   }
2147 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_gt_16_with_qmax)2148   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_gt_16_with_qmax) {
2149     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2150     for (uint32_t channels = 17; channels < 32; channels++) {
2151       DWConvMicrokernelTester()
2152         .cr(16)
2153         .kr(4)
2154         .channels(channels)
2155         .qmax(128)
2156         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2157     }
2158   }
2159 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,multipixel)2160   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel) {
2161     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2162     for (size_t channels = 1; channels <= 80; channels += 15) {
2163       DWConvMicrokernelTester()
2164         .cr(16)
2165         .kr(4)
2166         .channels(channels)
2167         .width(3)
2168         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2169     }
2170   }
2171 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,multipixel_with_step)2172   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel_with_step) {
2173     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2174     for (size_t channels = 1; channels <= 80; channels += 15) {
2175       for (size_t step = 2; step <= 4; step++) {
2176         DWConvMicrokernelTester()
2177           .cr(16)
2178           .kr(4)
2179           .channels(channels)
2180           .width(3)
2181           .step(step)
2182           .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2183       }
2184     }
2185   }
2186 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,multipixel_with_output_stride)2187   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
2188     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2189     for (size_t channels = 1; channels <= 80; channels += 15) {
2190       DWConvMicrokernelTester()
2191         .cr(16)
2192         .kr(4)
2193         .channels(16)
2194         .width(5)
2195         .output_stride(83)
2196         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2197     }
2198   }
2199 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,multipixel_with_qmin)2200   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
2201     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2202     for (size_t channels = 1; channels <= 80; channels += 15) {
2203       DWConvMicrokernelTester()
2204         .cr(16)
2205         .kr(4)
2206         .channels(channels)
2207         .width(3)
2208         .qmin(128)
2209         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2210     }
2211   }
2212 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,multipixel_with_qmax)2213   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
2214     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2215     for (size_t channels = 1; channels <= 80; channels += 15) {
2216       DWConvMicrokernelTester()
2217         .cr(16)
2218         .kr(4)
2219         .channels(channels)
2220         .width(3)
2221         .qmax(128)
2222         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2223     }
2224   }
2225 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,input_offset)2226   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, input_offset) {
2227     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2228     for (uint32_t channels = 32; channels < 256; channels += 48) {
2229       DWConvMicrokernelTester()
2230         .cr(16)
2231         .kr(4)
2232         .channels(channels)
2233         .input_offset(304)
2234         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2235     }
2236   }
2237 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,zero)2238   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, zero) {
2239     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2240     for (uint32_t mz = 0; mz < 4; mz++) {
2241       for (uint32_t channels = 32; channels < 256; channels += 48) {
2242         DWConvMicrokernelTester()
2243           .cr(16)
2244           .kr(4)
2245           .channels(channels)
2246           .input_offset(304)
2247           .zero_index(mz)
2248           .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2249       }
2250     }
2251   }
2252 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
2253 
2254 
2255 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_eq_16)2256   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_eq_16) {
2257     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2258     DWConvMicrokernelTester()
2259       .cr(16)
2260       .kr(9)
2261       .channels(16)
2262       .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
2263   }
2264 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_div_16)2265   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_div_16) {
2266     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2267     for (uint32_t channels = 32; channels < 256; channels += 48) {
2268       DWConvMicrokernelTester()
2269         .cr(16)
2270         .kr(9)
2271         .channels(channels)
2272         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
2273     }
2274   }
2275 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_div_16_with_qmin)2276   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_div_16_with_qmin) {
2277     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2278     for (uint32_t channels = 32; channels < 256; channels += 48) {
2279       DWConvMicrokernelTester()
2280         .cr(16)
2281         .kr(9)
2282         .channels(channels)
2283         .qmin(128)
2284         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
2285     }
2286   }
2287 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_div_16_with_qmax)2288   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_div_16_with_qmax) {
2289     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2290     for (uint32_t channels = 32; channels < 256; channels += 48) {
2291       DWConvMicrokernelTester()
2292         .cr(16)
2293         .kr(9)
2294         .channels(channels)
2295         .qmax(128)
2296         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
2297     }
2298   }
2299 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_lt_16)2300   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_lt_16) {
2301     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2302     for (uint32_t channels = 1; channels < 16; channels++) {
2303       DWConvMicrokernelTester()
2304         .cr(16)
2305         .kr(9)
2306         .channels(channels)
2307         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
2308     }
2309   }
2310 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_gt_16)2311   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_gt_16) {
2312     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2313     for (uint32_t channels = 17; channels < 32; channels++) {
2314       DWConvMicrokernelTester()
2315         .cr(16)
2316         .kr(9)
2317         .channels(channels)
2318         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
2319     }
2320   }
2321 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_gt_16_with_qmin)2322   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_gt_16_with_qmin) {
2323     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2324     for (uint32_t channels = 17; channels < 32; channels++) {
2325       DWConvMicrokernelTester()
2326         .cr(16)
2327         .kr(9)
2328         .channels(channels)
2329         .qmin(128)
2330         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
2331     }
2332   }
2333 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_gt_16_with_qmax)2334   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_gt_16_with_qmax) {
2335     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2336     for (uint32_t channels = 17; channels < 32; channels++) {
2337       DWConvMicrokernelTester()
2338         .cr(16)
2339         .kr(9)
2340         .channels(channels)
2341         .qmax(128)
2342         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
2343     }
2344   }
2345 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,multipixel)2346   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel) {
2347     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2348     for (size_t channels = 1; channels <= 80; channels += 15) {
2349       DWConvMicrokernelTester()
2350         .cr(16)
2351         .kr(9)
2352         .channels(channels)
2353         .width(3)
2354         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
2355     }
2356   }
2357 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,multipixel_with_step)2358   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel_with_step) {
2359     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2360     for (size_t channels = 1; channels <= 80; channels += 15) {
2361       for (size_t step = 2; step <= 9; step++) {
2362         DWConvMicrokernelTester()
2363           .cr(16)
2364           .kr(9)
2365           .channels(channels)
2366           .width(3)
2367           .step(step)
2368           .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
2369       }
2370     }
2371   }
2372 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,multipixel_with_output_stride)2373   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel_with_output_stride) {
2374     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2375     for (size_t channels = 1; channels <= 80; channels += 15) {
2376       DWConvMicrokernelTester()
2377         .cr(16)
2378         .kr(9)
2379         .channels(16)
2380         .width(5)
2381         .output_stride(83)
2382         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
2383     }
2384   }
2385 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,multipixel_with_qmin)2386   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel_with_qmin) {
2387     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2388     for (size_t channels = 1; channels <= 80; channels += 15) {
2389       DWConvMicrokernelTester()
2390         .cr(16)
2391         .kr(9)
2392         .channels(channels)
2393         .width(3)
2394         .qmin(128)
2395         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
2396     }
2397   }
2398 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,multipixel_with_qmax)2399   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel_with_qmax) {
2400     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2401     for (size_t channels = 1; channels <= 80; channels += 15) {
2402       DWConvMicrokernelTester()
2403         .cr(16)
2404         .kr(9)
2405         .channels(channels)
2406         .width(3)
2407         .qmax(128)
2408         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
2409     }
2410   }
2411 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,input_offset)2412   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, input_offset) {
2413     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2414     for (uint32_t channels = 32; channels < 256; channels += 48) {
2415       DWConvMicrokernelTester()
2416         .cr(16)
2417         .kr(9)
2418         .channels(channels)
2419         .input_offset(304)
2420         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
2421     }
2422   }
2423 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,zero)2424   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, zero) {
2425     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2426     for (uint32_t mz = 0; mz < 9; mz++) {
2427       for (uint32_t channels = 32; channels < 256; channels += 48) {
2428         DWConvMicrokernelTester()
2429           .cr(16)
2430           .kr(9)
2431           .channels(channels)
2432           .input_offset(304)
2433           .zero_index(mz)
2434           .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
2435       }
2436     }
2437   }
2438 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
2439 
2440 
2441 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_eq_16)2442   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_eq_16) {
2443     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2444     DWConvMicrokernelTester()
2445       .cr(16)
2446       .kr(9)
2447       .channels(16)
2448       .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2449   }
2450 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_div_16)2451   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_div_16) {
2452     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2453     for (uint32_t channels = 32; channels < 256; channels += 48) {
2454       DWConvMicrokernelTester()
2455         .cr(16)
2456         .kr(9)
2457         .channels(channels)
2458         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2459     }
2460   }
2461 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_div_16_with_qmin)2462   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_div_16_with_qmin) {
2463     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2464     for (uint32_t channels = 32; channels < 256; channels += 48) {
2465       DWConvMicrokernelTester()
2466         .cr(16)
2467         .kr(9)
2468         .channels(channels)
2469         .qmin(128)
2470         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2471     }
2472   }
2473 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_div_16_with_qmax)2474   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_div_16_with_qmax) {
2475     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2476     for (uint32_t channels = 32; channels < 256; channels += 48) {
2477       DWConvMicrokernelTester()
2478         .cr(16)
2479         .kr(9)
2480         .channels(channels)
2481         .qmax(128)
2482         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2483     }
2484   }
2485 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_lt_16)2486   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_lt_16) {
2487     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2488     for (uint32_t channels = 1; channels < 16; channels++) {
2489       DWConvMicrokernelTester()
2490         .cr(16)
2491         .kr(9)
2492         .channels(channels)
2493         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2494     }
2495   }
2496 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_gt_16)2497   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_gt_16) {
2498     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2499     for (uint32_t channels = 17; channels < 32; channels++) {
2500       DWConvMicrokernelTester()
2501         .cr(16)
2502         .kr(9)
2503         .channels(channels)
2504         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2505     }
2506   }
2507 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_gt_16_with_qmin)2508   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_gt_16_with_qmin) {
2509     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2510     for (uint32_t channels = 17; channels < 32; channels++) {
2511       DWConvMicrokernelTester()
2512         .cr(16)
2513         .kr(9)
2514         .channels(channels)
2515         .qmin(128)
2516         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2517     }
2518   }
2519 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_gt_16_with_qmax)2520   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_gt_16_with_qmax) {
2521     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2522     for (uint32_t channels = 17; channels < 32; channels++) {
2523       DWConvMicrokernelTester()
2524         .cr(16)
2525         .kr(9)
2526         .channels(channels)
2527         .qmax(128)
2528         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2529     }
2530   }
2531 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,multipixel)2532   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel) {
2533     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2534     for (size_t channels = 1; channels <= 80; channels += 15) {
2535       DWConvMicrokernelTester()
2536         .cr(16)
2537         .kr(9)
2538         .channels(channels)
2539         .width(3)
2540         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2541     }
2542   }
2543 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,multipixel_with_step)2544   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel_with_step) {
2545     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2546     for (size_t channels = 1; channels <= 80; channels += 15) {
2547       for (size_t step = 2; step <= 9; step++) {
2548         DWConvMicrokernelTester()
2549           .cr(16)
2550           .kr(9)
2551           .channels(channels)
2552           .width(3)
2553           .step(step)
2554           .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2555       }
2556     }
2557   }
2558 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,multipixel_with_output_stride)2559   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
2560     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2561     for (size_t channels = 1; channels <= 80; channels += 15) {
2562       DWConvMicrokernelTester()
2563         .cr(16)
2564         .kr(9)
2565         .channels(16)
2566         .width(5)
2567         .output_stride(83)
2568         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2569     }
2570   }
2571 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,multipixel_with_qmin)2572   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
2573     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2574     for (size_t channels = 1; channels <= 80; channels += 15) {
2575       DWConvMicrokernelTester()
2576         .cr(16)
2577         .kr(9)
2578         .channels(channels)
2579         .width(3)
2580         .qmin(128)
2581         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2582     }
2583   }
2584 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,multipixel_with_qmax)2585   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
2586     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2587     for (size_t channels = 1; channels <= 80; channels += 15) {
2588       DWConvMicrokernelTester()
2589         .cr(16)
2590         .kr(9)
2591         .channels(channels)
2592         .width(3)
2593         .qmax(128)
2594         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2595     }
2596   }
2597 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,input_offset)2598   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, input_offset) {
2599     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2600     for (uint32_t channels = 32; channels < 256; channels += 48) {
2601       DWConvMicrokernelTester()
2602         .cr(16)
2603         .kr(9)
2604         .channels(channels)
2605         .input_offset(304)
2606         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2607     }
2608   }
2609 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,zero)2610   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, zero) {
2611     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2612     for (uint32_t mz = 0; mz < 9; mz++) {
2613       for (uint32_t channels = 32; channels < 256; channels += 48) {
2614         DWConvMicrokernelTester()
2615           .cr(16)
2616           .kr(9)
2617           .channels(channels)
2618           .input_offset(304)
2619           .zero_index(mz)
2620           .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2621       }
2622     }
2623   }
2624 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
2625 
2626 
2627 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_eq_16)2628   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_eq_16) {
2629     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2630     DWConvMicrokernelTester()
2631       .cr(16)
2632       .kr(25)
2633       .channels(16)
2634       .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
2635   }
2636 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_div_16)2637   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_div_16) {
2638     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2639     for (uint32_t channels = 32; channels < 256; channels += 48) {
2640       DWConvMicrokernelTester()
2641         .cr(16)
2642         .kr(25)
2643         .channels(channels)
2644         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
2645     }
2646   }
2647 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_div_16_with_qmin)2648   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_div_16_with_qmin) {
2649     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2650     for (uint32_t channels = 32; channels < 256; channels += 48) {
2651       DWConvMicrokernelTester()
2652         .cr(16)
2653         .kr(25)
2654         .channels(channels)
2655         .qmin(128)
2656         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
2657     }
2658   }
2659 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_div_16_with_qmax)2660   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_div_16_with_qmax) {
2661     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2662     for (uint32_t channels = 32; channels < 256; channels += 48) {
2663       DWConvMicrokernelTester()
2664         .cr(16)
2665         .kr(25)
2666         .channels(channels)
2667         .qmax(128)
2668         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
2669     }
2670   }
2671 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_lt_16)2672   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_lt_16) {
2673     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2674     for (uint32_t channels = 1; channels < 16; channels++) {
2675       DWConvMicrokernelTester()
2676         .cr(16)
2677         .kr(25)
2678         .channels(channels)
2679         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
2680     }
2681   }
2682 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_gt_16)2683   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_gt_16) {
2684     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2685     for (uint32_t channels = 17; channels < 32; channels++) {
2686       DWConvMicrokernelTester()
2687         .cr(16)
2688         .kr(25)
2689         .channels(channels)
2690         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
2691     }
2692   }
2693 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_gt_16_with_qmin)2694   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_gt_16_with_qmin) {
2695     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2696     for (uint32_t channels = 17; channels < 32; channels++) {
2697       DWConvMicrokernelTester()
2698         .cr(16)
2699         .kr(25)
2700         .channels(channels)
2701         .qmin(128)
2702         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
2703     }
2704   }
2705 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_gt_16_with_qmax)2706   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_gt_16_with_qmax) {
2707     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2708     for (uint32_t channels = 17; channels < 32; channels++) {
2709       DWConvMicrokernelTester()
2710         .cr(16)
2711         .kr(25)
2712         .channels(channels)
2713         .qmax(128)
2714         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
2715     }
2716   }
2717 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,multipixel)2718   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel) {
2719     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2720     for (size_t channels = 1; channels <= 80; channels += 15) {
2721       DWConvMicrokernelTester()
2722         .cr(16)
2723         .kr(25)
2724         .channels(channels)
2725         .width(3)
2726         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
2727     }
2728   }
2729 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,multipixel_with_step)2730   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel_with_step) {
2731     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2732     for (size_t channels = 1; channels <= 80; channels += 15) {
2733       for (size_t step = 2; step <= 25; step++) {
2734         DWConvMicrokernelTester()
2735           .cr(16)
2736           .kr(25)
2737           .channels(channels)
2738           .width(3)
2739           .step(step)
2740           .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
2741       }
2742     }
2743   }
2744 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,multipixel_with_output_stride)2745   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel_with_output_stride) {
2746     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2747     for (size_t channels = 1; channels <= 80; channels += 15) {
2748       DWConvMicrokernelTester()
2749         .cr(16)
2750         .kr(25)
2751         .channels(16)
2752         .width(5)
2753         .output_stride(83)
2754         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
2755     }
2756   }
2757 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,multipixel_with_qmin)2758   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel_with_qmin) {
2759     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2760     for (size_t channels = 1; channels <= 80; channels += 15) {
2761       DWConvMicrokernelTester()
2762         .cr(16)
2763         .kr(25)
2764         .channels(channels)
2765         .width(3)
2766         .qmin(128)
2767         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
2768     }
2769   }
2770 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,multipixel_with_qmax)2771   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel_with_qmax) {
2772     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2773     for (size_t channels = 1; channels <= 80; channels += 15) {
2774       DWConvMicrokernelTester()
2775         .cr(16)
2776         .kr(25)
2777         .channels(channels)
2778         .width(3)
2779         .qmax(128)
2780         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
2781     }
2782   }
2783 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,input_offset)2784   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, input_offset) {
2785     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2786     for (uint32_t channels = 32; channels < 256; channels += 48) {
2787       DWConvMicrokernelTester()
2788         .cr(16)
2789         .kr(25)
2790         .channels(channels)
2791         .input_offset(304)
2792         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
2793     }
2794   }
2795 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,zero)2796   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, zero) {
2797     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2798     for (uint32_t mz = 0; mz < 25; mz++) {
2799       for (uint32_t channels = 32; channels < 256; channels += 48) {
2800         DWConvMicrokernelTester()
2801           .cr(16)
2802           .kr(25)
2803           .channels(channels)
2804           .input_offset(304)
2805           .zero_index(mz)
2806           .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
2807       }
2808     }
2809   }
2810 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
2811 
2812 
2813 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_eq_16)2814   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_eq_16) {
2815     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2816     DWConvMicrokernelTester()
2817       .cr(16)
2818       .kr(25)
2819       .channels(16)
2820       .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2821   }
2822 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_div_16)2823   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_div_16) {
2824     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2825     for (uint32_t channels = 32; channels < 256; channels += 48) {
2826       DWConvMicrokernelTester()
2827         .cr(16)
2828         .kr(25)
2829         .channels(channels)
2830         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2831     }
2832   }
2833 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_div_16_with_qmin)2834   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_div_16_with_qmin) {
2835     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2836     for (uint32_t channels = 32; channels < 256; channels += 48) {
2837       DWConvMicrokernelTester()
2838         .cr(16)
2839         .kr(25)
2840         .channels(channels)
2841         .qmin(128)
2842         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2843     }
2844   }
2845 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_div_16_with_qmax)2846   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_div_16_with_qmax) {
2847     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2848     for (uint32_t channels = 32; channels < 256; channels += 48) {
2849       DWConvMicrokernelTester()
2850         .cr(16)
2851         .kr(25)
2852         .channels(channels)
2853         .qmax(128)
2854         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2855     }
2856   }
2857 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_lt_16)2858   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_lt_16) {
2859     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2860     for (uint32_t channels = 1; channels < 16; channels++) {
2861       DWConvMicrokernelTester()
2862         .cr(16)
2863         .kr(25)
2864         .channels(channels)
2865         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2866     }
2867   }
2868 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_gt_16)2869   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_gt_16) {
2870     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2871     for (uint32_t channels = 17; channels < 32; channels++) {
2872       DWConvMicrokernelTester()
2873         .cr(16)
2874         .kr(25)
2875         .channels(channels)
2876         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2877     }
2878   }
2879 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_gt_16_with_qmin)2880   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_gt_16_with_qmin) {
2881     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2882     for (uint32_t channels = 17; channels < 32; channels++) {
2883       DWConvMicrokernelTester()
2884         .cr(16)
2885         .kr(25)
2886         .channels(channels)
2887         .qmin(128)
2888         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2889     }
2890   }
2891 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_gt_16_with_qmax)2892   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_gt_16_with_qmax) {
2893     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2894     for (uint32_t channels = 17; channels < 32; channels++) {
2895       DWConvMicrokernelTester()
2896         .cr(16)
2897         .kr(25)
2898         .channels(channels)
2899         .qmax(128)
2900         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2901     }
2902   }
2903 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,multipixel)2904   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel) {
2905     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2906     for (size_t channels = 1; channels <= 80; channels += 15) {
2907       DWConvMicrokernelTester()
2908         .cr(16)
2909         .kr(25)
2910         .channels(channels)
2911         .width(3)
2912         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2913     }
2914   }
2915 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,multipixel_with_step)2916   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel_with_step) {
2917     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2918     for (size_t channels = 1; channels <= 80; channels += 15) {
2919       for (size_t step = 2; step <= 25; step++) {
2920         DWConvMicrokernelTester()
2921           .cr(16)
2922           .kr(25)
2923           .channels(channels)
2924           .width(3)
2925           .step(step)
2926           .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2927       }
2928     }
2929   }
2930 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,multipixel_with_output_stride)2931   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
2932     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2933     for (size_t channels = 1; channels <= 80; channels += 15) {
2934       DWConvMicrokernelTester()
2935         .cr(16)
2936         .kr(25)
2937         .channels(16)
2938         .width(5)
2939         .output_stride(83)
2940         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2941     }
2942   }
2943 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,multipixel_with_qmin)2944   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
2945     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2946     for (size_t channels = 1; channels <= 80; channels += 15) {
2947       DWConvMicrokernelTester()
2948         .cr(16)
2949         .kr(25)
2950         .channels(channels)
2951         .width(3)
2952         .qmin(128)
2953         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2954     }
2955   }
2956 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,multipixel_with_qmax)2957   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
2958     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2959     for (size_t channels = 1; channels <= 80; channels += 15) {
2960       DWConvMicrokernelTester()
2961         .cr(16)
2962         .kr(25)
2963         .channels(channels)
2964         .width(3)
2965         .qmax(128)
2966         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2967     }
2968   }
2969 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,input_offset)2970   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, input_offset) {
2971     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2972     for (uint32_t channels = 32; channels < 256; channels += 48) {
2973       DWConvMicrokernelTester()
2974         .cr(16)
2975         .kr(25)
2976         .channels(channels)
2977         .input_offset(304)
2978         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2979     }
2980   }
2981 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,zero)2982   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, zero) {
2983     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2984     for (uint32_t mz = 0; mz < 25; mz++) {
2985       for (uint32_t channels = 32; channels < 256; channels += 48) {
2986         DWConvMicrokernelTester()
2987           .cr(16)
2988           .kr(25)
2989           .channels(channels)
2990           .input_offset(304)
2991           .zero_index(mz)
2992           .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
2993       }
2994     }
2995   }
2996 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
2997 
2998 
2999 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,c_eq_32)3000   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, c_eq_32) {
3001     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3002     DWConvMicrokernelTester()
3003       .cr(32)
3004       .kr(3)
3005       .channels(32)
3006       .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
3007   }
3008 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,c_div_32)3009   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, c_div_32) {
3010     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3011     for (uint32_t channels = 64; channels < 512; channels += 96) {
3012       DWConvMicrokernelTester()
3013         .cr(32)
3014         .kr(3)
3015         .channels(channels)
3016         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
3017     }
3018   }
3019 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,c_div_32_with_qmin)3020   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, c_div_32_with_qmin) {
3021     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3022     for (uint32_t channels = 64; channels < 512; channels += 96) {
3023       DWConvMicrokernelTester()
3024         .cr(32)
3025         .kr(3)
3026         .channels(channels)
3027         .qmin(128)
3028         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
3029     }
3030   }
3031 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,c_div_32_with_qmax)3032   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, c_div_32_with_qmax) {
3033     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3034     for (uint32_t channels = 64; channels < 512; channels += 96) {
3035       DWConvMicrokernelTester()
3036         .cr(32)
3037         .kr(3)
3038         .channels(channels)
3039         .qmax(128)
3040         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
3041     }
3042   }
3043 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,c_lt_32)3044   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, c_lt_32) {
3045     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3046     for (uint32_t channels = 1; channels < 32; channels++) {
3047       DWConvMicrokernelTester()
3048         .cr(32)
3049         .kr(3)
3050         .channels(channels)
3051         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
3052     }
3053   }
3054 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,c_gt_32)3055   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, c_gt_32) {
3056     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3057     for (uint32_t channels = 33; channels < 64; channels++) {
3058       DWConvMicrokernelTester()
3059         .cr(32)
3060         .kr(3)
3061         .channels(channels)
3062         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
3063     }
3064   }
3065 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,c_gt_32_with_qmin)3066   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, c_gt_32_with_qmin) {
3067     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3068     for (uint32_t channels = 33; channels < 64; channels++) {
3069       DWConvMicrokernelTester()
3070         .cr(32)
3071         .kr(3)
3072         .channels(channels)
3073         .qmin(128)
3074         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
3075     }
3076   }
3077 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,c_gt_32_with_qmax)3078   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, c_gt_32_with_qmax) {
3079     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3080     for (uint32_t channels = 33; channels < 64; channels++) {
3081       DWConvMicrokernelTester()
3082         .cr(32)
3083         .kr(3)
3084         .channels(channels)
3085         .qmax(128)
3086         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
3087     }
3088   }
3089 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,multipixel)3090   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, multipixel) {
3091     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3092     for (size_t channels = 1; channels <= 160; channels += 31) {
3093       DWConvMicrokernelTester()
3094         .cr(32)
3095         .kr(3)
3096         .channels(channels)
3097         .width(3)
3098         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
3099     }
3100   }
3101 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,multipixel_with_step)3102   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, multipixel_with_step) {
3103     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3104     for (size_t channels = 1; channels <= 160; channels += 31) {
3105       for (size_t step = 2; step <= 3; step++) {
3106         DWConvMicrokernelTester()
3107           .cr(32)
3108           .kr(3)
3109           .channels(channels)
3110           .width(3)
3111           .step(step)
3112           .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
3113       }
3114     }
3115   }
3116 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,multipixel_with_output_stride)3117   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, multipixel_with_output_stride) {
3118     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3119     for (size_t channels = 1; channels <= 160; channels += 31) {
3120       DWConvMicrokernelTester()
3121         .cr(32)
3122         .kr(3)
3123         .channels(32)
3124         .width(5)
3125         .output_stride(163)
3126         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
3127     }
3128   }
3129 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,multipixel_with_qmin)3130   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, multipixel_with_qmin) {
3131     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3132     for (size_t channels = 1; channels <= 160; channels += 31) {
3133       DWConvMicrokernelTester()
3134         .cr(32)
3135         .kr(3)
3136         .channels(channels)
3137         .width(3)
3138         .qmin(128)
3139         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
3140     }
3141   }
3142 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,multipixel_with_qmax)3143   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, multipixel_with_qmax) {
3144     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3145     for (size_t channels = 1; channels <= 160; channels += 31) {
3146       DWConvMicrokernelTester()
3147         .cr(32)
3148         .kr(3)
3149         .channels(channels)
3150         .width(3)
3151         .qmax(128)
3152         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
3153     }
3154   }
3155 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,input_offset)3156   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, input_offset) {
3157     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3158     for (uint32_t channels = 64; channels < 512; channels += 96) {
3159       DWConvMicrokernelTester()
3160         .cr(32)
3161         .kr(3)
3162         .channels(channels)
3163         .input_offset(592)
3164         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
3165     }
3166   }
3167 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,zero)3168   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, zero) {
3169     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3170     for (uint32_t mz = 0; mz < 3; mz++) {
3171       for (uint32_t channels = 64; channels < 512; channels += 96) {
3172         DWConvMicrokernelTester()
3173           .cr(32)
3174           .kr(3)
3175           .channels(channels)
3176           .input_offset(592)
3177           .zero_index(mz)
3178           .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params);
3179       }
3180     }
3181   }
3182 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
3183 
3184 
3185 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,c_eq_32)3186   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, c_eq_32) {
3187     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3188     DWConvMicrokernelTester()
3189       .cr(32)
3190       .kr(3)
3191       .channels(32)
3192       .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3193   }
3194 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,c_div_32)3195   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, c_div_32) {
3196     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3197     for (uint32_t channels = 64; channels < 512; channels += 96) {
3198       DWConvMicrokernelTester()
3199         .cr(32)
3200         .kr(3)
3201         .channels(channels)
3202         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3203     }
3204   }
3205 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,c_div_32_with_qmin)3206   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, c_div_32_with_qmin) {
3207     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3208     for (uint32_t channels = 64; channels < 512; channels += 96) {
3209       DWConvMicrokernelTester()
3210         .cr(32)
3211         .kr(3)
3212         .channels(channels)
3213         .qmin(128)
3214         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3215     }
3216   }
3217 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,c_div_32_with_qmax)3218   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, c_div_32_with_qmax) {
3219     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3220     for (uint32_t channels = 64; channels < 512; channels += 96) {
3221       DWConvMicrokernelTester()
3222         .cr(32)
3223         .kr(3)
3224         .channels(channels)
3225         .qmax(128)
3226         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3227     }
3228   }
3229 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,c_lt_32)3230   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, c_lt_32) {
3231     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3232     for (uint32_t channels = 1; channels < 32; channels++) {
3233       DWConvMicrokernelTester()
3234         .cr(32)
3235         .kr(3)
3236         .channels(channels)
3237         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3238     }
3239   }
3240 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,c_gt_32)3241   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, c_gt_32) {
3242     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3243     for (uint32_t channels = 33; channels < 64; channels++) {
3244       DWConvMicrokernelTester()
3245         .cr(32)
3246         .kr(3)
3247         .channels(channels)
3248         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3249     }
3250   }
3251 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,c_gt_32_with_qmin)3252   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, c_gt_32_with_qmin) {
3253     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3254     for (uint32_t channels = 33; channels < 64; channels++) {
3255       DWConvMicrokernelTester()
3256         .cr(32)
3257         .kr(3)
3258         .channels(channels)
3259         .qmin(128)
3260         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3261     }
3262   }
3263 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,c_gt_32_with_qmax)3264   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, c_gt_32_with_qmax) {
3265     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3266     for (uint32_t channels = 33; channels < 64; channels++) {
3267       DWConvMicrokernelTester()
3268         .cr(32)
3269         .kr(3)
3270         .channels(channels)
3271         .qmax(128)
3272         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3273     }
3274   }
3275 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,multipixel)3276   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, multipixel) {
3277     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3278     for (size_t channels = 1; channels <= 160; channels += 31) {
3279       DWConvMicrokernelTester()
3280         .cr(32)
3281         .kr(3)
3282         .channels(channels)
3283         .width(3)
3284         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3285     }
3286   }
3287 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,multipixel_with_step)3288   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, multipixel_with_step) {
3289     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3290     for (size_t channels = 1; channels <= 160; channels += 31) {
3291       for (size_t step = 2; step <= 3; step++) {
3292         DWConvMicrokernelTester()
3293           .cr(32)
3294           .kr(3)
3295           .channels(channels)
3296           .width(3)
3297           .step(step)
3298           .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3299       }
3300     }
3301   }
3302 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,multipixel_with_output_stride)3303   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
3304     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3305     for (size_t channels = 1; channels <= 160; channels += 31) {
3306       DWConvMicrokernelTester()
3307         .cr(32)
3308         .kr(3)
3309         .channels(32)
3310         .width(5)
3311         .output_stride(163)
3312         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3313     }
3314   }
3315 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,multipixel_with_qmin)3316   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
3317     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3318     for (size_t channels = 1; channels <= 160; channels += 31) {
3319       DWConvMicrokernelTester()
3320         .cr(32)
3321         .kr(3)
3322         .channels(channels)
3323         .width(3)
3324         .qmin(128)
3325         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3326     }
3327   }
3328 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,multipixel_with_qmax)3329   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
3330     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3331     for (size_t channels = 1; channels <= 160; channels += 31) {
3332       DWConvMicrokernelTester()
3333         .cr(32)
3334         .kr(3)
3335         .channels(channels)
3336         .width(3)
3337         .qmax(128)
3338         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3339     }
3340   }
3341 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,input_offset)3342   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, input_offset) {
3343     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3344     for (uint32_t channels = 64; channels < 512; channels += 96) {
3345       DWConvMicrokernelTester()
3346         .cr(32)
3347         .kr(3)
3348         .channels(channels)
3349         .input_offset(592)
3350         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3351     }
3352   }
3353 
TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,zero)3354   TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, zero) {
3355     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3356     for (uint32_t mz = 0; mz < 3; mz++) {
3357       for (uint32_t channels = 64; channels < 512; channels += 96) {
3358         DWConvMicrokernelTester()
3359           .cr(32)
3360           .kr(3)
3361           .channels(channels)
3362           .input_offset(592)
3363           .zero_index(mz)
3364           .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3365       }
3366     }
3367   }
3368 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
3369 
3370 
3371 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,c_eq_32)3372   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_eq_32) {
3373     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3374     DWConvMicrokernelTester()
3375       .cr(32)
3376       .kr(4)
3377       .channels(32)
3378       .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
3379   }
3380 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,c_div_32)3381   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_div_32) {
3382     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3383     for (uint32_t channels = 64; channels < 512; channels += 96) {
3384       DWConvMicrokernelTester()
3385         .cr(32)
3386         .kr(4)
3387         .channels(channels)
3388         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
3389     }
3390   }
3391 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,c_div_32_with_qmin)3392   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_div_32_with_qmin) {
3393     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3394     for (uint32_t channels = 64; channels < 512; channels += 96) {
3395       DWConvMicrokernelTester()
3396         .cr(32)
3397         .kr(4)
3398         .channels(channels)
3399         .qmin(128)
3400         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
3401     }
3402   }
3403 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,c_div_32_with_qmax)3404   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_div_32_with_qmax) {
3405     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3406     for (uint32_t channels = 64; channels < 512; channels += 96) {
3407       DWConvMicrokernelTester()
3408         .cr(32)
3409         .kr(4)
3410         .channels(channels)
3411         .qmax(128)
3412         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
3413     }
3414   }
3415 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,c_lt_32)3416   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_lt_32) {
3417     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3418     for (uint32_t channels = 1; channels < 32; channels++) {
3419       DWConvMicrokernelTester()
3420         .cr(32)
3421         .kr(4)
3422         .channels(channels)
3423         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
3424     }
3425   }
3426 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,c_gt_32)3427   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_gt_32) {
3428     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3429     for (uint32_t channels = 33; channels < 64; channels++) {
3430       DWConvMicrokernelTester()
3431         .cr(32)
3432         .kr(4)
3433         .channels(channels)
3434         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
3435     }
3436   }
3437 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,c_gt_32_with_qmin)3438   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_gt_32_with_qmin) {
3439     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3440     for (uint32_t channels = 33; channels < 64; channels++) {
3441       DWConvMicrokernelTester()
3442         .cr(32)
3443         .kr(4)
3444         .channels(channels)
3445         .qmin(128)
3446         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
3447     }
3448   }
3449 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,c_gt_32_with_qmax)3450   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_gt_32_with_qmax) {
3451     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3452     for (uint32_t channels = 33; channels < 64; channels++) {
3453       DWConvMicrokernelTester()
3454         .cr(32)
3455         .kr(4)
3456         .channels(channels)
3457         .qmax(128)
3458         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
3459     }
3460   }
3461 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,multipixel)3462   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, multipixel) {
3463     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3464     for (size_t channels = 1; channels <= 160; channels += 31) {
3465       DWConvMicrokernelTester()
3466         .cr(32)
3467         .kr(4)
3468         .channels(channels)
3469         .width(3)
3470         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
3471     }
3472   }
3473 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,multipixel_with_step)3474   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, multipixel_with_step) {
3475     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3476     for (size_t channels = 1; channels <= 160; channels += 31) {
3477       for (size_t step = 2; step <= 4; step++) {
3478         DWConvMicrokernelTester()
3479           .cr(32)
3480           .kr(4)
3481           .channels(channels)
3482           .width(3)
3483           .step(step)
3484           .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
3485       }
3486     }
3487   }
3488 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,multipixel_with_output_stride)3489   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, multipixel_with_output_stride) {
3490     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3491     for (size_t channels = 1; channels <= 160; channels += 31) {
3492       DWConvMicrokernelTester()
3493         .cr(32)
3494         .kr(4)
3495         .channels(32)
3496         .width(5)
3497         .output_stride(163)
3498         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
3499     }
3500   }
3501 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,multipixel_with_qmin)3502   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, multipixel_with_qmin) {
3503     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3504     for (size_t channels = 1; channels <= 160; channels += 31) {
3505       DWConvMicrokernelTester()
3506         .cr(32)
3507         .kr(4)
3508         .channels(channels)
3509         .width(3)
3510         .qmin(128)
3511         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
3512     }
3513   }
3514 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,multipixel_with_qmax)3515   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, multipixel_with_qmax) {
3516     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3517     for (size_t channels = 1; channels <= 160; channels += 31) {
3518       DWConvMicrokernelTester()
3519         .cr(32)
3520         .kr(4)
3521         .channels(channels)
3522         .width(3)
3523         .qmax(128)
3524         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
3525     }
3526   }
3527 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,input_offset)3528   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, input_offset) {
3529     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3530     for (uint32_t channels = 64; channels < 512; channels += 96) {
3531       DWConvMicrokernelTester()
3532         .cr(32)
3533         .kr(4)
3534         .channels(channels)
3535         .input_offset(592)
3536         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
3537     }
3538   }
3539 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,zero)3540   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, zero) {
3541     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3542     for (uint32_t mz = 0; mz < 4; mz++) {
3543       for (uint32_t channels = 64; channels < 512; channels += 96) {
3544         DWConvMicrokernelTester()
3545           .cr(32)
3546           .kr(4)
3547           .channels(channels)
3548           .input_offset(592)
3549           .zero_index(mz)
3550           .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params);
3551       }
3552     }
3553   }
3554 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
3555 
3556 
3557 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,c_eq_32)3558   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_eq_32) {
3559     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3560     DWConvMicrokernelTester()
3561       .cr(32)
3562       .kr(4)
3563       .channels(32)
3564       .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3565   }
3566 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,c_div_32)3567   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_div_32) {
3568     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3569     for (uint32_t channels = 64; channels < 512; channels += 96) {
3570       DWConvMicrokernelTester()
3571         .cr(32)
3572         .kr(4)
3573         .channels(channels)
3574         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3575     }
3576   }
3577 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,c_div_32_with_qmin)3578   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_div_32_with_qmin) {
3579     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3580     for (uint32_t channels = 64; channels < 512; channels += 96) {
3581       DWConvMicrokernelTester()
3582         .cr(32)
3583         .kr(4)
3584         .channels(channels)
3585         .qmin(128)
3586         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3587     }
3588   }
3589 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,c_div_32_with_qmax)3590   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_div_32_with_qmax) {
3591     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3592     for (uint32_t channels = 64; channels < 512; channels += 96) {
3593       DWConvMicrokernelTester()
3594         .cr(32)
3595         .kr(4)
3596         .channels(channels)
3597         .qmax(128)
3598         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3599     }
3600   }
3601 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,c_lt_32)3602   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_lt_32) {
3603     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3604     for (uint32_t channels = 1; channels < 32; channels++) {
3605       DWConvMicrokernelTester()
3606         .cr(32)
3607         .kr(4)
3608         .channels(channels)
3609         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3610     }
3611   }
3612 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,c_gt_32)3613   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_gt_32) {
3614     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3615     for (uint32_t channels = 33; channels < 64; channels++) {
3616       DWConvMicrokernelTester()
3617         .cr(32)
3618         .kr(4)
3619         .channels(channels)
3620         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3621     }
3622   }
3623 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,c_gt_32_with_qmin)3624   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_gt_32_with_qmin) {
3625     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3626     for (uint32_t channels = 33; channels < 64; channels++) {
3627       DWConvMicrokernelTester()
3628         .cr(32)
3629         .kr(4)
3630         .channels(channels)
3631         .qmin(128)
3632         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3633     }
3634   }
3635 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,c_gt_32_with_qmax)3636   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_gt_32_with_qmax) {
3637     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3638     for (uint32_t channels = 33; channels < 64; channels++) {
3639       DWConvMicrokernelTester()
3640         .cr(32)
3641         .kr(4)
3642         .channels(channels)
3643         .qmax(128)
3644         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3645     }
3646   }
3647 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,multipixel)3648   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, multipixel) {
3649     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3650     for (size_t channels = 1; channels <= 160; channels += 31) {
3651       DWConvMicrokernelTester()
3652         .cr(32)
3653         .kr(4)
3654         .channels(channels)
3655         .width(3)
3656         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3657     }
3658   }
3659 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,multipixel_with_step)3660   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, multipixel_with_step) {
3661     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3662     for (size_t channels = 1; channels <= 160; channels += 31) {
3663       for (size_t step = 2; step <= 4; step++) {
3664         DWConvMicrokernelTester()
3665           .cr(32)
3666           .kr(4)
3667           .channels(channels)
3668           .width(3)
3669           .step(step)
3670           .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3671       }
3672     }
3673   }
3674 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,multipixel_with_output_stride)3675   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
3676     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3677     for (size_t channels = 1; channels <= 160; channels += 31) {
3678       DWConvMicrokernelTester()
3679         .cr(32)
3680         .kr(4)
3681         .channels(32)
3682         .width(5)
3683         .output_stride(163)
3684         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3685     }
3686   }
3687 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,multipixel_with_qmin)3688   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
3689     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3690     for (size_t channels = 1; channels <= 160; channels += 31) {
3691       DWConvMicrokernelTester()
3692         .cr(32)
3693         .kr(4)
3694         .channels(channels)
3695         .width(3)
3696         .qmin(128)
3697         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3698     }
3699   }
3700 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,multipixel_with_qmax)3701   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
3702     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3703     for (size_t channels = 1; channels <= 160; channels += 31) {
3704       DWConvMicrokernelTester()
3705         .cr(32)
3706         .kr(4)
3707         .channels(channels)
3708         .width(3)
3709         .qmax(128)
3710         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3711     }
3712   }
3713 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,input_offset)3714   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, input_offset) {
3715     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3716     for (uint32_t channels = 64; channels < 512; channels += 96) {
3717       DWConvMicrokernelTester()
3718         .cr(32)
3719         .kr(4)
3720         .channels(channels)
3721         .input_offset(592)
3722         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3723     }
3724   }
3725 
TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,zero)3726   TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, zero) {
3727     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3728     for (uint32_t mz = 0; mz < 4; mz++) {
3729       for (uint32_t channels = 64; channels < 512; channels += 96) {
3730         DWConvMicrokernelTester()
3731           .cr(32)
3732           .kr(4)
3733           .channels(channels)
3734           .input_offset(592)
3735           .zero_index(mz)
3736           .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3737       }
3738     }
3739   }
3740 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
3741 
3742 
3743 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,c_eq_32)3744   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_eq_32) {
3745     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3746     DWConvMicrokernelTester()
3747       .cr(32)
3748       .kr(9)
3749       .channels(32)
3750       .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
3751   }
3752 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,c_div_32)3753   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_div_32) {
3754     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3755     for (uint32_t channels = 64; channels < 512; channels += 96) {
3756       DWConvMicrokernelTester()
3757         .cr(32)
3758         .kr(9)
3759         .channels(channels)
3760         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
3761     }
3762   }
3763 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,c_div_32_with_qmin)3764   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_div_32_with_qmin) {
3765     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3766     for (uint32_t channels = 64; channels < 512; channels += 96) {
3767       DWConvMicrokernelTester()
3768         .cr(32)
3769         .kr(9)
3770         .channels(channels)
3771         .qmin(128)
3772         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
3773     }
3774   }
3775 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,c_div_32_with_qmax)3776   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_div_32_with_qmax) {
3777     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3778     for (uint32_t channels = 64; channels < 512; channels += 96) {
3779       DWConvMicrokernelTester()
3780         .cr(32)
3781         .kr(9)
3782         .channels(channels)
3783         .qmax(128)
3784         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
3785     }
3786   }
3787 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,c_lt_32)3788   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_lt_32) {
3789     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3790     for (uint32_t channels = 1; channels < 32; channels++) {
3791       DWConvMicrokernelTester()
3792         .cr(32)
3793         .kr(9)
3794         .channels(channels)
3795         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
3796     }
3797   }
3798 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,c_gt_32)3799   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_gt_32) {
3800     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3801     for (uint32_t channels = 33; channels < 64; channels++) {
3802       DWConvMicrokernelTester()
3803         .cr(32)
3804         .kr(9)
3805         .channels(channels)
3806         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
3807     }
3808   }
3809 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,c_gt_32_with_qmin)3810   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_gt_32_with_qmin) {
3811     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3812     for (uint32_t channels = 33; channels < 64; channels++) {
3813       DWConvMicrokernelTester()
3814         .cr(32)
3815         .kr(9)
3816         .channels(channels)
3817         .qmin(128)
3818         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
3819     }
3820   }
3821 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,c_gt_32_with_qmax)3822   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_gt_32_with_qmax) {
3823     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3824     for (uint32_t channels = 33; channels < 64; channels++) {
3825       DWConvMicrokernelTester()
3826         .cr(32)
3827         .kr(9)
3828         .channels(channels)
3829         .qmax(128)
3830         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
3831     }
3832   }
3833 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,multipixel)3834   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, multipixel) {
3835     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3836     for (size_t channels = 1; channels <= 160; channels += 31) {
3837       DWConvMicrokernelTester()
3838         .cr(32)
3839         .kr(9)
3840         .channels(channels)
3841         .width(3)
3842         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
3843     }
3844   }
3845 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,multipixel_with_step)3846   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, multipixel_with_step) {
3847     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3848     for (size_t channels = 1; channels <= 160; channels += 31) {
3849       for (size_t step = 2; step <= 9; step++) {
3850         DWConvMicrokernelTester()
3851           .cr(32)
3852           .kr(9)
3853           .channels(channels)
3854           .width(3)
3855           .step(step)
3856           .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
3857       }
3858     }
3859   }
3860 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,multipixel_with_output_stride)3861   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, multipixel_with_output_stride) {
3862     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3863     for (size_t channels = 1; channels <= 160; channels += 31) {
3864       DWConvMicrokernelTester()
3865         .cr(32)
3866         .kr(9)
3867         .channels(32)
3868         .width(5)
3869         .output_stride(163)
3870         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
3871     }
3872   }
3873 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,multipixel_with_qmin)3874   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, multipixel_with_qmin) {
3875     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3876     for (size_t channels = 1; channels <= 160; channels += 31) {
3877       DWConvMicrokernelTester()
3878         .cr(32)
3879         .kr(9)
3880         .channels(channels)
3881         .width(3)
3882         .qmin(128)
3883         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
3884     }
3885   }
3886 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,multipixel_with_qmax)3887   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, multipixel_with_qmax) {
3888     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3889     for (size_t channels = 1; channels <= 160; channels += 31) {
3890       DWConvMicrokernelTester()
3891         .cr(32)
3892         .kr(9)
3893         .channels(channels)
3894         .width(3)
3895         .qmax(128)
3896         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
3897     }
3898   }
3899 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,input_offset)3900   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, input_offset) {
3901     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3902     for (uint32_t channels = 64; channels < 512; channels += 96) {
3903       DWConvMicrokernelTester()
3904         .cr(32)
3905         .kr(9)
3906         .channels(channels)
3907         .input_offset(592)
3908         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
3909     }
3910   }
3911 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,zero)3912   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, zero) {
3913     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3914     for (uint32_t mz = 0; mz < 9; mz++) {
3915       for (uint32_t channels = 64; channels < 512; channels += 96) {
3916         DWConvMicrokernelTester()
3917           .cr(32)
3918           .kr(9)
3919           .channels(channels)
3920           .input_offset(592)
3921           .zero_index(mz)
3922           .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params);
3923       }
3924     }
3925   }
3926 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
3927 
3928 
3929 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,c_eq_32)3930   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_eq_32) {
3931     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3932     DWConvMicrokernelTester()
3933       .cr(32)
3934       .kr(9)
3935       .channels(32)
3936       .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3937   }
3938 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,c_div_32)3939   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_div_32) {
3940     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3941     for (uint32_t channels = 64; channels < 512; channels += 96) {
3942       DWConvMicrokernelTester()
3943         .cr(32)
3944         .kr(9)
3945         .channels(channels)
3946         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3947     }
3948   }
3949 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,c_div_32_with_qmin)3950   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_div_32_with_qmin) {
3951     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3952     for (uint32_t channels = 64; channels < 512; channels += 96) {
3953       DWConvMicrokernelTester()
3954         .cr(32)
3955         .kr(9)
3956         .channels(channels)
3957         .qmin(128)
3958         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3959     }
3960   }
3961 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,c_div_32_with_qmax)3962   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_div_32_with_qmax) {
3963     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3964     for (uint32_t channels = 64; channels < 512; channels += 96) {
3965       DWConvMicrokernelTester()
3966         .cr(32)
3967         .kr(9)
3968         .channels(channels)
3969         .qmax(128)
3970         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3971     }
3972   }
3973 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,c_lt_32)3974   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_lt_32) {
3975     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3976     for (uint32_t channels = 1; channels < 32; channels++) {
3977       DWConvMicrokernelTester()
3978         .cr(32)
3979         .kr(9)
3980         .channels(channels)
3981         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3982     }
3983   }
3984 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,c_gt_32)3985   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_gt_32) {
3986     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3987     for (uint32_t channels = 33; channels < 64; channels++) {
3988       DWConvMicrokernelTester()
3989         .cr(32)
3990         .kr(9)
3991         .channels(channels)
3992         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
3993     }
3994   }
3995 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,c_gt_32_with_qmin)3996   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_gt_32_with_qmin) {
3997     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3998     for (uint32_t channels = 33; channels < 64; channels++) {
3999       DWConvMicrokernelTester()
4000         .cr(32)
4001         .kr(9)
4002         .channels(channels)
4003         .qmin(128)
4004         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4005     }
4006   }
4007 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,c_gt_32_with_qmax)4008   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_gt_32_with_qmax) {
4009     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4010     for (uint32_t channels = 33; channels < 64; channels++) {
4011       DWConvMicrokernelTester()
4012         .cr(32)
4013         .kr(9)
4014         .channels(channels)
4015         .qmax(128)
4016         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4017     }
4018   }
4019 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,multipixel)4020   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, multipixel) {
4021     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4022     for (size_t channels = 1; channels <= 160; channels += 31) {
4023       DWConvMicrokernelTester()
4024         .cr(32)
4025         .kr(9)
4026         .channels(channels)
4027         .width(3)
4028         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4029     }
4030   }
4031 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,multipixel_with_step)4032   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, multipixel_with_step) {
4033     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4034     for (size_t channels = 1; channels <= 160; channels += 31) {
4035       for (size_t step = 2; step <= 9; step++) {
4036         DWConvMicrokernelTester()
4037           .cr(32)
4038           .kr(9)
4039           .channels(channels)
4040           .width(3)
4041           .step(step)
4042           .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4043       }
4044     }
4045   }
4046 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,multipixel_with_output_stride)4047   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
4048     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4049     for (size_t channels = 1; channels <= 160; channels += 31) {
4050       DWConvMicrokernelTester()
4051         .cr(32)
4052         .kr(9)
4053         .channels(32)
4054         .width(5)
4055         .output_stride(163)
4056         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4057     }
4058   }
4059 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,multipixel_with_qmin)4060   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
4061     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4062     for (size_t channels = 1; channels <= 160; channels += 31) {
4063       DWConvMicrokernelTester()
4064         .cr(32)
4065         .kr(9)
4066         .channels(channels)
4067         .width(3)
4068         .qmin(128)
4069         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4070     }
4071   }
4072 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,multipixel_with_qmax)4073   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
4074     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4075     for (size_t channels = 1; channels <= 160; channels += 31) {
4076       DWConvMicrokernelTester()
4077         .cr(32)
4078         .kr(9)
4079         .channels(channels)
4080         .width(3)
4081         .qmax(128)
4082         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4083     }
4084   }
4085 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,input_offset)4086   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, input_offset) {
4087     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4088     for (uint32_t channels = 64; channels < 512; channels += 96) {
4089       DWConvMicrokernelTester()
4090         .cr(32)
4091         .kr(9)
4092         .channels(channels)
4093         .input_offset(592)
4094         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4095     }
4096   }
4097 
TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,zero)4098   TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, zero) {
4099     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4100     for (uint32_t mz = 0; mz < 9; mz++) {
4101       for (uint32_t channels = 64; channels < 512; channels += 96) {
4102         DWConvMicrokernelTester()
4103           .cr(32)
4104           .kr(9)
4105           .channels(channels)
4106           .input_offset(592)
4107           .zero_index(mz)
4108           .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4109       }
4110     }
4111   }
4112 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
4113 
4114 
4115 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,c_eq_32)4116   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_eq_32) {
4117     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4118     DWConvMicrokernelTester()
4119       .cr(32)
4120       .kr(25)
4121       .channels(32)
4122       .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
4123   }
4124 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,c_div_32)4125   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_div_32) {
4126     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4127     for (uint32_t channels = 64; channels < 512; channels += 96) {
4128       DWConvMicrokernelTester()
4129         .cr(32)
4130         .kr(25)
4131         .channels(channels)
4132         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
4133     }
4134   }
4135 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,c_div_32_with_qmin)4136   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_div_32_with_qmin) {
4137     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4138     for (uint32_t channels = 64; channels < 512; channels += 96) {
4139       DWConvMicrokernelTester()
4140         .cr(32)
4141         .kr(25)
4142         .channels(channels)
4143         .qmin(128)
4144         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
4145     }
4146   }
4147 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,c_div_32_with_qmax)4148   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_div_32_with_qmax) {
4149     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4150     for (uint32_t channels = 64; channels < 512; channels += 96) {
4151       DWConvMicrokernelTester()
4152         .cr(32)
4153         .kr(25)
4154         .channels(channels)
4155         .qmax(128)
4156         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
4157     }
4158   }
4159 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,c_lt_32)4160   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_lt_32) {
4161     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4162     for (uint32_t channels = 1; channels < 32; channels++) {
4163       DWConvMicrokernelTester()
4164         .cr(32)
4165         .kr(25)
4166         .channels(channels)
4167         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
4168     }
4169   }
4170 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,c_gt_32)4171   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_gt_32) {
4172     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4173     for (uint32_t channels = 33; channels < 64; channels++) {
4174       DWConvMicrokernelTester()
4175         .cr(32)
4176         .kr(25)
4177         .channels(channels)
4178         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
4179     }
4180   }
4181 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,c_gt_32_with_qmin)4182   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_gt_32_with_qmin) {
4183     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4184     for (uint32_t channels = 33; channels < 64; channels++) {
4185       DWConvMicrokernelTester()
4186         .cr(32)
4187         .kr(25)
4188         .channels(channels)
4189         .qmin(128)
4190         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
4191     }
4192   }
4193 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,c_gt_32_with_qmax)4194   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_gt_32_with_qmax) {
4195     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4196     for (uint32_t channels = 33; channels < 64; channels++) {
4197       DWConvMicrokernelTester()
4198         .cr(32)
4199         .kr(25)
4200         .channels(channels)
4201         .qmax(128)
4202         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
4203     }
4204   }
4205 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,multipixel)4206   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, multipixel) {
4207     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4208     for (size_t channels = 1; channels <= 160; channels += 31) {
4209       DWConvMicrokernelTester()
4210         .cr(32)
4211         .kr(25)
4212         .channels(channels)
4213         .width(3)
4214         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
4215     }
4216   }
4217 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,multipixel_with_step)4218   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, multipixel_with_step) {
4219     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4220     for (size_t channels = 1; channels <= 160; channels += 31) {
4221       for (size_t step = 2; step <= 25; step++) {
4222         DWConvMicrokernelTester()
4223           .cr(32)
4224           .kr(25)
4225           .channels(channels)
4226           .width(3)
4227           .step(step)
4228           .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
4229       }
4230     }
4231   }
4232 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,multipixel_with_output_stride)4233   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, multipixel_with_output_stride) {
4234     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4235     for (size_t channels = 1; channels <= 160; channels += 31) {
4236       DWConvMicrokernelTester()
4237         .cr(32)
4238         .kr(25)
4239         .channels(32)
4240         .width(5)
4241         .output_stride(163)
4242         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
4243     }
4244   }
4245 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,multipixel_with_qmin)4246   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, multipixel_with_qmin) {
4247     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4248     for (size_t channels = 1; channels <= 160; channels += 31) {
4249       DWConvMicrokernelTester()
4250         .cr(32)
4251         .kr(25)
4252         .channels(channels)
4253         .width(3)
4254         .qmin(128)
4255         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
4256     }
4257   }
4258 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,multipixel_with_qmax)4259   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, multipixel_with_qmax) {
4260     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4261     for (size_t channels = 1; channels <= 160; channels += 31) {
4262       DWConvMicrokernelTester()
4263         .cr(32)
4264         .kr(25)
4265         .channels(channels)
4266         .width(3)
4267         .qmax(128)
4268         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
4269     }
4270   }
4271 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,input_offset)4272   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, input_offset) {
4273     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4274     for (uint32_t channels = 64; channels < 512; channels += 96) {
4275       DWConvMicrokernelTester()
4276         .cr(32)
4277         .kr(25)
4278         .channels(channels)
4279         .input_offset(592)
4280         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
4281     }
4282   }
4283 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,zero)4284   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, zero) {
4285     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4286     for (uint32_t mz = 0; mz < 25; mz++) {
4287       for (uint32_t channels = 64; channels < 512; channels += 96) {
4288         DWConvMicrokernelTester()
4289           .cr(32)
4290           .kr(25)
4291           .channels(channels)
4292           .input_offset(592)
4293           .zero_index(mz)
4294           .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params);
4295       }
4296     }
4297   }
4298 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
4299 
4300 
4301 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,c_eq_32)4302   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_eq_32) {
4303     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4304     DWConvMicrokernelTester()
4305       .cr(32)
4306       .kr(25)
4307       .channels(32)
4308       .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4309   }
4310 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,c_div_32)4311   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_div_32) {
4312     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4313     for (uint32_t channels = 64; channels < 512; channels += 96) {
4314       DWConvMicrokernelTester()
4315         .cr(32)
4316         .kr(25)
4317         .channels(channels)
4318         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4319     }
4320   }
4321 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,c_div_32_with_qmin)4322   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_div_32_with_qmin) {
4323     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4324     for (uint32_t channels = 64; channels < 512; channels += 96) {
4325       DWConvMicrokernelTester()
4326         .cr(32)
4327         .kr(25)
4328         .channels(channels)
4329         .qmin(128)
4330         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4331     }
4332   }
4333 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,c_div_32_with_qmax)4334   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_div_32_with_qmax) {
4335     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4336     for (uint32_t channels = 64; channels < 512; channels += 96) {
4337       DWConvMicrokernelTester()
4338         .cr(32)
4339         .kr(25)
4340         .channels(channels)
4341         .qmax(128)
4342         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4343     }
4344   }
4345 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,c_lt_32)4346   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_lt_32) {
4347     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4348     for (uint32_t channels = 1; channels < 32; channels++) {
4349       DWConvMicrokernelTester()
4350         .cr(32)
4351         .kr(25)
4352         .channels(channels)
4353         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4354     }
4355   }
4356 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,c_gt_32)4357   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_gt_32) {
4358     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4359     for (uint32_t channels = 33; channels < 64; channels++) {
4360       DWConvMicrokernelTester()
4361         .cr(32)
4362         .kr(25)
4363         .channels(channels)
4364         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4365     }
4366   }
4367 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,c_gt_32_with_qmin)4368   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_gt_32_with_qmin) {
4369     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4370     for (uint32_t channels = 33; channels < 64; channels++) {
4371       DWConvMicrokernelTester()
4372         .cr(32)
4373         .kr(25)
4374         .channels(channels)
4375         .qmin(128)
4376         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4377     }
4378   }
4379 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,c_gt_32_with_qmax)4380   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_gt_32_with_qmax) {
4381     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4382     for (uint32_t channels = 33; channels < 64; channels++) {
4383       DWConvMicrokernelTester()
4384         .cr(32)
4385         .kr(25)
4386         .channels(channels)
4387         .qmax(128)
4388         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4389     }
4390   }
4391 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,multipixel)4392   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, multipixel) {
4393     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4394     for (size_t channels = 1; channels <= 160; channels += 31) {
4395       DWConvMicrokernelTester()
4396         .cr(32)
4397         .kr(25)
4398         .channels(channels)
4399         .width(3)
4400         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4401     }
4402   }
4403 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,multipixel_with_step)4404   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, multipixel_with_step) {
4405     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4406     for (size_t channels = 1; channels <= 160; channels += 31) {
4407       for (size_t step = 2; step <= 25; step++) {
4408         DWConvMicrokernelTester()
4409           .cr(32)
4410           .kr(25)
4411           .channels(channels)
4412           .width(3)
4413           .step(step)
4414           .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4415       }
4416     }
4417   }
4418 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,multipixel_with_output_stride)4419   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
4420     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4421     for (size_t channels = 1; channels <= 160; channels += 31) {
4422       DWConvMicrokernelTester()
4423         .cr(32)
4424         .kr(25)
4425         .channels(32)
4426         .width(5)
4427         .output_stride(163)
4428         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4429     }
4430   }
4431 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,multipixel_with_qmin)4432   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
4433     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4434     for (size_t channels = 1; channels <= 160; channels += 31) {
4435       DWConvMicrokernelTester()
4436         .cr(32)
4437         .kr(25)
4438         .channels(channels)
4439         .width(3)
4440         .qmin(128)
4441         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4442     }
4443   }
4444 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,multipixel_with_qmax)4445   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
4446     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4447     for (size_t channels = 1; channels <= 160; channels += 31) {
4448       DWConvMicrokernelTester()
4449         .cr(32)
4450         .kr(25)
4451         .channels(channels)
4452         .width(3)
4453         .qmax(128)
4454         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4455     }
4456   }
4457 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,input_offset)4458   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, input_offset) {
4459     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4460     for (uint32_t channels = 64; channels < 512; channels += 96) {
4461       DWConvMicrokernelTester()
4462         .cr(32)
4463         .kr(25)
4464         .channels(channels)
4465         .input_offset(592)
4466         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4467     }
4468   }
4469 
TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,zero)4470   TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, zero) {
4471     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4472     for (uint32_t mz = 0; mz < 25; mz++) {
4473       for (uint32_t channels = 64; channels < 512; channels += 96) {
4474         DWConvMicrokernelTester()
4475           .cr(32)
4476           .kr(25)
4477           .channels(channels)
4478           .input_offset(592)
4479           .zero_index(mz)
4480           .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params);
4481       }
4482     }
4483   }
4484 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
4485 
4486 
4487 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,c_eq_8)4488   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, c_eq_8) {
4489     TEST_REQUIRES_X86_FMA3;
4490     DWConvMicrokernelTester()
4491       .cr(8)
4492       .kr(3)
4493       .channels(8)
4494       .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params);
4495   }
4496 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,c_div_8)4497   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, c_div_8) {
4498     TEST_REQUIRES_X86_FMA3;
4499     for (uint32_t channels = 16; channels < 128; channels += 24) {
4500       DWConvMicrokernelTester()
4501         .cr(8)
4502         .kr(3)
4503         .channels(channels)
4504         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params);
4505     }
4506   }
4507 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,c_div_8_with_qmin)4508   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, c_div_8_with_qmin) {
4509     TEST_REQUIRES_X86_FMA3;
4510     for (uint32_t channels = 16; channels < 128; channels += 24) {
4511       DWConvMicrokernelTester()
4512         .cr(8)
4513         .kr(3)
4514         .channels(channels)
4515         .qmin(128)
4516         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params);
4517     }
4518   }
4519 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,c_div_8_with_qmax)4520   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, c_div_8_with_qmax) {
4521     TEST_REQUIRES_X86_FMA3;
4522     for (uint32_t channels = 16; channels < 128; channels += 24) {
4523       DWConvMicrokernelTester()
4524         .cr(8)
4525         .kr(3)
4526         .channels(channels)
4527         .qmax(128)
4528         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params);
4529     }
4530   }
4531 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,c_lt_8)4532   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, c_lt_8) {
4533     TEST_REQUIRES_X86_FMA3;
4534     for (uint32_t channels = 1; channels < 8; channels++) {
4535       DWConvMicrokernelTester()
4536         .cr(8)
4537         .kr(3)
4538         .channels(channels)
4539         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params);
4540     }
4541   }
4542 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,c_gt_8)4543   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, c_gt_8) {
4544     TEST_REQUIRES_X86_FMA3;
4545     for (uint32_t channels = 9; channels < 16; channels++) {
4546       DWConvMicrokernelTester()
4547         .cr(8)
4548         .kr(3)
4549         .channels(channels)
4550         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params);
4551     }
4552   }
4553 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,c_gt_8_with_qmin)4554   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, c_gt_8_with_qmin) {
4555     TEST_REQUIRES_X86_FMA3;
4556     for (uint32_t channels = 9; channels < 16; channels++) {
4557       DWConvMicrokernelTester()
4558         .cr(8)
4559         .kr(3)
4560         .channels(channels)
4561         .qmin(128)
4562         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params);
4563     }
4564   }
4565 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,c_gt_8_with_qmax)4566   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, c_gt_8_with_qmax) {
4567     TEST_REQUIRES_X86_FMA3;
4568     for (uint32_t channels = 9; channels < 16; channels++) {
4569       DWConvMicrokernelTester()
4570         .cr(8)
4571         .kr(3)
4572         .channels(channels)
4573         .qmax(128)
4574         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params);
4575     }
4576   }
4577 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,multipixel)4578   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, multipixel) {
4579     TEST_REQUIRES_X86_FMA3;
4580     for (size_t channels = 1; channels <= 40; channels += 7) {
4581       DWConvMicrokernelTester()
4582         .cr(8)
4583         .kr(3)
4584         .channels(channels)
4585         .width(3)
4586         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params);
4587     }
4588   }
4589 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,multipixel_with_step)4590   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, multipixel_with_step) {
4591     TEST_REQUIRES_X86_FMA3;
4592     for (size_t channels = 1; channels <= 40; channels += 7) {
4593       for (size_t step = 2; step <= 3; step++) {
4594         DWConvMicrokernelTester()
4595           .cr(8)
4596           .kr(3)
4597           .channels(channels)
4598           .width(3)
4599           .step(step)
4600           .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params);
4601       }
4602     }
4603   }
4604 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,multipixel_with_output_stride)4605   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, multipixel_with_output_stride) {
4606     TEST_REQUIRES_X86_FMA3;
4607     for (size_t channels = 1; channels <= 40; channels += 7) {
4608       DWConvMicrokernelTester()
4609         .cr(8)
4610         .kr(3)
4611         .channels(8)
4612         .width(5)
4613         .output_stride(43)
4614         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params);
4615     }
4616   }
4617 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,multipixel_with_qmin)4618   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, multipixel_with_qmin) {
4619     TEST_REQUIRES_X86_FMA3;
4620     for (size_t channels = 1; channels <= 40; channels += 7) {
4621       DWConvMicrokernelTester()
4622         .cr(8)
4623         .kr(3)
4624         .channels(channels)
4625         .width(3)
4626         .qmin(128)
4627         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params);
4628     }
4629   }
4630 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,multipixel_with_qmax)4631   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, multipixel_with_qmax) {
4632     TEST_REQUIRES_X86_FMA3;
4633     for (size_t channels = 1; channels <= 40; channels += 7) {
4634       DWConvMicrokernelTester()
4635         .cr(8)
4636         .kr(3)
4637         .channels(channels)
4638         .width(3)
4639         .qmax(128)
4640         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params);
4641     }
4642   }
4643 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,input_offset)4644   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, input_offset) {
4645     TEST_REQUIRES_X86_FMA3;
4646     for (uint32_t channels = 16; channels < 128; channels += 24) {
4647       DWConvMicrokernelTester()
4648         .cr(8)
4649         .kr(3)
4650         .channels(channels)
4651         .input_offset(176)
4652         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params);
4653     }
4654   }
4655 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,zero)4656   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, zero) {
4657     TEST_REQUIRES_X86_FMA3;
4658     for (uint32_t mz = 0; mz < 3; mz++) {
4659       for (uint32_t channels = 16; channels < 128; channels += 24) {
4660         DWConvMicrokernelTester()
4661           .cr(8)
4662           .kr(3)
4663           .channels(channels)
4664           .input_offset(176)
4665           .zero_index(mz)
4666           .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params);
4667       }
4668     }
4669   }
4670 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4671 
4672 
4673 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,c_eq_8)4674   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, c_eq_8) {
4675     TEST_REQUIRES_X86_FMA3;
4676     DWConvMicrokernelTester()
4677       .cr(8)
4678       .kr(3)
4679       .channels(8)
4680       .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
4681   }
4682 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,c_div_8)4683   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, c_div_8) {
4684     TEST_REQUIRES_X86_FMA3;
4685     for (uint32_t channels = 16; channels < 128; channels += 24) {
4686       DWConvMicrokernelTester()
4687         .cr(8)
4688         .kr(3)
4689         .channels(channels)
4690         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
4691     }
4692   }
4693 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,c_div_8_with_qmin)4694   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, c_div_8_with_qmin) {
4695     TEST_REQUIRES_X86_FMA3;
4696     for (uint32_t channels = 16; channels < 128; channels += 24) {
4697       DWConvMicrokernelTester()
4698         .cr(8)
4699         .kr(3)
4700         .channels(channels)
4701         .qmin(128)
4702         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
4703     }
4704   }
4705 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,c_div_8_with_qmax)4706   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, c_div_8_with_qmax) {
4707     TEST_REQUIRES_X86_FMA3;
4708     for (uint32_t channels = 16; channels < 128; channels += 24) {
4709       DWConvMicrokernelTester()
4710         .cr(8)
4711         .kr(3)
4712         .channels(channels)
4713         .qmax(128)
4714         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
4715     }
4716   }
4717 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,c_lt_8)4718   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, c_lt_8) {
4719     TEST_REQUIRES_X86_FMA3;
4720     for (uint32_t channels = 1; channels < 8; channels++) {
4721       DWConvMicrokernelTester()
4722         .cr(8)
4723         .kr(3)
4724         .channels(channels)
4725         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
4726     }
4727   }
4728 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,c_gt_8)4729   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, c_gt_8) {
4730     TEST_REQUIRES_X86_FMA3;
4731     for (uint32_t channels = 9; channels < 16; channels++) {
4732       DWConvMicrokernelTester()
4733         .cr(8)
4734         .kr(3)
4735         .channels(channels)
4736         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
4737     }
4738   }
4739 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,c_gt_8_with_qmin)4740   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, c_gt_8_with_qmin) {
4741     TEST_REQUIRES_X86_FMA3;
4742     for (uint32_t channels = 9; channels < 16; channels++) {
4743       DWConvMicrokernelTester()
4744         .cr(8)
4745         .kr(3)
4746         .channels(channels)
4747         .qmin(128)
4748         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
4749     }
4750   }
4751 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,c_gt_8_with_qmax)4752   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, c_gt_8_with_qmax) {
4753     TEST_REQUIRES_X86_FMA3;
4754     for (uint32_t channels = 9; channels < 16; channels++) {
4755       DWConvMicrokernelTester()
4756         .cr(8)
4757         .kr(3)
4758         .channels(channels)
4759         .qmax(128)
4760         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
4761     }
4762   }
4763 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,multipixel)4764   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, multipixel) {
4765     TEST_REQUIRES_X86_FMA3;
4766     for (size_t channels = 1; channels <= 40; channels += 7) {
4767       DWConvMicrokernelTester()
4768         .cr(8)
4769         .kr(3)
4770         .channels(channels)
4771         .width(3)
4772         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
4773     }
4774   }
4775 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,multipixel_with_step)4776   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, multipixel_with_step) {
4777     TEST_REQUIRES_X86_FMA3;
4778     for (size_t channels = 1; channels <= 40; channels += 7) {
4779       for (size_t step = 2; step <= 3; step++) {
4780         DWConvMicrokernelTester()
4781           .cr(8)
4782           .kr(3)
4783           .channels(channels)
4784           .width(3)
4785           .step(step)
4786           .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
4787       }
4788     }
4789   }
4790 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,multipixel_with_output_stride)4791   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, multipixel_with_output_stride) {
4792     TEST_REQUIRES_X86_FMA3;
4793     for (size_t channels = 1; channels <= 40; channels += 7) {
4794       DWConvMicrokernelTester()
4795         .cr(8)
4796         .kr(3)
4797         .channels(8)
4798         .width(5)
4799         .output_stride(43)
4800         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
4801     }
4802   }
4803 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,multipixel_with_qmin)4804   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, multipixel_with_qmin) {
4805     TEST_REQUIRES_X86_FMA3;
4806     for (size_t channels = 1; channels <= 40; channels += 7) {
4807       DWConvMicrokernelTester()
4808         .cr(8)
4809         .kr(3)
4810         .channels(channels)
4811         .width(3)
4812         .qmin(128)
4813         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
4814     }
4815   }
4816 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,multipixel_with_qmax)4817   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, multipixel_with_qmax) {
4818     TEST_REQUIRES_X86_FMA3;
4819     for (size_t channels = 1; channels <= 40; channels += 7) {
4820       DWConvMicrokernelTester()
4821         .cr(8)
4822         .kr(3)
4823         .channels(channels)
4824         .width(3)
4825         .qmax(128)
4826         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
4827     }
4828   }
4829 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,input_offset)4830   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, input_offset) {
4831     TEST_REQUIRES_X86_FMA3;
4832     for (uint32_t channels = 16; channels < 128; channels += 24) {
4833       DWConvMicrokernelTester()
4834         .cr(8)
4835         .kr(3)
4836         .channels(channels)
4837         .input_offset(176)
4838         .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
4839     }
4840   }
4841 
TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,zero)4842   TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, zero) {
4843     TEST_REQUIRES_X86_FMA3;
4844     for (uint32_t mz = 0; mz < 3; mz++) {
4845       for (uint32_t channels = 16; channels < 128; channels += 24) {
4846         DWConvMicrokernelTester()
4847           .cr(8)
4848           .kr(3)
4849           .channels(channels)
4850           .input_offset(176)
4851           .zero_index(mz)
4852           .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
4853       }
4854     }
4855   }
4856 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4857 
4858 
4859 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,c_eq_8)4860   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_eq_8) {
4861     TEST_REQUIRES_X86_FMA3;
4862     DWConvMicrokernelTester()
4863       .cr(8)
4864       .kr(4)
4865       .channels(8)
4866       .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
4867   }
4868 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,c_div_8)4869   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_div_8) {
4870     TEST_REQUIRES_X86_FMA3;
4871     for (uint32_t channels = 16; channels < 128; channels += 24) {
4872       DWConvMicrokernelTester()
4873         .cr(8)
4874         .kr(4)
4875         .channels(channels)
4876         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
4877     }
4878   }
4879 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,c_div_8_with_qmin)4880   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_div_8_with_qmin) {
4881     TEST_REQUIRES_X86_FMA3;
4882     for (uint32_t channels = 16; channels < 128; channels += 24) {
4883       DWConvMicrokernelTester()
4884         .cr(8)
4885         .kr(4)
4886         .channels(channels)
4887         .qmin(128)
4888         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
4889     }
4890   }
4891 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,c_div_8_with_qmax)4892   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_div_8_with_qmax) {
4893     TEST_REQUIRES_X86_FMA3;
4894     for (uint32_t channels = 16; channels < 128; channels += 24) {
4895       DWConvMicrokernelTester()
4896         .cr(8)
4897         .kr(4)
4898         .channels(channels)
4899         .qmax(128)
4900         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
4901     }
4902   }
4903 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,c_lt_8)4904   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_lt_8) {
4905     TEST_REQUIRES_X86_FMA3;
4906     for (uint32_t channels = 1; channels < 8; channels++) {
4907       DWConvMicrokernelTester()
4908         .cr(8)
4909         .kr(4)
4910         .channels(channels)
4911         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
4912     }
4913   }
4914 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,c_gt_8)4915   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_gt_8) {
4916     TEST_REQUIRES_X86_FMA3;
4917     for (uint32_t channels = 9; channels < 16; channels++) {
4918       DWConvMicrokernelTester()
4919         .cr(8)
4920         .kr(4)
4921         .channels(channels)
4922         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
4923     }
4924   }
4925 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,c_gt_8_with_qmin)4926   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_gt_8_with_qmin) {
4927     TEST_REQUIRES_X86_FMA3;
4928     for (uint32_t channels = 9; channels < 16; channels++) {
4929       DWConvMicrokernelTester()
4930         .cr(8)
4931         .kr(4)
4932         .channels(channels)
4933         .qmin(128)
4934         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
4935     }
4936   }
4937 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,c_gt_8_with_qmax)4938   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_gt_8_with_qmax) {
4939     TEST_REQUIRES_X86_FMA3;
4940     for (uint32_t channels = 9; channels < 16; channels++) {
4941       DWConvMicrokernelTester()
4942         .cr(8)
4943         .kr(4)
4944         .channels(channels)
4945         .qmax(128)
4946         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
4947     }
4948   }
4949 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,multipixel)4950   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, multipixel) {
4951     TEST_REQUIRES_X86_FMA3;
4952     for (size_t channels = 1; channels <= 40; channels += 7) {
4953       DWConvMicrokernelTester()
4954         .cr(8)
4955         .kr(4)
4956         .channels(channels)
4957         .width(3)
4958         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
4959     }
4960   }
4961 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,multipixel_with_step)4962   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, multipixel_with_step) {
4963     TEST_REQUIRES_X86_FMA3;
4964     for (size_t channels = 1; channels <= 40; channels += 7) {
4965       for (size_t step = 2; step <= 4; step++) {
4966         DWConvMicrokernelTester()
4967           .cr(8)
4968           .kr(4)
4969           .channels(channels)
4970           .width(3)
4971           .step(step)
4972           .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
4973       }
4974     }
4975   }
4976 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,multipixel_with_output_stride)4977   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, multipixel_with_output_stride) {
4978     TEST_REQUIRES_X86_FMA3;
4979     for (size_t channels = 1; channels <= 40; channels += 7) {
4980       DWConvMicrokernelTester()
4981         .cr(8)
4982         .kr(4)
4983         .channels(8)
4984         .width(5)
4985         .output_stride(43)
4986         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
4987     }
4988   }
4989 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,multipixel_with_qmin)4990   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, multipixel_with_qmin) {
4991     TEST_REQUIRES_X86_FMA3;
4992     for (size_t channels = 1; channels <= 40; channels += 7) {
4993       DWConvMicrokernelTester()
4994         .cr(8)
4995         .kr(4)
4996         .channels(channels)
4997         .width(3)
4998         .qmin(128)
4999         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
5000     }
5001   }
5002 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,multipixel_with_qmax)5003   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, multipixel_with_qmax) {
5004     TEST_REQUIRES_X86_FMA3;
5005     for (size_t channels = 1; channels <= 40; channels += 7) {
5006       DWConvMicrokernelTester()
5007         .cr(8)
5008         .kr(4)
5009         .channels(channels)
5010         .width(3)
5011         .qmax(128)
5012         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
5013     }
5014   }
5015 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,input_offset)5016   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, input_offset) {
5017     TEST_REQUIRES_X86_FMA3;
5018     for (uint32_t channels = 16; channels < 128; channels += 24) {
5019       DWConvMicrokernelTester()
5020         .cr(8)
5021         .kr(4)
5022         .channels(channels)
5023         .input_offset(176)
5024         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
5025     }
5026   }
5027 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,zero)5028   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, zero) {
5029     TEST_REQUIRES_X86_FMA3;
5030     for (uint32_t mz = 0; mz < 4; mz++) {
5031       for (uint32_t channels = 16; channels < 128; channels += 24) {
5032         DWConvMicrokernelTester()
5033           .cr(8)
5034           .kr(4)
5035           .channels(channels)
5036           .input_offset(176)
5037           .zero_index(mz)
5038           .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params);
5039       }
5040     }
5041   }
5042 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5043 
5044 
5045 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,c_eq_8)5046   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_eq_8) {
5047     TEST_REQUIRES_X86_FMA3;
5048     DWConvMicrokernelTester()
5049       .cr(8)
5050       .kr(4)
5051       .channels(8)
5052       .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5053   }
5054 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,c_div_8)5055   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_div_8) {
5056     TEST_REQUIRES_X86_FMA3;
5057     for (uint32_t channels = 16; channels < 128; channels += 24) {
5058       DWConvMicrokernelTester()
5059         .cr(8)
5060         .kr(4)
5061         .channels(channels)
5062         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5063     }
5064   }
5065 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,c_div_8_with_qmin)5066   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_div_8_with_qmin) {
5067     TEST_REQUIRES_X86_FMA3;
5068     for (uint32_t channels = 16; channels < 128; channels += 24) {
5069       DWConvMicrokernelTester()
5070         .cr(8)
5071         .kr(4)
5072         .channels(channels)
5073         .qmin(128)
5074         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5075     }
5076   }
5077 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,c_div_8_with_qmax)5078   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_div_8_with_qmax) {
5079     TEST_REQUIRES_X86_FMA3;
5080     for (uint32_t channels = 16; channels < 128; channels += 24) {
5081       DWConvMicrokernelTester()
5082         .cr(8)
5083         .kr(4)
5084         .channels(channels)
5085         .qmax(128)
5086         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5087     }
5088   }
5089 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,c_lt_8)5090   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_lt_8) {
5091     TEST_REQUIRES_X86_FMA3;
5092     for (uint32_t channels = 1; channels < 8; channels++) {
5093       DWConvMicrokernelTester()
5094         .cr(8)
5095         .kr(4)
5096         .channels(channels)
5097         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5098     }
5099   }
5100 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,c_gt_8)5101   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_gt_8) {
5102     TEST_REQUIRES_X86_FMA3;
5103     for (uint32_t channels = 9; channels < 16; channels++) {
5104       DWConvMicrokernelTester()
5105         .cr(8)
5106         .kr(4)
5107         .channels(channels)
5108         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5109     }
5110   }
5111 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,c_gt_8_with_qmin)5112   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_gt_8_with_qmin) {
5113     TEST_REQUIRES_X86_FMA3;
5114     for (uint32_t channels = 9; channels < 16; channels++) {
5115       DWConvMicrokernelTester()
5116         .cr(8)
5117         .kr(4)
5118         .channels(channels)
5119         .qmin(128)
5120         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5121     }
5122   }
5123 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,c_gt_8_with_qmax)5124   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_gt_8_with_qmax) {
5125     TEST_REQUIRES_X86_FMA3;
5126     for (uint32_t channels = 9; channels < 16; channels++) {
5127       DWConvMicrokernelTester()
5128         .cr(8)
5129         .kr(4)
5130         .channels(channels)
5131         .qmax(128)
5132         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5133     }
5134   }
5135 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,multipixel)5136   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, multipixel) {
5137     TEST_REQUIRES_X86_FMA3;
5138     for (size_t channels = 1; channels <= 40; channels += 7) {
5139       DWConvMicrokernelTester()
5140         .cr(8)
5141         .kr(4)
5142         .channels(channels)
5143         .width(3)
5144         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5145     }
5146   }
5147 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,multipixel_with_step)5148   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, multipixel_with_step) {
5149     TEST_REQUIRES_X86_FMA3;
5150     for (size_t channels = 1; channels <= 40; channels += 7) {
5151       for (size_t step = 2; step <= 4; step++) {
5152         DWConvMicrokernelTester()
5153           .cr(8)
5154           .kr(4)
5155           .channels(channels)
5156           .width(3)
5157           .step(step)
5158           .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5159       }
5160     }
5161   }
5162 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,multipixel_with_output_stride)5163   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, multipixel_with_output_stride) {
5164     TEST_REQUIRES_X86_FMA3;
5165     for (size_t channels = 1; channels <= 40; channels += 7) {
5166       DWConvMicrokernelTester()
5167         .cr(8)
5168         .kr(4)
5169         .channels(8)
5170         .width(5)
5171         .output_stride(43)
5172         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5173     }
5174   }
5175 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,multipixel_with_qmin)5176   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, multipixel_with_qmin) {
5177     TEST_REQUIRES_X86_FMA3;
5178     for (size_t channels = 1; channels <= 40; channels += 7) {
5179       DWConvMicrokernelTester()
5180         .cr(8)
5181         .kr(4)
5182         .channels(channels)
5183         .width(3)
5184         .qmin(128)
5185         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5186     }
5187   }
5188 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,multipixel_with_qmax)5189   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, multipixel_with_qmax) {
5190     TEST_REQUIRES_X86_FMA3;
5191     for (size_t channels = 1; channels <= 40; channels += 7) {
5192       DWConvMicrokernelTester()
5193         .cr(8)
5194         .kr(4)
5195         .channels(channels)
5196         .width(3)
5197         .qmax(128)
5198         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5199     }
5200   }
5201 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,input_offset)5202   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, input_offset) {
5203     TEST_REQUIRES_X86_FMA3;
5204     for (uint32_t channels = 16; channels < 128; channels += 24) {
5205       DWConvMicrokernelTester()
5206         .cr(8)
5207         .kr(4)
5208         .channels(channels)
5209         .input_offset(176)
5210         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5211     }
5212   }
5213 
TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,zero)5214   TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, zero) {
5215     TEST_REQUIRES_X86_FMA3;
5216     for (uint32_t mz = 0; mz < 4; mz++) {
5217       for (uint32_t channels = 16; channels < 128; channels += 24) {
5218         DWConvMicrokernelTester()
5219           .cr(8)
5220           .kr(4)
5221           .channels(channels)
5222           .input_offset(176)
5223           .zero_index(mz)
5224           .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
5225       }
5226     }
5227   }
5228 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5229 
5230 
5231 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,c_eq_8)5232   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_eq_8) {
5233     TEST_REQUIRES_X86_FMA3;
5234     DWConvMicrokernelTester()
5235       .cr(8)
5236       .kr(9)
5237       .channels(8)
5238       .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
5239   }
5240 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,c_div_8)5241   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_div_8) {
5242     TEST_REQUIRES_X86_FMA3;
5243     for (uint32_t channels = 16; channels < 128; channels += 24) {
5244       DWConvMicrokernelTester()
5245         .cr(8)
5246         .kr(9)
5247         .channels(channels)
5248         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
5249     }
5250   }
5251 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,c_div_8_with_qmin)5252   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_div_8_with_qmin) {
5253     TEST_REQUIRES_X86_FMA3;
5254     for (uint32_t channels = 16; channels < 128; channels += 24) {
5255       DWConvMicrokernelTester()
5256         .cr(8)
5257         .kr(9)
5258         .channels(channels)
5259         .qmin(128)
5260         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
5261     }
5262   }
5263 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,c_div_8_with_qmax)5264   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_div_8_with_qmax) {
5265     TEST_REQUIRES_X86_FMA3;
5266     for (uint32_t channels = 16; channels < 128; channels += 24) {
5267       DWConvMicrokernelTester()
5268         .cr(8)
5269         .kr(9)
5270         .channels(channels)
5271         .qmax(128)
5272         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
5273     }
5274   }
5275 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,c_lt_8)5276   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_lt_8) {
5277     TEST_REQUIRES_X86_FMA3;
5278     for (uint32_t channels = 1; channels < 8; channels++) {
5279       DWConvMicrokernelTester()
5280         .cr(8)
5281         .kr(9)
5282         .channels(channels)
5283         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
5284     }
5285   }
5286 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,c_gt_8)5287   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_gt_8) {
5288     TEST_REQUIRES_X86_FMA3;
5289     for (uint32_t channels = 9; channels < 16; channels++) {
5290       DWConvMicrokernelTester()
5291         .cr(8)
5292         .kr(9)
5293         .channels(channels)
5294         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
5295     }
5296   }
5297 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,c_gt_8_with_qmin)5298   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_gt_8_with_qmin) {
5299     TEST_REQUIRES_X86_FMA3;
5300     for (uint32_t channels = 9; channels < 16; channels++) {
5301       DWConvMicrokernelTester()
5302         .cr(8)
5303         .kr(9)
5304         .channels(channels)
5305         .qmin(128)
5306         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
5307     }
5308   }
5309 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,c_gt_8_with_qmax)5310   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_gt_8_with_qmax) {
5311     TEST_REQUIRES_X86_FMA3;
5312     for (uint32_t channels = 9; channels < 16; channels++) {
5313       DWConvMicrokernelTester()
5314         .cr(8)
5315         .kr(9)
5316         .channels(channels)
5317         .qmax(128)
5318         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
5319     }
5320   }
5321 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,multipixel)5322   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, multipixel) {
5323     TEST_REQUIRES_X86_FMA3;
5324     for (size_t channels = 1; channels <= 40; channels += 7) {
5325       DWConvMicrokernelTester()
5326         .cr(8)
5327         .kr(9)
5328         .channels(channels)
5329         .width(3)
5330         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
5331     }
5332   }
5333 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,multipixel_with_step)5334   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, multipixel_with_step) {
5335     TEST_REQUIRES_X86_FMA3;
5336     for (size_t channels = 1; channels <= 40; channels += 7) {
5337       for (size_t step = 2; step <= 9; step++) {
5338         DWConvMicrokernelTester()
5339           .cr(8)
5340           .kr(9)
5341           .channels(channels)
5342           .width(3)
5343           .step(step)
5344           .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
5345       }
5346     }
5347   }
5348 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,multipixel_with_output_stride)5349   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, multipixel_with_output_stride) {
5350     TEST_REQUIRES_X86_FMA3;
5351     for (size_t channels = 1; channels <= 40; channels += 7) {
5352       DWConvMicrokernelTester()
5353         .cr(8)
5354         .kr(9)
5355         .channels(8)
5356         .width(5)
5357         .output_stride(43)
5358         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
5359     }
5360   }
5361 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,multipixel_with_qmin)5362   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, multipixel_with_qmin) {
5363     TEST_REQUIRES_X86_FMA3;
5364     for (size_t channels = 1; channels <= 40; channels += 7) {
5365       DWConvMicrokernelTester()
5366         .cr(8)
5367         .kr(9)
5368         .channels(channels)
5369         .width(3)
5370         .qmin(128)
5371         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
5372     }
5373   }
5374 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,multipixel_with_qmax)5375   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, multipixel_with_qmax) {
5376     TEST_REQUIRES_X86_FMA3;
5377     for (size_t channels = 1; channels <= 40; channels += 7) {
5378       DWConvMicrokernelTester()
5379         .cr(8)
5380         .kr(9)
5381         .channels(channels)
5382         .width(3)
5383         .qmax(128)
5384         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
5385     }
5386   }
5387 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,input_offset)5388   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, input_offset) {
5389     TEST_REQUIRES_X86_FMA3;
5390     for (uint32_t channels = 16; channels < 128; channels += 24) {
5391       DWConvMicrokernelTester()
5392         .cr(8)
5393         .kr(9)
5394         .channels(channels)
5395         .input_offset(176)
5396         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
5397     }
5398   }
5399 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,zero)5400   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, zero) {
5401     TEST_REQUIRES_X86_FMA3;
5402     for (uint32_t mz = 0; mz < 9; mz++) {
5403       for (uint32_t channels = 16; channels < 128; channels += 24) {
5404         DWConvMicrokernelTester()
5405           .cr(8)
5406           .kr(9)
5407           .channels(channels)
5408           .input_offset(176)
5409           .zero_index(mz)
5410           .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params);
5411       }
5412     }
5413   }
5414 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5415 
5416 
5417 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,c_eq_8)5418   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_eq_8) {
5419     TEST_REQUIRES_X86_FMA3;
5420     DWConvMicrokernelTester()
5421       .cr(8)
5422       .kr(9)
5423       .channels(8)
5424       .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5425   }
5426 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,c_div_8)5427   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_div_8) {
5428     TEST_REQUIRES_X86_FMA3;
5429     for (uint32_t channels = 16; channels < 128; channels += 24) {
5430       DWConvMicrokernelTester()
5431         .cr(8)
5432         .kr(9)
5433         .channels(channels)
5434         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5435     }
5436   }
5437 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,c_div_8_with_qmin)5438   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_div_8_with_qmin) {
5439     TEST_REQUIRES_X86_FMA3;
5440     for (uint32_t channels = 16; channels < 128; channels += 24) {
5441       DWConvMicrokernelTester()
5442         .cr(8)
5443         .kr(9)
5444         .channels(channels)
5445         .qmin(128)
5446         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5447     }
5448   }
5449 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,c_div_8_with_qmax)5450   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_div_8_with_qmax) {
5451     TEST_REQUIRES_X86_FMA3;
5452     for (uint32_t channels = 16; channels < 128; channels += 24) {
5453       DWConvMicrokernelTester()
5454         .cr(8)
5455         .kr(9)
5456         .channels(channels)
5457         .qmax(128)
5458         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5459     }
5460   }
5461 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,c_lt_8)5462   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_lt_8) {
5463     TEST_REQUIRES_X86_FMA3;
5464     for (uint32_t channels = 1; channels < 8; channels++) {
5465       DWConvMicrokernelTester()
5466         .cr(8)
5467         .kr(9)
5468         .channels(channels)
5469         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5470     }
5471   }
5472 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,c_gt_8)5473   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_gt_8) {
5474     TEST_REQUIRES_X86_FMA3;
5475     for (uint32_t channels = 9; channels < 16; channels++) {
5476       DWConvMicrokernelTester()
5477         .cr(8)
5478         .kr(9)
5479         .channels(channels)
5480         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5481     }
5482   }
5483 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,c_gt_8_with_qmin)5484   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_gt_8_with_qmin) {
5485     TEST_REQUIRES_X86_FMA3;
5486     for (uint32_t channels = 9; channels < 16; channels++) {
5487       DWConvMicrokernelTester()
5488         .cr(8)
5489         .kr(9)
5490         .channels(channels)
5491         .qmin(128)
5492         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5493     }
5494   }
5495 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,c_gt_8_with_qmax)5496   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_gt_8_with_qmax) {
5497     TEST_REQUIRES_X86_FMA3;
5498     for (uint32_t channels = 9; channels < 16; channels++) {
5499       DWConvMicrokernelTester()
5500         .cr(8)
5501         .kr(9)
5502         .channels(channels)
5503         .qmax(128)
5504         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5505     }
5506   }
5507 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,multipixel)5508   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, multipixel) {
5509     TEST_REQUIRES_X86_FMA3;
5510     for (size_t channels = 1; channels <= 40; channels += 7) {
5511       DWConvMicrokernelTester()
5512         .cr(8)
5513         .kr(9)
5514         .channels(channels)
5515         .width(3)
5516         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5517     }
5518   }
5519 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,multipixel_with_step)5520   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, multipixel_with_step) {
5521     TEST_REQUIRES_X86_FMA3;
5522     for (size_t channels = 1; channels <= 40; channels += 7) {
5523       for (size_t step = 2; step <= 9; step++) {
5524         DWConvMicrokernelTester()
5525           .cr(8)
5526           .kr(9)
5527           .channels(channels)
5528           .width(3)
5529           .step(step)
5530           .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5531       }
5532     }
5533   }
5534 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,multipixel_with_output_stride)5535   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, multipixel_with_output_stride) {
5536     TEST_REQUIRES_X86_FMA3;
5537     for (size_t channels = 1; channels <= 40; channels += 7) {
5538       DWConvMicrokernelTester()
5539         .cr(8)
5540         .kr(9)
5541         .channels(8)
5542         .width(5)
5543         .output_stride(43)
5544         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5545     }
5546   }
5547 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,multipixel_with_qmin)5548   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, multipixel_with_qmin) {
5549     TEST_REQUIRES_X86_FMA3;
5550     for (size_t channels = 1; channels <= 40; channels += 7) {
5551       DWConvMicrokernelTester()
5552         .cr(8)
5553         .kr(9)
5554         .channels(channels)
5555         .width(3)
5556         .qmin(128)
5557         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5558     }
5559   }
5560 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,multipixel_with_qmax)5561   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, multipixel_with_qmax) {
5562     TEST_REQUIRES_X86_FMA3;
5563     for (size_t channels = 1; channels <= 40; channels += 7) {
5564       DWConvMicrokernelTester()
5565         .cr(8)
5566         .kr(9)
5567         .channels(channels)
5568         .width(3)
5569         .qmax(128)
5570         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5571     }
5572   }
5573 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,input_offset)5574   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, input_offset) {
5575     TEST_REQUIRES_X86_FMA3;
5576     for (uint32_t channels = 16; channels < 128; channels += 24) {
5577       DWConvMicrokernelTester()
5578         .cr(8)
5579         .kr(9)
5580         .channels(channels)
5581         .input_offset(176)
5582         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5583     }
5584   }
5585 
TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,zero)5586   TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, zero) {
5587     TEST_REQUIRES_X86_FMA3;
5588     for (uint32_t mz = 0; mz < 9; mz++) {
5589       for (uint32_t channels = 16; channels < 128; channels += 24) {
5590         DWConvMicrokernelTester()
5591           .cr(8)
5592           .kr(9)
5593           .channels(channels)
5594           .input_offset(176)
5595           .zero_index(mz)
5596           .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
5597       }
5598     }
5599   }
5600 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5601 
5602 
5603 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,c_eq_8)5604   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_eq_8) {
5605     TEST_REQUIRES_X86_FMA3;
5606     DWConvMicrokernelTester()
5607       .cr(8)
5608       .kr(25)
5609       .channels(8)
5610       .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
5611   }
5612 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,c_div_8)5613   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_div_8) {
5614     TEST_REQUIRES_X86_FMA3;
5615     for (uint32_t channels = 16; channels < 128; channels += 24) {
5616       DWConvMicrokernelTester()
5617         .cr(8)
5618         .kr(25)
5619         .channels(channels)
5620         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
5621     }
5622   }
5623 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,c_div_8_with_qmin)5624   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_div_8_with_qmin) {
5625     TEST_REQUIRES_X86_FMA3;
5626     for (uint32_t channels = 16; channels < 128; channels += 24) {
5627       DWConvMicrokernelTester()
5628         .cr(8)
5629         .kr(25)
5630         .channels(channels)
5631         .qmin(128)
5632         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
5633     }
5634   }
5635 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,c_div_8_with_qmax)5636   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_div_8_with_qmax) {
5637     TEST_REQUIRES_X86_FMA3;
5638     for (uint32_t channels = 16; channels < 128; channels += 24) {
5639       DWConvMicrokernelTester()
5640         .cr(8)
5641         .kr(25)
5642         .channels(channels)
5643         .qmax(128)
5644         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
5645     }
5646   }
5647 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,c_lt_8)5648   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_lt_8) {
5649     TEST_REQUIRES_X86_FMA3;
5650     for (uint32_t channels = 1; channels < 8; channels++) {
5651       DWConvMicrokernelTester()
5652         .cr(8)
5653         .kr(25)
5654         .channels(channels)
5655         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
5656     }
5657   }
5658 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,c_gt_8)5659   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_gt_8) {
5660     TEST_REQUIRES_X86_FMA3;
5661     for (uint32_t channels = 9; channels < 16; channels++) {
5662       DWConvMicrokernelTester()
5663         .cr(8)
5664         .kr(25)
5665         .channels(channels)
5666         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
5667     }
5668   }
5669 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,c_gt_8_with_qmin)5670   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_gt_8_with_qmin) {
5671     TEST_REQUIRES_X86_FMA3;
5672     for (uint32_t channels = 9; channels < 16; channels++) {
5673       DWConvMicrokernelTester()
5674         .cr(8)
5675         .kr(25)
5676         .channels(channels)
5677         .qmin(128)
5678         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
5679     }
5680   }
5681 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,c_gt_8_with_qmax)5682   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_gt_8_with_qmax) {
5683     TEST_REQUIRES_X86_FMA3;
5684     for (uint32_t channels = 9; channels < 16; channels++) {
5685       DWConvMicrokernelTester()
5686         .cr(8)
5687         .kr(25)
5688         .channels(channels)
5689         .qmax(128)
5690         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
5691     }
5692   }
5693 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,multipixel)5694   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, multipixel) {
5695     TEST_REQUIRES_X86_FMA3;
5696     for (size_t channels = 1; channels <= 40; channels += 7) {
5697       DWConvMicrokernelTester()
5698         .cr(8)
5699         .kr(25)
5700         .channels(channels)
5701         .width(3)
5702         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
5703     }
5704   }
5705 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,multipixel_with_step)5706   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, multipixel_with_step) {
5707     TEST_REQUIRES_X86_FMA3;
5708     for (size_t channels = 1; channels <= 40; channels += 7) {
5709       for (size_t step = 2; step <= 25; step++) {
5710         DWConvMicrokernelTester()
5711           .cr(8)
5712           .kr(25)
5713           .channels(channels)
5714           .width(3)
5715           .step(step)
5716           .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
5717       }
5718     }
5719   }
5720 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,multipixel_with_output_stride)5721   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, multipixel_with_output_stride) {
5722     TEST_REQUIRES_X86_FMA3;
5723     for (size_t channels = 1; channels <= 40; channels += 7) {
5724       DWConvMicrokernelTester()
5725         .cr(8)
5726         .kr(25)
5727         .channels(8)
5728         .width(5)
5729         .output_stride(43)
5730         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
5731     }
5732   }
5733 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,multipixel_with_qmin)5734   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, multipixel_with_qmin) {
5735     TEST_REQUIRES_X86_FMA3;
5736     for (size_t channels = 1; channels <= 40; channels += 7) {
5737       DWConvMicrokernelTester()
5738         .cr(8)
5739         .kr(25)
5740         .channels(channels)
5741         .width(3)
5742         .qmin(128)
5743         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
5744     }
5745   }
5746 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,multipixel_with_qmax)5747   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, multipixel_with_qmax) {
5748     TEST_REQUIRES_X86_FMA3;
5749     for (size_t channels = 1; channels <= 40; channels += 7) {
5750       DWConvMicrokernelTester()
5751         .cr(8)
5752         .kr(25)
5753         .channels(channels)
5754         .width(3)
5755         .qmax(128)
5756         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
5757     }
5758   }
5759 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,input_offset)5760   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, input_offset) {
5761     TEST_REQUIRES_X86_FMA3;
5762     for (uint32_t channels = 16; channels < 128; channels += 24) {
5763       DWConvMicrokernelTester()
5764         .cr(8)
5765         .kr(25)
5766         .channels(channels)
5767         .input_offset(176)
5768         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
5769     }
5770   }
5771 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,zero)5772   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, zero) {
5773     TEST_REQUIRES_X86_FMA3;
5774     for (uint32_t mz = 0; mz < 25; mz++) {
5775       for (uint32_t channels = 16; channels < 128; channels += 24) {
5776         DWConvMicrokernelTester()
5777           .cr(8)
5778           .kr(25)
5779           .channels(channels)
5780           .input_offset(176)
5781           .zero_index(mz)
5782           .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params);
5783       }
5784     }
5785   }
5786 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5787 
5788 
5789 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,c_eq_8)5790   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_eq_8) {
5791     TEST_REQUIRES_X86_FMA3;
5792     DWConvMicrokernelTester()
5793       .cr(8)
5794       .kr(25)
5795       .channels(8)
5796       .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
5797   }
5798 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,c_div_8)5799   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_div_8) {
5800     TEST_REQUIRES_X86_FMA3;
5801     for (uint32_t channels = 16; channels < 128; channels += 24) {
5802       DWConvMicrokernelTester()
5803         .cr(8)
5804         .kr(25)
5805         .channels(channels)
5806         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
5807     }
5808   }
5809 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,c_div_8_with_qmin)5810   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_div_8_with_qmin) {
5811     TEST_REQUIRES_X86_FMA3;
5812     for (uint32_t channels = 16; channels < 128; channels += 24) {
5813       DWConvMicrokernelTester()
5814         .cr(8)
5815         .kr(25)
5816         .channels(channels)
5817         .qmin(128)
5818         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
5819     }
5820   }
5821 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,c_div_8_with_qmax)5822   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_div_8_with_qmax) {
5823     TEST_REQUIRES_X86_FMA3;
5824     for (uint32_t channels = 16; channels < 128; channels += 24) {
5825       DWConvMicrokernelTester()
5826         .cr(8)
5827         .kr(25)
5828         .channels(channels)
5829         .qmax(128)
5830         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
5831     }
5832   }
5833 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,c_lt_8)5834   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_lt_8) {
5835     TEST_REQUIRES_X86_FMA3;
5836     for (uint32_t channels = 1; channels < 8; channels++) {
5837       DWConvMicrokernelTester()
5838         .cr(8)
5839         .kr(25)
5840         .channels(channels)
5841         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
5842     }
5843   }
5844 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,c_gt_8)5845   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_gt_8) {
5846     TEST_REQUIRES_X86_FMA3;
5847     for (uint32_t channels = 9; channels < 16; channels++) {
5848       DWConvMicrokernelTester()
5849         .cr(8)
5850         .kr(25)
5851         .channels(channels)
5852         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
5853     }
5854   }
5855 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,c_gt_8_with_qmin)5856   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_gt_8_with_qmin) {
5857     TEST_REQUIRES_X86_FMA3;
5858     for (uint32_t channels = 9; channels < 16; channels++) {
5859       DWConvMicrokernelTester()
5860         .cr(8)
5861         .kr(25)
5862         .channels(channels)
5863         .qmin(128)
5864         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
5865     }
5866   }
5867 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,c_gt_8_with_qmax)5868   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_gt_8_with_qmax) {
5869     TEST_REQUIRES_X86_FMA3;
5870     for (uint32_t channels = 9; channels < 16; channels++) {
5871       DWConvMicrokernelTester()
5872         .cr(8)
5873         .kr(25)
5874         .channels(channels)
5875         .qmax(128)
5876         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
5877     }
5878   }
5879 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,multipixel)5880   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, multipixel) {
5881     TEST_REQUIRES_X86_FMA3;
5882     for (size_t channels = 1; channels <= 40; channels += 7) {
5883       DWConvMicrokernelTester()
5884         .cr(8)
5885         .kr(25)
5886         .channels(channels)
5887         .width(3)
5888         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
5889     }
5890   }
5891 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,multipixel_with_step)5892   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, multipixel_with_step) {
5893     TEST_REQUIRES_X86_FMA3;
5894     for (size_t channels = 1; channels <= 40; channels += 7) {
5895       for (size_t step = 2; step <= 25; step++) {
5896         DWConvMicrokernelTester()
5897           .cr(8)
5898           .kr(25)
5899           .channels(channels)
5900           .width(3)
5901           .step(step)
5902           .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
5903       }
5904     }
5905   }
5906 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,multipixel_with_output_stride)5907   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, multipixel_with_output_stride) {
5908     TEST_REQUIRES_X86_FMA3;
5909     for (size_t channels = 1; channels <= 40; channels += 7) {
5910       DWConvMicrokernelTester()
5911         .cr(8)
5912         .kr(25)
5913         .channels(8)
5914         .width(5)
5915         .output_stride(43)
5916         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
5917     }
5918   }
5919 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,multipixel_with_qmin)5920   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, multipixel_with_qmin) {
5921     TEST_REQUIRES_X86_FMA3;
5922     for (size_t channels = 1; channels <= 40; channels += 7) {
5923       DWConvMicrokernelTester()
5924         .cr(8)
5925         .kr(25)
5926         .channels(channels)
5927         .width(3)
5928         .qmin(128)
5929         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
5930     }
5931   }
5932 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,multipixel_with_qmax)5933   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, multipixel_with_qmax) {
5934     TEST_REQUIRES_X86_FMA3;
5935     for (size_t channels = 1; channels <= 40; channels += 7) {
5936       DWConvMicrokernelTester()
5937         .cr(8)
5938         .kr(25)
5939         .channels(channels)
5940         .width(3)
5941         .qmax(128)
5942         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
5943     }
5944   }
5945 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,input_offset)5946   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, input_offset) {
5947     TEST_REQUIRES_X86_FMA3;
5948     for (uint32_t channels = 16; channels < 128; channels += 24) {
5949       DWConvMicrokernelTester()
5950         .cr(8)
5951         .kr(25)
5952         .channels(channels)
5953         .input_offset(176)
5954         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
5955     }
5956   }
5957 
TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,zero)5958   TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, zero) {
5959     TEST_REQUIRES_X86_FMA3;
5960     for (uint32_t mz = 0; mz < 25; mz++) {
5961       for (uint32_t channels = 16; channels < 128; channels += 24) {
5962         DWConvMicrokernelTester()
5963           .cr(8)
5964           .kr(25)
5965           .channels(channels)
5966           .input_offset(176)
5967           .zero_index(mz)
5968           .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
5969       }
5970     }
5971   }
5972 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5973 
5974 
5975 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,c_eq_16)5976   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, c_eq_16) {
5977     TEST_REQUIRES_X86_FMA3;
5978     DWConvMicrokernelTester()
5979       .cr(16)
5980       .kr(3)
5981       .channels(16)
5982       .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params);
5983   }
5984 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,c_div_16)5985   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, c_div_16) {
5986     TEST_REQUIRES_X86_FMA3;
5987     for (uint32_t channels = 32; channels < 256; channels += 48) {
5988       DWConvMicrokernelTester()
5989         .cr(16)
5990         .kr(3)
5991         .channels(channels)
5992         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params);
5993     }
5994   }
5995 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,c_div_16_with_qmin)5996   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, c_div_16_with_qmin) {
5997     TEST_REQUIRES_X86_FMA3;
5998     for (uint32_t channels = 32; channels < 256; channels += 48) {
5999       DWConvMicrokernelTester()
6000         .cr(16)
6001         .kr(3)
6002         .channels(channels)
6003         .qmin(128)
6004         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params);
6005     }
6006   }
6007 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,c_div_16_with_qmax)6008   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, c_div_16_with_qmax) {
6009     TEST_REQUIRES_X86_FMA3;
6010     for (uint32_t channels = 32; channels < 256; channels += 48) {
6011       DWConvMicrokernelTester()
6012         .cr(16)
6013         .kr(3)
6014         .channels(channels)
6015         .qmax(128)
6016         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params);
6017     }
6018   }
6019 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,c_lt_16)6020   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, c_lt_16) {
6021     TEST_REQUIRES_X86_FMA3;
6022     for (uint32_t channels = 1; channels < 16; channels++) {
6023       DWConvMicrokernelTester()
6024         .cr(16)
6025         .kr(3)
6026         .channels(channels)
6027         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params);
6028     }
6029   }
6030 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,c_gt_16)6031   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, c_gt_16) {
6032     TEST_REQUIRES_X86_FMA3;
6033     for (uint32_t channels = 17; channels < 32; channels++) {
6034       DWConvMicrokernelTester()
6035         .cr(16)
6036         .kr(3)
6037         .channels(channels)
6038         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params);
6039     }
6040   }
6041 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,c_gt_16_with_qmin)6042   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, c_gt_16_with_qmin) {
6043     TEST_REQUIRES_X86_FMA3;
6044     for (uint32_t channels = 17; channels < 32; channels++) {
6045       DWConvMicrokernelTester()
6046         .cr(16)
6047         .kr(3)
6048         .channels(channels)
6049         .qmin(128)
6050         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params);
6051     }
6052   }
6053 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,c_gt_16_with_qmax)6054   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, c_gt_16_with_qmax) {
6055     TEST_REQUIRES_X86_FMA3;
6056     for (uint32_t channels = 17; channels < 32; channels++) {
6057       DWConvMicrokernelTester()
6058         .cr(16)
6059         .kr(3)
6060         .channels(channels)
6061         .qmax(128)
6062         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params);
6063     }
6064   }
6065 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,multipixel)6066   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, multipixel) {
6067     TEST_REQUIRES_X86_FMA3;
6068     for (size_t channels = 1; channels <= 80; channels += 15) {
6069       DWConvMicrokernelTester()
6070         .cr(16)
6071         .kr(3)
6072         .channels(channels)
6073         .width(3)
6074         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params);
6075     }
6076   }
6077 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,multipixel_with_step)6078   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, multipixel_with_step) {
6079     TEST_REQUIRES_X86_FMA3;
6080     for (size_t channels = 1; channels <= 80; channels += 15) {
6081       for (size_t step = 2; step <= 3; step++) {
6082         DWConvMicrokernelTester()
6083           .cr(16)
6084           .kr(3)
6085           .channels(channels)
6086           .width(3)
6087           .step(step)
6088           .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params);
6089       }
6090     }
6091   }
6092 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,multipixel_with_output_stride)6093   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, multipixel_with_output_stride) {
6094     TEST_REQUIRES_X86_FMA3;
6095     for (size_t channels = 1; channels <= 80; channels += 15) {
6096       DWConvMicrokernelTester()
6097         .cr(16)
6098         .kr(3)
6099         .channels(16)
6100         .width(5)
6101         .output_stride(83)
6102         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params);
6103     }
6104   }
6105 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,multipixel_with_qmin)6106   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, multipixel_with_qmin) {
6107     TEST_REQUIRES_X86_FMA3;
6108     for (size_t channels = 1; channels <= 80; channels += 15) {
6109       DWConvMicrokernelTester()
6110         .cr(16)
6111         .kr(3)
6112         .channels(channels)
6113         .width(3)
6114         .qmin(128)
6115         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params);
6116     }
6117   }
6118 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,multipixel_with_qmax)6119   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, multipixel_with_qmax) {
6120     TEST_REQUIRES_X86_FMA3;
6121     for (size_t channels = 1; channels <= 80; channels += 15) {
6122       DWConvMicrokernelTester()
6123         .cr(16)
6124         .kr(3)
6125         .channels(channels)
6126         .width(3)
6127         .qmax(128)
6128         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params);
6129     }
6130   }
6131 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,input_offset)6132   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, input_offset) {
6133     TEST_REQUIRES_X86_FMA3;
6134     for (uint32_t channels = 32; channels < 256; channels += 48) {
6135       DWConvMicrokernelTester()
6136         .cr(16)
6137         .kr(3)
6138         .channels(channels)
6139         .input_offset(304)
6140         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params);
6141     }
6142   }
6143 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,zero)6144   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, zero) {
6145     TEST_REQUIRES_X86_FMA3;
6146     for (uint32_t mz = 0; mz < 3; mz++) {
6147       for (uint32_t channels = 32; channels < 256; channels += 48) {
6148         DWConvMicrokernelTester()
6149           .cr(16)
6150           .kr(3)
6151           .channels(channels)
6152           .input_offset(304)
6153           .zero_index(mz)
6154           .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params);
6155       }
6156     }
6157   }
6158 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6159 
6160 
6161 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,c_eq_16)6162   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, c_eq_16) {
6163     TEST_REQUIRES_X86_FMA3;
6164     DWConvMicrokernelTester()
6165       .cr(16)
6166       .kr(3)
6167       .channels(16)
6168       .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
6169   }
6170 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,c_div_16)6171   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, c_div_16) {
6172     TEST_REQUIRES_X86_FMA3;
6173     for (uint32_t channels = 32; channels < 256; channels += 48) {
6174       DWConvMicrokernelTester()
6175         .cr(16)
6176         .kr(3)
6177         .channels(channels)
6178         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
6179     }
6180   }
6181 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,c_div_16_with_qmin)6182   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, c_div_16_with_qmin) {
6183     TEST_REQUIRES_X86_FMA3;
6184     for (uint32_t channels = 32; channels < 256; channels += 48) {
6185       DWConvMicrokernelTester()
6186         .cr(16)
6187         .kr(3)
6188         .channels(channels)
6189         .qmin(128)
6190         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
6191     }
6192   }
6193 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,c_div_16_with_qmax)6194   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, c_div_16_with_qmax) {
6195     TEST_REQUIRES_X86_FMA3;
6196     for (uint32_t channels = 32; channels < 256; channels += 48) {
6197       DWConvMicrokernelTester()
6198         .cr(16)
6199         .kr(3)
6200         .channels(channels)
6201         .qmax(128)
6202         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
6203     }
6204   }
6205 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,c_lt_16)6206   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, c_lt_16) {
6207     TEST_REQUIRES_X86_FMA3;
6208     for (uint32_t channels = 1; channels < 16; channels++) {
6209       DWConvMicrokernelTester()
6210         .cr(16)
6211         .kr(3)
6212         .channels(channels)
6213         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
6214     }
6215   }
6216 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,c_gt_16)6217   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, c_gt_16) {
6218     TEST_REQUIRES_X86_FMA3;
6219     for (uint32_t channels = 17; channels < 32; channels++) {
6220       DWConvMicrokernelTester()
6221         .cr(16)
6222         .kr(3)
6223         .channels(channels)
6224         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
6225     }
6226   }
6227 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,c_gt_16_with_qmin)6228   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, c_gt_16_with_qmin) {
6229     TEST_REQUIRES_X86_FMA3;
6230     for (uint32_t channels = 17; channels < 32; channels++) {
6231       DWConvMicrokernelTester()
6232         .cr(16)
6233         .kr(3)
6234         .channels(channels)
6235         .qmin(128)
6236         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
6237     }
6238   }
6239 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,c_gt_16_with_qmax)6240   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, c_gt_16_with_qmax) {
6241     TEST_REQUIRES_X86_FMA3;
6242     for (uint32_t channels = 17; channels < 32; channels++) {
6243       DWConvMicrokernelTester()
6244         .cr(16)
6245         .kr(3)
6246         .channels(channels)
6247         .qmax(128)
6248         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
6249     }
6250   }
6251 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,multipixel)6252   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, multipixel) {
6253     TEST_REQUIRES_X86_FMA3;
6254     for (size_t channels = 1; channels <= 80; channels += 15) {
6255       DWConvMicrokernelTester()
6256         .cr(16)
6257         .kr(3)
6258         .channels(channels)
6259         .width(3)
6260         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
6261     }
6262   }
6263 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,multipixel_with_step)6264   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, multipixel_with_step) {
6265     TEST_REQUIRES_X86_FMA3;
6266     for (size_t channels = 1; channels <= 80; channels += 15) {
6267       for (size_t step = 2; step <= 3; step++) {
6268         DWConvMicrokernelTester()
6269           .cr(16)
6270           .kr(3)
6271           .channels(channels)
6272           .width(3)
6273           .step(step)
6274           .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
6275       }
6276     }
6277   }
6278 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,multipixel_with_output_stride)6279   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, multipixel_with_output_stride) {
6280     TEST_REQUIRES_X86_FMA3;
6281     for (size_t channels = 1; channels <= 80; channels += 15) {
6282       DWConvMicrokernelTester()
6283         .cr(16)
6284         .kr(3)
6285         .channels(16)
6286         .width(5)
6287         .output_stride(83)
6288         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
6289     }
6290   }
6291 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,multipixel_with_qmin)6292   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, multipixel_with_qmin) {
6293     TEST_REQUIRES_X86_FMA3;
6294     for (size_t channels = 1; channels <= 80; channels += 15) {
6295       DWConvMicrokernelTester()
6296         .cr(16)
6297         .kr(3)
6298         .channels(channels)
6299         .width(3)
6300         .qmin(128)
6301         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
6302     }
6303   }
6304 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,multipixel_with_qmax)6305   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, multipixel_with_qmax) {
6306     TEST_REQUIRES_X86_FMA3;
6307     for (size_t channels = 1; channels <= 80; channels += 15) {
6308       DWConvMicrokernelTester()
6309         .cr(16)
6310         .kr(3)
6311         .channels(channels)
6312         .width(3)
6313         .qmax(128)
6314         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
6315     }
6316   }
6317 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,input_offset)6318   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, input_offset) {
6319     TEST_REQUIRES_X86_FMA3;
6320     for (uint32_t channels = 32; channels < 256; channels += 48) {
6321       DWConvMicrokernelTester()
6322         .cr(16)
6323         .kr(3)
6324         .channels(channels)
6325         .input_offset(304)
6326         .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
6327     }
6328   }
6329 
TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,zero)6330   TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, zero) {
6331     TEST_REQUIRES_X86_FMA3;
6332     for (uint32_t mz = 0; mz < 3; mz++) {
6333       for (uint32_t channels = 32; channels < 256; channels += 48) {
6334         DWConvMicrokernelTester()
6335           .cr(16)
6336           .kr(3)
6337           .channels(channels)
6338           .input_offset(304)
6339           .zero_index(mz)
6340           .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
6341       }
6342     }
6343   }
6344 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6345 
6346 
6347 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,c_eq_16)6348   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_eq_16) {
6349     TEST_REQUIRES_X86_FMA3;
6350     DWConvMicrokernelTester()
6351       .cr(16)
6352       .kr(4)
6353       .channels(16)
6354       .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6355   }
6356 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,c_div_16)6357   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_div_16) {
6358     TEST_REQUIRES_X86_FMA3;
6359     for (uint32_t channels = 32; channels < 256; channels += 48) {
6360       DWConvMicrokernelTester()
6361         .cr(16)
6362         .kr(4)
6363         .channels(channels)
6364         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6365     }
6366   }
6367 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,c_div_16_with_qmin)6368   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_div_16_with_qmin) {
6369     TEST_REQUIRES_X86_FMA3;
6370     for (uint32_t channels = 32; channels < 256; channels += 48) {
6371       DWConvMicrokernelTester()
6372         .cr(16)
6373         .kr(4)
6374         .channels(channels)
6375         .qmin(128)
6376         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6377     }
6378   }
6379 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,c_div_16_with_qmax)6380   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_div_16_with_qmax) {
6381     TEST_REQUIRES_X86_FMA3;
6382     for (uint32_t channels = 32; channels < 256; channels += 48) {
6383       DWConvMicrokernelTester()
6384         .cr(16)
6385         .kr(4)
6386         .channels(channels)
6387         .qmax(128)
6388         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6389     }
6390   }
6391 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,c_lt_16)6392   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_lt_16) {
6393     TEST_REQUIRES_X86_FMA3;
6394     for (uint32_t channels = 1; channels < 16; channels++) {
6395       DWConvMicrokernelTester()
6396         .cr(16)
6397         .kr(4)
6398         .channels(channels)
6399         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6400     }
6401   }
6402 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,c_gt_16)6403   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_gt_16) {
6404     TEST_REQUIRES_X86_FMA3;
6405     for (uint32_t channels = 17; channels < 32; channels++) {
6406       DWConvMicrokernelTester()
6407         .cr(16)
6408         .kr(4)
6409         .channels(channels)
6410         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6411     }
6412   }
6413 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,c_gt_16_with_qmin)6414   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_gt_16_with_qmin) {
6415     TEST_REQUIRES_X86_FMA3;
6416     for (uint32_t channels = 17; channels < 32; channels++) {
6417       DWConvMicrokernelTester()
6418         .cr(16)
6419         .kr(4)
6420         .channels(channels)
6421         .qmin(128)
6422         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6423     }
6424   }
6425 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,c_gt_16_with_qmax)6426   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_gt_16_with_qmax) {
6427     TEST_REQUIRES_X86_FMA3;
6428     for (uint32_t channels = 17; channels < 32; channels++) {
6429       DWConvMicrokernelTester()
6430         .cr(16)
6431         .kr(4)
6432         .channels(channels)
6433         .qmax(128)
6434         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6435     }
6436   }
6437 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,multipixel)6438   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, multipixel) {
6439     TEST_REQUIRES_X86_FMA3;
6440     for (size_t channels = 1; channels <= 80; channels += 15) {
6441       DWConvMicrokernelTester()
6442         .cr(16)
6443         .kr(4)
6444         .channels(channels)
6445         .width(3)
6446         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6447     }
6448   }
6449 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,multipixel_with_step)6450   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, multipixel_with_step) {
6451     TEST_REQUIRES_X86_FMA3;
6452     for (size_t channels = 1; channels <= 80; channels += 15) {
6453       for (size_t step = 2; step <= 4; step++) {
6454         DWConvMicrokernelTester()
6455           .cr(16)
6456           .kr(4)
6457           .channels(channels)
6458           .width(3)
6459           .step(step)
6460           .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6461       }
6462     }
6463   }
6464 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,multipixel_with_output_stride)6465   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, multipixel_with_output_stride) {
6466     TEST_REQUIRES_X86_FMA3;
6467     for (size_t channels = 1; channels <= 80; channels += 15) {
6468       DWConvMicrokernelTester()
6469         .cr(16)
6470         .kr(4)
6471         .channels(16)
6472         .width(5)
6473         .output_stride(83)
6474         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6475     }
6476   }
6477 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,multipixel_with_qmin)6478   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, multipixel_with_qmin) {
6479     TEST_REQUIRES_X86_FMA3;
6480     for (size_t channels = 1; channels <= 80; channels += 15) {
6481       DWConvMicrokernelTester()
6482         .cr(16)
6483         .kr(4)
6484         .channels(channels)
6485         .width(3)
6486         .qmin(128)
6487         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6488     }
6489   }
6490 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,multipixel_with_qmax)6491   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, multipixel_with_qmax) {
6492     TEST_REQUIRES_X86_FMA3;
6493     for (size_t channels = 1; channels <= 80; channels += 15) {
6494       DWConvMicrokernelTester()
6495         .cr(16)
6496         .kr(4)
6497         .channels(channels)
6498         .width(3)
6499         .qmax(128)
6500         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6501     }
6502   }
6503 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,input_offset)6504   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, input_offset) {
6505     TEST_REQUIRES_X86_FMA3;
6506     for (uint32_t channels = 32; channels < 256; channels += 48) {
6507       DWConvMicrokernelTester()
6508         .cr(16)
6509         .kr(4)
6510         .channels(channels)
6511         .input_offset(304)
6512         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6513     }
6514   }
6515 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,zero)6516   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, zero) {
6517     TEST_REQUIRES_X86_FMA3;
6518     for (uint32_t mz = 0; mz < 4; mz++) {
6519       for (uint32_t channels = 32; channels < 256; channels += 48) {
6520         DWConvMicrokernelTester()
6521           .cr(16)
6522           .kr(4)
6523           .channels(channels)
6524           .input_offset(304)
6525           .zero_index(mz)
6526           .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params);
6527       }
6528     }
6529   }
6530 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6531 
6532 
6533 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,c_eq_16)6534   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_eq_16) {
6535     TEST_REQUIRES_X86_FMA3;
6536     DWConvMicrokernelTester()
6537       .cr(16)
6538       .kr(4)
6539       .channels(16)
6540       .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6541   }
6542 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,c_div_16)6543   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_div_16) {
6544     TEST_REQUIRES_X86_FMA3;
6545     for (uint32_t channels = 32; channels < 256; channels += 48) {
6546       DWConvMicrokernelTester()
6547         .cr(16)
6548         .kr(4)
6549         .channels(channels)
6550         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6551     }
6552   }
6553 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,c_div_16_with_qmin)6554   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_div_16_with_qmin) {
6555     TEST_REQUIRES_X86_FMA3;
6556     for (uint32_t channels = 32; channels < 256; channels += 48) {
6557       DWConvMicrokernelTester()
6558         .cr(16)
6559         .kr(4)
6560         .channels(channels)
6561         .qmin(128)
6562         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6563     }
6564   }
6565 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,c_div_16_with_qmax)6566   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_div_16_with_qmax) {
6567     TEST_REQUIRES_X86_FMA3;
6568     for (uint32_t channels = 32; channels < 256; channels += 48) {
6569       DWConvMicrokernelTester()
6570         .cr(16)
6571         .kr(4)
6572         .channels(channels)
6573         .qmax(128)
6574         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6575     }
6576   }
6577 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,c_lt_16)6578   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_lt_16) {
6579     TEST_REQUIRES_X86_FMA3;
6580     for (uint32_t channels = 1; channels < 16; channels++) {
6581       DWConvMicrokernelTester()
6582         .cr(16)
6583         .kr(4)
6584         .channels(channels)
6585         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6586     }
6587   }
6588 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,c_gt_16)6589   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_gt_16) {
6590     TEST_REQUIRES_X86_FMA3;
6591     for (uint32_t channels = 17; channels < 32; channels++) {
6592       DWConvMicrokernelTester()
6593         .cr(16)
6594         .kr(4)
6595         .channels(channels)
6596         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6597     }
6598   }
6599 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,c_gt_16_with_qmin)6600   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_gt_16_with_qmin) {
6601     TEST_REQUIRES_X86_FMA3;
6602     for (uint32_t channels = 17; channels < 32; channels++) {
6603       DWConvMicrokernelTester()
6604         .cr(16)
6605         .kr(4)
6606         .channels(channels)
6607         .qmin(128)
6608         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6609     }
6610   }
6611 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,c_gt_16_with_qmax)6612   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_gt_16_with_qmax) {
6613     TEST_REQUIRES_X86_FMA3;
6614     for (uint32_t channels = 17; channels < 32; channels++) {
6615       DWConvMicrokernelTester()
6616         .cr(16)
6617         .kr(4)
6618         .channels(channels)
6619         .qmax(128)
6620         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6621     }
6622   }
6623 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,multipixel)6624   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, multipixel) {
6625     TEST_REQUIRES_X86_FMA3;
6626     for (size_t channels = 1; channels <= 80; channels += 15) {
6627       DWConvMicrokernelTester()
6628         .cr(16)
6629         .kr(4)
6630         .channels(channels)
6631         .width(3)
6632         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6633     }
6634   }
6635 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,multipixel_with_step)6636   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, multipixel_with_step) {
6637     TEST_REQUIRES_X86_FMA3;
6638     for (size_t channels = 1; channels <= 80; channels += 15) {
6639       for (size_t step = 2; step <= 4; step++) {
6640         DWConvMicrokernelTester()
6641           .cr(16)
6642           .kr(4)
6643           .channels(channels)
6644           .width(3)
6645           .step(step)
6646           .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6647       }
6648     }
6649   }
6650 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,multipixel_with_output_stride)6651   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, multipixel_with_output_stride) {
6652     TEST_REQUIRES_X86_FMA3;
6653     for (size_t channels = 1; channels <= 80; channels += 15) {
6654       DWConvMicrokernelTester()
6655         .cr(16)
6656         .kr(4)
6657         .channels(16)
6658         .width(5)
6659         .output_stride(83)
6660         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6661     }
6662   }
6663 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,multipixel_with_qmin)6664   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, multipixel_with_qmin) {
6665     TEST_REQUIRES_X86_FMA3;
6666     for (size_t channels = 1; channels <= 80; channels += 15) {
6667       DWConvMicrokernelTester()
6668         .cr(16)
6669         .kr(4)
6670         .channels(channels)
6671         .width(3)
6672         .qmin(128)
6673         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6674     }
6675   }
6676 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,multipixel_with_qmax)6677   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, multipixel_with_qmax) {
6678     TEST_REQUIRES_X86_FMA3;
6679     for (size_t channels = 1; channels <= 80; channels += 15) {
6680       DWConvMicrokernelTester()
6681         .cr(16)
6682         .kr(4)
6683         .channels(channels)
6684         .width(3)
6685         .qmax(128)
6686         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6687     }
6688   }
6689 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,input_offset)6690   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, input_offset) {
6691     TEST_REQUIRES_X86_FMA3;
6692     for (uint32_t channels = 32; channels < 256; channels += 48) {
6693       DWConvMicrokernelTester()
6694         .cr(16)
6695         .kr(4)
6696         .channels(channels)
6697         .input_offset(304)
6698         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6699     }
6700   }
6701 
TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,zero)6702   TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, zero) {
6703     TEST_REQUIRES_X86_FMA3;
6704     for (uint32_t mz = 0; mz < 4; mz++) {
6705       for (uint32_t channels = 32; channels < 256; channels += 48) {
6706         DWConvMicrokernelTester()
6707           .cr(16)
6708           .kr(4)
6709           .channels(channels)
6710           .input_offset(304)
6711           .zero_index(mz)
6712           .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
6713       }
6714     }
6715   }
6716 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6717 
6718 
6719 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,c_eq_16)6720   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_eq_16) {
6721     TEST_REQUIRES_X86_FMA3;
6722     DWConvMicrokernelTester()
6723       .cr(16)
6724       .kr(9)
6725       .channels(16)
6726       .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
6727   }
6728 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,c_div_16)6729   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_div_16) {
6730     TEST_REQUIRES_X86_FMA3;
6731     for (uint32_t channels = 32; channels < 256; channels += 48) {
6732       DWConvMicrokernelTester()
6733         .cr(16)
6734         .kr(9)
6735         .channels(channels)
6736         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
6737     }
6738   }
6739 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,c_div_16_with_qmin)6740   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_div_16_with_qmin) {
6741     TEST_REQUIRES_X86_FMA3;
6742     for (uint32_t channels = 32; channels < 256; channels += 48) {
6743       DWConvMicrokernelTester()
6744         .cr(16)
6745         .kr(9)
6746         .channels(channels)
6747         .qmin(128)
6748         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
6749     }
6750   }
6751 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,c_div_16_with_qmax)6752   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_div_16_with_qmax) {
6753     TEST_REQUIRES_X86_FMA3;
6754     for (uint32_t channels = 32; channels < 256; channels += 48) {
6755       DWConvMicrokernelTester()
6756         .cr(16)
6757         .kr(9)
6758         .channels(channels)
6759         .qmax(128)
6760         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
6761     }
6762   }
6763 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,c_lt_16)6764   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_lt_16) {
6765     TEST_REQUIRES_X86_FMA3;
6766     for (uint32_t channels = 1; channels < 16; channels++) {
6767       DWConvMicrokernelTester()
6768         .cr(16)
6769         .kr(9)
6770         .channels(channels)
6771         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
6772     }
6773   }
6774 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,c_gt_16)6775   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_gt_16) {
6776     TEST_REQUIRES_X86_FMA3;
6777     for (uint32_t channels = 17; channels < 32; channels++) {
6778       DWConvMicrokernelTester()
6779         .cr(16)
6780         .kr(9)
6781         .channels(channels)
6782         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
6783     }
6784   }
6785 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,c_gt_16_with_qmin)6786   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_gt_16_with_qmin) {
6787     TEST_REQUIRES_X86_FMA3;
6788     for (uint32_t channels = 17; channels < 32; channels++) {
6789       DWConvMicrokernelTester()
6790         .cr(16)
6791         .kr(9)
6792         .channels(channels)
6793         .qmin(128)
6794         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
6795     }
6796   }
6797 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,c_gt_16_with_qmax)6798   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_gt_16_with_qmax) {
6799     TEST_REQUIRES_X86_FMA3;
6800     for (uint32_t channels = 17; channels < 32; channels++) {
6801       DWConvMicrokernelTester()
6802         .cr(16)
6803         .kr(9)
6804         .channels(channels)
6805         .qmax(128)
6806         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
6807     }
6808   }
6809 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,multipixel)6810   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, multipixel) {
6811     TEST_REQUIRES_X86_FMA3;
6812     for (size_t channels = 1; channels <= 80; channels += 15) {
6813       DWConvMicrokernelTester()
6814         .cr(16)
6815         .kr(9)
6816         .channels(channels)
6817         .width(3)
6818         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
6819     }
6820   }
6821 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,multipixel_with_step)6822   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, multipixel_with_step) {
6823     TEST_REQUIRES_X86_FMA3;
6824     for (size_t channels = 1; channels <= 80; channels += 15) {
6825       for (size_t step = 2; step <= 9; step++) {
6826         DWConvMicrokernelTester()
6827           .cr(16)
6828           .kr(9)
6829           .channels(channels)
6830           .width(3)
6831           .step(step)
6832           .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
6833       }
6834     }
6835   }
6836 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,multipixel_with_output_stride)6837   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, multipixel_with_output_stride) {
6838     TEST_REQUIRES_X86_FMA3;
6839     for (size_t channels = 1; channels <= 80; channels += 15) {
6840       DWConvMicrokernelTester()
6841         .cr(16)
6842         .kr(9)
6843         .channels(16)
6844         .width(5)
6845         .output_stride(83)
6846         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
6847     }
6848   }
6849 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,multipixel_with_qmin)6850   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, multipixel_with_qmin) {
6851     TEST_REQUIRES_X86_FMA3;
6852     for (size_t channels = 1; channels <= 80; channels += 15) {
6853       DWConvMicrokernelTester()
6854         .cr(16)
6855         .kr(9)
6856         .channels(channels)
6857         .width(3)
6858         .qmin(128)
6859         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
6860     }
6861   }
6862 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,multipixel_with_qmax)6863   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, multipixel_with_qmax) {
6864     TEST_REQUIRES_X86_FMA3;
6865     for (size_t channels = 1; channels <= 80; channels += 15) {
6866       DWConvMicrokernelTester()
6867         .cr(16)
6868         .kr(9)
6869         .channels(channels)
6870         .width(3)
6871         .qmax(128)
6872         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
6873     }
6874   }
6875 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,input_offset)6876   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, input_offset) {
6877     TEST_REQUIRES_X86_FMA3;
6878     for (uint32_t channels = 32; channels < 256; channels += 48) {
6879       DWConvMicrokernelTester()
6880         .cr(16)
6881         .kr(9)
6882         .channels(channels)
6883         .input_offset(304)
6884         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
6885     }
6886   }
6887 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,zero)6888   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, zero) {
6889     TEST_REQUIRES_X86_FMA3;
6890     for (uint32_t mz = 0; mz < 9; mz++) {
6891       for (uint32_t channels = 32; channels < 256; channels += 48) {
6892         DWConvMicrokernelTester()
6893           .cr(16)
6894           .kr(9)
6895           .channels(channels)
6896           .input_offset(304)
6897           .zero_index(mz)
6898           .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params);
6899       }
6900     }
6901   }
6902 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6903 
6904 
6905 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,c_eq_16)6906   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_eq_16) {
6907     TEST_REQUIRES_X86_FMA3;
6908     DWConvMicrokernelTester()
6909       .cr(16)
6910       .kr(9)
6911       .channels(16)
6912       .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
6913   }
6914 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,c_div_16)6915   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_div_16) {
6916     TEST_REQUIRES_X86_FMA3;
6917     for (uint32_t channels = 32; channels < 256; channels += 48) {
6918       DWConvMicrokernelTester()
6919         .cr(16)
6920         .kr(9)
6921         .channels(channels)
6922         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
6923     }
6924   }
6925 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,c_div_16_with_qmin)6926   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_div_16_with_qmin) {
6927     TEST_REQUIRES_X86_FMA3;
6928     for (uint32_t channels = 32; channels < 256; channels += 48) {
6929       DWConvMicrokernelTester()
6930         .cr(16)
6931         .kr(9)
6932         .channels(channels)
6933         .qmin(128)
6934         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
6935     }
6936   }
6937 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,c_div_16_with_qmax)6938   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_div_16_with_qmax) {
6939     TEST_REQUIRES_X86_FMA3;
6940     for (uint32_t channels = 32; channels < 256; channels += 48) {
6941       DWConvMicrokernelTester()
6942         .cr(16)
6943         .kr(9)
6944         .channels(channels)
6945         .qmax(128)
6946         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
6947     }
6948   }
6949 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,c_lt_16)6950   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_lt_16) {
6951     TEST_REQUIRES_X86_FMA3;
6952     for (uint32_t channels = 1; channels < 16; channels++) {
6953       DWConvMicrokernelTester()
6954         .cr(16)
6955         .kr(9)
6956         .channels(channels)
6957         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
6958     }
6959   }
6960 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,c_gt_16)6961   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_gt_16) {
6962     TEST_REQUIRES_X86_FMA3;
6963     for (uint32_t channels = 17; channels < 32; channels++) {
6964       DWConvMicrokernelTester()
6965         .cr(16)
6966         .kr(9)
6967         .channels(channels)
6968         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
6969     }
6970   }
6971 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,c_gt_16_with_qmin)6972   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_gt_16_with_qmin) {
6973     TEST_REQUIRES_X86_FMA3;
6974     for (uint32_t channels = 17; channels < 32; channels++) {
6975       DWConvMicrokernelTester()
6976         .cr(16)
6977         .kr(9)
6978         .channels(channels)
6979         .qmin(128)
6980         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
6981     }
6982   }
6983 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,c_gt_16_with_qmax)6984   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_gt_16_with_qmax) {
6985     TEST_REQUIRES_X86_FMA3;
6986     for (uint32_t channels = 17; channels < 32; channels++) {
6987       DWConvMicrokernelTester()
6988         .cr(16)
6989         .kr(9)
6990         .channels(channels)
6991         .qmax(128)
6992         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
6993     }
6994   }
6995 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,multipixel)6996   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, multipixel) {
6997     TEST_REQUIRES_X86_FMA3;
6998     for (size_t channels = 1; channels <= 80; channels += 15) {
6999       DWConvMicrokernelTester()
7000         .cr(16)
7001         .kr(9)
7002         .channels(channels)
7003         .width(3)
7004         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
7005     }
7006   }
7007 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,multipixel_with_step)7008   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, multipixel_with_step) {
7009     TEST_REQUIRES_X86_FMA3;
7010     for (size_t channels = 1; channels <= 80; channels += 15) {
7011       for (size_t step = 2; step <= 9; step++) {
7012         DWConvMicrokernelTester()
7013           .cr(16)
7014           .kr(9)
7015           .channels(channels)
7016           .width(3)
7017           .step(step)
7018           .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
7019       }
7020     }
7021   }
7022 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,multipixel_with_output_stride)7023   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, multipixel_with_output_stride) {
7024     TEST_REQUIRES_X86_FMA3;
7025     for (size_t channels = 1; channels <= 80; channels += 15) {
7026       DWConvMicrokernelTester()
7027         .cr(16)
7028         .kr(9)
7029         .channels(16)
7030         .width(5)
7031         .output_stride(83)
7032         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
7033     }
7034   }
7035 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,multipixel_with_qmin)7036   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, multipixel_with_qmin) {
7037     TEST_REQUIRES_X86_FMA3;
7038     for (size_t channels = 1; channels <= 80; channels += 15) {
7039       DWConvMicrokernelTester()
7040         .cr(16)
7041         .kr(9)
7042         .channels(channels)
7043         .width(3)
7044         .qmin(128)
7045         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
7046     }
7047   }
7048 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,multipixel_with_qmax)7049   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, multipixel_with_qmax) {
7050     TEST_REQUIRES_X86_FMA3;
7051     for (size_t channels = 1; channels <= 80; channels += 15) {
7052       DWConvMicrokernelTester()
7053         .cr(16)
7054         .kr(9)
7055         .channels(channels)
7056         .width(3)
7057         .qmax(128)
7058         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
7059     }
7060   }
7061 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,input_offset)7062   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, input_offset) {
7063     TEST_REQUIRES_X86_FMA3;
7064     for (uint32_t channels = 32; channels < 256; channels += 48) {
7065       DWConvMicrokernelTester()
7066         .cr(16)
7067         .kr(9)
7068         .channels(channels)
7069         .input_offset(304)
7070         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
7071     }
7072   }
7073 
TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,zero)7074   TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, zero) {
7075     TEST_REQUIRES_X86_FMA3;
7076     for (uint32_t mz = 0; mz < 9; mz++) {
7077       for (uint32_t channels = 32; channels < 256; channels += 48) {
7078         DWConvMicrokernelTester()
7079           .cr(16)
7080           .kr(9)
7081           .channels(channels)
7082           .input_offset(304)
7083           .zero_index(mz)
7084           .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
7085       }
7086     }
7087   }
7088 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7089 
7090 
7091 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,c_eq_16)7092   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_eq_16) {
7093     TEST_REQUIRES_X86_FMA3;
7094     DWConvMicrokernelTester()
7095       .cr(16)
7096       .kr(25)
7097       .channels(16)
7098       .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
7099   }
7100 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,c_div_16)7101   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_div_16) {
7102     TEST_REQUIRES_X86_FMA3;
7103     for (uint32_t channels = 32; channels < 256; channels += 48) {
7104       DWConvMicrokernelTester()
7105         .cr(16)
7106         .kr(25)
7107         .channels(channels)
7108         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
7109     }
7110   }
7111 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,c_div_16_with_qmin)7112   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_div_16_with_qmin) {
7113     TEST_REQUIRES_X86_FMA3;
7114     for (uint32_t channels = 32; channels < 256; channels += 48) {
7115       DWConvMicrokernelTester()
7116         .cr(16)
7117         .kr(25)
7118         .channels(channels)
7119         .qmin(128)
7120         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
7121     }
7122   }
7123 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,c_div_16_with_qmax)7124   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_div_16_with_qmax) {
7125     TEST_REQUIRES_X86_FMA3;
7126     for (uint32_t channels = 32; channels < 256; channels += 48) {
7127       DWConvMicrokernelTester()
7128         .cr(16)
7129         .kr(25)
7130         .channels(channels)
7131         .qmax(128)
7132         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
7133     }
7134   }
7135 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,c_lt_16)7136   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_lt_16) {
7137     TEST_REQUIRES_X86_FMA3;
7138     for (uint32_t channels = 1; channels < 16; channels++) {
7139       DWConvMicrokernelTester()
7140         .cr(16)
7141         .kr(25)
7142         .channels(channels)
7143         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
7144     }
7145   }
7146 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,c_gt_16)7147   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_gt_16) {
7148     TEST_REQUIRES_X86_FMA3;
7149     for (uint32_t channels = 17; channels < 32; channels++) {
7150       DWConvMicrokernelTester()
7151         .cr(16)
7152         .kr(25)
7153         .channels(channels)
7154         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
7155     }
7156   }
7157 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,c_gt_16_with_qmin)7158   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_gt_16_with_qmin) {
7159     TEST_REQUIRES_X86_FMA3;
7160     for (uint32_t channels = 17; channels < 32; channels++) {
7161       DWConvMicrokernelTester()
7162         .cr(16)
7163         .kr(25)
7164         .channels(channels)
7165         .qmin(128)
7166         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
7167     }
7168   }
7169 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,c_gt_16_with_qmax)7170   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_gt_16_with_qmax) {
7171     TEST_REQUIRES_X86_FMA3;
7172     for (uint32_t channels = 17; channels < 32; channels++) {
7173       DWConvMicrokernelTester()
7174         .cr(16)
7175         .kr(25)
7176         .channels(channels)
7177         .qmax(128)
7178         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
7179     }
7180   }
7181 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,multipixel)7182   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, multipixel) {
7183     TEST_REQUIRES_X86_FMA3;
7184     for (size_t channels = 1; channels <= 80; channels += 15) {
7185       DWConvMicrokernelTester()
7186         .cr(16)
7187         .kr(25)
7188         .channels(channels)
7189         .width(3)
7190         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
7191     }
7192   }
7193 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,multipixel_with_step)7194   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, multipixel_with_step) {
7195     TEST_REQUIRES_X86_FMA3;
7196     for (size_t channels = 1; channels <= 80; channels += 15) {
7197       for (size_t step = 2; step <= 25; step++) {
7198         DWConvMicrokernelTester()
7199           .cr(16)
7200           .kr(25)
7201           .channels(channels)
7202           .width(3)
7203           .step(step)
7204           .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
7205       }
7206     }
7207   }
7208 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,multipixel_with_output_stride)7209   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, multipixel_with_output_stride) {
7210     TEST_REQUIRES_X86_FMA3;
7211     for (size_t channels = 1; channels <= 80; channels += 15) {
7212       DWConvMicrokernelTester()
7213         .cr(16)
7214         .kr(25)
7215         .channels(16)
7216         .width(5)
7217         .output_stride(83)
7218         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
7219     }
7220   }
7221 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,multipixel_with_qmin)7222   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, multipixel_with_qmin) {
7223     TEST_REQUIRES_X86_FMA3;
7224     for (size_t channels = 1; channels <= 80; channels += 15) {
7225       DWConvMicrokernelTester()
7226         .cr(16)
7227         .kr(25)
7228         .channels(channels)
7229         .width(3)
7230         .qmin(128)
7231         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
7232     }
7233   }
7234 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,multipixel_with_qmax)7235   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, multipixel_with_qmax) {
7236     TEST_REQUIRES_X86_FMA3;
7237     for (size_t channels = 1; channels <= 80; channels += 15) {
7238       DWConvMicrokernelTester()
7239         .cr(16)
7240         .kr(25)
7241         .channels(channels)
7242         .width(3)
7243         .qmax(128)
7244         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
7245     }
7246   }
7247 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,input_offset)7248   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, input_offset) {
7249     TEST_REQUIRES_X86_FMA3;
7250     for (uint32_t channels = 32; channels < 256; channels += 48) {
7251       DWConvMicrokernelTester()
7252         .cr(16)
7253         .kr(25)
7254         .channels(channels)
7255         .input_offset(304)
7256         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
7257     }
7258   }
7259 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,zero)7260   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, zero) {
7261     TEST_REQUIRES_X86_FMA3;
7262     for (uint32_t mz = 0; mz < 25; mz++) {
7263       for (uint32_t channels = 32; channels < 256; channels += 48) {
7264         DWConvMicrokernelTester()
7265           .cr(16)
7266           .kr(25)
7267           .channels(channels)
7268           .input_offset(304)
7269           .zero_index(mz)
7270           .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params);
7271       }
7272     }
7273   }
7274 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7275 
7276 
7277 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,c_eq_16)7278   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_eq_16) {
7279     TEST_REQUIRES_X86_FMA3;
7280     DWConvMicrokernelTester()
7281       .cr(16)
7282       .kr(25)
7283       .channels(16)
7284       .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
7285   }
7286 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,c_div_16)7287   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_div_16) {
7288     TEST_REQUIRES_X86_FMA3;
7289     for (uint32_t channels = 32; channels < 256; channels += 48) {
7290       DWConvMicrokernelTester()
7291         .cr(16)
7292         .kr(25)
7293         .channels(channels)
7294         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
7295     }
7296   }
7297 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,c_div_16_with_qmin)7298   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_div_16_with_qmin) {
7299     TEST_REQUIRES_X86_FMA3;
7300     for (uint32_t channels = 32; channels < 256; channels += 48) {
7301       DWConvMicrokernelTester()
7302         .cr(16)
7303         .kr(25)
7304         .channels(channels)
7305         .qmin(128)
7306         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
7307     }
7308   }
7309 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,c_div_16_with_qmax)7310   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_div_16_with_qmax) {
7311     TEST_REQUIRES_X86_FMA3;
7312     for (uint32_t channels = 32; channels < 256; channels += 48) {
7313       DWConvMicrokernelTester()
7314         .cr(16)
7315         .kr(25)
7316         .channels(channels)
7317         .qmax(128)
7318         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
7319     }
7320   }
7321 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,c_lt_16)7322   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_lt_16) {
7323     TEST_REQUIRES_X86_FMA3;
7324     for (uint32_t channels = 1; channels < 16; channels++) {
7325       DWConvMicrokernelTester()
7326         .cr(16)
7327         .kr(25)
7328         .channels(channels)
7329         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
7330     }
7331   }
7332 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,c_gt_16)7333   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_gt_16) {
7334     TEST_REQUIRES_X86_FMA3;
7335     for (uint32_t channels = 17; channels < 32; channels++) {
7336       DWConvMicrokernelTester()
7337         .cr(16)
7338         .kr(25)
7339         .channels(channels)
7340         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
7341     }
7342   }
7343 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,c_gt_16_with_qmin)7344   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_gt_16_with_qmin) {
7345     TEST_REQUIRES_X86_FMA3;
7346     for (uint32_t channels = 17; channels < 32; channels++) {
7347       DWConvMicrokernelTester()
7348         .cr(16)
7349         .kr(25)
7350         .channels(channels)
7351         .qmin(128)
7352         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
7353     }
7354   }
7355 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,c_gt_16_with_qmax)7356   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_gt_16_with_qmax) {
7357     TEST_REQUIRES_X86_FMA3;
7358     for (uint32_t channels = 17; channels < 32; channels++) {
7359       DWConvMicrokernelTester()
7360         .cr(16)
7361         .kr(25)
7362         .channels(channels)
7363         .qmax(128)
7364         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
7365     }
7366   }
7367 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,multipixel)7368   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, multipixel) {
7369     TEST_REQUIRES_X86_FMA3;
7370     for (size_t channels = 1; channels <= 80; channels += 15) {
7371       DWConvMicrokernelTester()
7372         .cr(16)
7373         .kr(25)
7374         .channels(channels)
7375         .width(3)
7376         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
7377     }
7378   }
7379 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,multipixel_with_step)7380   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, multipixel_with_step) {
7381     TEST_REQUIRES_X86_FMA3;
7382     for (size_t channels = 1; channels <= 80; channels += 15) {
7383       for (size_t step = 2; step <= 25; step++) {
7384         DWConvMicrokernelTester()
7385           .cr(16)
7386           .kr(25)
7387           .channels(channels)
7388           .width(3)
7389           .step(step)
7390           .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
7391       }
7392     }
7393   }
7394 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,multipixel_with_output_stride)7395   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, multipixel_with_output_stride) {
7396     TEST_REQUIRES_X86_FMA3;
7397     for (size_t channels = 1; channels <= 80; channels += 15) {
7398       DWConvMicrokernelTester()
7399         .cr(16)
7400         .kr(25)
7401         .channels(16)
7402         .width(5)
7403         .output_stride(83)
7404         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
7405     }
7406   }
7407 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,multipixel_with_qmin)7408   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, multipixel_with_qmin) {
7409     TEST_REQUIRES_X86_FMA3;
7410     for (size_t channels = 1; channels <= 80; channels += 15) {
7411       DWConvMicrokernelTester()
7412         .cr(16)
7413         .kr(25)
7414         .channels(channels)
7415         .width(3)
7416         .qmin(128)
7417         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
7418     }
7419   }
7420 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,multipixel_with_qmax)7421   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, multipixel_with_qmax) {
7422     TEST_REQUIRES_X86_FMA3;
7423     for (size_t channels = 1; channels <= 80; channels += 15) {
7424       DWConvMicrokernelTester()
7425         .cr(16)
7426         .kr(25)
7427         .channels(channels)
7428         .width(3)
7429         .qmax(128)
7430         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
7431     }
7432   }
7433 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,input_offset)7434   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, input_offset) {
7435     TEST_REQUIRES_X86_FMA3;
7436     for (uint32_t channels = 32; channels < 256; channels += 48) {
7437       DWConvMicrokernelTester()
7438         .cr(16)
7439         .kr(25)
7440         .channels(channels)
7441         .input_offset(304)
7442         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
7443     }
7444   }
7445 
TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,zero)7446   TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, zero) {
7447     TEST_REQUIRES_X86_FMA3;
7448     for (uint32_t mz = 0; mz < 25; mz++) {
7449       for (uint32_t channels = 32; channels < 256; channels += 48) {
7450         DWConvMicrokernelTester()
7451           .cr(16)
7452           .kr(25)
7453           .channels(channels)
7454           .input_offset(304)
7455           .zero_index(mz)
7456           .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
7457       }
7458     }
7459   }
7460 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7461 
7462 
7463 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,c_eq_32)7464   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, c_eq_32) {
7465     TEST_REQUIRES_X86_FMA3;
7466     DWConvMicrokernelTester()
7467       .cr(32)
7468       .kr(3)
7469       .channels(32)
7470       .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params);
7471   }
7472 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,c_div_32)7473   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, c_div_32) {
7474     TEST_REQUIRES_X86_FMA3;
7475     for (uint32_t channels = 64; channels < 512; channels += 96) {
7476       DWConvMicrokernelTester()
7477         .cr(32)
7478         .kr(3)
7479         .channels(channels)
7480         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params);
7481     }
7482   }
7483 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,c_div_32_with_qmin)7484   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, c_div_32_with_qmin) {
7485     TEST_REQUIRES_X86_FMA3;
7486     for (uint32_t channels = 64; channels < 512; channels += 96) {
7487       DWConvMicrokernelTester()
7488         .cr(32)
7489         .kr(3)
7490         .channels(channels)
7491         .qmin(128)
7492         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params);
7493     }
7494   }
7495 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,c_div_32_with_qmax)7496   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, c_div_32_with_qmax) {
7497     TEST_REQUIRES_X86_FMA3;
7498     for (uint32_t channels = 64; channels < 512; channels += 96) {
7499       DWConvMicrokernelTester()
7500         .cr(32)
7501         .kr(3)
7502         .channels(channels)
7503         .qmax(128)
7504         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params);
7505     }
7506   }
7507 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,c_lt_32)7508   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, c_lt_32) {
7509     TEST_REQUIRES_X86_FMA3;
7510     for (uint32_t channels = 1; channels < 32; channels++) {
7511       DWConvMicrokernelTester()
7512         .cr(32)
7513         .kr(3)
7514         .channels(channels)
7515         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params);
7516     }
7517   }
7518 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,c_gt_32)7519   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, c_gt_32) {
7520     TEST_REQUIRES_X86_FMA3;
7521     for (uint32_t channels = 33; channels < 64; channels++) {
7522       DWConvMicrokernelTester()
7523         .cr(32)
7524         .kr(3)
7525         .channels(channels)
7526         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params);
7527     }
7528   }
7529 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,c_gt_32_with_qmin)7530   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, c_gt_32_with_qmin) {
7531     TEST_REQUIRES_X86_FMA3;
7532     for (uint32_t channels = 33; channels < 64; channels++) {
7533       DWConvMicrokernelTester()
7534         .cr(32)
7535         .kr(3)
7536         .channels(channels)
7537         .qmin(128)
7538         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params);
7539     }
7540   }
7541 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,c_gt_32_with_qmax)7542   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, c_gt_32_with_qmax) {
7543     TEST_REQUIRES_X86_FMA3;
7544     for (uint32_t channels = 33; channels < 64; channels++) {
7545       DWConvMicrokernelTester()
7546         .cr(32)
7547         .kr(3)
7548         .channels(channels)
7549         .qmax(128)
7550         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params);
7551     }
7552   }
7553 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,multipixel)7554   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, multipixel) {
7555     TEST_REQUIRES_X86_FMA3;
7556     for (size_t channels = 1; channels <= 160; channels += 31) {
7557       DWConvMicrokernelTester()
7558         .cr(32)
7559         .kr(3)
7560         .channels(channels)
7561         .width(3)
7562         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params);
7563     }
7564   }
7565 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,multipixel_with_step)7566   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, multipixel_with_step) {
7567     TEST_REQUIRES_X86_FMA3;
7568     for (size_t channels = 1; channels <= 160; channels += 31) {
7569       for (size_t step = 2; step <= 3; step++) {
7570         DWConvMicrokernelTester()
7571           .cr(32)
7572           .kr(3)
7573           .channels(channels)
7574           .width(3)
7575           .step(step)
7576           .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params);
7577       }
7578     }
7579   }
7580 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,multipixel_with_output_stride)7581   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, multipixel_with_output_stride) {
7582     TEST_REQUIRES_X86_FMA3;
7583     for (size_t channels = 1; channels <= 160; channels += 31) {
7584       DWConvMicrokernelTester()
7585         .cr(32)
7586         .kr(3)
7587         .channels(32)
7588         .width(5)
7589         .output_stride(163)
7590         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params);
7591     }
7592   }
7593 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,multipixel_with_qmin)7594   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, multipixel_with_qmin) {
7595     TEST_REQUIRES_X86_FMA3;
7596     for (size_t channels = 1; channels <= 160; channels += 31) {
7597       DWConvMicrokernelTester()
7598         .cr(32)
7599         .kr(3)
7600         .channels(channels)
7601         .width(3)
7602         .qmin(128)
7603         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params);
7604     }
7605   }
7606 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,multipixel_with_qmax)7607   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, multipixel_with_qmax) {
7608     TEST_REQUIRES_X86_FMA3;
7609     for (size_t channels = 1; channels <= 160; channels += 31) {
7610       DWConvMicrokernelTester()
7611         .cr(32)
7612         .kr(3)
7613         .channels(channels)
7614         .width(3)
7615         .qmax(128)
7616         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params);
7617     }
7618   }
7619 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,input_offset)7620   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, input_offset) {
7621     TEST_REQUIRES_X86_FMA3;
7622     for (uint32_t channels = 64; channels < 512; channels += 96) {
7623       DWConvMicrokernelTester()
7624         .cr(32)
7625         .kr(3)
7626         .channels(channels)
7627         .input_offset(592)
7628         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params);
7629     }
7630   }
7631 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,zero)7632   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, zero) {
7633     TEST_REQUIRES_X86_FMA3;
7634     for (uint32_t mz = 0; mz < 3; mz++) {
7635       for (uint32_t channels = 64; channels < 512; channels += 96) {
7636         DWConvMicrokernelTester()
7637           .cr(32)
7638           .kr(3)
7639           .channels(channels)
7640           .input_offset(592)
7641           .zero_index(mz)
7642           .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params);
7643       }
7644     }
7645   }
7646 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7647 
7648 
7649 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,c_eq_32)7650   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, c_eq_32) {
7651     TEST_REQUIRES_X86_FMA3;
7652     DWConvMicrokernelTester()
7653       .cr(32)
7654       .kr(3)
7655       .channels(32)
7656       .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
7657   }
7658 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,c_div_32)7659   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, c_div_32) {
7660     TEST_REQUIRES_X86_FMA3;
7661     for (uint32_t channels = 64; channels < 512; channels += 96) {
7662       DWConvMicrokernelTester()
7663         .cr(32)
7664         .kr(3)
7665         .channels(channels)
7666         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
7667     }
7668   }
7669 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,c_div_32_with_qmin)7670   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, c_div_32_with_qmin) {
7671     TEST_REQUIRES_X86_FMA3;
7672     for (uint32_t channels = 64; channels < 512; channels += 96) {
7673       DWConvMicrokernelTester()
7674         .cr(32)
7675         .kr(3)
7676         .channels(channels)
7677         .qmin(128)
7678         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
7679     }
7680   }
7681 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,c_div_32_with_qmax)7682   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, c_div_32_with_qmax) {
7683     TEST_REQUIRES_X86_FMA3;
7684     for (uint32_t channels = 64; channels < 512; channels += 96) {
7685       DWConvMicrokernelTester()
7686         .cr(32)
7687         .kr(3)
7688         .channels(channels)
7689         .qmax(128)
7690         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
7691     }
7692   }
7693 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,c_lt_32)7694   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, c_lt_32) {
7695     TEST_REQUIRES_X86_FMA3;
7696     for (uint32_t channels = 1; channels < 32; channels++) {
7697       DWConvMicrokernelTester()
7698         .cr(32)
7699         .kr(3)
7700         .channels(channels)
7701         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
7702     }
7703   }
7704 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,c_gt_32)7705   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, c_gt_32) {
7706     TEST_REQUIRES_X86_FMA3;
7707     for (uint32_t channels = 33; channels < 64; channels++) {
7708       DWConvMicrokernelTester()
7709         .cr(32)
7710         .kr(3)
7711         .channels(channels)
7712         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
7713     }
7714   }
7715 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,c_gt_32_with_qmin)7716   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, c_gt_32_with_qmin) {
7717     TEST_REQUIRES_X86_FMA3;
7718     for (uint32_t channels = 33; channels < 64; channels++) {
7719       DWConvMicrokernelTester()
7720         .cr(32)
7721         .kr(3)
7722         .channels(channels)
7723         .qmin(128)
7724         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
7725     }
7726   }
7727 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,c_gt_32_with_qmax)7728   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, c_gt_32_with_qmax) {
7729     TEST_REQUIRES_X86_FMA3;
7730     for (uint32_t channels = 33; channels < 64; channels++) {
7731       DWConvMicrokernelTester()
7732         .cr(32)
7733         .kr(3)
7734         .channels(channels)
7735         .qmax(128)
7736         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
7737     }
7738   }
7739 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,multipixel)7740   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, multipixel) {
7741     TEST_REQUIRES_X86_FMA3;
7742     for (size_t channels = 1; channels <= 160; channels += 31) {
7743       DWConvMicrokernelTester()
7744         .cr(32)
7745         .kr(3)
7746         .channels(channels)
7747         .width(3)
7748         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
7749     }
7750   }
7751 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,multipixel_with_step)7752   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, multipixel_with_step) {
7753     TEST_REQUIRES_X86_FMA3;
7754     for (size_t channels = 1; channels <= 160; channels += 31) {
7755       for (size_t step = 2; step <= 3; step++) {
7756         DWConvMicrokernelTester()
7757           .cr(32)
7758           .kr(3)
7759           .channels(channels)
7760           .width(3)
7761           .step(step)
7762           .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
7763       }
7764     }
7765   }
7766 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,multipixel_with_output_stride)7767   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, multipixel_with_output_stride) {
7768     TEST_REQUIRES_X86_FMA3;
7769     for (size_t channels = 1; channels <= 160; channels += 31) {
7770       DWConvMicrokernelTester()
7771         .cr(32)
7772         .kr(3)
7773         .channels(32)
7774         .width(5)
7775         .output_stride(163)
7776         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
7777     }
7778   }
7779 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,multipixel_with_qmin)7780   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, multipixel_with_qmin) {
7781     TEST_REQUIRES_X86_FMA3;
7782     for (size_t channels = 1; channels <= 160; channels += 31) {
7783       DWConvMicrokernelTester()
7784         .cr(32)
7785         .kr(3)
7786         .channels(channels)
7787         .width(3)
7788         .qmin(128)
7789         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
7790     }
7791   }
7792 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,multipixel_with_qmax)7793   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, multipixel_with_qmax) {
7794     TEST_REQUIRES_X86_FMA3;
7795     for (size_t channels = 1; channels <= 160; channels += 31) {
7796       DWConvMicrokernelTester()
7797         .cr(32)
7798         .kr(3)
7799         .channels(channels)
7800         .width(3)
7801         .qmax(128)
7802         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
7803     }
7804   }
7805 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,input_offset)7806   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, input_offset) {
7807     TEST_REQUIRES_X86_FMA3;
7808     for (uint32_t channels = 64; channels < 512; channels += 96) {
7809       DWConvMicrokernelTester()
7810         .cr(32)
7811         .kr(3)
7812         .channels(channels)
7813         .input_offset(592)
7814         .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
7815     }
7816   }
7817 
TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,zero)7818   TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, zero) {
7819     TEST_REQUIRES_X86_FMA3;
7820     for (uint32_t mz = 0; mz < 3; mz++) {
7821       for (uint32_t channels = 64; channels < 512; channels += 96) {
7822         DWConvMicrokernelTester()
7823           .cr(32)
7824           .kr(3)
7825           .channels(channels)
7826           .input_offset(592)
7827           .zero_index(mz)
7828           .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params);
7829       }
7830     }
7831   }
7832 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7833 
7834 
7835 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,c_eq_32)7836   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_eq_32) {
7837     TEST_REQUIRES_X86_FMA3;
7838     DWConvMicrokernelTester()
7839       .cr(32)
7840       .kr(4)
7841       .channels(32)
7842       .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
7843   }
7844 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,c_div_32)7845   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_div_32) {
7846     TEST_REQUIRES_X86_FMA3;
7847     for (uint32_t channels = 64; channels < 512; channels += 96) {
7848       DWConvMicrokernelTester()
7849         .cr(32)
7850         .kr(4)
7851         .channels(channels)
7852         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
7853     }
7854   }
7855 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,c_div_32_with_qmin)7856   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_div_32_with_qmin) {
7857     TEST_REQUIRES_X86_FMA3;
7858     for (uint32_t channels = 64; channels < 512; channels += 96) {
7859       DWConvMicrokernelTester()
7860         .cr(32)
7861         .kr(4)
7862         .channels(channels)
7863         .qmin(128)
7864         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
7865     }
7866   }
7867 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,c_div_32_with_qmax)7868   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_div_32_with_qmax) {
7869     TEST_REQUIRES_X86_FMA3;
7870     for (uint32_t channels = 64; channels < 512; channels += 96) {
7871       DWConvMicrokernelTester()
7872         .cr(32)
7873         .kr(4)
7874         .channels(channels)
7875         .qmax(128)
7876         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
7877     }
7878   }
7879 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,c_lt_32)7880   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_lt_32) {
7881     TEST_REQUIRES_X86_FMA3;
7882     for (uint32_t channels = 1; channels < 32; channels++) {
7883       DWConvMicrokernelTester()
7884         .cr(32)
7885         .kr(4)
7886         .channels(channels)
7887         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
7888     }
7889   }
7890 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,c_gt_32)7891   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_gt_32) {
7892     TEST_REQUIRES_X86_FMA3;
7893     for (uint32_t channels = 33; channels < 64; channels++) {
7894       DWConvMicrokernelTester()
7895         .cr(32)
7896         .kr(4)
7897         .channels(channels)
7898         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
7899     }
7900   }
7901 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,c_gt_32_with_qmin)7902   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_gt_32_with_qmin) {
7903     TEST_REQUIRES_X86_FMA3;
7904     for (uint32_t channels = 33; channels < 64; channels++) {
7905       DWConvMicrokernelTester()
7906         .cr(32)
7907         .kr(4)
7908         .channels(channels)
7909         .qmin(128)
7910         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
7911     }
7912   }
7913 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,c_gt_32_with_qmax)7914   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_gt_32_with_qmax) {
7915     TEST_REQUIRES_X86_FMA3;
7916     for (uint32_t channels = 33; channels < 64; channels++) {
7917       DWConvMicrokernelTester()
7918         .cr(32)
7919         .kr(4)
7920         .channels(channels)
7921         .qmax(128)
7922         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
7923     }
7924   }
7925 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,multipixel)7926   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, multipixel) {
7927     TEST_REQUIRES_X86_FMA3;
7928     for (size_t channels = 1; channels <= 160; channels += 31) {
7929       DWConvMicrokernelTester()
7930         .cr(32)
7931         .kr(4)
7932         .channels(channels)
7933         .width(3)
7934         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
7935     }
7936   }
7937 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,multipixel_with_step)7938   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, multipixel_with_step) {
7939     TEST_REQUIRES_X86_FMA3;
7940     for (size_t channels = 1; channels <= 160; channels += 31) {
7941       for (size_t step = 2; step <= 4; step++) {
7942         DWConvMicrokernelTester()
7943           .cr(32)
7944           .kr(4)
7945           .channels(channels)
7946           .width(3)
7947           .step(step)
7948           .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
7949       }
7950     }
7951   }
7952 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,multipixel_with_output_stride)7953   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, multipixel_with_output_stride) {
7954     TEST_REQUIRES_X86_FMA3;
7955     for (size_t channels = 1; channels <= 160; channels += 31) {
7956       DWConvMicrokernelTester()
7957         .cr(32)
7958         .kr(4)
7959         .channels(32)
7960         .width(5)
7961         .output_stride(163)
7962         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
7963     }
7964   }
7965 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,multipixel_with_qmin)7966   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, multipixel_with_qmin) {
7967     TEST_REQUIRES_X86_FMA3;
7968     for (size_t channels = 1; channels <= 160; channels += 31) {
7969       DWConvMicrokernelTester()
7970         .cr(32)
7971         .kr(4)
7972         .channels(channels)
7973         .width(3)
7974         .qmin(128)
7975         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
7976     }
7977   }
7978 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,multipixel_with_qmax)7979   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, multipixel_with_qmax) {
7980     TEST_REQUIRES_X86_FMA3;
7981     for (size_t channels = 1; channels <= 160; channels += 31) {
7982       DWConvMicrokernelTester()
7983         .cr(32)
7984         .kr(4)
7985         .channels(channels)
7986         .width(3)
7987         .qmax(128)
7988         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
7989     }
7990   }
7991 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,input_offset)7992   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, input_offset) {
7993     TEST_REQUIRES_X86_FMA3;
7994     for (uint32_t channels = 64; channels < 512; channels += 96) {
7995       DWConvMicrokernelTester()
7996         .cr(32)
7997         .kr(4)
7998         .channels(channels)
7999         .input_offset(592)
8000         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
8001     }
8002   }
8003 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,zero)8004   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, zero) {
8005     TEST_REQUIRES_X86_FMA3;
8006     for (uint32_t mz = 0; mz < 4; mz++) {
8007       for (uint32_t channels = 64; channels < 512; channels += 96) {
8008         DWConvMicrokernelTester()
8009           .cr(32)
8010           .kr(4)
8011           .channels(channels)
8012           .input_offset(592)
8013           .zero_index(mz)
8014           .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params);
8015       }
8016     }
8017   }
8018 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8019 
8020 
8021 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,c_eq_32)8022   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_eq_32) {
8023     TEST_REQUIRES_X86_FMA3;
8024     DWConvMicrokernelTester()
8025       .cr(32)
8026       .kr(4)
8027       .channels(32)
8028       .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
8029   }
8030 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,c_div_32)8031   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_div_32) {
8032     TEST_REQUIRES_X86_FMA3;
8033     for (uint32_t channels = 64; channels < 512; channels += 96) {
8034       DWConvMicrokernelTester()
8035         .cr(32)
8036         .kr(4)
8037         .channels(channels)
8038         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
8039     }
8040   }
8041 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,c_div_32_with_qmin)8042   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_div_32_with_qmin) {
8043     TEST_REQUIRES_X86_FMA3;
8044     for (uint32_t channels = 64; channels < 512; channels += 96) {
8045       DWConvMicrokernelTester()
8046         .cr(32)
8047         .kr(4)
8048         .channels(channels)
8049         .qmin(128)
8050         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
8051     }
8052   }
8053 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,c_div_32_with_qmax)8054   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_div_32_with_qmax) {
8055     TEST_REQUIRES_X86_FMA3;
8056     for (uint32_t channels = 64; channels < 512; channels += 96) {
8057       DWConvMicrokernelTester()
8058         .cr(32)
8059         .kr(4)
8060         .channels(channels)
8061         .qmax(128)
8062         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
8063     }
8064   }
8065 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,c_lt_32)8066   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_lt_32) {
8067     TEST_REQUIRES_X86_FMA3;
8068     for (uint32_t channels = 1; channels < 32; channels++) {
8069       DWConvMicrokernelTester()
8070         .cr(32)
8071         .kr(4)
8072         .channels(channels)
8073         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
8074     }
8075   }
8076 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,c_gt_32)8077   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_gt_32) {
8078     TEST_REQUIRES_X86_FMA3;
8079     for (uint32_t channels = 33; channels < 64; channels++) {
8080       DWConvMicrokernelTester()
8081         .cr(32)
8082         .kr(4)
8083         .channels(channels)
8084         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
8085     }
8086   }
8087 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,c_gt_32_with_qmin)8088   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_gt_32_with_qmin) {
8089     TEST_REQUIRES_X86_FMA3;
8090     for (uint32_t channels = 33; channels < 64; channels++) {
8091       DWConvMicrokernelTester()
8092         .cr(32)
8093         .kr(4)
8094         .channels(channels)
8095         .qmin(128)
8096         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
8097     }
8098   }
8099 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,c_gt_32_with_qmax)8100   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_gt_32_with_qmax) {
8101     TEST_REQUIRES_X86_FMA3;
8102     for (uint32_t channels = 33; channels < 64; channels++) {
8103       DWConvMicrokernelTester()
8104         .cr(32)
8105         .kr(4)
8106         .channels(channels)
8107         .qmax(128)
8108         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
8109     }
8110   }
8111 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,multipixel)8112   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, multipixel) {
8113     TEST_REQUIRES_X86_FMA3;
8114     for (size_t channels = 1; channels <= 160; channels += 31) {
8115       DWConvMicrokernelTester()
8116         .cr(32)
8117         .kr(4)
8118         .channels(channels)
8119         .width(3)
8120         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
8121     }
8122   }
8123 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,multipixel_with_step)8124   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, multipixel_with_step) {
8125     TEST_REQUIRES_X86_FMA3;
8126     for (size_t channels = 1; channels <= 160; channels += 31) {
8127       for (size_t step = 2; step <= 4; step++) {
8128         DWConvMicrokernelTester()
8129           .cr(32)
8130           .kr(4)
8131           .channels(channels)
8132           .width(3)
8133           .step(step)
8134           .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
8135       }
8136     }
8137   }
8138 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,multipixel_with_output_stride)8139   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, multipixel_with_output_stride) {
8140     TEST_REQUIRES_X86_FMA3;
8141     for (size_t channels = 1; channels <= 160; channels += 31) {
8142       DWConvMicrokernelTester()
8143         .cr(32)
8144         .kr(4)
8145         .channels(32)
8146         .width(5)
8147         .output_stride(163)
8148         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
8149     }
8150   }
8151 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,multipixel_with_qmin)8152   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, multipixel_with_qmin) {
8153     TEST_REQUIRES_X86_FMA3;
8154     for (size_t channels = 1; channels <= 160; channels += 31) {
8155       DWConvMicrokernelTester()
8156         .cr(32)
8157         .kr(4)
8158         .channels(channels)
8159         .width(3)
8160         .qmin(128)
8161         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
8162     }
8163   }
8164 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,multipixel_with_qmax)8165   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, multipixel_with_qmax) {
8166     TEST_REQUIRES_X86_FMA3;
8167     for (size_t channels = 1; channels <= 160; channels += 31) {
8168       DWConvMicrokernelTester()
8169         .cr(32)
8170         .kr(4)
8171         .channels(channels)
8172         .width(3)
8173         .qmax(128)
8174         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
8175     }
8176   }
8177 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,input_offset)8178   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, input_offset) {
8179     TEST_REQUIRES_X86_FMA3;
8180     for (uint32_t channels = 64; channels < 512; channels += 96) {
8181       DWConvMicrokernelTester()
8182         .cr(32)
8183         .kr(4)
8184         .channels(channels)
8185         .input_offset(592)
8186         .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
8187     }
8188   }
8189 
TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,zero)8190   TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, zero) {
8191     TEST_REQUIRES_X86_FMA3;
8192     for (uint32_t mz = 0; mz < 4; mz++) {
8193       for (uint32_t channels = 64; channels < 512; channels += 96) {
8194         DWConvMicrokernelTester()
8195           .cr(32)
8196           .kr(4)
8197           .channels(channels)
8198           .input_offset(592)
8199           .zero_index(mz)
8200           .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params);
8201       }
8202     }
8203   }
8204 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8205 
8206 
8207 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,c_eq_32)8208   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_eq_32) {
8209     TEST_REQUIRES_X86_FMA3;
8210     DWConvMicrokernelTester()
8211       .cr(32)
8212       .kr(9)
8213       .channels(32)
8214       .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
8215   }
8216 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,c_div_32)8217   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_div_32) {
8218     TEST_REQUIRES_X86_FMA3;
8219     for (uint32_t channels = 64; channels < 512; channels += 96) {
8220       DWConvMicrokernelTester()
8221         .cr(32)
8222         .kr(9)
8223         .channels(channels)
8224         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
8225     }
8226   }
8227 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,c_div_32_with_qmin)8228   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_div_32_with_qmin) {
8229     TEST_REQUIRES_X86_FMA3;
8230     for (uint32_t channels = 64; channels < 512; channels += 96) {
8231       DWConvMicrokernelTester()
8232         .cr(32)
8233         .kr(9)
8234         .channels(channels)
8235         .qmin(128)
8236         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
8237     }
8238   }
8239 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,c_div_32_with_qmax)8240   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_div_32_with_qmax) {
8241     TEST_REQUIRES_X86_FMA3;
8242     for (uint32_t channels = 64; channels < 512; channels += 96) {
8243       DWConvMicrokernelTester()
8244         .cr(32)
8245         .kr(9)
8246         .channels(channels)
8247         .qmax(128)
8248         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
8249     }
8250   }
8251 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,c_lt_32)8252   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_lt_32) {
8253     TEST_REQUIRES_X86_FMA3;
8254     for (uint32_t channels = 1; channels < 32; channels++) {
8255       DWConvMicrokernelTester()
8256         .cr(32)
8257         .kr(9)
8258         .channels(channels)
8259         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
8260     }
8261   }
8262 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,c_gt_32)8263   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_gt_32) {
8264     TEST_REQUIRES_X86_FMA3;
8265     for (uint32_t channels = 33; channels < 64; channels++) {
8266       DWConvMicrokernelTester()
8267         .cr(32)
8268         .kr(9)
8269         .channels(channels)
8270         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
8271     }
8272   }
8273 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,c_gt_32_with_qmin)8274   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_gt_32_with_qmin) {
8275     TEST_REQUIRES_X86_FMA3;
8276     for (uint32_t channels = 33; channels < 64; channels++) {
8277       DWConvMicrokernelTester()
8278         .cr(32)
8279         .kr(9)
8280         .channels(channels)
8281         .qmin(128)
8282         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
8283     }
8284   }
8285 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,c_gt_32_with_qmax)8286   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_gt_32_with_qmax) {
8287     TEST_REQUIRES_X86_FMA3;
8288     for (uint32_t channels = 33; channels < 64; channels++) {
8289       DWConvMicrokernelTester()
8290         .cr(32)
8291         .kr(9)
8292         .channels(channels)
8293         .qmax(128)
8294         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
8295     }
8296   }
8297 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,multipixel)8298   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, multipixel) {
8299     TEST_REQUIRES_X86_FMA3;
8300     for (size_t channels = 1; channels <= 160; channels += 31) {
8301       DWConvMicrokernelTester()
8302         .cr(32)
8303         .kr(9)
8304         .channels(channels)
8305         .width(3)
8306         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
8307     }
8308   }
8309 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,multipixel_with_step)8310   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, multipixel_with_step) {
8311     TEST_REQUIRES_X86_FMA3;
8312     for (size_t channels = 1; channels <= 160; channels += 31) {
8313       for (size_t step = 2; step <= 9; step++) {
8314         DWConvMicrokernelTester()
8315           .cr(32)
8316           .kr(9)
8317           .channels(channels)
8318           .width(3)
8319           .step(step)
8320           .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
8321       }
8322     }
8323   }
8324 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,multipixel_with_output_stride)8325   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, multipixel_with_output_stride) {
8326     TEST_REQUIRES_X86_FMA3;
8327     for (size_t channels = 1; channels <= 160; channels += 31) {
8328       DWConvMicrokernelTester()
8329         .cr(32)
8330         .kr(9)
8331         .channels(32)
8332         .width(5)
8333         .output_stride(163)
8334         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
8335     }
8336   }
8337 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,multipixel_with_qmin)8338   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, multipixel_with_qmin) {
8339     TEST_REQUIRES_X86_FMA3;
8340     for (size_t channels = 1; channels <= 160; channels += 31) {
8341       DWConvMicrokernelTester()
8342         .cr(32)
8343         .kr(9)
8344         .channels(channels)
8345         .width(3)
8346         .qmin(128)
8347         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
8348     }
8349   }
8350 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,multipixel_with_qmax)8351   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, multipixel_with_qmax) {
8352     TEST_REQUIRES_X86_FMA3;
8353     for (size_t channels = 1; channels <= 160; channels += 31) {
8354       DWConvMicrokernelTester()
8355         .cr(32)
8356         .kr(9)
8357         .channels(channels)
8358         .width(3)
8359         .qmax(128)
8360         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
8361     }
8362   }
8363 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,input_offset)8364   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, input_offset) {
8365     TEST_REQUIRES_X86_FMA3;
8366     for (uint32_t channels = 64; channels < 512; channels += 96) {
8367       DWConvMicrokernelTester()
8368         .cr(32)
8369         .kr(9)
8370         .channels(channels)
8371         .input_offset(592)
8372         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
8373     }
8374   }
8375 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,zero)8376   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, zero) {
8377     TEST_REQUIRES_X86_FMA3;
8378     for (uint32_t mz = 0; mz < 9; mz++) {
8379       for (uint32_t channels = 64; channels < 512; channels += 96) {
8380         DWConvMicrokernelTester()
8381           .cr(32)
8382           .kr(9)
8383           .channels(channels)
8384           .input_offset(592)
8385           .zero_index(mz)
8386           .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params);
8387       }
8388     }
8389   }
8390 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8391 
8392 
8393 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,c_eq_32)8394   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_eq_32) {
8395     TEST_REQUIRES_X86_FMA3;
8396     DWConvMicrokernelTester()
8397       .cr(32)
8398       .kr(9)
8399       .channels(32)
8400       .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
8401   }
8402 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,c_div_32)8403   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_div_32) {
8404     TEST_REQUIRES_X86_FMA3;
8405     for (uint32_t channels = 64; channels < 512; channels += 96) {
8406       DWConvMicrokernelTester()
8407         .cr(32)
8408         .kr(9)
8409         .channels(channels)
8410         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
8411     }
8412   }
8413 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,c_div_32_with_qmin)8414   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_div_32_with_qmin) {
8415     TEST_REQUIRES_X86_FMA3;
8416     for (uint32_t channels = 64; channels < 512; channels += 96) {
8417       DWConvMicrokernelTester()
8418         .cr(32)
8419         .kr(9)
8420         .channels(channels)
8421         .qmin(128)
8422         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
8423     }
8424   }
8425 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,c_div_32_with_qmax)8426   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_div_32_with_qmax) {
8427     TEST_REQUIRES_X86_FMA3;
8428     for (uint32_t channels = 64; channels < 512; channels += 96) {
8429       DWConvMicrokernelTester()
8430         .cr(32)
8431         .kr(9)
8432         .channels(channels)
8433         .qmax(128)
8434         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
8435     }
8436   }
8437 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,c_lt_32)8438   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_lt_32) {
8439     TEST_REQUIRES_X86_FMA3;
8440     for (uint32_t channels = 1; channels < 32; channels++) {
8441       DWConvMicrokernelTester()
8442         .cr(32)
8443         .kr(9)
8444         .channels(channels)
8445         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
8446     }
8447   }
8448 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,c_gt_32)8449   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_gt_32) {
8450     TEST_REQUIRES_X86_FMA3;
8451     for (uint32_t channels = 33; channels < 64; channels++) {
8452       DWConvMicrokernelTester()
8453         .cr(32)
8454         .kr(9)
8455         .channels(channels)
8456         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
8457     }
8458   }
8459 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,c_gt_32_with_qmin)8460   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_gt_32_with_qmin) {
8461     TEST_REQUIRES_X86_FMA3;
8462     for (uint32_t channels = 33; channels < 64; channels++) {
8463       DWConvMicrokernelTester()
8464         .cr(32)
8465         .kr(9)
8466         .channels(channels)
8467         .qmin(128)
8468         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
8469     }
8470   }
8471 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,c_gt_32_with_qmax)8472   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_gt_32_with_qmax) {
8473     TEST_REQUIRES_X86_FMA3;
8474     for (uint32_t channels = 33; channels < 64; channels++) {
8475       DWConvMicrokernelTester()
8476         .cr(32)
8477         .kr(9)
8478         .channels(channels)
8479         .qmax(128)
8480         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
8481     }
8482   }
8483 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,multipixel)8484   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, multipixel) {
8485     TEST_REQUIRES_X86_FMA3;
8486     for (size_t channels = 1; channels <= 160; channels += 31) {
8487       DWConvMicrokernelTester()
8488         .cr(32)
8489         .kr(9)
8490         .channels(channels)
8491         .width(3)
8492         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
8493     }
8494   }
8495 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,multipixel_with_step)8496   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, multipixel_with_step) {
8497     TEST_REQUIRES_X86_FMA3;
8498     for (size_t channels = 1; channels <= 160; channels += 31) {
8499       for (size_t step = 2; step <= 9; step++) {
8500         DWConvMicrokernelTester()
8501           .cr(32)
8502           .kr(9)
8503           .channels(channels)
8504           .width(3)
8505           .step(step)
8506           .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
8507       }
8508     }
8509   }
8510 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,multipixel_with_output_stride)8511   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, multipixel_with_output_stride) {
8512     TEST_REQUIRES_X86_FMA3;
8513     for (size_t channels = 1; channels <= 160; channels += 31) {
8514       DWConvMicrokernelTester()
8515         .cr(32)
8516         .kr(9)
8517         .channels(32)
8518         .width(5)
8519         .output_stride(163)
8520         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
8521     }
8522   }
8523 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,multipixel_with_qmin)8524   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, multipixel_with_qmin) {
8525     TEST_REQUIRES_X86_FMA3;
8526     for (size_t channels = 1; channels <= 160; channels += 31) {
8527       DWConvMicrokernelTester()
8528         .cr(32)
8529         .kr(9)
8530         .channels(channels)
8531         .width(3)
8532         .qmin(128)
8533         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
8534     }
8535   }
8536 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,multipixel_with_qmax)8537   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, multipixel_with_qmax) {
8538     TEST_REQUIRES_X86_FMA3;
8539     for (size_t channels = 1; channels <= 160; channels += 31) {
8540       DWConvMicrokernelTester()
8541         .cr(32)
8542         .kr(9)
8543         .channels(channels)
8544         .width(3)
8545         .qmax(128)
8546         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
8547     }
8548   }
8549 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,input_offset)8550   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, input_offset) {
8551     TEST_REQUIRES_X86_FMA3;
8552     for (uint32_t channels = 64; channels < 512; channels += 96) {
8553       DWConvMicrokernelTester()
8554         .cr(32)
8555         .kr(9)
8556         .channels(channels)
8557         .input_offset(592)
8558         .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
8559     }
8560   }
8561 
TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,zero)8562   TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, zero) {
8563     TEST_REQUIRES_X86_FMA3;
8564     for (uint32_t mz = 0; mz < 9; mz++) {
8565       for (uint32_t channels = 64; channels < 512; channels += 96) {
8566         DWConvMicrokernelTester()
8567           .cr(32)
8568           .kr(9)
8569           .channels(channels)
8570           .input_offset(592)
8571           .zero_index(mz)
8572           .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params);
8573       }
8574     }
8575   }
8576 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8577 
8578 
8579 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,c_eq_32)8580   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_eq_32) {
8581     TEST_REQUIRES_X86_FMA3;
8582     DWConvMicrokernelTester()
8583       .cr(32)
8584       .kr(25)
8585       .channels(32)
8586       .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
8587   }
8588 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,c_div_32)8589   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_div_32) {
8590     TEST_REQUIRES_X86_FMA3;
8591     for (uint32_t channels = 64; channels < 512; channels += 96) {
8592       DWConvMicrokernelTester()
8593         .cr(32)
8594         .kr(25)
8595         .channels(channels)
8596         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
8597     }
8598   }
8599 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,c_div_32_with_qmin)8600   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_div_32_with_qmin) {
8601     TEST_REQUIRES_X86_FMA3;
8602     for (uint32_t channels = 64; channels < 512; channels += 96) {
8603       DWConvMicrokernelTester()
8604         .cr(32)
8605         .kr(25)
8606         .channels(channels)
8607         .qmin(128)
8608         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
8609     }
8610   }
8611 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,c_div_32_with_qmax)8612   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_div_32_with_qmax) {
8613     TEST_REQUIRES_X86_FMA3;
8614     for (uint32_t channels = 64; channels < 512; channels += 96) {
8615       DWConvMicrokernelTester()
8616         .cr(32)
8617         .kr(25)
8618         .channels(channels)
8619         .qmax(128)
8620         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
8621     }
8622   }
8623 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,c_lt_32)8624   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_lt_32) {
8625     TEST_REQUIRES_X86_FMA3;
8626     for (uint32_t channels = 1; channels < 32; channels++) {
8627       DWConvMicrokernelTester()
8628         .cr(32)
8629         .kr(25)
8630         .channels(channels)
8631         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
8632     }
8633   }
8634 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,c_gt_32)8635   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_gt_32) {
8636     TEST_REQUIRES_X86_FMA3;
8637     for (uint32_t channels = 33; channels < 64; channels++) {
8638       DWConvMicrokernelTester()
8639         .cr(32)
8640         .kr(25)
8641         .channels(channels)
8642         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
8643     }
8644   }
8645 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,c_gt_32_with_qmin)8646   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_gt_32_with_qmin) {
8647     TEST_REQUIRES_X86_FMA3;
8648     for (uint32_t channels = 33; channels < 64; channels++) {
8649       DWConvMicrokernelTester()
8650         .cr(32)
8651         .kr(25)
8652         .channels(channels)
8653         .qmin(128)
8654         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
8655     }
8656   }
8657 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,c_gt_32_with_qmax)8658   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_gt_32_with_qmax) {
8659     TEST_REQUIRES_X86_FMA3;
8660     for (uint32_t channels = 33; channels < 64; channels++) {
8661       DWConvMicrokernelTester()
8662         .cr(32)
8663         .kr(25)
8664         .channels(channels)
8665         .qmax(128)
8666         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
8667     }
8668   }
8669 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,multipixel)8670   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, multipixel) {
8671     TEST_REQUIRES_X86_FMA3;
8672     for (size_t channels = 1; channels <= 160; channels += 31) {
8673       DWConvMicrokernelTester()
8674         .cr(32)
8675         .kr(25)
8676         .channels(channels)
8677         .width(3)
8678         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
8679     }
8680   }
8681 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,multipixel_with_step)8682   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, multipixel_with_step) {
8683     TEST_REQUIRES_X86_FMA3;
8684     for (size_t channels = 1; channels <= 160; channels += 31) {
8685       for (size_t step = 2; step <= 25; step++) {
8686         DWConvMicrokernelTester()
8687           .cr(32)
8688           .kr(25)
8689           .channels(channels)
8690           .width(3)
8691           .step(step)
8692           .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
8693       }
8694     }
8695   }
8696 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,multipixel_with_output_stride)8697   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, multipixel_with_output_stride) {
8698     TEST_REQUIRES_X86_FMA3;
8699     for (size_t channels = 1; channels <= 160; channels += 31) {
8700       DWConvMicrokernelTester()
8701         .cr(32)
8702         .kr(25)
8703         .channels(32)
8704         .width(5)
8705         .output_stride(163)
8706         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
8707     }
8708   }
8709 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,multipixel_with_qmin)8710   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, multipixel_with_qmin) {
8711     TEST_REQUIRES_X86_FMA3;
8712     for (size_t channels = 1; channels <= 160; channels += 31) {
8713       DWConvMicrokernelTester()
8714         .cr(32)
8715         .kr(25)
8716         .channels(channels)
8717         .width(3)
8718         .qmin(128)
8719         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
8720     }
8721   }
8722 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,multipixel_with_qmax)8723   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, multipixel_with_qmax) {
8724     TEST_REQUIRES_X86_FMA3;
8725     for (size_t channels = 1; channels <= 160; channels += 31) {
8726       DWConvMicrokernelTester()
8727         .cr(32)
8728         .kr(25)
8729         .channels(channels)
8730         .width(3)
8731         .qmax(128)
8732         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
8733     }
8734   }
8735 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,input_offset)8736   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, input_offset) {
8737     TEST_REQUIRES_X86_FMA3;
8738     for (uint32_t channels = 64; channels < 512; channels += 96) {
8739       DWConvMicrokernelTester()
8740         .cr(32)
8741         .kr(25)
8742         .channels(channels)
8743         .input_offset(592)
8744         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
8745     }
8746   }
8747 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,zero)8748   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, zero) {
8749     TEST_REQUIRES_X86_FMA3;
8750     for (uint32_t mz = 0; mz < 25; mz++) {
8751       for (uint32_t channels = 64; channels < 512; channels += 96) {
8752         DWConvMicrokernelTester()
8753           .cr(32)
8754           .kr(25)
8755           .channels(channels)
8756           .input_offset(592)
8757           .zero_index(mz)
8758           .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params);
8759       }
8760     }
8761   }
8762 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8763 
8764 
8765 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,c_eq_32)8766   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_eq_32) {
8767     TEST_REQUIRES_X86_FMA3;
8768     DWConvMicrokernelTester()
8769       .cr(32)
8770       .kr(25)
8771       .channels(32)
8772       .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
8773   }
8774 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,c_div_32)8775   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_div_32) {
8776     TEST_REQUIRES_X86_FMA3;
8777     for (uint32_t channels = 64; channels < 512; channels += 96) {
8778       DWConvMicrokernelTester()
8779         .cr(32)
8780         .kr(25)
8781         .channels(channels)
8782         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
8783     }
8784   }
8785 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,c_div_32_with_qmin)8786   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_div_32_with_qmin) {
8787     TEST_REQUIRES_X86_FMA3;
8788     for (uint32_t channels = 64; channels < 512; channels += 96) {
8789       DWConvMicrokernelTester()
8790         .cr(32)
8791         .kr(25)
8792         .channels(channels)
8793         .qmin(128)
8794         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
8795     }
8796   }
8797 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,c_div_32_with_qmax)8798   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_div_32_with_qmax) {
8799     TEST_REQUIRES_X86_FMA3;
8800     for (uint32_t channels = 64; channels < 512; channels += 96) {
8801       DWConvMicrokernelTester()
8802         .cr(32)
8803         .kr(25)
8804         .channels(channels)
8805         .qmax(128)
8806         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
8807     }
8808   }
8809 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,c_lt_32)8810   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_lt_32) {
8811     TEST_REQUIRES_X86_FMA3;
8812     for (uint32_t channels = 1; channels < 32; channels++) {
8813       DWConvMicrokernelTester()
8814         .cr(32)
8815         .kr(25)
8816         .channels(channels)
8817         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
8818     }
8819   }
8820 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,c_gt_32)8821   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_gt_32) {
8822     TEST_REQUIRES_X86_FMA3;
8823     for (uint32_t channels = 33; channels < 64; channels++) {
8824       DWConvMicrokernelTester()
8825         .cr(32)
8826         .kr(25)
8827         .channels(channels)
8828         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
8829     }
8830   }
8831 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,c_gt_32_with_qmin)8832   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_gt_32_with_qmin) {
8833     TEST_REQUIRES_X86_FMA3;
8834     for (uint32_t channels = 33; channels < 64; channels++) {
8835       DWConvMicrokernelTester()
8836         .cr(32)
8837         .kr(25)
8838         .channels(channels)
8839         .qmin(128)
8840         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
8841     }
8842   }
8843 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,c_gt_32_with_qmax)8844   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_gt_32_with_qmax) {
8845     TEST_REQUIRES_X86_FMA3;
8846     for (uint32_t channels = 33; channels < 64; channels++) {
8847       DWConvMicrokernelTester()
8848         .cr(32)
8849         .kr(25)
8850         .channels(channels)
8851         .qmax(128)
8852         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
8853     }
8854   }
8855 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,multipixel)8856   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, multipixel) {
8857     TEST_REQUIRES_X86_FMA3;
8858     for (size_t channels = 1; channels <= 160; channels += 31) {
8859       DWConvMicrokernelTester()
8860         .cr(32)
8861         .kr(25)
8862         .channels(channels)
8863         .width(3)
8864         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
8865     }
8866   }
8867 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,multipixel_with_step)8868   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, multipixel_with_step) {
8869     TEST_REQUIRES_X86_FMA3;
8870     for (size_t channels = 1; channels <= 160; channels += 31) {
8871       for (size_t step = 2; step <= 25; step++) {
8872         DWConvMicrokernelTester()
8873           .cr(32)
8874           .kr(25)
8875           .channels(channels)
8876           .width(3)
8877           .step(step)
8878           .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
8879       }
8880     }
8881   }
8882 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,multipixel_with_output_stride)8883   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, multipixel_with_output_stride) {
8884     TEST_REQUIRES_X86_FMA3;
8885     for (size_t channels = 1; channels <= 160; channels += 31) {
8886       DWConvMicrokernelTester()
8887         .cr(32)
8888         .kr(25)
8889         .channels(32)
8890         .width(5)
8891         .output_stride(163)
8892         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
8893     }
8894   }
8895 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,multipixel_with_qmin)8896   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, multipixel_with_qmin) {
8897     TEST_REQUIRES_X86_FMA3;
8898     for (size_t channels = 1; channels <= 160; channels += 31) {
8899       DWConvMicrokernelTester()
8900         .cr(32)
8901         .kr(25)
8902         .channels(channels)
8903         .width(3)
8904         .qmin(128)
8905         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
8906     }
8907   }
8908 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,multipixel_with_qmax)8909   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, multipixel_with_qmax) {
8910     TEST_REQUIRES_X86_FMA3;
8911     for (size_t channels = 1; channels <= 160; channels += 31) {
8912       DWConvMicrokernelTester()
8913         .cr(32)
8914         .kr(25)
8915         .channels(channels)
8916         .width(3)
8917         .qmax(128)
8918         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
8919     }
8920   }
8921 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,input_offset)8922   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, input_offset) {
8923     TEST_REQUIRES_X86_FMA3;
8924     for (uint32_t channels = 64; channels < 512; channels += 96) {
8925       DWConvMicrokernelTester()
8926         .cr(32)
8927         .kr(25)
8928         .channels(channels)
8929         .input_offset(592)
8930         .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
8931     }
8932   }
8933 
TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,zero)8934   TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, zero) {
8935     TEST_REQUIRES_X86_FMA3;
8936     for (uint32_t mz = 0; mz < 25; mz++) {
8937       for (uint32_t channels = 64; channels < 512; channels += 96) {
8938         DWConvMicrokernelTester()
8939           .cr(32)
8940           .kr(25)
8941           .channels(channels)
8942           .input_offset(592)
8943           .zero_index(mz)
8944           .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params);
8945       }
8946     }
8947   }
8948 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8949