xref: /aosp_15_r20/external/XNNPACK/test/qu8-dwconv-minmax-rndnu.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 //   Specification: test/qu8-dwconv-minmax-rndnu.yaml
11 //   Generator: tools/generate-dwconv-test.py
12 
13 
14 #include <gtest/gtest.h>
15 
16 #include <xnnpack/common.h>
17 #include <xnnpack/isa-checks.h>
18 
19 #include <xnnpack/dwconv.h>
20 #include "dwconv-microkernel-tester.h"
21 
22 
23 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,c_eq_8)24   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, c_eq_8) {
25     TEST_REQUIRES_ARM_NEON;
26     DWConvMicrokernelTester()
27       .cr(8)
28       .kr(9)
29       .channels(8)
30       .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
31   }
32 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,c_div_8)33   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, c_div_8) {
34     TEST_REQUIRES_ARM_NEON;
35     for (uint32_t channels = 16; channels < 128; channels += 24) {
36       DWConvMicrokernelTester()
37         .cr(8)
38         .kr(9)
39         .channels(channels)
40         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
41     }
42   }
43 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,c_div_8_with_qmin)44   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, c_div_8_with_qmin) {
45     TEST_REQUIRES_ARM_NEON;
46     for (uint32_t channels = 16; channels < 128; channels += 24) {
47       DWConvMicrokernelTester()
48         .cr(8)
49         .kr(9)
50         .channels(channels)
51         .qmin(128)
52         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
53     }
54   }
55 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,c_div_8_with_qmax)56   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, c_div_8_with_qmax) {
57     TEST_REQUIRES_ARM_NEON;
58     for (uint32_t channels = 16; channels < 128; channels += 24) {
59       DWConvMicrokernelTester()
60         .cr(8)
61         .kr(9)
62         .channels(channels)
63         .qmax(128)
64         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
65     }
66   }
67 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,c_lt_8)68   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, c_lt_8) {
69     TEST_REQUIRES_ARM_NEON;
70     for (uint32_t channels = 1; channels < 8; channels++) {
71       DWConvMicrokernelTester()
72         .cr(8)
73         .kr(9)
74         .channels(channels)
75         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
76     }
77   }
78 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,c_gt_8)79   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, c_gt_8) {
80     TEST_REQUIRES_ARM_NEON;
81     for (uint32_t channels = 9; channels < 16; channels++) {
82       DWConvMicrokernelTester()
83         .cr(8)
84         .kr(9)
85         .channels(channels)
86         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
87     }
88   }
89 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,c_gt_8_with_qmin)90   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, c_gt_8_with_qmin) {
91     TEST_REQUIRES_ARM_NEON;
92     for (uint32_t channels = 9; channels < 16; channels++) {
93       DWConvMicrokernelTester()
94         .cr(8)
95         .kr(9)
96         .channels(channels)
97         .qmin(128)
98         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
99     }
100   }
101 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,c_gt_8_with_qmax)102   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, c_gt_8_with_qmax) {
103     TEST_REQUIRES_ARM_NEON;
104     for (uint32_t channels = 9; channels < 16; channels++) {
105       DWConvMicrokernelTester()
106         .cr(8)
107         .kr(9)
108         .channels(channels)
109         .qmax(128)
110         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
111     }
112   }
113 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,multipixel)114   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, multipixel) {
115     TEST_REQUIRES_ARM_NEON;
116     for (size_t channels = 1; channels <= 40; channels += 7) {
117       DWConvMicrokernelTester()
118         .cr(8)
119         .kr(9)
120         .channels(channels)
121         .width(3)
122         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
123     }
124   }
125 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,multipixel_with_step)126   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, multipixel_with_step) {
127     TEST_REQUIRES_ARM_NEON;
128     for (size_t channels = 1; channels <= 40; channels += 7) {
129       for (size_t step = 2; step <= 9; step++) {
130         DWConvMicrokernelTester()
131           .cr(8)
132           .kr(9)
133           .channels(channels)
134           .width(3)
135           .step(step)
136           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
137       }
138     }
139   }
140 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,multipixel_with_output_stride)141   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, multipixel_with_output_stride) {
142     TEST_REQUIRES_ARM_NEON;
143     for (size_t channels = 1; channels <= 40; channels += 7) {
144       DWConvMicrokernelTester()
145         .cr(8)
146         .kr(9)
147         .channels(8)
148         .width(5)
149         .output_stride(43)
150         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
151     }
152   }
153 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,multipixel_with_qmin)154   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, multipixel_with_qmin) {
155     TEST_REQUIRES_ARM_NEON;
156     for (size_t channels = 1; channels <= 40; channels += 7) {
157       DWConvMicrokernelTester()
158         .cr(8)
159         .kr(9)
160         .channels(channels)
161         .width(3)
162         .qmin(128)
163         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
164     }
165   }
166 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,multipixel_with_qmax)167   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, multipixel_with_qmax) {
168     TEST_REQUIRES_ARM_NEON;
169     for (size_t channels = 1; channels <= 40; channels += 7) {
170       DWConvMicrokernelTester()
171         .cr(8)
172         .kr(9)
173         .channels(channels)
174         .width(3)
175         .qmax(128)
176         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
177     }
178   }
179 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,input_zero_point_only)180   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, input_zero_point_only) {
181     TEST_REQUIRES_ARM_NEON;
182     for (size_t channels = 1; channels <= 40; channels += 7) {
183       DWConvMicrokernelTester()
184         .cr(8)
185         .kr(9)
186         .channels(channels)
187         .width(3)
188         .input_zero_point(255)
189         .kernel_zero_point(0)
190         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
191     }
192   }
193 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,kernel_zero_point_only)194   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, kernel_zero_point_only) {
195     TEST_REQUIRES_ARM_NEON;
196     for (size_t channels = 1; channels <= 40; channels += 7) {
197       DWConvMicrokernelTester()
198         .cr(8)
199         .kr(9)
200         .channels(channels)
201         .width(3)
202         .input_zero_point(0)
203         .kernel_zero_point(255)
204         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
205     }
206   }
207 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,input_offset)208   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, input_offset) {
209     TEST_REQUIRES_ARM_NEON;
210     for (uint32_t channels = 16; channels < 128; channels += 24) {
211       DWConvMicrokernelTester()
212         .cr(8)
213         .kr(9)
214         .channels(channels)
215         .input_offset(176)
216         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
217     }
218   }
219 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,zero)220   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, zero) {
221     TEST_REQUIRES_ARM_NEON;
222     for (uint32_t mz = 0; mz < 9; mz++) {
223       for (uint32_t channels = 16; channels < 128; channels += 24) {
224         DWConvMicrokernelTester()
225           .cr(8)
226           .kr(9)
227           .channels(channels)
228           .input_offset(176)
229           .zero_index(mz)
230           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
231       }
232     }
233   }
234 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
235 
236 
237 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_eq_8)238   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_eq_8) {
239     TEST_REQUIRES_ARM_NEON;
240     DWConvMicrokernelTester()
241       .cr(8)
242       .kr(9)
243       .channels(8)
244       .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
245   }
246 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_div_8)247   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_div_8) {
248     TEST_REQUIRES_ARM_NEON;
249     for (uint32_t channels = 16; channels < 128; channels += 24) {
250       DWConvMicrokernelTester()
251         .cr(8)
252         .kr(9)
253         .channels(channels)
254         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
255     }
256   }
257 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_div_8_with_qmin)258   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_div_8_with_qmin) {
259     TEST_REQUIRES_ARM_NEON;
260     for (uint32_t channels = 16; channels < 128; channels += 24) {
261       DWConvMicrokernelTester()
262         .cr(8)
263         .kr(9)
264         .channels(channels)
265         .qmin(128)
266         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
267     }
268   }
269 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_div_8_with_qmax)270   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_div_8_with_qmax) {
271     TEST_REQUIRES_ARM_NEON;
272     for (uint32_t channels = 16; channels < 128; channels += 24) {
273       DWConvMicrokernelTester()
274         .cr(8)
275         .kr(9)
276         .channels(channels)
277         .qmax(128)
278         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
279     }
280   }
281 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_lt_8)282   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_lt_8) {
283     TEST_REQUIRES_ARM_NEON;
284     for (uint32_t channels = 1; channels < 8; channels++) {
285       DWConvMicrokernelTester()
286         .cr(8)
287         .kr(9)
288         .channels(channels)
289         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
290     }
291   }
292 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_gt_8)293   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_gt_8) {
294     TEST_REQUIRES_ARM_NEON;
295     for (uint32_t channels = 9; channels < 16; channels++) {
296       DWConvMicrokernelTester()
297         .cr(8)
298         .kr(9)
299         .channels(channels)
300         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
301     }
302   }
303 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_gt_8_with_qmin)304   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_gt_8_with_qmin) {
305     TEST_REQUIRES_ARM_NEON;
306     for (uint32_t channels = 9; channels < 16; channels++) {
307       DWConvMicrokernelTester()
308         .cr(8)
309         .kr(9)
310         .channels(channels)
311         .qmin(128)
312         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
313     }
314   }
315 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_gt_8_with_qmax)316   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_gt_8_with_qmax) {
317     TEST_REQUIRES_ARM_NEON;
318     for (uint32_t channels = 9; channels < 16; channels++) {
319       DWConvMicrokernelTester()
320         .cr(8)
321         .kr(9)
322         .channels(channels)
323         .qmax(128)
324         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
325     }
326   }
327 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,multipixel)328   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, multipixel) {
329     TEST_REQUIRES_ARM_NEON;
330     for (size_t channels = 1; channels <= 40; channels += 7) {
331       DWConvMicrokernelTester()
332         .cr(8)
333         .kr(9)
334         .channels(channels)
335         .width(3)
336         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
337     }
338   }
339 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,multipixel_with_step)340   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, multipixel_with_step) {
341     TEST_REQUIRES_ARM_NEON;
342     for (size_t channels = 1; channels <= 40; channels += 7) {
343       for (size_t step = 2; step <= 9; step++) {
344         DWConvMicrokernelTester()
345           .cr(8)
346           .kr(9)
347           .channels(channels)
348           .width(3)
349           .step(step)
350           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
351       }
352     }
353   }
354 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,multipixel_with_output_stride)355   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, multipixel_with_output_stride) {
356     TEST_REQUIRES_ARM_NEON;
357     for (size_t channels = 1; channels <= 40; channels += 7) {
358       DWConvMicrokernelTester()
359         .cr(8)
360         .kr(9)
361         .channels(8)
362         .width(5)
363         .output_stride(43)
364         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
365     }
366   }
367 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,multipixel_with_qmin)368   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, multipixel_with_qmin) {
369     TEST_REQUIRES_ARM_NEON;
370     for (size_t channels = 1; channels <= 40; channels += 7) {
371       DWConvMicrokernelTester()
372         .cr(8)
373         .kr(9)
374         .channels(channels)
375         .width(3)
376         .qmin(128)
377         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
378     }
379   }
380 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,multipixel_with_qmax)381   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, multipixel_with_qmax) {
382     TEST_REQUIRES_ARM_NEON;
383     for (size_t channels = 1; channels <= 40; channels += 7) {
384       DWConvMicrokernelTester()
385         .cr(8)
386         .kr(9)
387         .channels(channels)
388         .width(3)
389         .qmax(128)
390         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
391     }
392   }
393 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,input_zero_point_only)394   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, input_zero_point_only) {
395     TEST_REQUIRES_ARM_NEON;
396     for (size_t channels = 1; channels <= 40; channels += 7) {
397       DWConvMicrokernelTester()
398         .cr(8)
399         .kr(9)
400         .channels(channels)
401         .width(3)
402         .input_zero_point(255)
403         .kernel_zero_point(0)
404         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
405     }
406   }
407 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,kernel_zero_point_only)408   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, kernel_zero_point_only) {
409     TEST_REQUIRES_ARM_NEON;
410     for (size_t channels = 1; channels <= 40; channels += 7) {
411       DWConvMicrokernelTester()
412         .cr(8)
413         .kr(9)
414         .channels(channels)
415         .width(3)
416         .input_zero_point(0)
417         .kernel_zero_point(255)
418         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
419     }
420   }
421 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,input_offset)422   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, input_offset) {
423     TEST_REQUIRES_ARM_NEON;
424     for (uint32_t channels = 16; channels < 128; channels += 24) {
425       DWConvMicrokernelTester()
426         .cr(8)
427         .kr(9)
428         .channels(channels)
429         .input_offset(176)
430         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
431     }
432   }
433 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,zero)434   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, zero) {
435     TEST_REQUIRES_ARM_NEON;
436     for (uint32_t mz = 0; mz < 9; mz++) {
437       for (uint32_t channels = 16; channels < 128; channels += 24) {
438         DWConvMicrokernelTester()
439           .cr(8)
440           .kr(9)
441           .channels(channels)
442           .input_offset(176)
443           .zero_index(mz)
444           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
445       }
446     }
447   }
448 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
449 
450 
451 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,c_eq_8)452   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, c_eq_8) {
453     TEST_REQUIRES_ARM_NEON;
454     DWConvMicrokernelTester()
455       .cr(8)
456       .kr(25)
457       .channels(8)
458       .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
459   }
460 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,c_div_8)461   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, c_div_8) {
462     TEST_REQUIRES_ARM_NEON;
463     for (uint32_t channels = 16; channels < 128; channels += 24) {
464       DWConvMicrokernelTester()
465         .cr(8)
466         .kr(25)
467         .channels(channels)
468         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
469     }
470   }
471 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,c_div_8_with_qmin)472   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, c_div_8_with_qmin) {
473     TEST_REQUIRES_ARM_NEON;
474     for (uint32_t channels = 16; channels < 128; channels += 24) {
475       DWConvMicrokernelTester()
476         .cr(8)
477         .kr(25)
478         .channels(channels)
479         .qmin(128)
480         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
481     }
482   }
483 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,c_div_8_with_qmax)484   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, c_div_8_with_qmax) {
485     TEST_REQUIRES_ARM_NEON;
486     for (uint32_t channels = 16; channels < 128; channels += 24) {
487       DWConvMicrokernelTester()
488         .cr(8)
489         .kr(25)
490         .channels(channels)
491         .qmax(128)
492         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
493     }
494   }
495 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,c_lt_8)496   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, c_lt_8) {
497     TEST_REQUIRES_ARM_NEON;
498     for (uint32_t channels = 1; channels < 8; channels++) {
499       DWConvMicrokernelTester()
500         .cr(8)
501         .kr(25)
502         .channels(channels)
503         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
504     }
505   }
506 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,c_gt_8)507   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, c_gt_8) {
508     TEST_REQUIRES_ARM_NEON;
509     for (uint32_t channels = 9; channels < 16; channels++) {
510       DWConvMicrokernelTester()
511         .cr(8)
512         .kr(25)
513         .channels(channels)
514         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
515     }
516   }
517 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,c_gt_8_with_qmin)518   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, c_gt_8_with_qmin) {
519     TEST_REQUIRES_ARM_NEON;
520     for (uint32_t channels = 9; channels < 16; channels++) {
521       DWConvMicrokernelTester()
522         .cr(8)
523         .kr(25)
524         .channels(channels)
525         .qmin(128)
526         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
527     }
528   }
529 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,c_gt_8_with_qmax)530   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, c_gt_8_with_qmax) {
531     TEST_REQUIRES_ARM_NEON;
532     for (uint32_t channels = 9; channels < 16; channels++) {
533       DWConvMicrokernelTester()
534         .cr(8)
535         .kr(25)
536         .channels(channels)
537         .qmax(128)
538         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
539     }
540   }
541 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,multipixel)542   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, multipixel) {
543     TEST_REQUIRES_ARM_NEON;
544     for (size_t channels = 1; channels <= 40; channels += 7) {
545       DWConvMicrokernelTester()
546         .cr(8)
547         .kr(25)
548         .channels(channels)
549         .width(3)
550         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
551     }
552   }
553 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,multipixel_with_step)554   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, multipixel_with_step) {
555     TEST_REQUIRES_ARM_NEON;
556     for (size_t channels = 1; channels <= 40; channels += 7) {
557       for (size_t step = 2; step <= 25; step++) {
558         DWConvMicrokernelTester()
559           .cr(8)
560           .kr(25)
561           .channels(channels)
562           .width(3)
563           .step(step)
564           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
565       }
566     }
567   }
568 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,multipixel_with_output_stride)569   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, multipixel_with_output_stride) {
570     TEST_REQUIRES_ARM_NEON;
571     for (size_t channels = 1; channels <= 40; channels += 7) {
572       DWConvMicrokernelTester()
573         .cr(8)
574         .kr(25)
575         .channels(8)
576         .width(5)
577         .output_stride(43)
578         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
579     }
580   }
581 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,multipixel_with_qmin)582   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, multipixel_with_qmin) {
583     TEST_REQUIRES_ARM_NEON;
584     for (size_t channels = 1; channels <= 40; channels += 7) {
585       DWConvMicrokernelTester()
586         .cr(8)
587         .kr(25)
588         .channels(channels)
589         .width(3)
590         .qmin(128)
591         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
592     }
593   }
594 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,multipixel_with_qmax)595   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, multipixel_with_qmax) {
596     TEST_REQUIRES_ARM_NEON;
597     for (size_t channels = 1; channels <= 40; channels += 7) {
598       DWConvMicrokernelTester()
599         .cr(8)
600         .kr(25)
601         .channels(channels)
602         .width(3)
603         .qmax(128)
604         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
605     }
606   }
607 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,input_zero_point_only)608   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, input_zero_point_only) {
609     TEST_REQUIRES_ARM_NEON;
610     for (size_t channels = 1; channels <= 40; channels += 7) {
611       DWConvMicrokernelTester()
612         .cr(8)
613         .kr(25)
614         .channels(channels)
615         .width(3)
616         .input_zero_point(255)
617         .kernel_zero_point(0)
618         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
619     }
620   }
621 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,kernel_zero_point_only)622   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, kernel_zero_point_only) {
623     TEST_REQUIRES_ARM_NEON;
624     for (size_t channels = 1; channels <= 40; channels += 7) {
625       DWConvMicrokernelTester()
626         .cr(8)
627         .kr(25)
628         .channels(channels)
629         .width(3)
630         .input_zero_point(0)
631         .kernel_zero_point(255)
632         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
633     }
634   }
635 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,input_offset)636   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, input_offset) {
637     TEST_REQUIRES_ARM_NEON;
638     for (uint32_t channels = 16; channels < 128; channels += 24) {
639       DWConvMicrokernelTester()
640         .cr(8)
641         .kr(25)
642         .channels(channels)
643         .input_offset(176)
644         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
645     }
646   }
647 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,zero)648   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, zero) {
649     TEST_REQUIRES_ARM_NEON;
650     for (uint32_t mz = 0; mz < 25; mz++) {
651       for (uint32_t channels = 16; channels < 128; channels += 24) {
652         DWConvMicrokernelTester()
653           .cr(8)
654           .kr(25)
655           .channels(channels)
656           .input_offset(176)
657           .zero_index(mz)
658           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
659       }
660     }
661   }
662 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
663 
664 
665 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_eq_8)666   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_eq_8) {
667     TEST_REQUIRES_ARM_NEON;
668     DWConvMicrokernelTester()
669       .cr(8)
670       .kr(25)
671       .channels(8)
672       .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
673   }
674 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_div_8)675   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_div_8) {
676     TEST_REQUIRES_ARM_NEON;
677     for (uint32_t channels = 16; channels < 128; channels += 24) {
678       DWConvMicrokernelTester()
679         .cr(8)
680         .kr(25)
681         .channels(channels)
682         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
683     }
684   }
685 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_div_8_with_qmin)686   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_div_8_with_qmin) {
687     TEST_REQUIRES_ARM_NEON;
688     for (uint32_t channels = 16; channels < 128; channels += 24) {
689       DWConvMicrokernelTester()
690         .cr(8)
691         .kr(25)
692         .channels(channels)
693         .qmin(128)
694         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
695     }
696   }
697 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_div_8_with_qmax)698   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_div_8_with_qmax) {
699     TEST_REQUIRES_ARM_NEON;
700     for (uint32_t channels = 16; channels < 128; channels += 24) {
701       DWConvMicrokernelTester()
702         .cr(8)
703         .kr(25)
704         .channels(channels)
705         .qmax(128)
706         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
707     }
708   }
709 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_lt_8)710   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_lt_8) {
711     TEST_REQUIRES_ARM_NEON;
712     for (uint32_t channels = 1; channels < 8; channels++) {
713       DWConvMicrokernelTester()
714         .cr(8)
715         .kr(25)
716         .channels(channels)
717         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
718     }
719   }
720 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_gt_8)721   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_gt_8) {
722     TEST_REQUIRES_ARM_NEON;
723     for (uint32_t channels = 9; channels < 16; channels++) {
724       DWConvMicrokernelTester()
725         .cr(8)
726         .kr(25)
727         .channels(channels)
728         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
729     }
730   }
731 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_gt_8_with_qmin)732   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_gt_8_with_qmin) {
733     TEST_REQUIRES_ARM_NEON;
734     for (uint32_t channels = 9; channels < 16; channels++) {
735       DWConvMicrokernelTester()
736         .cr(8)
737         .kr(25)
738         .channels(channels)
739         .qmin(128)
740         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
741     }
742   }
743 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_gt_8_with_qmax)744   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_gt_8_with_qmax) {
745     TEST_REQUIRES_ARM_NEON;
746     for (uint32_t channels = 9; channels < 16; channels++) {
747       DWConvMicrokernelTester()
748         .cr(8)
749         .kr(25)
750         .channels(channels)
751         .qmax(128)
752         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
753     }
754   }
755 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,multipixel)756   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, multipixel) {
757     TEST_REQUIRES_ARM_NEON;
758     for (size_t channels = 1; channels <= 40; channels += 7) {
759       DWConvMicrokernelTester()
760         .cr(8)
761         .kr(25)
762         .channels(channels)
763         .width(3)
764         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
765     }
766   }
767 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,multipixel_with_step)768   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, multipixel_with_step) {
769     TEST_REQUIRES_ARM_NEON;
770     for (size_t channels = 1; channels <= 40; channels += 7) {
771       for (size_t step = 2; step <= 25; step++) {
772         DWConvMicrokernelTester()
773           .cr(8)
774           .kr(25)
775           .channels(channels)
776           .width(3)
777           .step(step)
778           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
779       }
780     }
781   }
782 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,multipixel_with_output_stride)783   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, multipixel_with_output_stride) {
784     TEST_REQUIRES_ARM_NEON;
785     for (size_t channels = 1; channels <= 40; channels += 7) {
786       DWConvMicrokernelTester()
787         .cr(8)
788         .kr(25)
789         .channels(8)
790         .width(5)
791         .output_stride(43)
792         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
793     }
794   }
795 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,multipixel_with_qmin)796   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, multipixel_with_qmin) {
797     TEST_REQUIRES_ARM_NEON;
798     for (size_t channels = 1; channels <= 40; channels += 7) {
799       DWConvMicrokernelTester()
800         .cr(8)
801         .kr(25)
802         .channels(channels)
803         .width(3)
804         .qmin(128)
805         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
806     }
807   }
808 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,multipixel_with_qmax)809   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, multipixel_with_qmax) {
810     TEST_REQUIRES_ARM_NEON;
811     for (size_t channels = 1; channels <= 40; channels += 7) {
812       DWConvMicrokernelTester()
813         .cr(8)
814         .kr(25)
815         .channels(channels)
816         .width(3)
817         .qmax(128)
818         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
819     }
820   }
821 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,input_zero_point_only)822   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, input_zero_point_only) {
823     TEST_REQUIRES_ARM_NEON;
824     for (size_t channels = 1; channels <= 40; channels += 7) {
825       DWConvMicrokernelTester()
826         .cr(8)
827         .kr(25)
828         .channels(channels)
829         .width(3)
830         .input_zero_point(255)
831         .kernel_zero_point(0)
832         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
833     }
834   }
835 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,kernel_zero_point_only)836   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, kernel_zero_point_only) {
837     TEST_REQUIRES_ARM_NEON;
838     for (size_t channels = 1; channels <= 40; channels += 7) {
839       DWConvMicrokernelTester()
840         .cr(8)
841         .kr(25)
842         .channels(channels)
843         .width(3)
844         .input_zero_point(0)
845         .kernel_zero_point(255)
846         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
847     }
848   }
849 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,input_offset)850   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, input_offset) {
851     TEST_REQUIRES_ARM_NEON;
852     for (uint32_t channels = 16; channels < 128; channels += 24) {
853       DWConvMicrokernelTester()
854         .cr(8)
855         .kr(25)
856         .channels(channels)
857         .input_offset(176)
858         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
859     }
860   }
861 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,zero)862   TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, zero) {
863     TEST_REQUIRES_ARM_NEON;
864     for (uint32_t mz = 0; mz < 25; mz++) {
865       for (uint32_t channels = 16; channels < 128; channels += 24) {
866         DWConvMicrokernelTester()
867           .cr(8)
868           .kr(25)
869           .channels(channels)
870           .input_offset(176)
871           .zero_index(mz)
872           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
873       }
874     }
875   }
876 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
877 
878 
879 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,c_eq_16)880   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, c_eq_16) {
881     TEST_REQUIRES_ARM_NEON;
882     DWConvMicrokernelTester()
883       .cr(16)
884       .kr(9)
885       .channels(16)
886       .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
887   }
888 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,c_div_16)889   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, c_div_16) {
890     TEST_REQUIRES_ARM_NEON;
891     for (uint32_t channels = 32; channels < 256; channels += 48) {
892       DWConvMicrokernelTester()
893         .cr(16)
894         .kr(9)
895         .channels(channels)
896         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
897     }
898   }
899 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,c_div_16_with_qmin)900   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, c_div_16_with_qmin) {
901     TEST_REQUIRES_ARM_NEON;
902     for (uint32_t channels = 32; channels < 256; channels += 48) {
903       DWConvMicrokernelTester()
904         .cr(16)
905         .kr(9)
906         .channels(channels)
907         .qmin(128)
908         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
909     }
910   }
911 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,c_div_16_with_qmax)912   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, c_div_16_with_qmax) {
913     TEST_REQUIRES_ARM_NEON;
914     for (uint32_t channels = 32; channels < 256; channels += 48) {
915       DWConvMicrokernelTester()
916         .cr(16)
917         .kr(9)
918         .channels(channels)
919         .qmax(128)
920         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
921     }
922   }
923 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,c_lt_16)924   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, c_lt_16) {
925     TEST_REQUIRES_ARM_NEON;
926     for (uint32_t channels = 1; channels < 16; channels++) {
927       DWConvMicrokernelTester()
928         .cr(16)
929         .kr(9)
930         .channels(channels)
931         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
932     }
933   }
934 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,c_gt_16)935   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, c_gt_16) {
936     TEST_REQUIRES_ARM_NEON;
937     for (uint32_t channels = 17; channels < 32; channels++) {
938       DWConvMicrokernelTester()
939         .cr(16)
940         .kr(9)
941         .channels(channels)
942         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
943     }
944   }
945 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,c_gt_16_with_qmin)946   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, c_gt_16_with_qmin) {
947     TEST_REQUIRES_ARM_NEON;
948     for (uint32_t channels = 17; channels < 32; channels++) {
949       DWConvMicrokernelTester()
950         .cr(16)
951         .kr(9)
952         .channels(channels)
953         .qmin(128)
954         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
955     }
956   }
957 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,c_gt_16_with_qmax)958   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, c_gt_16_with_qmax) {
959     TEST_REQUIRES_ARM_NEON;
960     for (uint32_t channels = 17; channels < 32; channels++) {
961       DWConvMicrokernelTester()
962         .cr(16)
963         .kr(9)
964         .channels(channels)
965         .qmax(128)
966         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
967     }
968   }
969 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,multipixel)970   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, multipixel) {
971     TEST_REQUIRES_ARM_NEON;
972     for (size_t channels = 1; channels <= 80; channels += 15) {
973       DWConvMicrokernelTester()
974         .cr(16)
975         .kr(9)
976         .channels(channels)
977         .width(3)
978         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
979     }
980   }
981 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,multipixel_with_step)982   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, multipixel_with_step) {
983     TEST_REQUIRES_ARM_NEON;
984     for (size_t channels = 1; channels <= 80; channels += 15) {
985       for (size_t step = 2; step <= 9; step++) {
986         DWConvMicrokernelTester()
987           .cr(16)
988           .kr(9)
989           .channels(channels)
990           .width(3)
991           .step(step)
992           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
993       }
994     }
995   }
996 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,multipixel_with_output_stride)997   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, multipixel_with_output_stride) {
998     TEST_REQUIRES_ARM_NEON;
999     for (size_t channels = 1; channels <= 80; channels += 15) {
1000       DWConvMicrokernelTester()
1001         .cr(16)
1002         .kr(9)
1003         .channels(16)
1004         .width(5)
1005         .output_stride(83)
1006         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1007     }
1008   }
1009 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,multipixel_with_qmin)1010   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, multipixel_with_qmin) {
1011     TEST_REQUIRES_ARM_NEON;
1012     for (size_t channels = 1; channels <= 80; channels += 15) {
1013       DWConvMicrokernelTester()
1014         .cr(16)
1015         .kr(9)
1016         .channels(channels)
1017         .width(3)
1018         .qmin(128)
1019         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1020     }
1021   }
1022 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,multipixel_with_qmax)1023   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, multipixel_with_qmax) {
1024     TEST_REQUIRES_ARM_NEON;
1025     for (size_t channels = 1; channels <= 80; channels += 15) {
1026       DWConvMicrokernelTester()
1027         .cr(16)
1028         .kr(9)
1029         .channels(channels)
1030         .width(3)
1031         .qmax(128)
1032         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1033     }
1034   }
1035 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,input_zero_point_only)1036   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, input_zero_point_only) {
1037     TEST_REQUIRES_ARM_NEON;
1038     for (size_t channels = 1; channels <= 80; channels += 15) {
1039       DWConvMicrokernelTester()
1040         .cr(16)
1041         .kr(9)
1042         .channels(channels)
1043         .width(3)
1044         .input_zero_point(255)
1045         .kernel_zero_point(0)
1046         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1047     }
1048   }
1049 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,kernel_zero_point_only)1050   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, kernel_zero_point_only) {
1051     TEST_REQUIRES_ARM_NEON;
1052     for (size_t channels = 1; channels <= 80; channels += 15) {
1053       DWConvMicrokernelTester()
1054         .cr(16)
1055         .kr(9)
1056         .channels(channels)
1057         .width(3)
1058         .input_zero_point(0)
1059         .kernel_zero_point(255)
1060         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1061     }
1062   }
1063 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,input_offset)1064   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, input_offset) {
1065     TEST_REQUIRES_ARM_NEON;
1066     for (uint32_t channels = 32; channels < 256; channels += 48) {
1067       DWConvMicrokernelTester()
1068         .cr(16)
1069         .kr(9)
1070         .channels(channels)
1071         .input_offset(304)
1072         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1073     }
1074   }
1075 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,zero)1076   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, zero) {
1077     TEST_REQUIRES_ARM_NEON;
1078     for (uint32_t mz = 0; mz < 9; mz++) {
1079       for (uint32_t channels = 32; channels < 256; channels += 48) {
1080         DWConvMicrokernelTester()
1081           .cr(16)
1082           .kr(9)
1083           .channels(channels)
1084           .input_offset(304)
1085           .zero_index(mz)
1086           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1087       }
1088     }
1089   }
1090 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1091 
1092 
1093 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_eq_16)1094   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_eq_16) {
1095     TEST_REQUIRES_ARM_NEON;
1096     DWConvMicrokernelTester()
1097       .cr(16)
1098       .kr(9)
1099       .channels(16)
1100       .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1101   }
1102 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_div_16)1103   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_div_16) {
1104     TEST_REQUIRES_ARM_NEON;
1105     for (uint32_t channels = 32; channels < 256; channels += 48) {
1106       DWConvMicrokernelTester()
1107         .cr(16)
1108         .kr(9)
1109         .channels(channels)
1110         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1111     }
1112   }
1113 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_div_16_with_qmin)1114   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_div_16_with_qmin) {
1115     TEST_REQUIRES_ARM_NEON;
1116     for (uint32_t channels = 32; channels < 256; channels += 48) {
1117       DWConvMicrokernelTester()
1118         .cr(16)
1119         .kr(9)
1120         .channels(channels)
1121         .qmin(128)
1122         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1123     }
1124   }
1125 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_div_16_with_qmax)1126   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_div_16_with_qmax) {
1127     TEST_REQUIRES_ARM_NEON;
1128     for (uint32_t channels = 32; channels < 256; channels += 48) {
1129       DWConvMicrokernelTester()
1130         .cr(16)
1131         .kr(9)
1132         .channels(channels)
1133         .qmax(128)
1134         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1135     }
1136   }
1137 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_lt_16)1138   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_lt_16) {
1139     TEST_REQUIRES_ARM_NEON;
1140     for (uint32_t channels = 1; channels < 16; channels++) {
1141       DWConvMicrokernelTester()
1142         .cr(16)
1143         .kr(9)
1144         .channels(channels)
1145         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1146     }
1147   }
1148 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_gt_16)1149   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_gt_16) {
1150     TEST_REQUIRES_ARM_NEON;
1151     for (uint32_t channels = 17; channels < 32; channels++) {
1152       DWConvMicrokernelTester()
1153         .cr(16)
1154         .kr(9)
1155         .channels(channels)
1156         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1157     }
1158   }
1159 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_gt_16_with_qmin)1160   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_gt_16_with_qmin) {
1161     TEST_REQUIRES_ARM_NEON;
1162     for (uint32_t channels = 17; channels < 32; channels++) {
1163       DWConvMicrokernelTester()
1164         .cr(16)
1165         .kr(9)
1166         .channels(channels)
1167         .qmin(128)
1168         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1169     }
1170   }
1171 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_gt_16_with_qmax)1172   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_gt_16_with_qmax) {
1173     TEST_REQUIRES_ARM_NEON;
1174     for (uint32_t channels = 17; channels < 32; channels++) {
1175       DWConvMicrokernelTester()
1176         .cr(16)
1177         .kr(9)
1178         .channels(channels)
1179         .qmax(128)
1180         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1181     }
1182   }
1183 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,multipixel)1184   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, multipixel) {
1185     TEST_REQUIRES_ARM_NEON;
1186     for (size_t channels = 1; channels <= 80; channels += 15) {
1187       DWConvMicrokernelTester()
1188         .cr(16)
1189         .kr(9)
1190         .channels(channels)
1191         .width(3)
1192         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1193     }
1194   }
1195 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,multipixel_with_step)1196   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, multipixel_with_step) {
1197     TEST_REQUIRES_ARM_NEON;
1198     for (size_t channels = 1; channels <= 80; channels += 15) {
1199       for (size_t step = 2; step <= 9; step++) {
1200         DWConvMicrokernelTester()
1201           .cr(16)
1202           .kr(9)
1203           .channels(channels)
1204           .width(3)
1205           .step(step)
1206           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1207       }
1208     }
1209   }
1210 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,multipixel_with_output_stride)1211   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, multipixel_with_output_stride) {
1212     TEST_REQUIRES_ARM_NEON;
1213     for (size_t channels = 1; channels <= 80; channels += 15) {
1214       DWConvMicrokernelTester()
1215         .cr(16)
1216         .kr(9)
1217         .channels(16)
1218         .width(5)
1219         .output_stride(83)
1220         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1221     }
1222   }
1223 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,multipixel_with_qmin)1224   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, multipixel_with_qmin) {
1225     TEST_REQUIRES_ARM_NEON;
1226     for (size_t channels = 1; channels <= 80; channels += 15) {
1227       DWConvMicrokernelTester()
1228         .cr(16)
1229         .kr(9)
1230         .channels(channels)
1231         .width(3)
1232         .qmin(128)
1233         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1234     }
1235   }
1236 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,multipixel_with_qmax)1237   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, multipixel_with_qmax) {
1238     TEST_REQUIRES_ARM_NEON;
1239     for (size_t channels = 1; channels <= 80; channels += 15) {
1240       DWConvMicrokernelTester()
1241         .cr(16)
1242         .kr(9)
1243         .channels(channels)
1244         .width(3)
1245         .qmax(128)
1246         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1247     }
1248   }
1249 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,input_zero_point_only)1250   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, input_zero_point_only) {
1251     TEST_REQUIRES_ARM_NEON;
1252     for (size_t channels = 1; channels <= 80; channels += 15) {
1253       DWConvMicrokernelTester()
1254         .cr(16)
1255         .kr(9)
1256         .channels(channels)
1257         .width(3)
1258         .input_zero_point(255)
1259         .kernel_zero_point(0)
1260         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1261     }
1262   }
1263 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,kernel_zero_point_only)1264   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, kernel_zero_point_only) {
1265     TEST_REQUIRES_ARM_NEON;
1266     for (size_t channels = 1; channels <= 80; channels += 15) {
1267       DWConvMicrokernelTester()
1268         .cr(16)
1269         .kr(9)
1270         .channels(channels)
1271         .width(3)
1272         .input_zero_point(0)
1273         .kernel_zero_point(255)
1274         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1275     }
1276   }
1277 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,input_offset)1278   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, input_offset) {
1279     TEST_REQUIRES_ARM_NEON;
1280     for (uint32_t channels = 32; channels < 256; channels += 48) {
1281       DWConvMicrokernelTester()
1282         .cr(16)
1283         .kr(9)
1284         .channels(channels)
1285         .input_offset(304)
1286         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1287     }
1288   }
1289 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,zero)1290   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, zero) {
1291     TEST_REQUIRES_ARM_NEON;
1292     for (uint32_t mz = 0; mz < 9; mz++) {
1293       for (uint32_t channels = 32; channels < 256; channels += 48) {
1294         DWConvMicrokernelTester()
1295           .cr(16)
1296           .kr(9)
1297           .channels(channels)
1298           .input_offset(304)
1299           .zero_index(mz)
1300           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1301       }
1302     }
1303   }
1304 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1305 
1306 
1307 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,c_eq_16)1308   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, c_eq_16) {
1309     TEST_REQUIRES_ARM_NEON;
1310     DWConvMicrokernelTester()
1311       .cr(16)
1312       .kr(25)
1313       .channels(16)
1314       .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1315   }
1316 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,c_div_16)1317   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, c_div_16) {
1318     TEST_REQUIRES_ARM_NEON;
1319     for (uint32_t channels = 32; channels < 256; channels += 48) {
1320       DWConvMicrokernelTester()
1321         .cr(16)
1322         .kr(25)
1323         .channels(channels)
1324         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1325     }
1326   }
1327 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,c_div_16_with_qmin)1328   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, c_div_16_with_qmin) {
1329     TEST_REQUIRES_ARM_NEON;
1330     for (uint32_t channels = 32; channels < 256; channels += 48) {
1331       DWConvMicrokernelTester()
1332         .cr(16)
1333         .kr(25)
1334         .channels(channels)
1335         .qmin(128)
1336         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1337     }
1338   }
1339 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,c_div_16_with_qmax)1340   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, c_div_16_with_qmax) {
1341     TEST_REQUIRES_ARM_NEON;
1342     for (uint32_t channels = 32; channels < 256; channels += 48) {
1343       DWConvMicrokernelTester()
1344         .cr(16)
1345         .kr(25)
1346         .channels(channels)
1347         .qmax(128)
1348         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1349     }
1350   }
1351 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,c_lt_16)1352   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, c_lt_16) {
1353     TEST_REQUIRES_ARM_NEON;
1354     for (uint32_t channels = 1; channels < 16; channels++) {
1355       DWConvMicrokernelTester()
1356         .cr(16)
1357         .kr(25)
1358         .channels(channels)
1359         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1360     }
1361   }
1362 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,c_gt_16)1363   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, c_gt_16) {
1364     TEST_REQUIRES_ARM_NEON;
1365     for (uint32_t channels = 17; channels < 32; channels++) {
1366       DWConvMicrokernelTester()
1367         .cr(16)
1368         .kr(25)
1369         .channels(channels)
1370         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1371     }
1372   }
1373 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,c_gt_16_with_qmin)1374   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, c_gt_16_with_qmin) {
1375     TEST_REQUIRES_ARM_NEON;
1376     for (uint32_t channels = 17; channels < 32; channels++) {
1377       DWConvMicrokernelTester()
1378         .cr(16)
1379         .kr(25)
1380         .channels(channels)
1381         .qmin(128)
1382         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1383     }
1384   }
1385 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,c_gt_16_with_qmax)1386   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, c_gt_16_with_qmax) {
1387     TEST_REQUIRES_ARM_NEON;
1388     for (uint32_t channels = 17; channels < 32; channels++) {
1389       DWConvMicrokernelTester()
1390         .cr(16)
1391         .kr(25)
1392         .channels(channels)
1393         .qmax(128)
1394         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1395     }
1396   }
1397 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,multipixel)1398   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, multipixel) {
1399     TEST_REQUIRES_ARM_NEON;
1400     for (size_t channels = 1; channels <= 80; channels += 15) {
1401       DWConvMicrokernelTester()
1402         .cr(16)
1403         .kr(25)
1404         .channels(channels)
1405         .width(3)
1406         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1407     }
1408   }
1409 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,multipixel_with_step)1410   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, multipixel_with_step) {
1411     TEST_REQUIRES_ARM_NEON;
1412     for (size_t channels = 1; channels <= 80; channels += 15) {
1413       for (size_t step = 2; step <= 25; step++) {
1414         DWConvMicrokernelTester()
1415           .cr(16)
1416           .kr(25)
1417           .channels(channels)
1418           .width(3)
1419           .step(step)
1420           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1421       }
1422     }
1423   }
1424 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,multipixel_with_output_stride)1425   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, multipixel_with_output_stride) {
1426     TEST_REQUIRES_ARM_NEON;
1427     for (size_t channels = 1; channels <= 80; channels += 15) {
1428       DWConvMicrokernelTester()
1429         .cr(16)
1430         .kr(25)
1431         .channels(16)
1432         .width(5)
1433         .output_stride(83)
1434         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1435     }
1436   }
1437 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,multipixel_with_qmin)1438   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, multipixel_with_qmin) {
1439     TEST_REQUIRES_ARM_NEON;
1440     for (size_t channels = 1; channels <= 80; channels += 15) {
1441       DWConvMicrokernelTester()
1442         .cr(16)
1443         .kr(25)
1444         .channels(channels)
1445         .width(3)
1446         .qmin(128)
1447         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1448     }
1449   }
1450 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,multipixel_with_qmax)1451   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, multipixel_with_qmax) {
1452     TEST_REQUIRES_ARM_NEON;
1453     for (size_t channels = 1; channels <= 80; channels += 15) {
1454       DWConvMicrokernelTester()
1455         .cr(16)
1456         .kr(25)
1457         .channels(channels)
1458         .width(3)
1459         .qmax(128)
1460         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1461     }
1462   }
1463 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,input_zero_point_only)1464   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, input_zero_point_only) {
1465     TEST_REQUIRES_ARM_NEON;
1466     for (size_t channels = 1; channels <= 80; channels += 15) {
1467       DWConvMicrokernelTester()
1468         .cr(16)
1469         .kr(25)
1470         .channels(channels)
1471         .width(3)
1472         .input_zero_point(255)
1473         .kernel_zero_point(0)
1474         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1475     }
1476   }
1477 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,kernel_zero_point_only)1478   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, kernel_zero_point_only) {
1479     TEST_REQUIRES_ARM_NEON;
1480     for (size_t channels = 1; channels <= 80; channels += 15) {
1481       DWConvMicrokernelTester()
1482         .cr(16)
1483         .kr(25)
1484         .channels(channels)
1485         .width(3)
1486         .input_zero_point(0)
1487         .kernel_zero_point(255)
1488         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1489     }
1490   }
1491 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,input_offset)1492   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, input_offset) {
1493     TEST_REQUIRES_ARM_NEON;
1494     for (uint32_t channels = 32; channels < 256; channels += 48) {
1495       DWConvMicrokernelTester()
1496         .cr(16)
1497         .kr(25)
1498         .channels(channels)
1499         .input_offset(304)
1500         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1501     }
1502   }
1503 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,zero)1504   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, zero) {
1505     TEST_REQUIRES_ARM_NEON;
1506     for (uint32_t mz = 0; mz < 25; mz++) {
1507       for (uint32_t channels = 32; channels < 256; channels += 48) {
1508         DWConvMicrokernelTester()
1509           .cr(16)
1510           .kr(25)
1511           .channels(channels)
1512           .input_offset(304)
1513           .zero_index(mz)
1514           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1515       }
1516     }
1517   }
1518 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1519 
1520 
1521 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_eq_16)1522   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_eq_16) {
1523     TEST_REQUIRES_ARM_NEON;
1524     DWConvMicrokernelTester()
1525       .cr(16)
1526       .kr(25)
1527       .channels(16)
1528       .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1529   }
1530 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_div_16)1531   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_div_16) {
1532     TEST_REQUIRES_ARM_NEON;
1533     for (uint32_t channels = 32; channels < 256; channels += 48) {
1534       DWConvMicrokernelTester()
1535         .cr(16)
1536         .kr(25)
1537         .channels(channels)
1538         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1539     }
1540   }
1541 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_div_16_with_qmin)1542   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_div_16_with_qmin) {
1543     TEST_REQUIRES_ARM_NEON;
1544     for (uint32_t channels = 32; channels < 256; channels += 48) {
1545       DWConvMicrokernelTester()
1546         .cr(16)
1547         .kr(25)
1548         .channels(channels)
1549         .qmin(128)
1550         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1551     }
1552   }
1553 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_div_16_with_qmax)1554   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_div_16_with_qmax) {
1555     TEST_REQUIRES_ARM_NEON;
1556     for (uint32_t channels = 32; channels < 256; channels += 48) {
1557       DWConvMicrokernelTester()
1558         .cr(16)
1559         .kr(25)
1560         .channels(channels)
1561         .qmax(128)
1562         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1563     }
1564   }
1565 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_lt_16)1566   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_lt_16) {
1567     TEST_REQUIRES_ARM_NEON;
1568     for (uint32_t channels = 1; channels < 16; channels++) {
1569       DWConvMicrokernelTester()
1570         .cr(16)
1571         .kr(25)
1572         .channels(channels)
1573         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1574     }
1575   }
1576 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_gt_16)1577   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_gt_16) {
1578     TEST_REQUIRES_ARM_NEON;
1579     for (uint32_t channels = 17; channels < 32; channels++) {
1580       DWConvMicrokernelTester()
1581         .cr(16)
1582         .kr(25)
1583         .channels(channels)
1584         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1585     }
1586   }
1587 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_gt_16_with_qmin)1588   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_gt_16_with_qmin) {
1589     TEST_REQUIRES_ARM_NEON;
1590     for (uint32_t channels = 17; channels < 32; channels++) {
1591       DWConvMicrokernelTester()
1592         .cr(16)
1593         .kr(25)
1594         .channels(channels)
1595         .qmin(128)
1596         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1597     }
1598   }
1599 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_gt_16_with_qmax)1600   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_gt_16_with_qmax) {
1601     TEST_REQUIRES_ARM_NEON;
1602     for (uint32_t channels = 17; channels < 32; channels++) {
1603       DWConvMicrokernelTester()
1604         .cr(16)
1605         .kr(25)
1606         .channels(channels)
1607         .qmax(128)
1608         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1609     }
1610   }
1611 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,multipixel)1612   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, multipixel) {
1613     TEST_REQUIRES_ARM_NEON;
1614     for (size_t channels = 1; channels <= 80; channels += 15) {
1615       DWConvMicrokernelTester()
1616         .cr(16)
1617         .kr(25)
1618         .channels(channels)
1619         .width(3)
1620         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1621     }
1622   }
1623 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,multipixel_with_step)1624   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, multipixel_with_step) {
1625     TEST_REQUIRES_ARM_NEON;
1626     for (size_t channels = 1; channels <= 80; channels += 15) {
1627       for (size_t step = 2; step <= 25; step++) {
1628         DWConvMicrokernelTester()
1629           .cr(16)
1630           .kr(25)
1631           .channels(channels)
1632           .width(3)
1633           .step(step)
1634           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1635       }
1636     }
1637   }
1638 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,multipixel_with_output_stride)1639   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, multipixel_with_output_stride) {
1640     TEST_REQUIRES_ARM_NEON;
1641     for (size_t channels = 1; channels <= 80; channels += 15) {
1642       DWConvMicrokernelTester()
1643         .cr(16)
1644         .kr(25)
1645         .channels(16)
1646         .width(5)
1647         .output_stride(83)
1648         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1649     }
1650   }
1651 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,multipixel_with_qmin)1652   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, multipixel_with_qmin) {
1653     TEST_REQUIRES_ARM_NEON;
1654     for (size_t channels = 1; channels <= 80; channels += 15) {
1655       DWConvMicrokernelTester()
1656         .cr(16)
1657         .kr(25)
1658         .channels(channels)
1659         .width(3)
1660         .qmin(128)
1661         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1662     }
1663   }
1664 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,multipixel_with_qmax)1665   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, multipixel_with_qmax) {
1666     TEST_REQUIRES_ARM_NEON;
1667     for (size_t channels = 1; channels <= 80; channels += 15) {
1668       DWConvMicrokernelTester()
1669         .cr(16)
1670         .kr(25)
1671         .channels(channels)
1672         .width(3)
1673         .qmax(128)
1674         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1675     }
1676   }
1677 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,input_zero_point_only)1678   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, input_zero_point_only) {
1679     TEST_REQUIRES_ARM_NEON;
1680     for (size_t channels = 1; channels <= 80; channels += 15) {
1681       DWConvMicrokernelTester()
1682         .cr(16)
1683         .kr(25)
1684         .channels(channels)
1685         .width(3)
1686         .input_zero_point(255)
1687         .kernel_zero_point(0)
1688         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1689     }
1690   }
1691 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,kernel_zero_point_only)1692   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, kernel_zero_point_only) {
1693     TEST_REQUIRES_ARM_NEON;
1694     for (size_t channels = 1; channels <= 80; channels += 15) {
1695       DWConvMicrokernelTester()
1696         .cr(16)
1697         .kr(25)
1698         .channels(channels)
1699         .width(3)
1700         .input_zero_point(0)
1701         .kernel_zero_point(255)
1702         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1703     }
1704   }
1705 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,input_offset)1706   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, input_offset) {
1707     TEST_REQUIRES_ARM_NEON;
1708     for (uint32_t channels = 32; channels < 256; channels += 48) {
1709       DWConvMicrokernelTester()
1710         .cr(16)
1711         .kr(25)
1712         .channels(channels)
1713         .input_offset(304)
1714         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1715     }
1716   }
1717 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,zero)1718   TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, zero) {
1719     TEST_REQUIRES_ARM_NEON;
1720     for (uint32_t mz = 0; mz < 25; mz++) {
1721       for (uint32_t channels = 32; channels < 256; channels += 48) {
1722         DWConvMicrokernelTester()
1723           .cr(16)
1724           .kr(25)
1725           .channels(channels)
1726           .input_offset(304)
1727           .zero_index(mz)
1728           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1729       }
1730     }
1731   }
1732 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1733 
1734 
1735 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,c_eq_24)1736   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, c_eq_24) {
1737     TEST_REQUIRES_ARM_NEON;
1738     DWConvMicrokernelTester()
1739       .cr(24)
1740       .kr(9)
1741       .channels(24)
1742       .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1743   }
1744 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,c_div_24)1745   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, c_div_24) {
1746     TEST_REQUIRES_ARM_NEON;
1747     for (uint32_t channels = 48; channels < 384; channels += 72) {
1748       DWConvMicrokernelTester()
1749         .cr(24)
1750         .kr(9)
1751         .channels(channels)
1752         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1753     }
1754   }
1755 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,c_div_24_with_qmin)1756   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, c_div_24_with_qmin) {
1757     TEST_REQUIRES_ARM_NEON;
1758     for (uint32_t channels = 48; channels < 384; channels += 72) {
1759       DWConvMicrokernelTester()
1760         .cr(24)
1761         .kr(9)
1762         .channels(channels)
1763         .qmin(128)
1764         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1765     }
1766   }
1767 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,c_div_24_with_qmax)1768   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, c_div_24_with_qmax) {
1769     TEST_REQUIRES_ARM_NEON;
1770     for (uint32_t channels = 48; channels < 384; channels += 72) {
1771       DWConvMicrokernelTester()
1772         .cr(24)
1773         .kr(9)
1774         .channels(channels)
1775         .qmax(128)
1776         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1777     }
1778   }
1779 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,c_lt_24)1780   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, c_lt_24) {
1781     TEST_REQUIRES_ARM_NEON;
1782     for (uint32_t channels = 1; channels < 24; channels++) {
1783       DWConvMicrokernelTester()
1784         .cr(24)
1785         .kr(9)
1786         .channels(channels)
1787         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1788     }
1789   }
1790 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,c_gt_24)1791   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, c_gt_24) {
1792     TEST_REQUIRES_ARM_NEON;
1793     for (uint32_t channels = 25; channels < 48; channels++) {
1794       DWConvMicrokernelTester()
1795         .cr(24)
1796         .kr(9)
1797         .channels(channels)
1798         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1799     }
1800   }
1801 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,c_gt_24_with_qmin)1802   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, c_gt_24_with_qmin) {
1803     TEST_REQUIRES_ARM_NEON;
1804     for (uint32_t channels = 25; channels < 48; channels++) {
1805       DWConvMicrokernelTester()
1806         .cr(24)
1807         .kr(9)
1808         .channels(channels)
1809         .qmin(128)
1810         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1811     }
1812   }
1813 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,c_gt_24_with_qmax)1814   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, c_gt_24_with_qmax) {
1815     TEST_REQUIRES_ARM_NEON;
1816     for (uint32_t channels = 25; channels < 48; channels++) {
1817       DWConvMicrokernelTester()
1818         .cr(24)
1819         .kr(9)
1820         .channels(channels)
1821         .qmax(128)
1822         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1823     }
1824   }
1825 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,multipixel)1826   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, multipixel) {
1827     TEST_REQUIRES_ARM_NEON;
1828     for (size_t channels = 1; channels <= 120; channels += 23) {
1829       DWConvMicrokernelTester()
1830         .cr(24)
1831         .kr(9)
1832         .channels(channels)
1833         .width(3)
1834         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1835     }
1836   }
1837 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,multipixel_with_step)1838   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, multipixel_with_step) {
1839     TEST_REQUIRES_ARM_NEON;
1840     for (size_t channels = 1; channels <= 120; channels += 23) {
1841       for (size_t step = 2; step <= 9; step++) {
1842         DWConvMicrokernelTester()
1843           .cr(24)
1844           .kr(9)
1845           .channels(channels)
1846           .width(3)
1847           .step(step)
1848           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1849       }
1850     }
1851   }
1852 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,multipixel_with_output_stride)1853   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, multipixel_with_output_stride) {
1854     TEST_REQUIRES_ARM_NEON;
1855     for (size_t channels = 1; channels <= 120; channels += 23) {
1856       DWConvMicrokernelTester()
1857         .cr(24)
1858         .kr(9)
1859         .channels(24)
1860         .width(5)
1861         .output_stride(127)
1862         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1863     }
1864   }
1865 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,multipixel_with_qmin)1866   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, multipixel_with_qmin) {
1867     TEST_REQUIRES_ARM_NEON;
1868     for (size_t channels = 1; channels <= 120; channels += 23) {
1869       DWConvMicrokernelTester()
1870         .cr(24)
1871         .kr(9)
1872         .channels(channels)
1873         .width(3)
1874         .qmin(128)
1875         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1876     }
1877   }
1878 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,multipixel_with_qmax)1879   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, multipixel_with_qmax) {
1880     TEST_REQUIRES_ARM_NEON;
1881     for (size_t channels = 1; channels <= 120; channels += 23) {
1882       DWConvMicrokernelTester()
1883         .cr(24)
1884         .kr(9)
1885         .channels(channels)
1886         .width(3)
1887         .qmax(128)
1888         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1889     }
1890   }
1891 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,input_zero_point_only)1892   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, input_zero_point_only) {
1893     TEST_REQUIRES_ARM_NEON;
1894     for (size_t channels = 1; channels <= 120; channels += 23) {
1895       DWConvMicrokernelTester()
1896         .cr(24)
1897         .kr(9)
1898         .channels(channels)
1899         .width(3)
1900         .input_zero_point(255)
1901         .kernel_zero_point(0)
1902         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1903     }
1904   }
1905 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,kernel_zero_point_only)1906   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, kernel_zero_point_only) {
1907     TEST_REQUIRES_ARM_NEON;
1908     for (size_t channels = 1; channels <= 120; channels += 23) {
1909       DWConvMicrokernelTester()
1910         .cr(24)
1911         .kr(9)
1912         .channels(channels)
1913         .width(3)
1914         .input_zero_point(0)
1915         .kernel_zero_point(255)
1916         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1917     }
1918   }
1919 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,input_offset)1920   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, input_offset) {
1921     TEST_REQUIRES_ARM_NEON;
1922     for (uint32_t channels = 48; channels < 384; channels += 72) {
1923       DWConvMicrokernelTester()
1924         .cr(24)
1925         .kr(9)
1926         .channels(channels)
1927         .input_offset(464)
1928         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1929     }
1930   }
1931 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,zero)1932   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, zero) {
1933     TEST_REQUIRES_ARM_NEON;
1934     for (uint32_t mz = 0; mz < 9; mz++) {
1935       for (uint32_t channels = 48; channels < 384; channels += 72) {
1936         DWConvMicrokernelTester()
1937           .cr(24)
1938           .kr(9)
1939           .channels(channels)
1940           .input_offset(464)
1941           .zero_index(mz)
1942           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1943       }
1944     }
1945   }
1946 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1947 
1948 
1949 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_eq_24)1950   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_eq_24) {
1951     TEST_REQUIRES_ARM_NEON;
1952     DWConvMicrokernelTester()
1953       .cr(24)
1954       .kr(9)
1955       .channels(24)
1956       .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1957   }
1958 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_div_24)1959   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_div_24) {
1960     TEST_REQUIRES_ARM_NEON;
1961     for (uint32_t channels = 48; channels < 384; channels += 72) {
1962       DWConvMicrokernelTester()
1963         .cr(24)
1964         .kr(9)
1965         .channels(channels)
1966         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1967     }
1968   }
1969 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_div_24_with_qmin)1970   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_div_24_with_qmin) {
1971     TEST_REQUIRES_ARM_NEON;
1972     for (uint32_t channels = 48; channels < 384; channels += 72) {
1973       DWConvMicrokernelTester()
1974         .cr(24)
1975         .kr(9)
1976         .channels(channels)
1977         .qmin(128)
1978         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1979     }
1980   }
1981 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_div_24_with_qmax)1982   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_div_24_with_qmax) {
1983     TEST_REQUIRES_ARM_NEON;
1984     for (uint32_t channels = 48; channels < 384; channels += 72) {
1985       DWConvMicrokernelTester()
1986         .cr(24)
1987         .kr(9)
1988         .channels(channels)
1989         .qmax(128)
1990         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1991     }
1992   }
1993 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_lt_24)1994   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_lt_24) {
1995     TEST_REQUIRES_ARM_NEON;
1996     for (uint32_t channels = 1; channels < 24; channels++) {
1997       DWConvMicrokernelTester()
1998         .cr(24)
1999         .kr(9)
2000         .channels(channels)
2001         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2002     }
2003   }
2004 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_gt_24)2005   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_gt_24) {
2006     TEST_REQUIRES_ARM_NEON;
2007     for (uint32_t channels = 25; channels < 48; channels++) {
2008       DWConvMicrokernelTester()
2009         .cr(24)
2010         .kr(9)
2011         .channels(channels)
2012         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2013     }
2014   }
2015 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_gt_24_with_qmin)2016   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_gt_24_with_qmin) {
2017     TEST_REQUIRES_ARM_NEON;
2018     for (uint32_t channels = 25; channels < 48; channels++) {
2019       DWConvMicrokernelTester()
2020         .cr(24)
2021         .kr(9)
2022         .channels(channels)
2023         .qmin(128)
2024         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2025     }
2026   }
2027 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_gt_24_with_qmax)2028   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_gt_24_with_qmax) {
2029     TEST_REQUIRES_ARM_NEON;
2030     for (uint32_t channels = 25; channels < 48; channels++) {
2031       DWConvMicrokernelTester()
2032         .cr(24)
2033         .kr(9)
2034         .channels(channels)
2035         .qmax(128)
2036         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2037     }
2038   }
2039 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,multipixel)2040   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, multipixel) {
2041     TEST_REQUIRES_ARM_NEON;
2042     for (size_t channels = 1; channels <= 120; channels += 23) {
2043       DWConvMicrokernelTester()
2044         .cr(24)
2045         .kr(9)
2046         .channels(channels)
2047         .width(3)
2048         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2049     }
2050   }
2051 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,multipixel_with_step)2052   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, multipixel_with_step) {
2053     TEST_REQUIRES_ARM_NEON;
2054     for (size_t channels = 1; channels <= 120; channels += 23) {
2055       for (size_t step = 2; step <= 9; step++) {
2056         DWConvMicrokernelTester()
2057           .cr(24)
2058           .kr(9)
2059           .channels(channels)
2060           .width(3)
2061           .step(step)
2062           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2063       }
2064     }
2065   }
2066 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,multipixel_with_output_stride)2067   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, multipixel_with_output_stride) {
2068     TEST_REQUIRES_ARM_NEON;
2069     for (size_t channels = 1; channels <= 120; channels += 23) {
2070       DWConvMicrokernelTester()
2071         .cr(24)
2072         .kr(9)
2073         .channels(24)
2074         .width(5)
2075         .output_stride(127)
2076         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2077     }
2078   }
2079 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,multipixel_with_qmin)2080   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, multipixel_with_qmin) {
2081     TEST_REQUIRES_ARM_NEON;
2082     for (size_t channels = 1; channels <= 120; channels += 23) {
2083       DWConvMicrokernelTester()
2084         .cr(24)
2085         .kr(9)
2086         .channels(channels)
2087         .width(3)
2088         .qmin(128)
2089         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2090     }
2091   }
2092 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,multipixel_with_qmax)2093   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, multipixel_with_qmax) {
2094     TEST_REQUIRES_ARM_NEON;
2095     for (size_t channels = 1; channels <= 120; channels += 23) {
2096       DWConvMicrokernelTester()
2097         .cr(24)
2098         .kr(9)
2099         .channels(channels)
2100         .width(3)
2101         .qmax(128)
2102         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2103     }
2104   }
2105 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,input_zero_point_only)2106   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, input_zero_point_only) {
2107     TEST_REQUIRES_ARM_NEON;
2108     for (size_t channels = 1; channels <= 120; channels += 23) {
2109       DWConvMicrokernelTester()
2110         .cr(24)
2111         .kr(9)
2112         .channels(channels)
2113         .width(3)
2114         .input_zero_point(255)
2115         .kernel_zero_point(0)
2116         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2117     }
2118   }
2119 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,kernel_zero_point_only)2120   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, kernel_zero_point_only) {
2121     TEST_REQUIRES_ARM_NEON;
2122     for (size_t channels = 1; channels <= 120; channels += 23) {
2123       DWConvMicrokernelTester()
2124         .cr(24)
2125         .kr(9)
2126         .channels(channels)
2127         .width(3)
2128         .input_zero_point(0)
2129         .kernel_zero_point(255)
2130         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2131     }
2132   }
2133 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,input_offset)2134   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, input_offset) {
2135     TEST_REQUIRES_ARM_NEON;
2136     for (uint32_t channels = 48; channels < 384; channels += 72) {
2137       DWConvMicrokernelTester()
2138         .cr(24)
2139         .kr(9)
2140         .channels(channels)
2141         .input_offset(464)
2142         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2143     }
2144   }
2145 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,zero)2146   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, zero) {
2147     TEST_REQUIRES_ARM_NEON;
2148     for (uint32_t mz = 0; mz < 9; mz++) {
2149       for (uint32_t channels = 48; channels < 384; channels += 72) {
2150         DWConvMicrokernelTester()
2151           .cr(24)
2152           .kr(9)
2153           .channels(channels)
2154           .input_offset(464)
2155           .zero_index(mz)
2156           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2157       }
2158     }
2159   }
2160 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2161 
2162 
2163 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,c_eq_24)2164   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, c_eq_24) {
2165     TEST_REQUIRES_ARM_NEON;
2166     DWConvMicrokernelTester()
2167       .cr(24)
2168       .kr(25)
2169       .channels(24)
2170       .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2171   }
2172 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,c_div_24)2173   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, c_div_24) {
2174     TEST_REQUIRES_ARM_NEON;
2175     for (uint32_t channels = 48; channels < 384; channels += 72) {
2176       DWConvMicrokernelTester()
2177         .cr(24)
2178         .kr(25)
2179         .channels(channels)
2180         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2181     }
2182   }
2183 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,c_div_24_with_qmin)2184   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, c_div_24_with_qmin) {
2185     TEST_REQUIRES_ARM_NEON;
2186     for (uint32_t channels = 48; channels < 384; channels += 72) {
2187       DWConvMicrokernelTester()
2188         .cr(24)
2189         .kr(25)
2190         .channels(channels)
2191         .qmin(128)
2192         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2193     }
2194   }
2195 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,c_div_24_with_qmax)2196   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, c_div_24_with_qmax) {
2197     TEST_REQUIRES_ARM_NEON;
2198     for (uint32_t channels = 48; channels < 384; channels += 72) {
2199       DWConvMicrokernelTester()
2200         .cr(24)
2201         .kr(25)
2202         .channels(channels)
2203         .qmax(128)
2204         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2205     }
2206   }
2207 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,c_lt_24)2208   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, c_lt_24) {
2209     TEST_REQUIRES_ARM_NEON;
2210     for (uint32_t channels = 1; channels < 24; channels++) {
2211       DWConvMicrokernelTester()
2212         .cr(24)
2213         .kr(25)
2214         .channels(channels)
2215         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2216     }
2217   }
2218 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,c_gt_24)2219   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, c_gt_24) {
2220     TEST_REQUIRES_ARM_NEON;
2221     for (uint32_t channels = 25; channels < 48; channels++) {
2222       DWConvMicrokernelTester()
2223         .cr(24)
2224         .kr(25)
2225         .channels(channels)
2226         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2227     }
2228   }
2229 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,c_gt_24_with_qmin)2230   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, c_gt_24_with_qmin) {
2231     TEST_REQUIRES_ARM_NEON;
2232     for (uint32_t channels = 25; channels < 48; channels++) {
2233       DWConvMicrokernelTester()
2234         .cr(24)
2235         .kr(25)
2236         .channels(channels)
2237         .qmin(128)
2238         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2239     }
2240   }
2241 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,c_gt_24_with_qmax)2242   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, c_gt_24_with_qmax) {
2243     TEST_REQUIRES_ARM_NEON;
2244     for (uint32_t channels = 25; channels < 48; channels++) {
2245       DWConvMicrokernelTester()
2246         .cr(24)
2247         .kr(25)
2248         .channels(channels)
2249         .qmax(128)
2250         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2251     }
2252   }
2253 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,multipixel)2254   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, multipixel) {
2255     TEST_REQUIRES_ARM_NEON;
2256     for (size_t channels = 1; channels <= 120; channels += 23) {
2257       DWConvMicrokernelTester()
2258         .cr(24)
2259         .kr(25)
2260         .channels(channels)
2261         .width(3)
2262         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2263     }
2264   }
2265 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,multipixel_with_step)2266   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, multipixel_with_step) {
2267     TEST_REQUIRES_ARM_NEON;
2268     for (size_t channels = 1; channels <= 120; channels += 23) {
2269       for (size_t step = 2; step <= 25; step++) {
2270         DWConvMicrokernelTester()
2271           .cr(24)
2272           .kr(25)
2273           .channels(channels)
2274           .width(3)
2275           .step(step)
2276           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2277       }
2278     }
2279   }
2280 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,multipixel_with_output_stride)2281   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, multipixel_with_output_stride) {
2282     TEST_REQUIRES_ARM_NEON;
2283     for (size_t channels = 1; channels <= 120; channels += 23) {
2284       DWConvMicrokernelTester()
2285         .cr(24)
2286         .kr(25)
2287         .channels(24)
2288         .width(5)
2289         .output_stride(127)
2290         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2291     }
2292   }
2293 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,multipixel_with_qmin)2294   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, multipixel_with_qmin) {
2295     TEST_REQUIRES_ARM_NEON;
2296     for (size_t channels = 1; channels <= 120; channels += 23) {
2297       DWConvMicrokernelTester()
2298         .cr(24)
2299         .kr(25)
2300         .channels(channels)
2301         .width(3)
2302         .qmin(128)
2303         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2304     }
2305   }
2306 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,multipixel_with_qmax)2307   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, multipixel_with_qmax) {
2308     TEST_REQUIRES_ARM_NEON;
2309     for (size_t channels = 1; channels <= 120; channels += 23) {
2310       DWConvMicrokernelTester()
2311         .cr(24)
2312         .kr(25)
2313         .channels(channels)
2314         .width(3)
2315         .qmax(128)
2316         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2317     }
2318   }
2319 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,input_zero_point_only)2320   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, input_zero_point_only) {
2321     TEST_REQUIRES_ARM_NEON;
2322     for (size_t channels = 1; channels <= 120; channels += 23) {
2323       DWConvMicrokernelTester()
2324         .cr(24)
2325         .kr(25)
2326         .channels(channels)
2327         .width(3)
2328         .input_zero_point(255)
2329         .kernel_zero_point(0)
2330         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2331     }
2332   }
2333 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,kernel_zero_point_only)2334   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, kernel_zero_point_only) {
2335     TEST_REQUIRES_ARM_NEON;
2336     for (size_t channels = 1; channels <= 120; channels += 23) {
2337       DWConvMicrokernelTester()
2338         .cr(24)
2339         .kr(25)
2340         .channels(channels)
2341         .width(3)
2342         .input_zero_point(0)
2343         .kernel_zero_point(255)
2344         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2345     }
2346   }
2347 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,input_offset)2348   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, input_offset) {
2349     TEST_REQUIRES_ARM_NEON;
2350     for (uint32_t channels = 48; channels < 384; channels += 72) {
2351       DWConvMicrokernelTester()
2352         .cr(24)
2353         .kr(25)
2354         .channels(channels)
2355         .input_offset(464)
2356         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2357     }
2358   }
2359 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,zero)2360   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, zero) {
2361     TEST_REQUIRES_ARM_NEON;
2362     for (uint32_t mz = 0; mz < 25; mz++) {
2363       for (uint32_t channels = 48; channels < 384; channels += 72) {
2364         DWConvMicrokernelTester()
2365           .cr(24)
2366           .kr(25)
2367           .channels(channels)
2368           .input_offset(464)
2369           .zero_index(mz)
2370           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2371       }
2372     }
2373   }
2374 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2375 
2376 
2377 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_eq_24)2378   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_eq_24) {
2379     TEST_REQUIRES_ARM_NEON;
2380     DWConvMicrokernelTester()
2381       .cr(24)
2382       .kr(25)
2383       .channels(24)
2384       .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2385   }
2386 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_div_24)2387   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_div_24) {
2388     TEST_REQUIRES_ARM_NEON;
2389     for (uint32_t channels = 48; channels < 384; channels += 72) {
2390       DWConvMicrokernelTester()
2391         .cr(24)
2392         .kr(25)
2393         .channels(channels)
2394         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2395     }
2396   }
2397 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_div_24_with_qmin)2398   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_div_24_with_qmin) {
2399     TEST_REQUIRES_ARM_NEON;
2400     for (uint32_t channels = 48; channels < 384; channels += 72) {
2401       DWConvMicrokernelTester()
2402         .cr(24)
2403         .kr(25)
2404         .channels(channels)
2405         .qmin(128)
2406         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2407     }
2408   }
2409 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_div_24_with_qmax)2410   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_div_24_with_qmax) {
2411     TEST_REQUIRES_ARM_NEON;
2412     for (uint32_t channels = 48; channels < 384; channels += 72) {
2413       DWConvMicrokernelTester()
2414         .cr(24)
2415         .kr(25)
2416         .channels(channels)
2417         .qmax(128)
2418         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2419     }
2420   }
2421 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_lt_24)2422   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_lt_24) {
2423     TEST_REQUIRES_ARM_NEON;
2424     for (uint32_t channels = 1; channels < 24; channels++) {
2425       DWConvMicrokernelTester()
2426         .cr(24)
2427         .kr(25)
2428         .channels(channels)
2429         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2430     }
2431   }
2432 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_gt_24)2433   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_gt_24) {
2434     TEST_REQUIRES_ARM_NEON;
2435     for (uint32_t channels = 25; channels < 48; channels++) {
2436       DWConvMicrokernelTester()
2437         .cr(24)
2438         .kr(25)
2439         .channels(channels)
2440         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2441     }
2442   }
2443 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_gt_24_with_qmin)2444   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_gt_24_with_qmin) {
2445     TEST_REQUIRES_ARM_NEON;
2446     for (uint32_t channels = 25; channels < 48; channels++) {
2447       DWConvMicrokernelTester()
2448         .cr(24)
2449         .kr(25)
2450         .channels(channels)
2451         .qmin(128)
2452         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2453     }
2454   }
2455 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_gt_24_with_qmax)2456   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_gt_24_with_qmax) {
2457     TEST_REQUIRES_ARM_NEON;
2458     for (uint32_t channels = 25; channels < 48; channels++) {
2459       DWConvMicrokernelTester()
2460         .cr(24)
2461         .kr(25)
2462         .channels(channels)
2463         .qmax(128)
2464         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2465     }
2466   }
2467 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,multipixel)2468   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, multipixel) {
2469     TEST_REQUIRES_ARM_NEON;
2470     for (size_t channels = 1; channels <= 120; channels += 23) {
2471       DWConvMicrokernelTester()
2472         .cr(24)
2473         .kr(25)
2474         .channels(channels)
2475         .width(3)
2476         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2477     }
2478   }
2479 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,multipixel_with_step)2480   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, multipixel_with_step) {
2481     TEST_REQUIRES_ARM_NEON;
2482     for (size_t channels = 1; channels <= 120; channels += 23) {
2483       for (size_t step = 2; step <= 25; step++) {
2484         DWConvMicrokernelTester()
2485           .cr(24)
2486           .kr(25)
2487           .channels(channels)
2488           .width(3)
2489           .step(step)
2490           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2491       }
2492     }
2493   }
2494 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,multipixel_with_output_stride)2495   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, multipixel_with_output_stride) {
2496     TEST_REQUIRES_ARM_NEON;
2497     for (size_t channels = 1; channels <= 120; channels += 23) {
2498       DWConvMicrokernelTester()
2499         .cr(24)
2500         .kr(25)
2501         .channels(24)
2502         .width(5)
2503         .output_stride(127)
2504         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2505     }
2506   }
2507 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,multipixel_with_qmin)2508   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, multipixel_with_qmin) {
2509     TEST_REQUIRES_ARM_NEON;
2510     for (size_t channels = 1; channels <= 120; channels += 23) {
2511       DWConvMicrokernelTester()
2512         .cr(24)
2513         .kr(25)
2514         .channels(channels)
2515         .width(3)
2516         .qmin(128)
2517         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2518     }
2519   }
2520 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,multipixel_with_qmax)2521   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, multipixel_with_qmax) {
2522     TEST_REQUIRES_ARM_NEON;
2523     for (size_t channels = 1; channels <= 120; channels += 23) {
2524       DWConvMicrokernelTester()
2525         .cr(24)
2526         .kr(25)
2527         .channels(channels)
2528         .width(3)
2529         .qmax(128)
2530         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2531     }
2532   }
2533 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,input_zero_point_only)2534   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, input_zero_point_only) {
2535     TEST_REQUIRES_ARM_NEON;
2536     for (size_t channels = 1; channels <= 120; channels += 23) {
2537       DWConvMicrokernelTester()
2538         .cr(24)
2539         .kr(25)
2540         .channels(channels)
2541         .width(3)
2542         .input_zero_point(255)
2543         .kernel_zero_point(0)
2544         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2545     }
2546   }
2547 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,kernel_zero_point_only)2548   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, kernel_zero_point_only) {
2549     TEST_REQUIRES_ARM_NEON;
2550     for (size_t channels = 1; channels <= 120; channels += 23) {
2551       DWConvMicrokernelTester()
2552         .cr(24)
2553         .kr(25)
2554         .channels(channels)
2555         .width(3)
2556         .input_zero_point(0)
2557         .kernel_zero_point(255)
2558         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2559     }
2560   }
2561 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,input_offset)2562   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, input_offset) {
2563     TEST_REQUIRES_ARM_NEON;
2564     for (uint32_t channels = 48; channels < 384; channels += 72) {
2565       DWConvMicrokernelTester()
2566         .cr(24)
2567         .kr(25)
2568         .channels(channels)
2569         .input_offset(464)
2570         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2571     }
2572   }
2573 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,zero)2574   TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, zero) {
2575     TEST_REQUIRES_ARM_NEON;
2576     for (uint32_t mz = 0; mz < 25; mz++) {
2577       for (uint32_t channels = 48; channels < 384; channels += 72) {
2578         DWConvMicrokernelTester()
2579           .cr(24)
2580           .kr(25)
2581           .channels(channels)
2582           .input_offset(464)
2583           .zero_index(mz)
2584           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2585       }
2586     }
2587   }
2588 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2589 
2590 
2591 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,c_eq_32)2592   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, c_eq_32) {
2593     TEST_REQUIRES_ARM_NEON;
2594     DWConvMicrokernelTester()
2595       .cr(32)
2596       .kr(9)
2597       .channels(32)
2598       .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2599   }
2600 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,c_div_32)2601   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, c_div_32) {
2602     TEST_REQUIRES_ARM_NEON;
2603     for (uint32_t channels = 64; channels < 512; channels += 96) {
2604       DWConvMicrokernelTester()
2605         .cr(32)
2606         .kr(9)
2607         .channels(channels)
2608         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2609     }
2610   }
2611 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,c_div_32_with_qmin)2612   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, c_div_32_with_qmin) {
2613     TEST_REQUIRES_ARM_NEON;
2614     for (uint32_t channels = 64; channels < 512; channels += 96) {
2615       DWConvMicrokernelTester()
2616         .cr(32)
2617         .kr(9)
2618         .channels(channels)
2619         .qmin(128)
2620         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2621     }
2622   }
2623 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,c_div_32_with_qmax)2624   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, c_div_32_with_qmax) {
2625     TEST_REQUIRES_ARM_NEON;
2626     for (uint32_t channels = 64; channels < 512; channels += 96) {
2627       DWConvMicrokernelTester()
2628         .cr(32)
2629         .kr(9)
2630         .channels(channels)
2631         .qmax(128)
2632         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2633     }
2634   }
2635 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,c_lt_32)2636   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, c_lt_32) {
2637     TEST_REQUIRES_ARM_NEON;
2638     for (uint32_t channels = 1; channels < 32; channels++) {
2639       DWConvMicrokernelTester()
2640         .cr(32)
2641         .kr(9)
2642         .channels(channels)
2643         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2644     }
2645   }
2646 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,c_gt_32)2647   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, c_gt_32) {
2648     TEST_REQUIRES_ARM_NEON;
2649     for (uint32_t channels = 33; channels < 64; channels++) {
2650       DWConvMicrokernelTester()
2651         .cr(32)
2652         .kr(9)
2653         .channels(channels)
2654         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2655     }
2656   }
2657 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,c_gt_32_with_qmin)2658   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, c_gt_32_with_qmin) {
2659     TEST_REQUIRES_ARM_NEON;
2660     for (uint32_t channels = 33; channels < 64; channels++) {
2661       DWConvMicrokernelTester()
2662         .cr(32)
2663         .kr(9)
2664         .channels(channels)
2665         .qmin(128)
2666         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2667     }
2668   }
2669 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,c_gt_32_with_qmax)2670   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, c_gt_32_with_qmax) {
2671     TEST_REQUIRES_ARM_NEON;
2672     for (uint32_t channels = 33; channels < 64; channels++) {
2673       DWConvMicrokernelTester()
2674         .cr(32)
2675         .kr(9)
2676         .channels(channels)
2677         .qmax(128)
2678         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2679     }
2680   }
2681 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,multipixel)2682   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, multipixel) {
2683     TEST_REQUIRES_ARM_NEON;
2684     for (size_t channels = 1; channels <= 160; channels += 31) {
2685       DWConvMicrokernelTester()
2686         .cr(32)
2687         .kr(9)
2688         .channels(channels)
2689         .width(3)
2690         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2691     }
2692   }
2693 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,multipixel_with_step)2694   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, multipixel_with_step) {
2695     TEST_REQUIRES_ARM_NEON;
2696     for (size_t channels = 1; channels <= 160; channels += 31) {
2697       for (size_t step = 2; step <= 9; step++) {
2698         DWConvMicrokernelTester()
2699           .cr(32)
2700           .kr(9)
2701           .channels(channels)
2702           .width(3)
2703           .step(step)
2704           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2705       }
2706     }
2707   }
2708 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,multipixel_with_output_stride)2709   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, multipixel_with_output_stride) {
2710     TEST_REQUIRES_ARM_NEON;
2711     for (size_t channels = 1; channels <= 160; channels += 31) {
2712       DWConvMicrokernelTester()
2713         .cr(32)
2714         .kr(9)
2715         .channels(32)
2716         .width(5)
2717         .output_stride(163)
2718         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2719     }
2720   }
2721 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,multipixel_with_qmin)2722   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, multipixel_with_qmin) {
2723     TEST_REQUIRES_ARM_NEON;
2724     for (size_t channels = 1; channels <= 160; channels += 31) {
2725       DWConvMicrokernelTester()
2726         .cr(32)
2727         .kr(9)
2728         .channels(channels)
2729         .width(3)
2730         .qmin(128)
2731         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2732     }
2733   }
2734 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,multipixel_with_qmax)2735   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, multipixel_with_qmax) {
2736     TEST_REQUIRES_ARM_NEON;
2737     for (size_t channels = 1; channels <= 160; channels += 31) {
2738       DWConvMicrokernelTester()
2739         .cr(32)
2740         .kr(9)
2741         .channels(channels)
2742         .width(3)
2743         .qmax(128)
2744         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2745     }
2746   }
2747 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,input_zero_point_only)2748   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, input_zero_point_only) {
2749     TEST_REQUIRES_ARM_NEON;
2750     for (size_t channels = 1; channels <= 160; channels += 31) {
2751       DWConvMicrokernelTester()
2752         .cr(32)
2753         .kr(9)
2754         .channels(channels)
2755         .width(3)
2756         .input_zero_point(255)
2757         .kernel_zero_point(0)
2758         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2759     }
2760   }
2761 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,kernel_zero_point_only)2762   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, kernel_zero_point_only) {
2763     TEST_REQUIRES_ARM_NEON;
2764     for (size_t channels = 1; channels <= 160; channels += 31) {
2765       DWConvMicrokernelTester()
2766         .cr(32)
2767         .kr(9)
2768         .channels(channels)
2769         .width(3)
2770         .input_zero_point(0)
2771         .kernel_zero_point(255)
2772         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2773     }
2774   }
2775 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,input_offset)2776   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, input_offset) {
2777     TEST_REQUIRES_ARM_NEON;
2778     for (uint32_t channels = 64; channels < 512; channels += 96) {
2779       DWConvMicrokernelTester()
2780         .cr(32)
2781         .kr(9)
2782         .channels(channels)
2783         .input_offset(592)
2784         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2785     }
2786   }
2787 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,zero)2788   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, zero) {
2789     TEST_REQUIRES_ARM_NEON;
2790     for (uint32_t mz = 0; mz < 9; mz++) {
2791       for (uint32_t channels = 64; channels < 512; channels += 96) {
2792         DWConvMicrokernelTester()
2793           .cr(32)
2794           .kr(9)
2795           .channels(channels)
2796           .input_offset(592)
2797           .zero_index(mz)
2798           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2799       }
2800     }
2801   }
2802 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2803 
2804 
2805 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_eq_32)2806   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_eq_32) {
2807     TEST_REQUIRES_ARM_NEON;
2808     DWConvMicrokernelTester()
2809       .cr(32)
2810       .kr(9)
2811       .channels(32)
2812       .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2813   }
2814 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_div_32)2815   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_div_32) {
2816     TEST_REQUIRES_ARM_NEON;
2817     for (uint32_t channels = 64; channels < 512; channels += 96) {
2818       DWConvMicrokernelTester()
2819         .cr(32)
2820         .kr(9)
2821         .channels(channels)
2822         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2823     }
2824   }
2825 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_div_32_with_qmin)2826   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_div_32_with_qmin) {
2827     TEST_REQUIRES_ARM_NEON;
2828     for (uint32_t channels = 64; channels < 512; channels += 96) {
2829       DWConvMicrokernelTester()
2830         .cr(32)
2831         .kr(9)
2832         .channels(channels)
2833         .qmin(128)
2834         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2835     }
2836   }
2837 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_div_32_with_qmax)2838   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_div_32_with_qmax) {
2839     TEST_REQUIRES_ARM_NEON;
2840     for (uint32_t channels = 64; channels < 512; channels += 96) {
2841       DWConvMicrokernelTester()
2842         .cr(32)
2843         .kr(9)
2844         .channels(channels)
2845         .qmax(128)
2846         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2847     }
2848   }
2849 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_lt_32)2850   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_lt_32) {
2851     TEST_REQUIRES_ARM_NEON;
2852     for (uint32_t channels = 1; channels < 32; channels++) {
2853       DWConvMicrokernelTester()
2854         .cr(32)
2855         .kr(9)
2856         .channels(channels)
2857         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2858     }
2859   }
2860 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_gt_32)2861   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_gt_32) {
2862     TEST_REQUIRES_ARM_NEON;
2863     for (uint32_t channels = 33; channels < 64; channels++) {
2864       DWConvMicrokernelTester()
2865         .cr(32)
2866         .kr(9)
2867         .channels(channels)
2868         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2869     }
2870   }
2871 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_gt_32_with_qmin)2872   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_gt_32_with_qmin) {
2873     TEST_REQUIRES_ARM_NEON;
2874     for (uint32_t channels = 33; channels < 64; channels++) {
2875       DWConvMicrokernelTester()
2876         .cr(32)
2877         .kr(9)
2878         .channels(channels)
2879         .qmin(128)
2880         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2881     }
2882   }
2883 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_gt_32_with_qmax)2884   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_gt_32_with_qmax) {
2885     TEST_REQUIRES_ARM_NEON;
2886     for (uint32_t channels = 33; channels < 64; channels++) {
2887       DWConvMicrokernelTester()
2888         .cr(32)
2889         .kr(9)
2890         .channels(channels)
2891         .qmax(128)
2892         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2893     }
2894   }
2895 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,multipixel)2896   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, multipixel) {
2897     TEST_REQUIRES_ARM_NEON;
2898     for (size_t channels = 1; channels <= 160; channels += 31) {
2899       DWConvMicrokernelTester()
2900         .cr(32)
2901         .kr(9)
2902         .channels(channels)
2903         .width(3)
2904         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2905     }
2906   }
2907 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,multipixel_with_step)2908   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, multipixel_with_step) {
2909     TEST_REQUIRES_ARM_NEON;
2910     for (size_t channels = 1; channels <= 160; channels += 31) {
2911       for (size_t step = 2; step <= 9; step++) {
2912         DWConvMicrokernelTester()
2913           .cr(32)
2914           .kr(9)
2915           .channels(channels)
2916           .width(3)
2917           .step(step)
2918           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2919       }
2920     }
2921   }
2922 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,multipixel_with_output_stride)2923   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, multipixel_with_output_stride) {
2924     TEST_REQUIRES_ARM_NEON;
2925     for (size_t channels = 1; channels <= 160; channels += 31) {
2926       DWConvMicrokernelTester()
2927         .cr(32)
2928         .kr(9)
2929         .channels(32)
2930         .width(5)
2931         .output_stride(163)
2932         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2933     }
2934   }
2935 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,multipixel_with_qmin)2936   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, multipixel_with_qmin) {
2937     TEST_REQUIRES_ARM_NEON;
2938     for (size_t channels = 1; channels <= 160; channels += 31) {
2939       DWConvMicrokernelTester()
2940         .cr(32)
2941         .kr(9)
2942         .channels(channels)
2943         .width(3)
2944         .qmin(128)
2945         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2946     }
2947   }
2948 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,multipixel_with_qmax)2949   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, multipixel_with_qmax) {
2950     TEST_REQUIRES_ARM_NEON;
2951     for (size_t channels = 1; channels <= 160; channels += 31) {
2952       DWConvMicrokernelTester()
2953         .cr(32)
2954         .kr(9)
2955         .channels(channels)
2956         .width(3)
2957         .qmax(128)
2958         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2959     }
2960   }
2961 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,input_zero_point_only)2962   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, input_zero_point_only) {
2963     TEST_REQUIRES_ARM_NEON;
2964     for (size_t channels = 1; channels <= 160; channels += 31) {
2965       DWConvMicrokernelTester()
2966         .cr(32)
2967         .kr(9)
2968         .channels(channels)
2969         .width(3)
2970         .input_zero_point(255)
2971         .kernel_zero_point(0)
2972         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2973     }
2974   }
2975 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,kernel_zero_point_only)2976   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, kernel_zero_point_only) {
2977     TEST_REQUIRES_ARM_NEON;
2978     for (size_t channels = 1; channels <= 160; channels += 31) {
2979       DWConvMicrokernelTester()
2980         .cr(32)
2981         .kr(9)
2982         .channels(channels)
2983         .width(3)
2984         .input_zero_point(0)
2985         .kernel_zero_point(255)
2986         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2987     }
2988   }
2989 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,input_offset)2990   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, input_offset) {
2991     TEST_REQUIRES_ARM_NEON;
2992     for (uint32_t channels = 64; channels < 512; channels += 96) {
2993       DWConvMicrokernelTester()
2994         .cr(32)
2995         .kr(9)
2996         .channels(channels)
2997         .input_offset(592)
2998         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2999     }
3000   }
3001 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,zero)3002   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, zero) {
3003     TEST_REQUIRES_ARM_NEON;
3004     for (uint32_t mz = 0; mz < 9; mz++) {
3005       for (uint32_t channels = 64; channels < 512; channels += 96) {
3006         DWConvMicrokernelTester()
3007           .cr(32)
3008           .kr(9)
3009           .channels(channels)
3010           .input_offset(592)
3011           .zero_index(mz)
3012           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3013       }
3014     }
3015   }
3016 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3017 
3018 
3019 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,c_eq_32)3020   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, c_eq_32) {
3021     TEST_REQUIRES_ARM_NEON;
3022     DWConvMicrokernelTester()
3023       .cr(32)
3024       .kr(25)
3025       .channels(32)
3026       .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3027   }
3028 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,c_div_32)3029   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, c_div_32) {
3030     TEST_REQUIRES_ARM_NEON;
3031     for (uint32_t channels = 64; channels < 512; channels += 96) {
3032       DWConvMicrokernelTester()
3033         .cr(32)
3034         .kr(25)
3035         .channels(channels)
3036         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3037     }
3038   }
3039 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,c_div_32_with_qmin)3040   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, c_div_32_with_qmin) {
3041     TEST_REQUIRES_ARM_NEON;
3042     for (uint32_t channels = 64; channels < 512; channels += 96) {
3043       DWConvMicrokernelTester()
3044         .cr(32)
3045         .kr(25)
3046         .channels(channels)
3047         .qmin(128)
3048         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3049     }
3050   }
3051 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,c_div_32_with_qmax)3052   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, c_div_32_with_qmax) {
3053     TEST_REQUIRES_ARM_NEON;
3054     for (uint32_t channels = 64; channels < 512; channels += 96) {
3055       DWConvMicrokernelTester()
3056         .cr(32)
3057         .kr(25)
3058         .channels(channels)
3059         .qmax(128)
3060         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3061     }
3062   }
3063 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,c_lt_32)3064   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, c_lt_32) {
3065     TEST_REQUIRES_ARM_NEON;
3066     for (uint32_t channels = 1; channels < 32; channels++) {
3067       DWConvMicrokernelTester()
3068         .cr(32)
3069         .kr(25)
3070         .channels(channels)
3071         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3072     }
3073   }
3074 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,c_gt_32)3075   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, c_gt_32) {
3076     TEST_REQUIRES_ARM_NEON;
3077     for (uint32_t channels = 33; channels < 64; channels++) {
3078       DWConvMicrokernelTester()
3079         .cr(32)
3080         .kr(25)
3081         .channels(channels)
3082         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3083     }
3084   }
3085 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,c_gt_32_with_qmin)3086   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, c_gt_32_with_qmin) {
3087     TEST_REQUIRES_ARM_NEON;
3088     for (uint32_t channels = 33; channels < 64; channels++) {
3089       DWConvMicrokernelTester()
3090         .cr(32)
3091         .kr(25)
3092         .channels(channels)
3093         .qmin(128)
3094         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3095     }
3096   }
3097 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,c_gt_32_with_qmax)3098   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, c_gt_32_with_qmax) {
3099     TEST_REQUIRES_ARM_NEON;
3100     for (uint32_t channels = 33; channels < 64; channels++) {
3101       DWConvMicrokernelTester()
3102         .cr(32)
3103         .kr(25)
3104         .channels(channels)
3105         .qmax(128)
3106         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3107     }
3108   }
3109 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,multipixel)3110   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, multipixel) {
3111     TEST_REQUIRES_ARM_NEON;
3112     for (size_t channels = 1; channels <= 160; channels += 31) {
3113       DWConvMicrokernelTester()
3114         .cr(32)
3115         .kr(25)
3116         .channels(channels)
3117         .width(3)
3118         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3119     }
3120   }
3121 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,multipixel_with_step)3122   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, multipixel_with_step) {
3123     TEST_REQUIRES_ARM_NEON;
3124     for (size_t channels = 1; channels <= 160; channels += 31) {
3125       for (size_t step = 2; step <= 25; step++) {
3126         DWConvMicrokernelTester()
3127           .cr(32)
3128           .kr(25)
3129           .channels(channels)
3130           .width(3)
3131           .step(step)
3132           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3133       }
3134     }
3135   }
3136 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,multipixel_with_output_stride)3137   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, multipixel_with_output_stride) {
3138     TEST_REQUIRES_ARM_NEON;
3139     for (size_t channels = 1; channels <= 160; channels += 31) {
3140       DWConvMicrokernelTester()
3141         .cr(32)
3142         .kr(25)
3143         .channels(32)
3144         .width(5)
3145         .output_stride(163)
3146         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3147     }
3148   }
3149 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,multipixel_with_qmin)3150   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, multipixel_with_qmin) {
3151     TEST_REQUIRES_ARM_NEON;
3152     for (size_t channels = 1; channels <= 160; channels += 31) {
3153       DWConvMicrokernelTester()
3154         .cr(32)
3155         .kr(25)
3156         .channels(channels)
3157         .width(3)
3158         .qmin(128)
3159         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3160     }
3161   }
3162 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,multipixel_with_qmax)3163   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, multipixel_with_qmax) {
3164     TEST_REQUIRES_ARM_NEON;
3165     for (size_t channels = 1; channels <= 160; channels += 31) {
3166       DWConvMicrokernelTester()
3167         .cr(32)
3168         .kr(25)
3169         .channels(channels)
3170         .width(3)
3171         .qmax(128)
3172         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3173     }
3174   }
3175 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,input_zero_point_only)3176   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, input_zero_point_only) {
3177     TEST_REQUIRES_ARM_NEON;
3178     for (size_t channels = 1; channels <= 160; channels += 31) {
3179       DWConvMicrokernelTester()
3180         .cr(32)
3181         .kr(25)
3182         .channels(channels)
3183         .width(3)
3184         .input_zero_point(255)
3185         .kernel_zero_point(0)
3186         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3187     }
3188   }
3189 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,kernel_zero_point_only)3190   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, kernel_zero_point_only) {
3191     TEST_REQUIRES_ARM_NEON;
3192     for (size_t channels = 1; channels <= 160; channels += 31) {
3193       DWConvMicrokernelTester()
3194         .cr(32)
3195         .kr(25)
3196         .channels(channels)
3197         .width(3)
3198         .input_zero_point(0)
3199         .kernel_zero_point(255)
3200         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3201     }
3202   }
3203 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,input_offset)3204   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, input_offset) {
3205     TEST_REQUIRES_ARM_NEON;
3206     for (uint32_t channels = 64; channels < 512; channels += 96) {
3207       DWConvMicrokernelTester()
3208         .cr(32)
3209         .kr(25)
3210         .channels(channels)
3211         .input_offset(592)
3212         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3213     }
3214   }
3215 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,zero)3216   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, zero) {
3217     TEST_REQUIRES_ARM_NEON;
3218     for (uint32_t mz = 0; mz < 25; mz++) {
3219       for (uint32_t channels = 64; channels < 512; channels += 96) {
3220         DWConvMicrokernelTester()
3221           .cr(32)
3222           .kr(25)
3223           .channels(channels)
3224           .input_offset(592)
3225           .zero_index(mz)
3226           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3227       }
3228     }
3229   }
3230 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3231 
3232 
3233 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_eq_32)3234   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_eq_32) {
3235     TEST_REQUIRES_ARM_NEON;
3236     DWConvMicrokernelTester()
3237       .cr(32)
3238       .kr(25)
3239       .channels(32)
3240       .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3241   }
3242 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_div_32)3243   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_div_32) {
3244     TEST_REQUIRES_ARM_NEON;
3245     for (uint32_t channels = 64; channels < 512; channels += 96) {
3246       DWConvMicrokernelTester()
3247         .cr(32)
3248         .kr(25)
3249         .channels(channels)
3250         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3251     }
3252   }
3253 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_div_32_with_qmin)3254   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_div_32_with_qmin) {
3255     TEST_REQUIRES_ARM_NEON;
3256     for (uint32_t channels = 64; channels < 512; channels += 96) {
3257       DWConvMicrokernelTester()
3258         .cr(32)
3259         .kr(25)
3260         .channels(channels)
3261         .qmin(128)
3262         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3263     }
3264   }
3265 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_div_32_with_qmax)3266   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_div_32_with_qmax) {
3267     TEST_REQUIRES_ARM_NEON;
3268     for (uint32_t channels = 64; channels < 512; channels += 96) {
3269       DWConvMicrokernelTester()
3270         .cr(32)
3271         .kr(25)
3272         .channels(channels)
3273         .qmax(128)
3274         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3275     }
3276   }
3277 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_lt_32)3278   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_lt_32) {
3279     TEST_REQUIRES_ARM_NEON;
3280     for (uint32_t channels = 1; channels < 32; channels++) {
3281       DWConvMicrokernelTester()
3282         .cr(32)
3283         .kr(25)
3284         .channels(channels)
3285         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3286     }
3287   }
3288 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_gt_32)3289   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_gt_32) {
3290     TEST_REQUIRES_ARM_NEON;
3291     for (uint32_t channels = 33; channels < 64; channels++) {
3292       DWConvMicrokernelTester()
3293         .cr(32)
3294         .kr(25)
3295         .channels(channels)
3296         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3297     }
3298   }
3299 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_gt_32_with_qmin)3300   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_gt_32_with_qmin) {
3301     TEST_REQUIRES_ARM_NEON;
3302     for (uint32_t channels = 33; channels < 64; channels++) {
3303       DWConvMicrokernelTester()
3304         .cr(32)
3305         .kr(25)
3306         .channels(channels)
3307         .qmin(128)
3308         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3309     }
3310   }
3311 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_gt_32_with_qmax)3312   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_gt_32_with_qmax) {
3313     TEST_REQUIRES_ARM_NEON;
3314     for (uint32_t channels = 33; channels < 64; channels++) {
3315       DWConvMicrokernelTester()
3316         .cr(32)
3317         .kr(25)
3318         .channels(channels)
3319         .qmax(128)
3320         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3321     }
3322   }
3323 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,multipixel)3324   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, multipixel) {
3325     TEST_REQUIRES_ARM_NEON;
3326     for (size_t channels = 1; channels <= 160; channels += 31) {
3327       DWConvMicrokernelTester()
3328         .cr(32)
3329         .kr(25)
3330         .channels(channels)
3331         .width(3)
3332         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3333     }
3334   }
3335 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,multipixel_with_step)3336   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, multipixel_with_step) {
3337     TEST_REQUIRES_ARM_NEON;
3338     for (size_t channels = 1; channels <= 160; channels += 31) {
3339       for (size_t step = 2; step <= 25; step++) {
3340         DWConvMicrokernelTester()
3341           .cr(32)
3342           .kr(25)
3343           .channels(channels)
3344           .width(3)
3345           .step(step)
3346           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3347       }
3348     }
3349   }
3350 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,multipixel_with_output_stride)3351   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, multipixel_with_output_stride) {
3352     TEST_REQUIRES_ARM_NEON;
3353     for (size_t channels = 1; channels <= 160; channels += 31) {
3354       DWConvMicrokernelTester()
3355         .cr(32)
3356         .kr(25)
3357         .channels(32)
3358         .width(5)
3359         .output_stride(163)
3360         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3361     }
3362   }
3363 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,multipixel_with_qmin)3364   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, multipixel_with_qmin) {
3365     TEST_REQUIRES_ARM_NEON;
3366     for (size_t channels = 1; channels <= 160; channels += 31) {
3367       DWConvMicrokernelTester()
3368         .cr(32)
3369         .kr(25)
3370         .channels(channels)
3371         .width(3)
3372         .qmin(128)
3373         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3374     }
3375   }
3376 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,multipixel_with_qmax)3377   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, multipixel_with_qmax) {
3378     TEST_REQUIRES_ARM_NEON;
3379     for (size_t channels = 1; channels <= 160; channels += 31) {
3380       DWConvMicrokernelTester()
3381         .cr(32)
3382         .kr(25)
3383         .channels(channels)
3384         .width(3)
3385         .qmax(128)
3386         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3387     }
3388   }
3389 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,input_zero_point_only)3390   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, input_zero_point_only) {
3391     TEST_REQUIRES_ARM_NEON;
3392     for (size_t channels = 1; channels <= 160; channels += 31) {
3393       DWConvMicrokernelTester()
3394         .cr(32)
3395         .kr(25)
3396         .channels(channels)
3397         .width(3)
3398         .input_zero_point(255)
3399         .kernel_zero_point(0)
3400         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3401     }
3402   }
3403 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,kernel_zero_point_only)3404   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, kernel_zero_point_only) {
3405     TEST_REQUIRES_ARM_NEON;
3406     for (size_t channels = 1; channels <= 160; channels += 31) {
3407       DWConvMicrokernelTester()
3408         .cr(32)
3409         .kr(25)
3410         .channels(channels)
3411         .width(3)
3412         .input_zero_point(0)
3413         .kernel_zero_point(255)
3414         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3415     }
3416   }
3417 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,input_offset)3418   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, input_offset) {
3419     TEST_REQUIRES_ARM_NEON;
3420     for (uint32_t channels = 64; channels < 512; channels += 96) {
3421       DWConvMicrokernelTester()
3422         .cr(32)
3423         .kr(25)
3424         .channels(channels)
3425         .input_offset(592)
3426         .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3427     }
3428   }
3429 
TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,zero)3430   TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, zero) {
3431     TEST_REQUIRES_ARM_NEON;
3432     for (uint32_t mz = 0; mz < 25; mz++) {
3433       for (uint32_t channels = 64; channels < 512; channels += 96) {
3434         DWConvMicrokernelTester()
3435           .cr(32)
3436           .kr(25)
3437           .channels(channels)
3438           .input_offset(592)
3439           .zero_index(mz)
3440           .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3441       }
3442     }
3443   }
3444 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3445