xref: /aosp_15_r20/external/XNNPACK/test/qc8-dwconv-minmax-fp32.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 //   Specification: test/qc8-dwconv-minmax-fp32.yaml
11 //   Generator: tools/generate-dwconv-test.py
12 
13 
14 #include <gtest/gtest.h>
15 
16 #include <xnnpack/common.h>
17 #include <xnnpack/isa-checks.h>
18 
19 #include <xnnpack/dwconv.h>
20 #include "dwconv-microkernel-tester.h"
21 
22 
23 #if XNN_ARCH_ARM
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_eq_8)24   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_eq_8) {
25     TEST_REQUIRES_ARM_NEON_V8;
26     DWConvMicrokernelTester()
27       .cr(8)
28       .kr(3)
29       .channels(8)
30       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
31   }
32 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_div_8)33   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_div_8) {
34     TEST_REQUIRES_ARM_NEON_V8;
35     for (uint32_t channels = 16; channels < 128; channels += 24) {
36       DWConvMicrokernelTester()
37         .cr(8)
38         .kr(3)
39         .channels(channels)
40         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
41     }
42   }
43 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_div_8_with_qmin)44   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_div_8_with_qmin) {
45     TEST_REQUIRES_ARM_NEON_V8;
46     for (uint32_t channels = 16; channels < 128; channels += 24) {
47       DWConvMicrokernelTester()
48         .cr(8)
49         .kr(3)
50         .channels(channels)
51         .qmin(128)
52         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
53     }
54   }
55 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_div_8_with_qmax)56   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_div_8_with_qmax) {
57     TEST_REQUIRES_ARM_NEON_V8;
58     for (uint32_t channels = 16; channels < 128; channels += 24) {
59       DWConvMicrokernelTester()
60         .cr(8)
61         .kr(3)
62         .channels(channels)
63         .qmax(128)
64         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
65     }
66   }
67 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_lt_8)68   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_lt_8) {
69     TEST_REQUIRES_ARM_NEON_V8;
70     for (uint32_t channels = 1; channels < 8; channels++) {
71       DWConvMicrokernelTester()
72         .cr(8)
73         .kr(3)
74         .channels(channels)
75         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
76     }
77   }
78 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_gt_8)79   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_gt_8) {
80     TEST_REQUIRES_ARM_NEON_V8;
81     for (uint32_t channels = 9; channels < 16; channels++) {
82       DWConvMicrokernelTester()
83         .cr(8)
84         .kr(3)
85         .channels(channels)
86         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
87     }
88   }
89 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_gt_8_with_qmin)90   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_gt_8_with_qmin) {
91     TEST_REQUIRES_ARM_NEON_V8;
92     for (uint32_t channels = 9; channels < 16; channels++) {
93       DWConvMicrokernelTester()
94         .cr(8)
95         .kr(3)
96         .channels(channels)
97         .qmin(128)
98         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
99     }
100   }
101 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_gt_8_with_qmax)102   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_gt_8_with_qmax) {
103     TEST_REQUIRES_ARM_NEON_V8;
104     for (uint32_t channels = 9; channels < 16; channels++) {
105       DWConvMicrokernelTester()
106         .cr(8)
107         .kr(3)
108         .channels(channels)
109         .qmax(128)
110         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
111     }
112   }
113 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,multipixel)114   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, multipixel) {
115     TEST_REQUIRES_ARM_NEON_V8;
116     for (size_t channels = 1; channels <= 40; channels += 7) {
117       DWConvMicrokernelTester()
118         .cr(8)
119         .kr(3)
120         .channels(channels)
121         .width(3)
122         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
123     }
124   }
125 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,multipixel_with_step)126   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, multipixel_with_step) {
127     TEST_REQUIRES_ARM_NEON_V8;
128     for (size_t channels = 1; channels <= 40; channels += 7) {
129       for (size_t step = 2; step <= 3; step++) {
130         DWConvMicrokernelTester()
131           .cr(8)
132           .kr(3)
133           .channels(channels)
134           .width(3)
135           .step(step)
136           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
137       }
138     }
139   }
140 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,multipixel_with_output_stride)141   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, multipixel_with_output_stride) {
142     TEST_REQUIRES_ARM_NEON_V8;
143     for (size_t channels = 1; channels <= 40; channels += 7) {
144       DWConvMicrokernelTester()
145         .cr(8)
146         .kr(3)
147         .channels(8)
148         .width(5)
149         .output_stride(43)
150         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
151     }
152   }
153 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,multipixel_with_qmin)154   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, multipixel_with_qmin) {
155     TEST_REQUIRES_ARM_NEON_V8;
156     for (size_t channels = 1; channels <= 40; channels += 7) {
157       DWConvMicrokernelTester()
158         .cr(8)
159         .kr(3)
160         .channels(channels)
161         .width(3)
162         .qmin(128)
163         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
164     }
165   }
166 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,multipixel_with_qmax)167   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, multipixel_with_qmax) {
168     TEST_REQUIRES_ARM_NEON_V8;
169     for (size_t channels = 1; channels <= 40; channels += 7) {
170       DWConvMicrokernelTester()
171         .cr(8)
172         .kr(3)
173         .channels(channels)
174         .width(3)
175         .qmax(128)
176         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
177     }
178   }
179 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,input_offset)180   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, input_offset) {
181     TEST_REQUIRES_ARM_NEON_V8;
182     for (uint32_t channels = 16; channels < 128; channels += 24) {
183       DWConvMicrokernelTester()
184         .cr(8)
185         .kr(3)
186         .channels(channels)
187         .input_offset(176)
188         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
189     }
190   }
191 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,zero)192   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, zero) {
193     TEST_REQUIRES_ARM_NEON_V8;
194     for (uint32_t mz = 0; mz < 3; mz++) {
195       for (uint32_t channels = 16; channels < 128; channels += 24) {
196         DWConvMicrokernelTester()
197           .cr(8)
198           .kr(3)
199           .channels(channels)
200           .input_offset(176)
201           .zero_index(mz)
202           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
203       }
204     }
205   }
206 #endif  // XNN_ARCH_ARM
207 
208 
209 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,c_eq_8)210   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, c_eq_8) {
211     TEST_REQUIRES_ARM_NEON;
212     DWConvMicrokernelTester()
213       .cr(8)
214       .kr(3)
215       .channels(8)
216       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
217   }
218 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,c_div_8)219   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, c_div_8) {
220     TEST_REQUIRES_ARM_NEON;
221     for (uint32_t channels = 16; channels < 128; channels += 24) {
222       DWConvMicrokernelTester()
223         .cr(8)
224         .kr(3)
225         .channels(channels)
226         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
227     }
228   }
229 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,c_div_8_with_qmin)230   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, c_div_8_with_qmin) {
231     TEST_REQUIRES_ARM_NEON;
232     for (uint32_t channels = 16; channels < 128; channels += 24) {
233       DWConvMicrokernelTester()
234         .cr(8)
235         .kr(3)
236         .channels(channels)
237         .qmin(128)
238         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
239     }
240   }
241 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,c_div_8_with_qmax)242   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, c_div_8_with_qmax) {
243     TEST_REQUIRES_ARM_NEON;
244     for (uint32_t channels = 16; channels < 128; channels += 24) {
245       DWConvMicrokernelTester()
246         .cr(8)
247         .kr(3)
248         .channels(channels)
249         .qmax(128)
250         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
251     }
252   }
253 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,c_lt_8)254   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, c_lt_8) {
255     TEST_REQUIRES_ARM_NEON;
256     for (uint32_t channels = 1; channels < 8; channels++) {
257       DWConvMicrokernelTester()
258         .cr(8)
259         .kr(3)
260         .channels(channels)
261         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
262     }
263   }
264 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,c_gt_8)265   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, c_gt_8) {
266     TEST_REQUIRES_ARM_NEON;
267     for (uint32_t channels = 9; channels < 16; channels++) {
268       DWConvMicrokernelTester()
269         .cr(8)
270         .kr(3)
271         .channels(channels)
272         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
273     }
274   }
275 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,c_gt_8_with_qmin)276   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, c_gt_8_with_qmin) {
277     TEST_REQUIRES_ARM_NEON;
278     for (uint32_t channels = 9; channels < 16; channels++) {
279       DWConvMicrokernelTester()
280         .cr(8)
281         .kr(3)
282         .channels(channels)
283         .qmin(128)
284         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
285     }
286   }
287 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,c_gt_8_with_qmax)288   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, c_gt_8_with_qmax) {
289     TEST_REQUIRES_ARM_NEON;
290     for (uint32_t channels = 9; channels < 16; channels++) {
291       DWConvMicrokernelTester()
292         .cr(8)
293         .kr(3)
294         .channels(channels)
295         .qmax(128)
296         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
297     }
298   }
299 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,multipixel)300   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, multipixel) {
301     TEST_REQUIRES_ARM_NEON;
302     for (size_t channels = 1; channels <= 40; channels += 7) {
303       DWConvMicrokernelTester()
304         .cr(8)
305         .kr(3)
306         .channels(channels)
307         .width(3)
308         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
309     }
310   }
311 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,multipixel_with_step)312   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, multipixel_with_step) {
313     TEST_REQUIRES_ARM_NEON;
314     for (size_t channels = 1; channels <= 40; channels += 7) {
315       for (size_t step = 2; step <= 3; step++) {
316         DWConvMicrokernelTester()
317           .cr(8)
318           .kr(3)
319           .channels(channels)
320           .width(3)
321           .step(step)
322           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
323       }
324     }
325   }
326 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,multipixel_with_output_stride)327   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, multipixel_with_output_stride) {
328     TEST_REQUIRES_ARM_NEON;
329     for (size_t channels = 1; channels <= 40; channels += 7) {
330       DWConvMicrokernelTester()
331         .cr(8)
332         .kr(3)
333         .channels(8)
334         .width(5)
335         .output_stride(43)
336         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
337     }
338   }
339 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,multipixel_with_qmin)340   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, multipixel_with_qmin) {
341     TEST_REQUIRES_ARM_NEON;
342     for (size_t channels = 1; channels <= 40; channels += 7) {
343       DWConvMicrokernelTester()
344         .cr(8)
345         .kr(3)
346         .channels(channels)
347         .width(3)
348         .qmin(128)
349         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
350     }
351   }
352 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,multipixel_with_qmax)353   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, multipixel_with_qmax) {
354     TEST_REQUIRES_ARM_NEON;
355     for (size_t channels = 1; channels <= 40; channels += 7) {
356       DWConvMicrokernelTester()
357         .cr(8)
358         .kr(3)
359         .channels(channels)
360         .width(3)
361         .qmax(128)
362         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
363     }
364   }
365 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,input_offset)366   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, input_offset) {
367     TEST_REQUIRES_ARM_NEON;
368     for (uint32_t channels = 16; channels < 128; channels += 24) {
369       DWConvMicrokernelTester()
370         .cr(8)
371         .kr(3)
372         .channels(channels)
373         .input_offset(176)
374         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
375     }
376   }
377 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,zero)378   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, zero) {
379     TEST_REQUIRES_ARM_NEON;
380     for (uint32_t mz = 0; mz < 3; mz++) {
381       for (uint32_t channels = 16; channels < 128; channels += 24) {
382         DWConvMicrokernelTester()
383           .cr(8)
384           .kr(3)
385           .channels(channels)
386           .input_offset(176)
387           .zero_index(mz)
388           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
389       }
390     }
391   }
392 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
393 
394 
395 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,c_eq_8)396   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, c_eq_8) {
397     TEST_REQUIRES_ARM_NEON_V8;
398     DWConvMicrokernelTester()
399       .cr(8)
400       .kr(3)
401       .channels(8)
402       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
403   }
404 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,c_div_8)405   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, c_div_8) {
406     TEST_REQUIRES_ARM_NEON_V8;
407     for (uint32_t channels = 16; channels < 128; channels += 24) {
408       DWConvMicrokernelTester()
409         .cr(8)
410         .kr(3)
411         .channels(channels)
412         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
413     }
414   }
415 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,c_div_8_with_qmin)416   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, c_div_8_with_qmin) {
417     TEST_REQUIRES_ARM_NEON_V8;
418     for (uint32_t channels = 16; channels < 128; channels += 24) {
419       DWConvMicrokernelTester()
420         .cr(8)
421         .kr(3)
422         .channels(channels)
423         .qmin(128)
424         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
425     }
426   }
427 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,c_div_8_with_qmax)428   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, c_div_8_with_qmax) {
429     TEST_REQUIRES_ARM_NEON_V8;
430     for (uint32_t channels = 16; channels < 128; channels += 24) {
431       DWConvMicrokernelTester()
432         .cr(8)
433         .kr(3)
434         .channels(channels)
435         .qmax(128)
436         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
437     }
438   }
439 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,c_lt_8)440   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, c_lt_8) {
441     TEST_REQUIRES_ARM_NEON_V8;
442     for (uint32_t channels = 1; channels < 8; channels++) {
443       DWConvMicrokernelTester()
444         .cr(8)
445         .kr(3)
446         .channels(channels)
447         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
448     }
449   }
450 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,c_gt_8)451   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, c_gt_8) {
452     TEST_REQUIRES_ARM_NEON_V8;
453     for (uint32_t channels = 9; channels < 16; channels++) {
454       DWConvMicrokernelTester()
455         .cr(8)
456         .kr(3)
457         .channels(channels)
458         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
459     }
460   }
461 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,c_gt_8_with_qmin)462   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, c_gt_8_with_qmin) {
463     TEST_REQUIRES_ARM_NEON_V8;
464     for (uint32_t channels = 9; channels < 16; channels++) {
465       DWConvMicrokernelTester()
466         .cr(8)
467         .kr(3)
468         .channels(channels)
469         .qmin(128)
470         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
471     }
472   }
473 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,c_gt_8_with_qmax)474   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, c_gt_8_with_qmax) {
475     TEST_REQUIRES_ARM_NEON_V8;
476     for (uint32_t channels = 9; channels < 16; channels++) {
477       DWConvMicrokernelTester()
478         .cr(8)
479         .kr(3)
480         .channels(channels)
481         .qmax(128)
482         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
483     }
484   }
485 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,multipixel)486   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, multipixel) {
487     TEST_REQUIRES_ARM_NEON_V8;
488     for (size_t channels = 1; channels <= 40; channels += 7) {
489       DWConvMicrokernelTester()
490         .cr(8)
491         .kr(3)
492         .channels(channels)
493         .width(3)
494         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
495     }
496   }
497 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,multipixel_with_step)498   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, multipixel_with_step) {
499     TEST_REQUIRES_ARM_NEON_V8;
500     for (size_t channels = 1; channels <= 40; channels += 7) {
501       for (size_t step = 2; step <= 3; step++) {
502         DWConvMicrokernelTester()
503           .cr(8)
504           .kr(3)
505           .channels(channels)
506           .width(3)
507           .step(step)
508           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
509       }
510     }
511   }
512 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,multipixel_with_output_stride)513   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, multipixel_with_output_stride) {
514     TEST_REQUIRES_ARM_NEON_V8;
515     for (size_t channels = 1; channels <= 40; channels += 7) {
516       DWConvMicrokernelTester()
517         .cr(8)
518         .kr(3)
519         .channels(8)
520         .width(5)
521         .output_stride(43)
522         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
523     }
524   }
525 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,multipixel_with_qmin)526   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, multipixel_with_qmin) {
527     TEST_REQUIRES_ARM_NEON_V8;
528     for (size_t channels = 1; channels <= 40; channels += 7) {
529       DWConvMicrokernelTester()
530         .cr(8)
531         .kr(3)
532         .channels(channels)
533         .width(3)
534         .qmin(128)
535         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
536     }
537   }
538 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,multipixel_with_qmax)539   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, multipixel_with_qmax) {
540     TEST_REQUIRES_ARM_NEON_V8;
541     for (size_t channels = 1; channels <= 40; channels += 7) {
542       DWConvMicrokernelTester()
543         .cr(8)
544         .kr(3)
545         .channels(channels)
546         .width(3)
547         .qmax(128)
548         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
549     }
550   }
551 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,input_offset)552   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, input_offset) {
553     TEST_REQUIRES_ARM_NEON_V8;
554     for (uint32_t channels = 16; channels < 128; channels += 24) {
555       DWConvMicrokernelTester()
556         .cr(8)
557         .kr(3)
558         .channels(channels)
559         .input_offset(176)
560         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
561     }
562   }
563 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,zero)564   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, zero) {
565     TEST_REQUIRES_ARM_NEON_V8;
566     for (uint32_t mz = 0; mz < 3; mz++) {
567       for (uint32_t channels = 16; channels < 128; channels += 24) {
568         DWConvMicrokernelTester()
569           .cr(8)
570           .kr(3)
571           .channels(channels)
572           .input_offset(176)
573           .zero_index(mz)
574           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
575       }
576     }
577   }
578 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
579 
580 
581 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,c_eq_8)582   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_eq_8) {
583     TEST_REQUIRES_ARM_NEON;
584     DWConvMicrokernelTester()
585       .cr(8)
586       .kr(9)
587       .channels(8)
588       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
589   }
590 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,c_div_8)591   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_div_8) {
592     TEST_REQUIRES_ARM_NEON;
593     for (uint32_t channels = 16; channels < 128; channels += 24) {
594       DWConvMicrokernelTester()
595         .cr(8)
596         .kr(9)
597         .channels(channels)
598         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
599     }
600   }
601 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,c_div_8_with_qmin)602   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_div_8_with_qmin) {
603     TEST_REQUIRES_ARM_NEON;
604     for (uint32_t channels = 16; channels < 128; channels += 24) {
605       DWConvMicrokernelTester()
606         .cr(8)
607         .kr(9)
608         .channels(channels)
609         .qmin(128)
610         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
611     }
612   }
613 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,c_div_8_with_qmax)614   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_div_8_with_qmax) {
615     TEST_REQUIRES_ARM_NEON;
616     for (uint32_t channels = 16; channels < 128; channels += 24) {
617       DWConvMicrokernelTester()
618         .cr(8)
619         .kr(9)
620         .channels(channels)
621         .qmax(128)
622         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
623     }
624   }
625 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,c_lt_8)626   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_lt_8) {
627     TEST_REQUIRES_ARM_NEON;
628     for (uint32_t channels = 1; channels < 8; channels++) {
629       DWConvMicrokernelTester()
630         .cr(8)
631         .kr(9)
632         .channels(channels)
633         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
634     }
635   }
636 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,c_gt_8)637   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_gt_8) {
638     TEST_REQUIRES_ARM_NEON;
639     for (uint32_t channels = 9; channels < 16; channels++) {
640       DWConvMicrokernelTester()
641         .cr(8)
642         .kr(9)
643         .channels(channels)
644         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
645     }
646   }
647 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,c_gt_8_with_qmin)648   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_gt_8_with_qmin) {
649     TEST_REQUIRES_ARM_NEON;
650     for (uint32_t channels = 9; channels < 16; channels++) {
651       DWConvMicrokernelTester()
652         .cr(8)
653         .kr(9)
654         .channels(channels)
655         .qmin(128)
656         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
657     }
658   }
659 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,c_gt_8_with_qmax)660   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_gt_8_with_qmax) {
661     TEST_REQUIRES_ARM_NEON;
662     for (uint32_t channels = 9; channels < 16; channels++) {
663       DWConvMicrokernelTester()
664         .cr(8)
665         .kr(9)
666         .channels(channels)
667         .qmax(128)
668         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
669     }
670   }
671 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,multipixel)672   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, multipixel) {
673     TEST_REQUIRES_ARM_NEON;
674     for (size_t channels = 1; channels <= 40; channels += 7) {
675       DWConvMicrokernelTester()
676         .cr(8)
677         .kr(9)
678         .channels(channels)
679         .width(3)
680         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
681     }
682   }
683 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,multipixel_with_step)684   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, multipixel_with_step) {
685     TEST_REQUIRES_ARM_NEON;
686     for (size_t channels = 1; channels <= 40; channels += 7) {
687       for (size_t step = 2; step <= 9; step++) {
688         DWConvMicrokernelTester()
689           .cr(8)
690           .kr(9)
691           .channels(channels)
692           .width(3)
693           .step(step)
694           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
695       }
696     }
697   }
698 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,multipixel_with_output_stride)699   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, multipixel_with_output_stride) {
700     TEST_REQUIRES_ARM_NEON;
701     for (size_t channels = 1; channels <= 40; channels += 7) {
702       DWConvMicrokernelTester()
703         .cr(8)
704         .kr(9)
705         .channels(8)
706         .width(5)
707         .output_stride(43)
708         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
709     }
710   }
711 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,multipixel_with_qmin)712   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, multipixel_with_qmin) {
713     TEST_REQUIRES_ARM_NEON;
714     for (size_t channels = 1; channels <= 40; channels += 7) {
715       DWConvMicrokernelTester()
716         .cr(8)
717         .kr(9)
718         .channels(channels)
719         .width(3)
720         .qmin(128)
721         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
722     }
723   }
724 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,multipixel_with_qmax)725   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, multipixel_with_qmax) {
726     TEST_REQUIRES_ARM_NEON;
727     for (size_t channels = 1; channels <= 40; channels += 7) {
728       DWConvMicrokernelTester()
729         .cr(8)
730         .kr(9)
731         .channels(channels)
732         .width(3)
733         .qmax(128)
734         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
735     }
736   }
737 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,input_offset)738   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, input_offset) {
739     TEST_REQUIRES_ARM_NEON;
740     for (uint32_t channels = 16; channels < 128; channels += 24) {
741       DWConvMicrokernelTester()
742         .cr(8)
743         .kr(9)
744         .channels(channels)
745         .input_offset(176)
746         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
747     }
748   }
749 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,zero)750   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, zero) {
751     TEST_REQUIRES_ARM_NEON;
752     for (uint32_t mz = 0; mz < 9; mz++) {
753       for (uint32_t channels = 16; channels < 128; channels += 24) {
754         DWConvMicrokernelTester()
755           .cr(8)
756           .kr(9)
757           .channels(channels)
758           .input_offset(176)
759           .zero_index(mz)
760           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
761       }
762     }
763   }
764 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
765 
766 
767 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,c_eq_8)768   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_eq_8) {
769     TEST_REQUIRES_ARM_NEON;
770     DWConvMicrokernelTester()
771       .cr(8)
772       .kr(9)
773       .channels(8)
774       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
775   }
776 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,c_div_8)777   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_div_8) {
778     TEST_REQUIRES_ARM_NEON;
779     for (uint32_t channels = 16; channels < 128; channels += 24) {
780       DWConvMicrokernelTester()
781         .cr(8)
782         .kr(9)
783         .channels(channels)
784         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
785     }
786   }
787 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,c_div_8_with_qmin)788   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_div_8_with_qmin) {
789     TEST_REQUIRES_ARM_NEON;
790     for (uint32_t channels = 16; channels < 128; channels += 24) {
791       DWConvMicrokernelTester()
792         .cr(8)
793         .kr(9)
794         .channels(channels)
795         .qmin(128)
796         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
797     }
798   }
799 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,c_div_8_with_qmax)800   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_div_8_with_qmax) {
801     TEST_REQUIRES_ARM_NEON;
802     for (uint32_t channels = 16; channels < 128; channels += 24) {
803       DWConvMicrokernelTester()
804         .cr(8)
805         .kr(9)
806         .channels(channels)
807         .qmax(128)
808         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
809     }
810   }
811 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,c_lt_8)812   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_lt_8) {
813     TEST_REQUIRES_ARM_NEON;
814     for (uint32_t channels = 1; channels < 8; channels++) {
815       DWConvMicrokernelTester()
816         .cr(8)
817         .kr(9)
818         .channels(channels)
819         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
820     }
821   }
822 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,c_gt_8)823   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_gt_8) {
824     TEST_REQUIRES_ARM_NEON;
825     for (uint32_t channels = 9; channels < 16; channels++) {
826       DWConvMicrokernelTester()
827         .cr(8)
828         .kr(9)
829         .channels(channels)
830         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
831     }
832   }
833 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,c_gt_8_with_qmin)834   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_gt_8_with_qmin) {
835     TEST_REQUIRES_ARM_NEON;
836     for (uint32_t channels = 9; channels < 16; channels++) {
837       DWConvMicrokernelTester()
838         .cr(8)
839         .kr(9)
840         .channels(channels)
841         .qmin(128)
842         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
843     }
844   }
845 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,c_gt_8_with_qmax)846   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_gt_8_with_qmax) {
847     TEST_REQUIRES_ARM_NEON;
848     for (uint32_t channels = 9; channels < 16; channels++) {
849       DWConvMicrokernelTester()
850         .cr(8)
851         .kr(9)
852         .channels(channels)
853         .qmax(128)
854         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
855     }
856   }
857 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,multipixel)858   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, multipixel) {
859     TEST_REQUIRES_ARM_NEON;
860     for (size_t channels = 1; channels <= 40; channels += 7) {
861       DWConvMicrokernelTester()
862         .cr(8)
863         .kr(9)
864         .channels(channels)
865         .width(3)
866         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
867     }
868   }
869 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,multipixel_with_step)870   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, multipixel_with_step) {
871     TEST_REQUIRES_ARM_NEON;
872     for (size_t channels = 1; channels <= 40; channels += 7) {
873       for (size_t step = 2; step <= 9; step++) {
874         DWConvMicrokernelTester()
875           .cr(8)
876           .kr(9)
877           .channels(channels)
878           .width(3)
879           .step(step)
880           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
881       }
882     }
883   }
884 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,multipixel_with_output_stride)885   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, multipixel_with_output_stride) {
886     TEST_REQUIRES_ARM_NEON;
887     for (size_t channels = 1; channels <= 40; channels += 7) {
888       DWConvMicrokernelTester()
889         .cr(8)
890         .kr(9)
891         .channels(8)
892         .width(5)
893         .output_stride(43)
894         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
895     }
896   }
897 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,multipixel_with_qmin)898   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, multipixel_with_qmin) {
899     TEST_REQUIRES_ARM_NEON;
900     for (size_t channels = 1; channels <= 40; channels += 7) {
901       DWConvMicrokernelTester()
902         .cr(8)
903         .kr(9)
904         .channels(channels)
905         .width(3)
906         .qmin(128)
907         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
908     }
909   }
910 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,multipixel_with_qmax)911   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, multipixel_with_qmax) {
912     TEST_REQUIRES_ARM_NEON;
913     for (size_t channels = 1; channels <= 40; channels += 7) {
914       DWConvMicrokernelTester()
915         .cr(8)
916         .kr(9)
917         .channels(channels)
918         .width(3)
919         .qmax(128)
920         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
921     }
922   }
923 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,input_offset)924   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, input_offset) {
925     TEST_REQUIRES_ARM_NEON;
926     for (uint32_t channels = 16; channels < 128; channels += 24) {
927       DWConvMicrokernelTester()
928         .cr(8)
929         .kr(9)
930         .channels(channels)
931         .input_offset(176)
932         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
933     }
934   }
935 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,zero)936   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, zero) {
937     TEST_REQUIRES_ARM_NEON;
938     for (uint32_t mz = 0; mz < 9; mz++) {
939       for (uint32_t channels = 16; channels < 128; channels += 24) {
940         DWConvMicrokernelTester()
941           .cr(8)
942           .kr(9)
943           .channels(channels)
944           .input_offset(176)
945           .zero_index(mz)
946           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
947       }
948     }
949   }
950 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
951 
952 
953 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_eq_8)954   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_eq_8) {
955     TEST_REQUIRES_ARM_NEON;
956     DWConvMicrokernelTester()
957       .cr(8)
958       .kr(9)
959       .channels(8)
960       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
961   }
962 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_div_8)963   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_div_8) {
964     TEST_REQUIRES_ARM_NEON;
965     for (uint32_t channels = 16; channels < 128; channels += 24) {
966       DWConvMicrokernelTester()
967         .cr(8)
968         .kr(9)
969         .channels(channels)
970         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
971     }
972   }
973 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_div_8_with_qmin)974   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_div_8_with_qmin) {
975     TEST_REQUIRES_ARM_NEON;
976     for (uint32_t channels = 16; channels < 128; channels += 24) {
977       DWConvMicrokernelTester()
978         .cr(8)
979         .kr(9)
980         .channels(channels)
981         .qmin(128)
982         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
983     }
984   }
985 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_div_8_with_qmax)986   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_div_8_with_qmax) {
987     TEST_REQUIRES_ARM_NEON;
988     for (uint32_t channels = 16; channels < 128; channels += 24) {
989       DWConvMicrokernelTester()
990         .cr(8)
991         .kr(9)
992         .channels(channels)
993         .qmax(128)
994         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
995     }
996   }
997 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_lt_8)998   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_lt_8) {
999     TEST_REQUIRES_ARM_NEON;
1000     for (uint32_t channels = 1; channels < 8; channels++) {
1001       DWConvMicrokernelTester()
1002         .cr(8)
1003         .kr(9)
1004         .channels(channels)
1005         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1006     }
1007   }
1008 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_gt_8)1009   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_gt_8) {
1010     TEST_REQUIRES_ARM_NEON;
1011     for (uint32_t channels = 9; channels < 16; channels++) {
1012       DWConvMicrokernelTester()
1013         .cr(8)
1014         .kr(9)
1015         .channels(channels)
1016         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1017     }
1018   }
1019 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_gt_8_with_qmin)1020   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_gt_8_with_qmin) {
1021     TEST_REQUIRES_ARM_NEON;
1022     for (uint32_t channels = 9; channels < 16; channels++) {
1023       DWConvMicrokernelTester()
1024         .cr(8)
1025         .kr(9)
1026         .channels(channels)
1027         .qmin(128)
1028         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1029     }
1030   }
1031 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_gt_8_with_qmax)1032   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_gt_8_with_qmax) {
1033     TEST_REQUIRES_ARM_NEON;
1034     for (uint32_t channels = 9; channels < 16; channels++) {
1035       DWConvMicrokernelTester()
1036         .cr(8)
1037         .kr(9)
1038         .channels(channels)
1039         .qmax(128)
1040         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1041     }
1042   }
1043 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,multipixel)1044   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel) {
1045     TEST_REQUIRES_ARM_NEON;
1046     for (size_t channels = 1; channels <= 40; channels += 7) {
1047       DWConvMicrokernelTester()
1048         .cr(8)
1049         .kr(9)
1050         .channels(channels)
1051         .width(3)
1052         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1053     }
1054   }
1055 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,multipixel_with_step)1056   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_step) {
1057     TEST_REQUIRES_ARM_NEON;
1058     for (size_t channels = 1; channels <= 40; channels += 7) {
1059       for (size_t step = 2; step <= 9; step++) {
1060         DWConvMicrokernelTester()
1061           .cr(8)
1062           .kr(9)
1063           .channels(channels)
1064           .width(3)
1065           .step(step)
1066           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1067       }
1068     }
1069   }
1070 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,multipixel_with_output_stride)1071   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_output_stride) {
1072     TEST_REQUIRES_ARM_NEON;
1073     for (size_t channels = 1; channels <= 40; channels += 7) {
1074       DWConvMicrokernelTester()
1075         .cr(8)
1076         .kr(9)
1077         .channels(8)
1078         .width(5)
1079         .output_stride(43)
1080         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1081     }
1082   }
1083 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,multipixel_with_qmin)1084   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_qmin) {
1085     TEST_REQUIRES_ARM_NEON;
1086     for (size_t channels = 1; channels <= 40; channels += 7) {
1087       DWConvMicrokernelTester()
1088         .cr(8)
1089         .kr(9)
1090         .channels(channels)
1091         .width(3)
1092         .qmin(128)
1093         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1094     }
1095   }
1096 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,multipixel_with_qmax)1097   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_qmax) {
1098     TEST_REQUIRES_ARM_NEON;
1099     for (size_t channels = 1; channels <= 40; channels += 7) {
1100       DWConvMicrokernelTester()
1101         .cr(8)
1102         .kr(9)
1103         .channels(channels)
1104         .width(3)
1105         .qmax(128)
1106         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1107     }
1108   }
1109 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,input_offset)1110   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, input_offset) {
1111     TEST_REQUIRES_ARM_NEON;
1112     for (uint32_t channels = 16; channels < 128; channels += 24) {
1113       DWConvMicrokernelTester()
1114         .cr(8)
1115         .kr(9)
1116         .channels(channels)
1117         .input_offset(176)
1118         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1119     }
1120   }
1121 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,zero)1122   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, zero) {
1123     TEST_REQUIRES_ARM_NEON;
1124     for (uint32_t mz = 0; mz < 9; mz++) {
1125       for (uint32_t channels = 16; channels < 128; channels += 24) {
1126         DWConvMicrokernelTester()
1127           .cr(8)
1128           .kr(9)
1129           .channels(channels)
1130           .input_offset(176)
1131           .zero_index(mz)
1132           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1133       }
1134     }
1135   }
1136 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1137 
1138 
1139 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,c_eq_8)1140   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_eq_8) {
1141     TEST_REQUIRES_ARM_NEON_V8;
1142     DWConvMicrokernelTester()
1143       .cr(8)
1144       .kr(9)
1145       .channels(8)
1146       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1147   }
1148 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,c_div_8)1149   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_div_8) {
1150     TEST_REQUIRES_ARM_NEON_V8;
1151     for (uint32_t channels = 16; channels < 128; channels += 24) {
1152       DWConvMicrokernelTester()
1153         .cr(8)
1154         .kr(9)
1155         .channels(channels)
1156         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1157     }
1158   }
1159 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,c_div_8_with_qmin)1160   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_div_8_with_qmin) {
1161     TEST_REQUIRES_ARM_NEON_V8;
1162     for (uint32_t channels = 16; channels < 128; channels += 24) {
1163       DWConvMicrokernelTester()
1164         .cr(8)
1165         .kr(9)
1166         .channels(channels)
1167         .qmin(128)
1168         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1169     }
1170   }
1171 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,c_div_8_with_qmax)1172   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_div_8_with_qmax) {
1173     TEST_REQUIRES_ARM_NEON_V8;
1174     for (uint32_t channels = 16; channels < 128; channels += 24) {
1175       DWConvMicrokernelTester()
1176         .cr(8)
1177         .kr(9)
1178         .channels(channels)
1179         .qmax(128)
1180         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1181     }
1182   }
1183 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,c_lt_8)1184   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_lt_8) {
1185     TEST_REQUIRES_ARM_NEON_V8;
1186     for (uint32_t channels = 1; channels < 8; channels++) {
1187       DWConvMicrokernelTester()
1188         .cr(8)
1189         .kr(9)
1190         .channels(channels)
1191         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1192     }
1193   }
1194 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,c_gt_8)1195   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_gt_8) {
1196     TEST_REQUIRES_ARM_NEON_V8;
1197     for (uint32_t channels = 9; channels < 16; channels++) {
1198       DWConvMicrokernelTester()
1199         .cr(8)
1200         .kr(9)
1201         .channels(channels)
1202         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1203     }
1204   }
1205 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,c_gt_8_with_qmin)1206   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_gt_8_with_qmin) {
1207     TEST_REQUIRES_ARM_NEON_V8;
1208     for (uint32_t channels = 9; channels < 16; channels++) {
1209       DWConvMicrokernelTester()
1210         .cr(8)
1211         .kr(9)
1212         .channels(channels)
1213         .qmin(128)
1214         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1215     }
1216   }
1217 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,c_gt_8_with_qmax)1218   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_gt_8_with_qmax) {
1219     TEST_REQUIRES_ARM_NEON_V8;
1220     for (uint32_t channels = 9; channels < 16; channels++) {
1221       DWConvMicrokernelTester()
1222         .cr(8)
1223         .kr(9)
1224         .channels(channels)
1225         .qmax(128)
1226         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1227     }
1228   }
1229 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,multipixel)1230   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, multipixel) {
1231     TEST_REQUIRES_ARM_NEON_V8;
1232     for (size_t channels = 1; channels <= 40; channels += 7) {
1233       DWConvMicrokernelTester()
1234         .cr(8)
1235         .kr(9)
1236         .channels(channels)
1237         .width(3)
1238         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1239     }
1240   }
1241 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,multipixel_with_step)1242   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, multipixel_with_step) {
1243     TEST_REQUIRES_ARM_NEON_V8;
1244     for (size_t channels = 1; channels <= 40; channels += 7) {
1245       for (size_t step = 2; step <= 9; step++) {
1246         DWConvMicrokernelTester()
1247           .cr(8)
1248           .kr(9)
1249           .channels(channels)
1250           .width(3)
1251           .step(step)
1252           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1253       }
1254     }
1255   }
1256 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,multipixel_with_output_stride)1257   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, multipixel_with_output_stride) {
1258     TEST_REQUIRES_ARM_NEON_V8;
1259     for (size_t channels = 1; channels <= 40; channels += 7) {
1260       DWConvMicrokernelTester()
1261         .cr(8)
1262         .kr(9)
1263         .channels(8)
1264         .width(5)
1265         .output_stride(43)
1266         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1267     }
1268   }
1269 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,multipixel_with_qmin)1270   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, multipixel_with_qmin) {
1271     TEST_REQUIRES_ARM_NEON_V8;
1272     for (size_t channels = 1; channels <= 40; channels += 7) {
1273       DWConvMicrokernelTester()
1274         .cr(8)
1275         .kr(9)
1276         .channels(channels)
1277         .width(3)
1278         .qmin(128)
1279         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1280     }
1281   }
1282 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,multipixel_with_qmax)1283   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, multipixel_with_qmax) {
1284     TEST_REQUIRES_ARM_NEON_V8;
1285     for (size_t channels = 1; channels <= 40; channels += 7) {
1286       DWConvMicrokernelTester()
1287         .cr(8)
1288         .kr(9)
1289         .channels(channels)
1290         .width(3)
1291         .qmax(128)
1292         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1293     }
1294   }
1295 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,input_offset)1296   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, input_offset) {
1297     TEST_REQUIRES_ARM_NEON_V8;
1298     for (uint32_t channels = 16; channels < 128; channels += 24) {
1299       DWConvMicrokernelTester()
1300         .cr(8)
1301         .kr(9)
1302         .channels(channels)
1303         .input_offset(176)
1304         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1305     }
1306   }
1307 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,zero)1308   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, zero) {
1309     TEST_REQUIRES_ARM_NEON_V8;
1310     for (uint32_t mz = 0; mz < 9; mz++) {
1311       for (uint32_t channels = 16; channels < 128; channels += 24) {
1312         DWConvMicrokernelTester()
1313           .cr(8)
1314           .kr(9)
1315           .channels(channels)
1316           .input_offset(176)
1317           .zero_index(mz)
1318           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1319       }
1320     }
1321   }
1322 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1323 
1324 
1325 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,c_eq_8)1326   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_eq_8) {
1327     TEST_REQUIRES_ARM_NEON_V8;
1328     DWConvMicrokernelTester()
1329       .cr(8)
1330       .kr(9)
1331       .channels(8)
1332       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1333   }
1334 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,c_div_8)1335   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_div_8) {
1336     TEST_REQUIRES_ARM_NEON_V8;
1337     for (uint32_t channels = 16; channels < 128; channels += 24) {
1338       DWConvMicrokernelTester()
1339         .cr(8)
1340         .kr(9)
1341         .channels(channels)
1342         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1343     }
1344   }
1345 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,c_div_8_with_qmin)1346   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_div_8_with_qmin) {
1347     TEST_REQUIRES_ARM_NEON_V8;
1348     for (uint32_t channels = 16; channels < 128; channels += 24) {
1349       DWConvMicrokernelTester()
1350         .cr(8)
1351         .kr(9)
1352         .channels(channels)
1353         .qmin(128)
1354         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1355     }
1356   }
1357 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,c_div_8_with_qmax)1358   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_div_8_with_qmax) {
1359     TEST_REQUIRES_ARM_NEON_V8;
1360     for (uint32_t channels = 16; channels < 128; channels += 24) {
1361       DWConvMicrokernelTester()
1362         .cr(8)
1363         .kr(9)
1364         .channels(channels)
1365         .qmax(128)
1366         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1367     }
1368   }
1369 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,c_lt_8)1370   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_lt_8) {
1371     TEST_REQUIRES_ARM_NEON_V8;
1372     for (uint32_t channels = 1; channels < 8; channels++) {
1373       DWConvMicrokernelTester()
1374         .cr(8)
1375         .kr(9)
1376         .channels(channels)
1377         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1378     }
1379   }
1380 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,c_gt_8)1381   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_gt_8) {
1382     TEST_REQUIRES_ARM_NEON_V8;
1383     for (uint32_t channels = 9; channels < 16; channels++) {
1384       DWConvMicrokernelTester()
1385         .cr(8)
1386         .kr(9)
1387         .channels(channels)
1388         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1389     }
1390   }
1391 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,c_gt_8_with_qmin)1392   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_gt_8_with_qmin) {
1393     TEST_REQUIRES_ARM_NEON_V8;
1394     for (uint32_t channels = 9; channels < 16; channels++) {
1395       DWConvMicrokernelTester()
1396         .cr(8)
1397         .kr(9)
1398         .channels(channels)
1399         .qmin(128)
1400         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1401     }
1402   }
1403 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,c_gt_8_with_qmax)1404   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_gt_8_with_qmax) {
1405     TEST_REQUIRES_ARM_NEON_V8;
1406     for (uint32_t channels = 9; channels < 16; channels++) {
1407       DWConvMicrokernelTester()
1408         .cr(8)
1409         .kr(9)
1410         .channels(channels)
1411         .qmax(128)
1412         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1413     }
1414   }
1415 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,multipixel)1416   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, multipixel) {
1417     TEST_REQUIRES_ARM_NEON_V8;
1418     for (size_t channels = 1; channels <= 40; channels += 7) {
1419       DWConvMicrokernelTester()
1420         .cr(8)
1421         .kr(9)
1422         .channels(channels)
1423         .width(3)
1424         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1425     }
1426   }
1427 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,multipixel_with_step)1428   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, multipixel_with_step) {
1429     TEST_REQUIRES_ARM_NEON_V8;
1430     for (size_t channels = 1; channels <= 40; channels += 7) {
1431       for (size_t step = 2; step <= 9; step++) {
1432         DWConvMicrokernelTester()
1433           .cr(8)
1434           .kr(9)
1435           .channels(channels)
1436           .width(3)
1437           .step(step)
1438           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1439       }
1440     }
1441   }
1442 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,multipixel_with_output_stride)1443   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, multipixel_with_output_stride) {
1444     TEST_REQUIRES_ARM_NEON_V8;
1445     for (size_t channels = 1; channels <= 40; channels += 7) {
1446       DWConvMicrokernelTester()
1447         .cr(8)
1448         .kr(9)
1449         .channels(8)
1450         .width(5)
1451         .output_stride(43)
1452         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1453     }
1454   }
1455 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,multipixel_with_qmin)1456   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, multipixel_with_qmin) {
1457     TEST_REQUIRES_ARM_NEON_V8;
1458     for (size_t channels = 1; channels <= 40; channels += 7) {
1459       DWConvMicrokernelTester()
1460         .cr(8)
1461         .kr(9)
1462         .channels(channels)
1463         .width(3)
1464         .qmin(128)
1465         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1466     }
1467   }
1468 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,multipixel_with_qmax)1469   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, multipixel_with_qmax) {
1470     TEST_REQUIRES_ARM_NEON_V8;
1471     for (size_t channels = 1; channels <= 40; channels += 7) {
1472       DWConvMicrokernelTester()
1473         .cr(8)
1474         .kr(9)
1475         .channels(channels)
1476         .width(3)
1477         .qmax(128)
1478         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1479     }
1480   }
1481 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,input_offset)1482   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, input_offset) {
1483     TEST_REQUIRES_ARM_NEON_V8;
1484     for (uint32_t channels = 16; channels < 128; channels += 24) {
1485       DWConvMicrokernelTester()
1486         .cr(8)
1487         .kr(9)
1488         .channels(channels)
1489         .input_offset(176)
1490         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1491     }
1492   }
1493 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,zero)1494   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, zero) {
1495     TEST_REQUIRES_ARM_NEON_V8;
1496     for (uint32_t mz = 0; mz < 9; mz++) {
1497       for (uint32_t channels = 16; channels < 128; channels += 24) {
1498         DWConvMicrokernelTester()
1499           .cr(8)
1500           .kr(9)
1501           .channels(channels)
1502           .input_offset(176)
1503           .zero_index(mz)
1504           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1505       }
1506     }
1507   }
1508 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1509 
1510 
1511 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_eq_8)1512   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_eq_8) {
1513     TEST_REQUIRES_ARM_NEON_V8;
1514     DWConvMicrokernelTester()
1515       .cr(8)
1516       .kr(9)
1517       .channels(8)
1518       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1519   }
1520 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_div_8)1521   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_div_8) {
1522     TEST_REQUIRES_ARM_NEON_V8;
1523     for (uint32_t channels = 16; channels < 128; channels += 24) {
1524       DWConvMicrokernelTester()
1525         .cr(8)
1526         .kr(9)
1527         .channels(channels)
1528         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1529     }
1530   }
1531 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_div_8_with_qmin)1532   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_div_8_with_qmin) {
1533     TEST_REQUIRES_ARM_NEON_V8;
1534     for (uint32_t channels = 16; channels < 128; channels += 24) {
1535       DWConvMicrokernelTester()
1536         .cr(8)
1537         .kr(9)
1538         .channels(channels)
1539         .qmin(128)
1540         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1541     }
1542   }
1543 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_div_8_with_qmax)1544   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_div_8_with_qmax) {
1545     TEST_REQUIRES_ARM_NEON_V8;
1546     for (uint32_t channels = 16; channels < 128; channels += 24) {
1547       DWConvMicrokernelTester()
1548         .cr(8)
1549         .kr(9)
1550         .channels(channels)
1551         .qmax(128)
1552         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1553     }
1554   }
1555 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_lt_8)1556   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_lt_8) {
1557     TEST_REQUIRES_ARM_NEON_V8;
1558     for (uint32_t channels = 1; channels < 8; channels++) {
1559       DWConvMicrokernelTester()
1560         .cr(8)
1561         .kr(9)
1562         .channels(channels)
1563         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1564     }
1565   }
1566 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_gt_8)1567   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_gt_8) {
1568     TEST_REQUIRES_ARM_NEON_V8;
1569     for (uint32_t channels = 9; channels < 16; channels++) {
1570       DWConvMicrokernelTester()
1571         .cr(8)
1572         .kr(9)
1573         .channels(channels)
1574         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1575     }
1576   }
1577 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_gt_8_with_qmin)1578   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_gt_8_with_qmin) {
1579     TEST_REQUIRES_ARM_NEON_V8;
1580     for (uint32_t channels = 9; channels < 16; channels++) {
1581       DWConvMicrokernelTester()
1582         .cr(8)
1583         .kr(9)
1584         .channels(channels)
1585         .qmin(128)
1586         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1587     }
1588   }
1589 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_gt_8_with_qmax)1590   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_gt_8_with_qmax) {
1591     TEST_REQUIRES_ARM_NEON_V8;
1592     for (uint32_t channels = 9; channels < 16; channels++) {
1593       DWConvMicrokernelTester()
1594         .cr(8)
1595         .kr(9)
1596         .channels(channels)
1597         .qmax(128)
1598         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1599     }
1600   }
1601 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,multipixel)1602   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel) {
1603     TEST_REQUIRES_ARM_NEON_V8;
1604     for (size_t channels = 1; channels <= 40; channels += 7) {
1605       DWConvMicrokernelTester()
1606         .cr(8)
1607         .kr(9)
1608         .channels(channels)
1609         .width(3)
1610         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1611     }
1612   }
1613 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,multipixel_with_step)1614   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_step) {
1615     TEST_REQUIRES_ARM_NEON_V8;
1616     for (size_t channels = 1; channels <= 40; channels += 7) {
1617       for (size_t step = 2; step <= 9; step++) {
1618         DWConvMicrokernelTester()
1619           .cr(8)
1620           .kr(9)
1621           .channels(channels)
1622           .width(3)
1623           .step(step)
1624           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1625       }
1626     }
1627   }
1628 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,multipixel_with_output_stride)1629   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_output_stride) {
1630     TEST_REQUIRES_ARM_NEON_V8;
1631     for (size_t channels = 1; channels <= 40; channels += 7) {
1632       DWConvMicrokernelTester()
1633         .cr(8)
1634         .kr(9)
1635         .channels(8)
1636         .width(5)
1637         .output_stride(43)
1638         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1639     }
1640   }
1641 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,multipixel_with_qmin)1642   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_qmin) {
1643     TEST_REQUIRES_ARM_NEON_V8;
1644     for (size_t channels = 1; channels <= 40; channels += 7) {
1645       DWConvMicrokernelTester()
1646         .cr(8)
1647         .kr(9)
1648         .channels(channels)
1649         .width(3)
1650         .qmin(128)
1651         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1652     }
1653   }
1654 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,multipixel_with_qmax)1655   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_qmax) {
1656     TEST_REQUIRES_ARM_NEON_V8;
1657     for (size_t channels = 1; channels <= 40; channels += 7) {
1658       DWConvMicrokernelTester()
1659         .cr(8)
1660         .kr(9)
1661         .channels(channels)
1662         .width(3)
1663         .qmax(128)
1664         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1665     }
1666   }
1667 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,input_offset)1668   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, input_offset) {
1669     TEST_REQUIRES_ARM_NEON_V8;
1670     for (uint32_t channels = 16; channels < 128; channels += 24) {
1671       DWConvMicrokernelTester()
1672         .cr(8)
1673         .kr(9)
1674         .channels(channels)
1675         .input_offset(176)
1676         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1677     }
1678   }
1679 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,zero)1680   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, zero) {
1681     TEST_REQUIRES_ARM_NEON_V8;
1682     for (uint32_t mz = 0; mz < 9; mz++) {
1683       for (uint32_t channels = 16; channels < 128; channels += 24) {
1684         DWConvMicrokernelTester()
1685           .cr(8)
1686           .kr(9)
1687           .channels(channels)
1688           .input_offset(176)
1689           .zero_index(mz)
1690           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1691       }
1692     }
1693   }
1694 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1695 
1696 
1697 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,c_eq_8)1698   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_eq_8) {
1699     TEST_REQUIRES_ARM_NEON;
1700     DWConvMicrokernelTester()
1701       .cr(8)
1702       .kr(25)
1703       .channels(8)
1704       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1705   }
1706 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,c_div_8)1707   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_div_8) {
1708     TEST_REQUIRES_ARM_NEON;
1709     for (uint32_t channels = 16; channels < 128; channels += 24) {
1710       DWConvMicrokernelTester()
1711         .cr(8)
1712         .kr(25)
1713         .channels(channels)
1714         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1715     }
1716   }
1717 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,c_div_8_with_qmin)1718   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_div_8_with_qmin) {
1719     TEST_REQUIRES_ARM_NEON;
1720     for (uint32_t channels = 16; channels < 128; channels += 24) {
1721       DWConvMicrokernelTester()
1722         .cr(8)
1723         .kr(25)
1724         .channels(channels)
1725         .qmin(128)
1726         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1727     }
1728   }
1729 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,c_div_8_with_qmax)1730   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_div_8_with_qmax) {
1731     TEST_REQUIRES_ARM_NEON;
1732     for (uint32_t channels = 16; channels < 128; channels += 24) {
1733       DWConvMicrokernelTester()
1734         .cr(8)
1735         .kr(25)
1736         .channels(channels)
1737         .qmax(128)
1738         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1739     }
1740   }
1741 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,c_lt_8)1742   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_lt_8) {
1743     TEST_REQUIRES_ARM_NEON;
1744     for (uint32_t channels = 1; channels < 8; channels++) {
1745       DWConvMicrokernelTester()
1746         .cr(8)
1747         .kr(25)
1748         .channels(channels)
1749         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1750     }
1751   }
1752 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,c_gt_8)1753   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_gt_8) {
1754     TEST_REQUIRES_ARM_NEON;
1755     for (uint32_t channels = 9; channels < 16; channels++) {
1756       DWConvMicrokernelTester()
1757         .cr(8)
1758         .kr(25)
1759         .channels(channels)
1760         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1761     }
1762   }
1763 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,c_gt_8_with_qmin)1764   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_gt_8_with_qmin) {
1765     TEST_REQUIRES_ARM_NEON;
1766     for (uint32_t channels = 9; channels < 16; channels++) {
1767       DWConvMicrokernelTester()
1768         .cr(8)
1769         .kr(25)
1770         .channels(channels)
1771         .qmin(128)
1772         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1773     }
1774   }
1775 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,c_gt_8_with_qmax)1776   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_gt_8_with_qmax) {
1777     TEST_REQUIRES_ARM_NEON;
1778     for (uint32_t channels = 9; channels < 16; channels++) {
1779       DWConvMicrokernelTester()
1780         .cr(8)
1781         .kr(25)
1782         .channels(channels)
1783         .qmax(128)
1784         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1785     }
1786   }
1787 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,multipixel)1788   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, multipixel) {
1789     TEST_REQUIRES_ARM_NEON;
1790     for (size_t channels = 1; channels <= 40; channels += 7) {
1791       DWConvMicrokernelTester()
1792         .cr(8)
1793         .kr(25)
1794         .channels(channels)
1795         .width(3)
1796         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1797     }
1798   }
1799 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,multipixel_with_step)1800   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, multipixel_with_step) {
1801     TEST_REQUIRES_ARM_NEON;
1802     for (size_t channels = 1; channels <= 40; channels += 7) {
1803       for (size_t step = 2; step <= 25; step++) {
1804         DWConvMicrokernelTester()
1805           .cr(8)
1806           .kr(25)
1807           .channels(channels)
1808           .width(3)
1809           .step(step)
1810           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1811       }
1812     }
1813   }
1814 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,multipixel_with_output_stride)1815   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, multipixel_with_output_stride) {
1816     TEST_REQUIRES_ARM_NEON;
1817     for (size_t channels = 1; channels <= 40; channels += 7) {
1818       DWConvMicrokernelTester()
1819         .cr(8)
1820         .kr(25)
1821         .channels(8)
1822         .width(5)
1823         .output_stride(43)
1824         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1825     }
1826   }
1827 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,multipixel_with_qmin)1828   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, multipixel_with_qmin) {
1829     TEST_REQUIRES_ARM_NEON;
1830     for (size_t channels = 1; channels <= 40; channels += 7) {
1831       DWConvMicrokernelTester()
1832         .cr(8)
1833         .kr(25)
1834         .channels(channels)
1835         .width(3)
1836         .qmin(128)
1837         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1838     }
1839   }
1840 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,multipixel_with_qmax)1841   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, multipixel_with_qmax) {
1842     TEST_REQUIRES_ARM_NEON;
1843     for (size_t channels = 1; channels <= 40; channels += 7) {
1844       DWConvMicrokernelTester()
1845         .cr(8)
1846         .kr(25)
1847         .channels(channels)
1848         .width(3)
1849         .qmax(128)
1850         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1851     }
1852   }
1853 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,input_offset)1854   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, input_offset) {
1855     TEST_REQUIRES_ARM_NEON;
1856     for (uint32_t channels = 16; channels < 128; channels += 24) {
1857       DWConvMicrokernelTester()
1858         .cr(8)
1859         .kr(25)
1860         .channels(channels)
1861         .input_offset(176)
1862         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1863     }
1864   }
1865 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,zero)1866   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, zero) {
1867     TEST_REQUIRES_ARM_NEON;
1868     for (uint32_t mz = 0; mz < 25; mz++) {
1869       for (uint32_t channels = 16; channels < 128; channels += 24) {
1870         DWConvMicrokernelTester()
1871           .cr(8)
1872           .kr(25)
1873           .channels(channels)
1874           .input_offset(176)
1875           .zero_index(mz)
1876           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1877       }
1878     }
1879   }
1880 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1881 
1882 
1883 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,c_eq_8)1884   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_eq_8) {
1885     TEST_REQUIRES_ARM_NEON;
1886     DWConvMicrokernelTester()
1887       .cr(8)
1888       .kr(25)
1889       .channels(8)
1890       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1891   }
1892 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,c_div_8)1893   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_div_8) {
1894     TEST_REQUIRES_ARM_NEON;
1895     for (uint32_t channels = 16; channels < 128; channels += 24) {
1896       DWConvMicrokernelTester()
1897         .cr(8)
1898         .kr(25)
1899         .channels(channels)
1900         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1901     }
1902   }
1903 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,c_div_8_with_qmin)1904   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_div_8_with_qmin) {
1905     TEST_REQUIRES_ARM_NEON;
1906     for (uint32_t channels = 16; channels < 128; channels += 24) {
1907       DWConvMicrokernelTester()
1908         .cr(8)
1909         .kr(25)
1910         .channels(channels)
1911         .qmin(128)
1912         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1913     }
1914   }
1915 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,c_div_8_with_qmax)1916   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_div_8_with_qmax) {
1917     TEST_REQUIRES_ARM_NEON;
1918     for (uint32_t channels = 16; channels < 128; channels += 24) {
1919       DWConvMicrokernelTester()
1920         .cr(8)
1921         .kr(25)
1922         .channels(channels)
1923         .qmax(128)
1924         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1925     }
1926   }
1927 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,c_lt_8)1928   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_lt_8) {
1929     TEST_REQUIRES_ARM_NEON;
1930     for (uint32_t channels = 1; channels < 8; channels++) {
1931       DWConvMicrokernelTester()
1932         .cr(8)
1933         .kr(25)
1934         .channels(channels)
1935         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1936     }
1937   }
1938 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,c_gt_8)1939   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_gt_8) {
1940     TEST_REQUIRES_ARM_NEON;
1941     for (uint32_t channels = 9; channels < 16; channels++) {
1942       DWConvMicrokernelTester()
1943         .cr(8)
1944         .kr(25)
1945         .channels(channels)
1946         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1947     }
1948   }
1949 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,c_gt_8_with_qmin)1950   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_gt_8_with_qmin) {
1951     TEST_REQUIRES_ARM_NEON;
1952     for (uint32_t channels = 9; channels < 16; channels++) {
1953       DWConvMicrokernelTester()
1954         .cr(8)
1955         .kr(25)
1956         .channels(channels)
1957         .qmin(128)
1958         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1959     }
1960   }
1961 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,c_gt_8_with_qmax)1962   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_gt_8_with_qmax) {
1963     TEST_REQUIRES_ARM_NEON;
1964     for (uint32_t channels = 9; channels < 16; channels++) {
1965       DWConvMicrokernelTester()
1966         .cr(8)
1967         .kr(25)
1968         .channels(channels)
1969         .qmax(128)
1970         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1971     }
1972   }
1973 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,multipixel)1974   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, multipixel) {
1975     TEST_REQUIRES_ARM_NEON;
1976     for (size_t channels = 1; channels <= 40; channels += 7) {
1977       DWConvMicrokernelTester()
1978         .cr(8)
1979         .kr(25)
1980         .channels(channels)
1981         .width(3)
1982         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1983     }
1984   }
1985 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,multipixel_with_step)1986   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, multipixel_with_step) {
1987     TEST_REQUIRES_ARM_NEON;
1988     for (size_t channels = 1; channels <= 40; channels += 7) {
1989       for (size_t step = 2; step <= 25; step++) {
1990         DWConvMicrokernelTester()
1991           .cr(8)
1992           .kr(25)
1993           .channels(channels)
1994           .width(3)
1995           .step(step)
1996           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1997       }
1998     }
1999   }
2000 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,multipixel_with_output_stride)2001   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, multipixel_with_output_stride) {
2002     TEST_REQUIRES_ARM_NEON;
2003     for (size_t channels = 1; channels <= 40; channels += 7) {
2004       DWConvMicrokernelTester()
2005         .cr(8)
2006         .kr(25)
2007         .channels(8)
2008         .width(5)
2009         .output_stride(43)
2010         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2011     }
2012   }
2013 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,multipixel_with_qmin)2014   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, multipixel_with_qmin) {
2015     TEST_REQUIRES_ARM_NEON;
2016     for (size_t channels = 1; channels <= 40; channels += 7) {
2017       DWConvMicrokernelTester()
2018         .cr(8)
2019         .kr(25)
2020         .channels(channels)
2021         .width(3)
2022         .qmin(128)
2023         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2024     }
2025   }
2026 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,multipixel_with_qmax)2027   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, multipixel_with_qmax) {
2028     TEST_REQUIRES_ARM_NEON;
2029     for (size_t channels = 1; channels <= 40; channels += 7) {
2030       DWConvMicrokernelTester()
2031         .cr(8)
2032         .kr(25)
2033         .channels(channels)
2034         .width(3)
2035         .qmax(128)
2036         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2037     }
2038   }
2039 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,input_offset)2040   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, input_offset) {
2041     TEST_REQUIRES_ARM_NEON;
2042     for (uint32_t channels = 16; channels < 128; channels += 24) {
2043       DWConvMicrokernelTester()
2044         .cr(8)
2045         .kr(25)
2046         .channels(channels)
2047         .input_offset(176)
2048         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2049     }
2050   }
2051 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,zero)2052   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, zero) {
2053     TEST_REQUIRES_ARM_NEON;
2054     for (uint32_t mz = 0; mz < 25; mz++) {
2055       for (uint32_t channels = 16; channels < 128; channels += 24) {
2056         DWConvMicrokernelTester()
2057           .cr(8)
2058           .kr(25)
2059           .channels(channels)
2060           .input_offset(176)
2061           .zero_index(mz)
2062           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2063       }
2064     }
2065   }
2066 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2067 
2068 
2069 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_eq_8)2070   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_eq_8) {
2071     TEST_REQUIRES_ARM_NEON;
2072     DWConvMicrokernelTester()
2073       .cr(8)
2074       .kr(25)
2075       .channels(8)
2076       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2077   }
2078 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_div_8)2079   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_div_8) {
2080     TEST_REQUIRES_ARM_NEON;
2081     for (uint32_t channels = 16; channels < 128; channels += 24) {
2082       DWConvMicrokernelTester()
2083         .cr(8)
2084         .kr(25)
2085         .channels(channels)
2086         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2087     }
2088   }
2089 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_div_8_with_qmin)2090   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_div_8_with_qmin) {
2091     TEST_REQUIRES_ARM_NEON;
2092     for (uint32_t channels = 16; channels < 128; channels += 24) {
2093       DWConvMicrokernelTester()
2094         .cr(8)
2095         .kr(25)
2096         .channels(channels)
2097         .qmin(128)
2098         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2099     }
2100   }
2101 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_div_8_with_qmax)2102   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_div_8_with_qmax) {
2103     TEST_REQUIRES_ARM_NEON;
2104     for (uint32_t channels = 16; channels < 128; channels += 24) {
2105       DWConvMicrokernelTester()
2106         .cr(8)
2107         .kr(25)
2108         .channels(channels)
2109         .qmax(128)
2110         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2111     }
2112   }
2113 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_lt_8)2114   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_lt_8) {
2115     TEST_REQUIRES_ARM_NEON;
2116     for (uint32_t channels = 1; channels < 8; channels++) {
2117       DWConvMicrokernelTester()
2118         .cr(8)
2119         .kr(25)
2120         .channels(channels)
2121         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2122     }
2123   }
2124 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_gt_8)2125   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_gt_8) {
2126     TEST_REQUIRES_ARM_NEON;
2127     for (uint32_t channels = 9; channels < 16; channels++) {
2128       DWConvMicrokernelTester()
2129         .cr(8)
2130         .kr(25)
2131         .channels(channels)
2132         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2133     }
2134   }
2135 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_gt_8_with_qmin)2136   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_gt_8_with_qmin) {
2137     TEST_REQUIRES_ARM_NEON;
2138     for (uint32_t channels = 9; channels < 16; channels++) {
2139       DWConvMicrokernelTester()
2140         .cr(8)
2141         .kr(25)
2142         .channels(channels)
2143         .qmin(128)
2144         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2145     }
2146   }
2147 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_gt_8_with_qmax)2148   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_gt_8_with_qmax) {
2149     TEST_REQUIRES_ARM_NEON;
2150     for (uint32_t channels = 9; channels < 16; channels++) {
2151       DWConvMicrokernelTester()
2152         .cr(8)
2153         .kr(25)
2154         .channels(channels)
2155         .qmax(128)
2156         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2157     }
2158   }
2159 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,multipixel)2160   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel) {
2161     TEST_REQUIRES_ARM_NEON;
2162     for (size_t channels = 1; channels <= 40; channels += 7) {
2163       DWConvMicrokernelTester()
2164         .cr(8)
2165         .kr(25)
2166         .channels(channels)
2167         .width(3)
2168         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2169     }
2170   }
2171 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,multipixel_with_step)2172   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_step) {
2173     TEST_REQUIRES_ARM_NEON;
2174     for (size_t channels = 1; channels <= 40; channels += 7) {
2175       for (size_t step = 2; step <= 25; step++) {
2176         DWConvMicrokernelTester()
2177           .cr(8)
2178           .kr(25)
2179           .channels(channels)
2180           .width(3)
2181           .step(step)
2182           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2183       }
2184     }
2185   }
2186 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,multipixel_with_output_stride)2187   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_output_stride) {
2188     TEST_REQUIRES_ARM_NEON;
2189     for (size_t channels = 1; channels <= 40; channels += 7) {
2190       DWConvMicrokernelTester()
2191         .cr(8)
2192         .kr(25)
2193         .channels(8)
2194         .width(5)
2195         .output_stride(43)
2196         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2197     }
2198   }
2199 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,multipixel_with_qmin)2200   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_qmin) {
2201     TEST_REQUIRES_ARM_NEON;
2202     for (size_t channels = 1; channels <= 40; channels += 7) {
2203       DWConvMicrokernelTester()
2204         .cr(8)
2205         .kr(25)
2206         .channels(channels)
2207         .width(3)
2208         .qmin(128)
2209         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2210     }
2211   }
2212 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,multipixel_with_qmax)2213   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_qmax) {
2214     TEST_REQUIRES_ARM_NEON;
2215     for (size_t channels = 1; channels <= 40; channels += 7) {
2216       DWConvMicrokernelTester()
2217         .cr(8)
2218         .kr(25)
2219         .channels(channels)
2220         .width(3)
2221         .qmax(128)
2222         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2223     }
2224   }
2225 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,input_offset)2226   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, input_offset) {
2227     TEST_REQUIRES_ARM_NEON;
2228     for (uint32_t channels = 16; channels < 128; channels += 24) {
2229       DWConvMicrokernelTester()
2230         .cr(8)
2231         .kr(25)
2232         .channels(channels)
2233         .input_offset(176)
2234         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2235     }
2236   }
2237 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,zero)2238   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, zero) {
2239     TEST_REQUIRES_ARM_NEON;
2240     for (uint32_t mz = 0; mz < 25; mz++) {
2241       for (uint32_t channels = 16; channels < 128; channels += 24) {
2242         DWConvMicrokernelTester()
2243           .cr(8)
2244           .kr(25)
2245           .channels(channels)
2246           .input_offset(176)
2247           .zero_index(mz)
2248           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2249       }
2250     }
2251   }
2252 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2253 
2254 
2255 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,c_eq_8)2256   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_eq_8) {
2257     TEST_REQUIRES_ARM_NEON_V8;
2258     DWConvMicrokernelTester()
2259       .cr(8)
2260       .kr(25)
2261       .channels(8)
2262       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2263   }
2264 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,c_div_8)2265   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_div_8) {
2266     TEST_REQUIRES_ARM_NEON_V8;
2267     for (uint32_t channels = 16; channels < 128; channels += 24) {
2268       DWConvMicrokernelTester()
2269         .cr(8)
2270         .kr(25)
2271         .channels(channels)
2272         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2273     }
2274   }
2275 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,c_div_8_with_qmin)2276   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_div_8_with_qmin) {
2277     TEST_REQUIRES_ARM_NEON_V8;
2278     for (uint32_t channels = 16; channels < 128; channels += 24) {
2279       DWConvMicrokernelTester()
2280         .cr(8)
2281         .kr(25)
2282         .channels(channels)
2283         .qmin(128)
2284         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2285     }
2286   }
2287 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,c_div_8_with_qmax)2288   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_div_8_with_qmax) {
2289     TEST_REQUIRES_ARM_NEON_V8;
2290     for (uint32_t channels = 16; channels < 128; channels += 24) {
2291       DWConvMicrokernelTester()
2292         .cr(8)
2293         .kr(25)
2294         .channels(channels)
2295         .qmax(128)
2296         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2297     }
2298   }
2299 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,c_lt_8)2300   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_lt_8) {
2301     TEST_REQUIRES_ARM_NEON_V8;
2302     for (uint32_t channels = 1; channels < 8; channels++) {
2303       DWConvMicrokernelTester()
2304         .cr(8)
2305         .kr(25)
2306         .channels(channels)
2307         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2308     }
2309   }
2310 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,c_gt_8)2311   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_gt_8) {
2312     TEST_REQUIRES_ARM_NEON_V8;
2313     for (uint32_t channels = 9; channels < 16; channels++) {
2314       DWConvMicrokernelTester()
2315         .cr(8)
2316         .kr(25)
2317         .channels(channels)
2318         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2319     }
2320   }
2321 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,c_gt_8_with_qmin)2322   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_gt_8_with_qmin) {
2323     TEST_REQUIRES_ARM_NEON_V8;
2324     for (uint32_t channels = 9; channels < 16; channels++) {
2325       DWConvMicrokernelTester()
2326         .cr(8)
2327         .kr(25)
2328         .channels(channels)
2329         .qmin(128)
2330         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2331     }
2332   }
2333 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,c_gt_8_with_qmax)2334   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_gt_8_with_qmax) {
2335     TEST_REQUIRES_ARM_NEON_V8;
2336     for (uint32_t channels = 9; channels < 16; channels++) {
2337       DWConvMicrokernelTester()
2338         .cr(8)
2339         .kr(25)
2340         .channels(channels)
2341         .qmax(128)
2342         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2343     }
2344   }
2345 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,multipixel)2346   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, multipixel) {
2347     TEST_REQUIRES_ARM_NEON_V8;
2348     for (size_t channels = 1; channels <= 40; channels += 7) {
2349       DWConvMicrokernelTester()
2350         .cr(8)
2351         .kr(25)
2352         .channels(channels)
2353         .width(3)
2354         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2355     }
2356   }
2357 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,multipixel_with_step)2358   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, multipixel_with_step) {
2359     TEST_REQUIRES_ARM_NEON_V8;
2360     for (size_t channels = 1; channels <= 40; channels += 7) {
2361       for (size_t step = 2; step <= 25; step++) {
2362         DWConvMicrokernelTester()
2363           .cr(8)
2364           .kr(25)
2365           .channels(channels)
2366           .width(3)
2367           .step(step)
2368           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2369       }
2370     }
2371   }
2372 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,multipixel_with_output_stride)2373   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, multipixel_with_output_stride) {
2374     TEST_REQUIRES_ARM_NEON_V8;
2375     for (size_t channels = 1; channels <= 40; channels += 7) {
2376       DWConvMicrokernelTester()
2377         .cr(8)
2378         .kr(25)
2379         .channels(8)
2380         .width(5)
2381         .output_stride(43)
2382         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2383     }
2384   }
2385 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,multipixel_with_qmin)2386   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, multipixel_with_qmin) {
2387     TEST_REQUIRES_ARM_NEON_V8;
2388     for (size_t channels = 1; channels <= 40; channels += 7) {
2389       DWConvMicrokernelTester()
2390         .cr(8)
2391         .kr(25)
2392         .channels(channels)
2393         .width(3)
2394         .qmin(128)
2395         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2396     }
2397   }
2398 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,multipixel_with_qmax)2399   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, multipixel_with_qmax) {
2400     TEST_REQUIRES_ARM_NEON_V8;
2401     for (size_t channels = 1; channels <= 40; channels += 7) {
2402       DWConvMicrokernelTester()
2403         .cr(8)
2404         .kr(25)
2405         .channels(channels)
2406         .width(3)
2407         .qmax(128)
2408         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2409     }
2410   }
2411 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,input_offset)2412   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, input_offset) {
2413     TEST_REQUIRES_ARM_NEON_V8;
2414     for (uint32_t channels = 16; channels < 128; channels += 24) {
2415       DWConvMicrokernelTester()
2416         .cr(8)
2417         .kr(25)
2418         .channels(channels)
2419         .input_offset(176)
2420         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2421     }
2422   }
2423 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,zero)2424   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, zero) {
2425     TEST_REQUIRES_ARM_NEON_V8;
2426     for (uint32_t mz = 0; mz < 25; mz++) {
2427       for (uint32_t channels = 16; channels < 128; channels += 24) {
2428         DWConvMicrokernelTester()
2429           .cr(8)
2430           .kr(25)
2431           .channels(channels)
2432           .input_offset(176)
2433           .zero_index(mz)
2434           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2435       }
2436     }
2437   }
2438 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2439 
2440 
2441 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,c_eq_8)2442   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_eq_8) {
2443     TEST_REQUIRES_ARM_NEON_V8;
2444     DWConvMicrokernelTester()
2445       .cr(8)
2446       .kr(25)
2447       .channels(8)
2448       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2449   }
2450 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,c_div_8)2451   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_div_8) {
2452     TEST_REQUIRES_ARM_NEON_V8;
2453     for (uint32_t channels = 16; channels < 128; channels += 24) {
2454       DWConvMicrokernelTester()
2455         .cr(8)
2456         .kr(25)
2457         .channels(channels)
2458         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2459     }
2460   }
2461 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,c_div_8_with_qmin)2462   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_div_8_with_qmin) {
2463     TEST_REQUIRES_ARM_NEON_V8;
2464     for (uint32_t channels = 16; channels < 128; channels += 24) {
2465       DWConvMicrokernelTester()
2466         .cr(8)
2467         .kr(25)
2468         .channels(channels)
2469         .qmin(128)
2470         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2471     }
2472   }
2473 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,c_div_8_with_qmax)2474   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_div_8_with_qmax) {
2475     TEST_REQUIRES_ARM_NEON_V8;
2476     for (uint32_t channels = 16; channels < 128; channels += 24) {
2477       DWConvMicrokernelTester()
2478         .cr(8)
2479         .kr(25)
2480         .channels(channels)
2481         .qmax(128)
2482         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2483     }
2484   }
2485 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,c_lt_8)2486   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_lt_8) {
2487     TEST_REQUIRES_ARM_NEON_V8;
2488     for (uint32_t channels = 1; channels < 8; channels++) {
2489       DWConvMicrokernelTester()
2490         .cr(8)
2491         .kr(25)
2492         .channels(channels)
2493         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2494     }
2495   }
2496 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,c_gt_8)2497   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_gt_8) {
2498     TEST_REQUIRES_ARM_NEON_V8;
2499     for (uint32_t channels = 9; channels < 16; channels++) {
2500       DWConvMicrokernelTester()
2501         .cr(8)
2502         .kr(25)
2503         .channels(channels)
2504         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2505     }
2506   }
2507 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,c_gt_8_with_qmin)2508   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_gt_8_with_qmin) {
2509     TEST_REQUIRES_ARM_NEON_V8;
2510     for (uint32_t channels = 9; channels < 16; channels++) {
2511       DWConvMicrokernelTester()
2512         .cr(8)
2513         .kr(25)
2514         .channels(channels)
2515         .qmin(128)
2516         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2517     }
2518   }
2519 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,c_gt_8_with_qmax)2520   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_gt_8_with_qmax) {
2521     TEST_REQUIRES_ARM_NEON_V8;
2522     for (uint32_t channels = 9; channels < 16; channels++) {
2523       DWConvMicrokernelTester()
2524         .cr(8)
2525         .kr(25)
2526         .channels(channels)
2527         .qmax(128)
2528         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2529     }
2530   }
2531 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,multipixel)2532   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, multipixel) {
2533     TEST_REQUIRES_ARM_NEON_V8;
2534     for (size_t channels = 1; channels <= 40; channels += 7) {
2535       DWConvMicrokernelTester()
2536         .cr(8)
2537         .kr(25)
2538         .channels(channels)
2539         .width(3)
2540         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2541     }
2542   }
2543 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,multipixel_with_step)2544   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, multipixel_with_step) {
2545     TEST_REQUIRES_ARM_NEON_V8;
2546     for (size_t channels = 1; channels <= 40; channels += 7) {
2547       for (size_t step = 2; step <= 25; step++) {
2548         DWConvMicrokernelTester()
2549           .cr(8)
2550           .kr(25)
2551           .channels(channels)
2552           .width(3)
2553           .step(step)
2554           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2555       }
2556     }
2557   }
2558 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,multipixel_with_output_stride)2559   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, multipixel_with_output_stride) {
2560     TEST_REQUIRES_ARM_NEON_V8;
2561     for (size_t channels = 1; channels <= 40; channels += 7) {
2562       DWConvMicrokernelTester()
2563         .cr(8)
2564         .kr(25)
2565         .channels(8)
2566         .width(5)
2567         .output_stride(43)
2568         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2569     }
2570   }
2571 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,multipixel_with_qmin)2572   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, multipixel_with_qmin) {
2573     TEST_REQUIRES_ARM_NEON_V8;
2574     for (size_t channels = 1; channels <= 40; channels += 7) {
2575       DWConvMicrokernelTester()
2576         .cr(8)
2577         .kr(25)
2578         .channels(channels)
2579         .width(3)
2580         .qmin(128)
2581         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2582     }
2583   }
2584 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,multipixel_with_qmax)2585   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, multipixel_with_qmax) {
2586     TEST_REQUIRES_ARM_NEON_V8;
2587     for (size_t channels = 1; channels <= 40; channels += 7) {
2588       DWConvMicrokernelTester()
2589         .cr(8)
2590         .kr(25)
2591         .channels(channels)
2592         .width(3)
2593         .qmax(128)
2594         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2595     }
2596   }
2597 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,input_offset)2598   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, input_offset) {
2599     TEST_REQUIRES_ARM_NEON_V8;
2600     for (uint32_t channels = 16; channels < 128; channels += 24) {
2601       DWConvMicrokernelTester()
2602         .cr(8)
2603         .kr(25)
2604         .channels(channels)
2605         .input_offset(176)
2606         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2607     }
2608   }
2609 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,zero)2610   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, zero) {
2611     TEST_REQUIRES_ARM_NEON_V8;
2612     for (uint32_t mz = 0; mz < 25; mz++) {
2613       for (uint32_t channels = 16; channels < 128; channels += 24) {
2614         DWConvMicrokernelTester()
2615           .cr(8)
2616           .kr(25)
2617           .channels(channels)
2618           .input_offset(176)
2619           .zero_index(mz)
2620           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2621       }
2622     }
2623   }
2624 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2625 
2626 
2627 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_eq_8)2628   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_eq_8) {
2629     TEST_REQUIRES_ARM_NEON_V8;
2630     DWConvMicrokernelTester()
2631       .cr(8)
2632       .kr(25)
2633       .channels(8)
2634       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2635   }
2636 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_div_8)2637   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_div_8) {
2638     TEST_REQUIRES_ARM_NEON_V8;
2639     for (uint32_t channels = 16; channels < 128; channels += 24) {
2640       DWConvMicrokernelTester()
2641         .cr(8)
2642         .kr(25)
2643         .channels(channels)
2644         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2645     }
2646   }
2647 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_div_8_with_qmin)2648   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_div_8_with_qmin) {
2649     TEST_REQUIRES_ARM_NEON_V8;
2650     for (uint32_t channels = 16; channels < 128; channels += 24) {
2651       DWConvMicrokernelTester()
2652         .cr(8)
2653         .kr(25)
2654         .channels(channels)
2655         .qmin(128)
2656         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2657     }
2658   }
2659 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_div_8_with_qmax)2660   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_div_8_with_qmax) {
2661     TEST_REQUIRES_ARM_NEON_V8;
2662     for (uint32_t channels = 16; channels < 128; channels += 24) {
2663       DWConvMicrokernelTester()
2664         .cr(8)
2665         .kr(25)
2666         .channels(channels)
2667         .qmax(128)
2668         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2669     }
2670   }
2671 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_lt_8)2672   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_lt_8) {
2673     TEST_REQUIRES_ARM_NEON_V8;
2674     for (uint32_t channels = 1; channels < 8; channels++) {
2675       DWConvMicrokernelTester()
2676         .cr(8)
2677         .kr(25)
2678         .channels(channels)
2679         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2680     }
2681   }
2682 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_gt_8)2683   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_gt_8) {
2684     TEST_REQUIRES_ARM_NEON_V8;
2685     for (uint32_t channels = 9; channels < 16; channels++) {
2686       DWConvMicrokernelTester()
2687         .cr(8)
2688         .kr(25)
2689         .channels(channels)
2690         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2691     }
2692   }
2693 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_gt_8_with_qmin)2694   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_gt_8_with_qmin) {
2695     TEST_REQUIRES_ARM_NEON_V8;
2696     for (uint32_t channels = 9; channels < 16; channels++) {
2697       DWConvMicrokernelTester()
2698         .cr(8)
2699         .kr(25)
2700         .channels(channels)
2701         .qmin(128)
2702         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2703     }
2704   }
2705 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_gt_8_with_qmax)2706   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_gt_8_with_qmax) {
2707     TEST_REQUIRES_ARM_NEON_V8;
2708     for (uint32_t channels = 9; channels < 16; channels++) {
2709       DWConvMicrokernelTester()
2710         .cr(8)
2711         .kr(25)
2712         .channels(channels)
2713         .qmax(128)
2714         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2715     }
2716   }
2717 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,multipixel)2718   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel) {
2719     TEST_REQUIRES_ARM_NEON_V8;
2720     for (size_t channels = 1; channels <= 40; channels += 7) {
2721       DWConvMicrokernelTester()
2722         .cr(8)
2723         .kr(25)
2724         .channels(channels)
2725         .width(3)
2726         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2727     }
2728   }
2729 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,multipixel_with_step)2730   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_step) {
2731     TEST_REQUIRES_ARM_NEON_V8;
2732     for (size_t channels = 1; channels <= 40; channels += 7) {
2733       for (size_t step = 2; step <= 25; step++) {
2734         DWConvMicrokernelTester()
2735           .cr(8)
2736           .kr(25)
2737           .channels(channels)
2738           .width(3)
2739           .step(step)
2740           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2741       }
2742     }
2743   }
2744 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,multipixel_with_output_stride)2745   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_output_stride) {
2746     TEST_REQUIRES_ARM_NEON_V8;
2747     for (size_t channels = 1; channels <= 40; channels += 7) {
2748       DWConvMicrokernelTester()
2749         .cr(8)
2750         .kr(25)
2751         .channels(8)
2752         .width(5)
2753         .output_stride(43)
2754         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2755     }
2756   }
2757 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,multipixel_with_qmin)2758   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_qmin) {
2759     TEST_REQUIRES_ARM_NEON_V8;
2760     for (size_t channels = 1; channels <= 40; channels += 7) {
2761       DWConvMicrokernelTester()
2762         .cr(8)
2763         .kr(25)
2764         .channels(channels)
2765         .width(3)
2766         .qmin(128)
2767         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2768     }
2769   }
2770 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,multipixel_with_qmax)2771   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_qmax) {
2772     TEST_REQUIRES_ARM_NEON_V8;
2773     for (size_t channels = 1; channels <= 40; channels += 7) {
2774       DWConvMicrokernelTester()
2775         .cr(8)
2776         .kr(25)
2777         .channels(channels)
2778         .width(3)
2779         .qmax(128)
2780         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2781     }
2782   }
2783 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,input_offset)2784   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, input_offset) {
2785     TEST_REQUIRES_ARM_NEON_V8;
2786     for (uint32_t channels = 16; channels < 128; channels += 24) {
2787       DWConvMicrokernelTester()
2788         .cr(8)
2789         .kr(25)
2790         .channels(channels)
2791         .input_offset(176)
2792         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2793     }
2794   }
2795 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,zero)2796   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, zero) {
2797     TEST_REQUIRES_ARM_NEON_V8;
2798     for (uint32_t mz = 0; mz < 25; mz++) {
2799       for (uint32_t channels = 16; channels < 128; channels += 24) {
2800         DWConvMicrokernelTester()
2801           .cr(8)
2802           .kr(25)
2803           .channels(channels)
2804           .input_offset(176)
2805           .zero_index(mz)
2806           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2807       }
2808     }
2809   }
2810 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2811 
2812 
2813 #if XNN_ARCH_ARM
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_eq_16)2814   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_eq_16) {
2815     TEST_REQUIRES_ARM_NEON_V8;
2816     DWConvMicrokernelTester()
2817       .cr(16)
2818       .kr(3)
2819       .channels(16)
2820       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2821   }
2822 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_div_16)2823   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_div_16) {
2824     TEST_REQUIRES_ARM_NEON_V8;
2825     for (uint32_t channels = 32; channels < 256; channels += 48) {
2826       DWConvMicrokernelTester()
2827         .cr(16)
2828         .kr(3)
2829         .channels(channels)
2830         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2831     }
2832   }
2833 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_div_16_with_qmin)2834   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_div_16_with_qmin) {
2835     TEST_REQUIRES_ARM_NEON_V8;
2836     for (uint32_t channels = 32; channels < 256; channels += 48) {
2837       DWConvMicrokernelTester()
2838         .cr(16)
2839         .kr(3)
2840         .channels(channels)
2841         .qmin(128)
2842         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2843     }
2844   }
2845 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_div_16_with_qmax)2846   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_div_16_with_qmax) {
2847     TEST_REQUIRES_ARM_NEON_V8;
2848     for (uint32_t channels = 32; channels < 256; channels += 48) {
2849       DWConvMicrokernelTester()
2850         .cr(16)
2851         .kr(3)
2852         .channels(channels)
2853         .qmax(128)
2854         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2855     }
2856   }
2857 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_lt_16)2858   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_lt_16) {
2859     TEST_REQUIRES_ARM_NEON_V8;
2860     for (uint32_t channels = 1; channels < 16; channels++) {
2861       DWConvMicrokernelTester()
2862         .cr(16)
2863         .kr(3)
2864         .channels(channels)
2865         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2866     }
2867   }
2868 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_gt_16)2869   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_gt_16) {
2870     TEST_REQUIRES_ARM_NEON_V8;
2871     for (uint32_t channels = 17; channels < 32; channels++) {
2872       DWConvMicrokernelTester()
2873         .cr(16)
2874         .kr(3)
2875         .channels(channels)
2876         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2877     }
2878   }
2879 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_gt_16_with_qmin)2880   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_gt_16_with_qmin) {
2881     TEST_REQUIRES_ARM_NEON_V8;
2882     for (uint32_t channels = 17; channels < 32; channels++) {
2883       DWConvMicrokernelTester()
2884         .cr(16)
2885         .kr(3)
2886         .channels(channels)
2887         .qmin(128)
2888         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2889     }
2890   }
2891 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_gt_16_with_qmax)2892   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_gt_16_with_qmax) {
2893     TEST_REQUIRES_ARM_NEON_V8;
2894     for (uint32_t channels = 17; channels < 32; channels++) {
2895       DWConvMicrokernelTester()
2896         .cr(16)
2897         .kr(3)
2898         .channels(channels)
2899         .qmax(128)
2900         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2901     }
2902   }
2903 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,multipixel)2904   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, multipixel) {
2905     TEST_REQUIRES_ARM_NEON_V8;
2906     for (size_t channels = 1; channels <= 80; channels += 15) {
2907       DWConvMicrokernelTester()
2908         .cr(16)
2909         .kr(3)
2910         .channels(channels)
2911         .width(3)
2912         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2913     }
2914   }
2915 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,multipixel_with_step)2916   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, multipixel_with_step) {
2917     TEST_REQUIRES_ARM_NEON_V8;
2918     for (size_t channels = 1; channels <= 80; channels += 15) {
2919       for (size_t step = 2; step <= 3; step++) {
2920         DWConvMicrokernelTester()
2921           .cr(16)
2922           .kr(3)
2923           .channels(channels)
2924           .width(3)
2925           .step(step)
2926           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2927       }
2928     }
2929   }
2930 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,multipixel_with_output_stride)2931   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, multipixel_with_output_stride) {
2932     TEST_REQUIRES_ARM_NEON_V8;
2933     for (size_t channels = 1; channels <= 80; channels += 15) {
2934       DWConvMicrokernelTester()
2935         .cr(16)
2936         .kr(3)
2937         .channels(16)
2938         .width(5)
2939         .output_stride(83)
2940         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2941     }
2942   }
2943 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,multipixel_with_qmin)2944   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, multipixel_with_qmin) {
2945     TEST_REQUIRES_ARM_NEON_V8;
2946     for (size_t channels = 1; channels <= 80; channels += 15) {
2947       DWConvMicrokernelTester()
2948         .cr(16)
2949         .kr(3)
2950         .channels(channels)
2951         .width(3)
2952         .qmin(128)
2953         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2954     }
2955   }
2956 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,multipixel_with_qmax)2957   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, multipixel_with_qmax) {
2958     TEST_REQUIRES_ARM_NEON_V8;
2959     for (size_t channels = 1; channels <= 80; channels += 15) {
2960       DWConvMicrokernelTester()
2961         .cr(16)
2962         .kr(3)
2963         .channels(channels)
2964         .width(3)
2965         .qmax(128)
2966         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2967     }
2968   }
2969 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,input_offset)2970   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, input_offset) {
2971     TEST_REQUIRES_ARM_NEON_V8;
2972     for (uint32_t channels = 32; channels < 256; channels += 48) {
2973       DWConvMicrokernelTester()
2974         .cr(16)
2975         .kr(3)
2976         .channels(channels)
2977         .input_offset(304)
2978         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2979     }
2980   }
2981 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,zero)2982   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, zero) {
2983     TEST_REQUIRES_ARM_NEON_V8;
2984     for (uint32_t mz = 0; mz < 3; mz++) {
2985       for (uint32_t channels = 32; channels < 256; channels += 48) {
2986         DWConvMicrokernelTester()
2987           .cr(16)
2988           .kr(3)
2989           .channels(channels)
2990           .input_offset(304)
2991           .zero_index(mz)
2992           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2993       }
2994     }
2995   }
2996 #endif  // XNN_ARCH_ARM
2997 
2998 
2999 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,c_eq_16)3000   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, c_eq_16) {
3001     TEST_REQUIRES_ARM_NEON;
3002     DWConvMicrokernelTester()
3003       .cr(16)
3004       .kr(3)
3005       .channels(16)
3006       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3007   }
3008 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,c_div_16)3009   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, c_div_16) {
3010     TEST_REQUIRES_ARM_NEON;
3011     for (uint32_t channels = 32; channels < 256; channels += 48) {
3012       DWConvMicrokernelTester()
3013         .cr(16)
3014         .kr(3)
3015         .channels(channels)
3016         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3017     }
3018   }
3019 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,c_div_16_with_qmin)3020   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, c_div_16_with_qmin) {
3021     TEST_REQUIRES_ARM_NEON;
3022     for (uint32_t channels = 32; channels < 256; channels += 48) {
3023       DWConvMicrokernelTester()
3024         .cr(16)
3025         .kr(3)
3026         .channels(channels)
3027         .qmin(128)
3028         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3029     }
3030   }
3031 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,c_div_16_with_qmax)3032   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, c_div_16_with_qmax) {
3033     TEST_REQUIRES_ARM_NEON;
3034     for (uint32_t channels = 32; channels < 256; channels += 48) {
3035       DWConvMicrokernelTester()
3036         .cr(16)
3037         .kr(3)
3038         .channels(channels)
3039         .qmax(128)
3040         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3041     }
3042   }
3043 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,c_lt_16)3044   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, c_lt_16) {
3045     TEST_REQUIRES_ARM_NEON;
3046     for (uint32_t channels = 1; channels < 16; channels++) {
3047       DWConvMicrokernelTester()
3048         .cr(16)
3049         .kr(3)
3050         .channels(channels)
3051         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3052     }
3053   }
3054 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,c_gt_16)3055   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, c_gt_16) {
3056     TEST_REQUIRES_ARM_NEON;
3057     for (uint32_t channels = 17; channels < 32; channels++) {
3058       DWConvMicrokernelTester()
3059         .cr(16)
3060         .kr(3)
3061         .channels(channels)
3062         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3063     }
3064   }
3065 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,c_gt_16_with_qmin)3066   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, c_gt_16_with_qmin) {
3067     TEST_REQUIRES_ARM_NEON;
3068     for (uint32_t channels = 17; channels < 32; channels++) {
3069       DWConvMicrokernelTester()
3070         .cr(16)
3071         .kr(3)
3072         .channels(channels)
3073         .qmin(128)
3074         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3075     }
3076   }
3077 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,c_gt_16_with_qmax)3078   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, c_gt_16_with_qmax) {
3079     TEST_REQUIRES_ARM_NEON;
3080     for (uint32_t channels = 17; channels < 32; channels++) {
3081       DWConvMicrokernelTester()
3082         .cr(16)
3083         .kr(3)
3084         .channels(channels)
3085         .qmax(128)
3086         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3087     }
3088   }
3089 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,multipixel)3090   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, multipixel) {
3091     TEST_REQUIRES_ARM_NEON;
3092     for (size_t channels = 1; channels <= 80; channels += 15) {
3093       DWConvMicrokernelTester()
3094         .cr(16)
3095         .kr(3)
3096         .channels(channels)
3097         .width(3)
3098         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3099     }
3100   }
3101 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,multipixel_with_step)3102   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, multipixel_with_step) {
3103     TEST_REQUIRES_ARM_NEON;
3104     for (size_t channels = 1; channels <= 80; channels += 15) {
3105       for (size_t step = 2; step <= 3; step++) {
3106         DWConvMicrokernelTester()
3107           .cr(16)
3108           .kr(3)
3109           .channels(channels)
3110           .width(3)
3111           .step(step)
3112           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3113       }
3114     }
3115   }
3116 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,multipixel_with_output_stride)3117   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, multipixel_with_output_stride) {
3118     TEST_REQUIRES_ARM_NEON;
3119     for (size_t channels = 1; channels <= 80; channels += 15) {
3120       DWConvMicrokernelTester()
3121         .cr(16)
3122         .kr(3)
3123         .channels(16)
3124         .width(5)
3125         .output_stride(83)
3126         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3127     }
3128   }
3129 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,multipixel_with_qmin)3130   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, multipixel_with_qmin) {
3131     TEST_REQUIRES_ARM_NEON;
3132     for (size_t channels = 1; channels <= 80; channels += 15) {
3133       DWConvMicrokernelTester()
3134         .cr(16)
3135         .kr(3)
3136         .channels(channels)
3137         .width(3)
3138         .qmin(128)
3139         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3140     }
3141   }
3142 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,multipixel_with_qmax)3143   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, multipixel_with_qmax) {
3144     TEST_REQUIRES_ARM_NEON;
3145     for (size_t channels = 1; channels <= 80; channels += 15) {
3146       DWConvMicrokernelTester()
3147         .cr(16)
3148         .kr(3)
3149         .channels(channels)
3150         .width(3)
3151         .qmax(128)
3152         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3153     }
3154   }
3155 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,input_offset)3156   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, input_offset) {
3157     TEST_REQUIRES_ARM_NEON;
3158     for (uint32_t channels = 32; channels < 256; channels += 48) {
3159       DWConvMicrokernelTester()
3160         .cr(16)
3161         .kr(3)
3162         .channels(channels)
3163         .input_offset(304)
3164         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3165     }
3166   }
3167 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,zero)3168   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, zero) {
3169     TEST_REQUIRES_ARM_NEON;
3170     for (uint32_t mz = 0; mz < 3; mz++) {
3171       for (uint32_t channels = 32; channels < 256; channels += 48) {
3172         DWConvMicrokernelTester()
3173           .cr(16)
3174           .kr(3)
3175           .channels(channels)
3176           .input_offset(304)
3177           .zero_index(mz)
3178           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3179       }
3180     }
3181   }
3182 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3183 
3184 
3185 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,c_eq_16)3186   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, c_eq_16) {
3187     TEST_REQUIRES_ARM_NEON;
3188     DWConvMicrokernelTester()
3189       .cr(16)
3190       .kr(3)
3191       .channels(16)
3192       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3193   }
3194 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,c_div_16)3195   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, c_div_16) {
3196     TEST_REQUIRES_ARM_NEON;
3197     for (uint32_t channels = 32; channels < 256; channels += 48) {
3198       DWConvMicrokernelTester()
3199         .cr(16)
3200         .kr(3)
3201         .channels(channels)
3202         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3203     }
3204   }
3205 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,c_div_16_with_qmin)3206   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, c_div_16_with_qmin) {
3207     TEST_REQUIRES_ARM_NEON;
3208     for (uint32_t channels = 32; channels < 256; channels += 48) {
3209       DWConvMicrokernelTester()
3210         .cr(16)
3211         .kr(3)
3212         .channels(channels)
3213         .qmin(128)
3214         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3215     }
3216   }
3217 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,c_div_16_with_qmax)3218   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, c_div_16_with_qmax) {
3219     TEST_REQUIRES_ARM_NEON;
3220     for (uint32_t channels = 32; channels < 256; channels += 48) {
3221       DWConvMicrokernelTester()
3222         .cr(16)
3223         .kr(3)
3224         .channels(channels)
3225         .qmax(128)
3226         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3227     }
3228   }
3229 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,c_lt_16)3230   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, c_lt_16) {
3231     TEST_REQUIRES_ARM_NEON;
3232     for (uint32_t channels = 1; channels < 16; channels++) {
3233       DWConvMicrokernelTester()
3234         .cr(16)
3235         .kr(3)
3236         .channels(channels)
3237         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3238     }
3239   }
3240 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,c_gt_16)3241   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, c_gt_16) {
3242     TEST_REQUIRES_ARM_NEON;
3243     for (uint32_t channels = 17; channels < 32; channels++) {
3244       DWConvMicrokernelTester()
3245         .cr(16)
3246         .kr(3)
3247         .channels(channels)
3248         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3249     }
3250   }
3251 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,c_gt_16_with_qmin)3252   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, c_gt_16_with_qmin) {
3253     TEST_REQUIRES_ARM_NEON;
3254     for (uint32_t channels = 17; channels < 32; channels++) {
3255       DWConvMicrokernelTester()
3256         .cr(16)
3257         .kr(3)
3258         .channels(channels)
3259         .qmin(128)
3260         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3261     }
3262   }
3263 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,c_gt_16_with_qmax)3264   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, c_gt_16_with_qmax) {
3265     TEST_REQUIRES_ARM_NEON;
3266     for (uint32_t channels = 17; channels < 32; channels++) {
3267       DWConvMicrokernelTester()
3268         .cr(16)
3269         .kr(3)
3270         .channels(channels)
3271         .qmax(128)
3272         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3273     }
3274   }
3275 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,multipixel)3276   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, multipixel) {
3277     TEST_REQUIRES_ARM_NEON;
3278     for (size_t channels = 1; channels <= 80; channels += 15) {
3279       DWConvMicrokernelTester()
3280         .cr(16)
3281         .kr(3)
3282         .channels(channels)
3283         .width(3)
3284         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3285     }
3286   }
3287 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,multipixel_with_step)3288   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, multipixel_with_step) {
3289     TEST_REQUIRES_ARM_NEON;
3290     for (size_t channels = 1; channels <= 80; channels += 15) {
3291       for (size_t step = 2; step <= 3; step++) {
3292         DWConvMicrokernelTester()
3293           .cr(16)
3294           .kr(3)
3295           .channels(channels)
3296           .width(3)
3297           .step(step)
3298           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3299       }
3300     }
3301   }
3302 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,multipixel_with_output_stride)3303   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, multipixel_with_output_stride) {
3304     TEST_REQUIRES_ARM_NEON;
3305     for (size_t channels = 1; channels <= 80; channels += 15) {
3306       DWConvMicrokernelTester()
3307         .cr(16)
3308         .kr(3)
3309         .channels(16)
3310         .width(5)
3311         .output_stride(83)
3312         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3313     }
3314   }
3315 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,multipixel_with_qmin)3316   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, multipixel_with_qmin) {
3317     TEST_REQUIRES_ARM_NEON;
3318     for (size_t channels = 1; channels <= 80; channels += 15) {
3319       DWConvMicrokernelTester()
3320         .cr(16)
3321         .kr(3)
3322         .channels(channels)
3323         .width(3)
3324         .qmin(128)
3325         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3326     }
3327   }
3328 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,multipixel_with_qmax)3329   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, multipixel_with_qmax) {
3330     TEST_REQUIRES_ARM_NEON;
3331     for (size_t channels = 1; channels <= 80; channels += 15) {
3332       DWConvMicrokernelTester()
3333         .cr(16)
3334         .kr(3)
3335         .channels(channels)
3336         .width(3)
3337         .qmax(128)
3338         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3339     }
3340   }
3341 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,input_offset)3342   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, input_offset) {
3343     TEST_REQUIRES_ARM_NEON;
3344     for (uint32_t channels = 32; channels < 256; channels += 48) {
3345       DWConvMicrokernelTester()
3346         .cr(16)
3347         .kr(3)
3348         .channels(channels)
3349         .input_offset(304)
3350         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3351     }
3352   }
3353 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,zero)3354   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, zero) {
3355     TEST_REQUIRES_ARM_NEON;
3356     for (uint32_t mz = 0; mz < 3; mz++) {
3357       for (uint32_t channels = 32; channels < 256; channels += 48) {
3358         DWConvMicrokernelTester()
3359           .cr(16)
3360           .kr(3)
3361           .channels(channels)
3362           .input_offset(304)
3363           .zero_index(mz)
3364           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3365       }
3366     }
3367   }
3368 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3369 
3370 
3371 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,c_eq_16)3372   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, c_eq_16) {
3373     TEST_REQUIRES_ARM_NEON_V8;
3374     DWConvMicrokernelTester()
3375       .cr(16)
3376       .kr(3)
3377       .channels(16)
3378       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3379   }
3380 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,c_div_16)3381   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, c_div_16) {
3382     TEST_REQUIRES_ARM_NEON_V8;
3383     for (uint32_t channels = 32; channels < 256; channels += 48) {
3384       DWConvMicrokernelTester()
3385         .cr(16)
3386         .kr(3)
3387         .channels(channels)
3388         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3389     }
3390   }
3391 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,c_div_16_with_qmin)3392   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, c_div_16_with_qmin) {
3393     TEST_REQUIRES_ARM_NEON_V8;
3394     for (uint32_t channels = 32; channels < 256; channels += 48) {
3395       DWConvMicrokernelTester()
3396         .cr(16)
3397         .kr(3)
3398         .channels(channels)
3399         .qmin(128)
3400         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3401     }
3402   }
3403 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,c_div_16_with_qmax)3404   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, c_div_16_with_qmax) {
3405     TEST_REQUIRES_ARM_NEON_V8;
3406     for (uint32_t channels = 32; channels < 256; channels += 48) {
3407       DWConvMicrokernelTester()
3408         .cr(16)
3409         .kr(3)
3410         .channels(channels)
3411         .qmax(128)
3412         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3413     }
3414   }
3415 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,c_lt_16)3416   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, c_lt_16) {
3417     TEST_REQUIRES_ARM_NEON_V8;
3418     for (uint32_t channels = 1; channels < 16; channels++) {
3419       DWConvMicrokernelTester()
3420         .cr(16)
3421         .kr(3)
3422         .channels(channels)
3423         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3424     }
3425   }
3426 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,c_gt_16)3427   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, c_gt_16) {
3428     TEST_REQUIRES_ARM_NEON_V8;
3429     for (uint32_t channels = 17; channels < 32; channels++) {
3430       DWConvMicrokernelTester()
3431         .cr(16)
3432         .kr(3)
3433         .channels(channels)
3434         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3435     }
3436   }
3437 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,c_gt_16_with_qmin)3438   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, c_gt_16_with_qmin) {
3439     TEST_REQUIRES_ARM_NEON_V8;
3440     for (uint32_t channels = 17; channels < 32; channels++) {
3441       DWConvMicrokernelTester()
3442         .cr(16)
3443         .kr(3)
3444         .channels(channels)
3445         .qmin(128)
3446         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3447     }
3448   }
3449 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,c_gt_16_with_qmax)3450   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, c_gt_16_with_qmax) {
3451     TEST_REQUIRES_ARM_NEON_V8;
3452     for (uint32_t channels = 17; channels < 32; channels++) {
3453       DWConvMicrokernelTester()
3454         .cr(16)
3455         .kr(3)
3456         .channels(channels)
3457         .qmax(128)
3458         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3459     }
3460   }
3461 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,multipixel)3462   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, multipixel) {
3463     TEST_REQUIRES_ARM_NEON_V8;
3464     for (size_t channels = 1; channels <= 80; channels += 15) {
3465       DWConvMicrokernelTester()
3466         .cr(16)
3467         .kr(3)
3468         .channels(channels)
3469         .width(3)
3470         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3471     }
3472   }
3473 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,multipixel_with_step)3474   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, multipixel_with_step) {
3475     TEST_REQUIRES_ARM_NEON_V8;
3476     for (size_t channels = 1; channels <= 80; channels += 15) {
3477       for (size_t step = 2; step <= 3; step++) {
3478         DWConvMicrokernelTester()
3479           .cr(16)
3480           .kr(3)
3481           .channels(channels)
3482           .width(3)
3483           .step(step)
3484           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3485       }
3486     }
3487   }
3488 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,multipixel_with_output_stride)3489   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, multipixel_with_output_stride) {
3490     TEST_REQUIRES_ARM_NEON_V8;
3491     for (size_t channels = 1; channels <= 80; channels += 15) {
3492       DWConvMicrokernelTester()
3493         .cr(16)
3494         .kr(3)
3495         .channels(16)
3496         .width(5)
3497         .output_stride(83)
3498         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3499     }
3500   }
3501 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,multipixel_with_qmin)3502   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, multipixel_with_qmin) {
3503     TEST_REQUIRES_ARM_NEON_V8;
3504     for (size_t channels = 1; channels <= 80; channels += 15) {
3505       DWConvMicrokernelTester()
3506         .cr(16)
3507         .kr(3)
3508         .channels(channels)
3509         .width(3)
3510         .qmin(128)
3511         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3512     }
3513   }
3514 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,multipixel_with_qmax)3515   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, multipixel_with_qmax) {
3516     TEST_REQUIRES_ARM_NEON_V8;
3517     for (size_t channels = 1; channels <= 80; channels += 15) {
3518       DWConvMicrokernelTester()
3519         .cr(16)
3520         .kr(3)
3521         .channels(channels)
3522         .width(3)
3523         .qmax(128)
3524         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3525     }
3526   }
3527 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,input_offset)3528   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, input_offset) {
3529     TEST_REQUIRES_ARM_NEON_V8;
3530     for (uint32_t channels = 32; channels < 256; channels += 48) {
3531       DWConvMicrokernelTester()
3532         .cr(16)
3533         .kr(3)
3534         .channels(channels)
3535         .input_offset(304)
3536         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3537     }
3538   }
3539 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,zero)3540   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, zero) {
3541     TEST_REQUIRES_ARM_NEON_V8;
3542     for (uint32_t mz = 0; mz < 3; mz++) {
3543       for (uint32_t channels = 32; channels < 256; channels += 48) {
3544         DWConvMicrokernelTester()
3545           .cr(16)
3546           .kr(3)
3547           .channels(channels)
3548           .input_offset(304)
3549           .zero_index(mz)
3550           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3551       }
3552     }
3553   }
3554 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3555 
3556 
3557 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,c_eq_16)3558   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, c_eq_16) {
3559     TEST_REQUIRES_ARM_NEON_V8;
3560     DWConvMicrokernelTester()
3561       .cr(16)
3562       .kr(3)
3563       .channels(16)
3564       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3565   }
3566 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,c_div_16)3567   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, c_div_16) {
3568     TEST_REQUIRES_ARM_NEON_V8;
3569     for (uint32_t channels = 32; channels < 256; channels += 48) {
3570       DWConvMicrokernelTester()
3571         .cr(16)
3572         .kr(3)
3573         .channels(channels)
3574         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3575     }
3576   }
3577 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,c_div_16_with_qmin)3578   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, c_div_16_with_qmin) {
3579     TEST_REQUIRES_ARM_NEON_V8;
3580     for (uint32_t channels = 32; channels < 256; channels += 48) {
3581       DWConvMicrokernelTester()
3582         .cr(16)
3583         .kr(3)
3584         .channels(channels)
3585         .qmin(128)
3586         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3587     }
3588   }
3589 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,c_div_16_with_qmax)3590   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, c_div_16_with_qmax) {
3591     TEST_REQUIRES_ARM_NEON_V8;
3592     for (uint32_t channels = 32; channels < 256; channels += 48) {
3593       DWConvMicrokernelTester()
3594         .cr(16)
3595         .kr(3)
3596         .channels(channels)
3597         .qmax(128)
3598         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3599     }
3600   }
3601 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,c_lt_16)3602   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, c_lt_16) {
3603     TEST_REQUIRES_ARM_NEON_V8;
3604     for (uint32_t channels = 1; channels < 16; channels++) {
3605       DWConvMicrokernelTester()
3606         .cr(16)
3607         .kr(3)
3608         .channels(channels)
3609         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3610     }
3611   }
3612 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,c_gt_16)3613   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, c_gt_16) {
3614     TEST_REQUIRES_ARM_NEON_V8;
3615     for (uint32_t channels = 17; channels < 32; channels++) {
3616       DWConvMicrokernelTester()
3617         .cr(16)
3618         .kr(3)
3619         .channels(channels)
3620         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3621     }
3622   }
3623 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,c_gt_16_with_qmin)3624   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, c_gt_16_with_qmin) {
3625     TEST_REQUIRES_ARM_NEON_V8;
3626     for (uint32_t channels = 17; channels < 32; channels++) {
3627       DWConvMicrokernelTester()
3628         .cr(16)
3629         .kr(3)
3630         .channels(channels)
3631         .qmin(128)
3632         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3633     }
3634   }
3635 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,c_gt_16_with_qmax)3636   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, c_gt_16_with_qmax) {
3637     TEST_REQUIRES_ARM_NEON_V8;
3638     for (uint32_t channels = 17; channels < 32; channels++) {
3639       DWConvMicrokernelTester()
3640         .cr(16)
3641         .kr(3)
3642         .channels(channels)
3643         .qmax(128)
3644         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3645     }
3646   }
3647 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,multipixel)3648   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, multipixel) {
3649     TEST_REQUIRES_ARM_NEON_V8;
3650     for (size_t channels = 1; channels <= 80; channels += 15) {
3651       DWConvMicrokernelTester()
3652         .cr(16)
3653         .kr(3)
3654         .channels(channels)
3655         .width(3)
3656         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3657     }
3658   }
3659 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,multipixel_with_step)3660   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, multipixel_with_step) {
3661     TEST_REQUIRES_ARM_NEON_V8;
3662     for (size_t channels = 1; channels <= 80; channels += 15) {
3663       for (size_t step = 2; step <= 3; step++) {
3664         DWConvMicrokernelTester()
3665           .cr(16)
3666           .kr(3)
3667           .channels(channels)
3668           .width(3)
3669           .step(step)
3670           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3671       }
3672     }
3673   }
3674 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,multipixel_with_output_stride)3675   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, multipixel_with_output_stride) {
3676     TEST_REQUIRES_ARM_NEON_V8;
3677     for (size_t channels = 1; channels <= 80; channels += 15) {
3678       DWConvMicrokernelTester()
3679         .cr(16)
3680         .kr(3)
3681         .channels(16)
3682         .width(5)
3683         .output_stride(83)
3684         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3685     }
3686   }
3687 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,multipixel_with_qmin)3688   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, multipixel_with_qmin) {
3689     TEST_REQUIRES_ARM_NEON_V8;
3690     for (size_t channels = 1; channels <= 80; channels += 15) {
3691       DWConvMicrokernelTester()
3692         .cr(16)
3693         .kr(3)
3694         .channels(channels)
3695         .width(3)
3696         .qmin(128)
3697         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3698     }
3699   }
3700 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,multipixel_with_qmax)3701   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, multipixel_with_qmax) {
3702     TEST_REQUIRES_ARM_NEON_V8;
3703     for (size_t channels = 1; channels <= 80; channels += 15) {
3704       DWConvMicrokernelTester()
3705         .cr(16)
3706         .kr(3)
3707         .channels(channels)
3708         .width(3)
3709         .qmax(128)
3710         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3711     }
3712   }
3713 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,input_offset)3714   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, input_offset) {
3715     TEST_REQUIRES_ARM_NEON_V8;
3716     for (uint32_t channels = 32; channels < 256; channels += 48) {
3717       DWConvMicrokernelTester()
3718         .cr(16)
3719         .kr(3)
3720         .channels(channels)
3721         .input_offset(304)
3722         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3723     }
3724   }
3725 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,zero)3726   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, zero) {
3727     TEST_REQUIRES_ARM_NEON_V8;
3728     for (uint32_t mz = 0; mz < 3; mz++) {
3729       for (uint32_t channels = 32; channels < 256; channels += 48) {
3730         DWConvMicrokernelTester()
3731           .cr(16)
3732           .kr(3)
3733           .channels(channels)
3734           .input_offset(304)
3735           .zero_index(mz)
3736           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3737       }
3738     }
3739   }
3740 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3741 
3742 
3743 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,c_eq_16)3744   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_eq_16) {
3745     TEST_REQUIRES_ARM_NEON;
3746     DWConvMicrokernelTester()
3747       .cr(16)
3748       .kr(9)
3749       .channels(16)
3750       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3751   }
3752 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,c_div_16)3753   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_div_16) {
3754     TEST_REQUIRES_ARM_NEON;
3755     for (uint32_t channels = 32; channels < 256; channels += 48) {
3756       DWConvMicrokernelTester()
3757         .cr(16)
3758         .kr(9)
3759         .channels(channels)
3760         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3761     }
3762   }
3763 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,c_div_16_with_qmin)3764   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_div_16_with_qmin) {
3765     TEST_REQUIRES_ARM_NEON;
3766     for (uint32_t channels = 32; channels < 256; channels += 48) {
3767       DWConvMicrokernelTester()
3768         .cr(16)
3769         .kr(9)
3770         .channels(channels)
3771         .qmin(128)
3772         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3773     }
3774   }
3775 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,c_div_16_with_qmax)3776   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_div_16_with_qmax) {
3777     TEST_REQUIRES_ARM_NEON;
3778     for (uint32_t channels = 32; channels < 256; channels += 48) {
3779       DWConvMicrokernelTester()
3780         .cr(16)
3781         .kr(9)
3782         .channels(channels)
3783         .qmax(128)
3784         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3785     }
3786   }
3787 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,c_lt_16)3788   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_lt_16) {
3789     TEST_REQUIRES_ARM_NEON;
3790     for (uint32_t channels = 1; channels < 16; channels++) {
3791       DWConvMicrokernelTester()
3792         .cr(16)
3793         .kr(9)
3794         .channels(channels)
3795         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3796     }
3797   }
3798 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,c_gt_16)3799   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_gt_16) {
3800     TEST_REQUIRES_ARM_NEON;
3801     for (uint32_t channels = 17; channels < 32; channels++) {
3802       DWConvMicrokernelTester()
3803         .cr(16)
3804         .kr(9)
3805         .channels(channels)
3806         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3807     }
3808   }
3809 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,c_gt_16_with_qmin)3810   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_gt_16_with_qmin) {
3811     TEST_REQUIRES_ARM_NEON;
3812     for (uint32_t channels = 17; channels < 32; channels++) {
3813       DWConvMicrokernelTester()
3814         .cr(16)
3815         .kr(9)
3816         .channels(channels)
3817         .qmin(128)
3818         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3819     }
3820   }
3821 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,c_gt_16_with_qmax)3822   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_gt_16_with_qmax) {
3823     TEST_REQUIRES_ARM_NEON;
3824     for (uint32_t channels = 17; channels < 32; channels++) {
3825       DWConvMicrokernelTester()
3826         .cr(16)
3827         .kr(9)
3828         .channels(channels)
3829         .qmax(128)
3830         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3831     }
3832   }
3833 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,multipixel)3834   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, multipixel) {
3835     TEST_REQUIRES_ARM_NEON;
3836     for (size_t channels = 1; channels <= 80; channels += 15) {
3837       DWConvMicrokernelTester()
3838         .cr(16)
3839         .kr(9)
3840         .channels(channels)
3841         .width(3)
3842         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3843     }
3844   }
3845 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,multipixel_with_step)3846   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, multipixel_with_step) {
3847     TEST_REQUIRES_ARM_NEON;
3848     for (size_t channels = 1; channels <= 80; channels += 15) {
3849       for (size_t step = 2; step <= 9; step++) {
3850         DWConvMicrokernelTester()
3851           .cr(16)
3852           .kr(9)
3853           .channels(channels)
3854           .width(3)
3855           .step(step)
3856           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3857       }
3858     }
3859   }
3860 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,multipixel_with_output_stride)3861   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, multipixel_with_output_stride) {
3862     TEST_REQUIRES_ARM_NEON;
3863     for (size_t channels = 1; channels <= 80; channels += 15) {
3864       DWConvMicrokernelTester()
3865         .cr(16)
3866         .kr(9)
3867         .channels(16)
3868         .width(5)
3869         .output_stride(83)
3870         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3871     }
3872   }
3873 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,multipixel_with_qmin)3874   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, multipixel_with_qmin) {
3875     TEST_REQUIRES_ARM_NEON;
3876     for (size_t channels = 1; channels <= 80; channels += 15) {
3877       DWConvMicrokernelTester()
3878         .cr(16)
3879         .kr(9)
3880         .channels(channels)
3881         .width(3)
3882         .qmin(128)
3883         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3884     }
3885   }
3886 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,multipixel_with_qmax)3887   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, multipixel_with_qmax) {
3888     TEST_REQUIRES_ARM_NEON;
3889     for (size_t channels = 1; channels <= 80; channels += 15) {
3890       DWConvMicrokernelTester()
3891         .cr(16)
3892         .kr(9)
3893         .channels(channels)
3894         .width(3)
3895         .qmax(128)
3896         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3897     }
3898   }
3899 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,input_offset)3900   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, input_offset) {
3901     TEST_REQUIRES_ARM_NEON;
3902     for (uint32_t channels = 32; channels < 256; channels += 48) {
3903       DWConvMicrokernelTester()
3904         .cr(16)
3905         .kr(9)
3906         .channels(channels)
3907         .input_offset(304)
3908         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3909     }
3910   }
3911 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,zero)3912   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, zero) {
3913     TEST_REQUIRES_ARM_NEON;
3914     for (uint32_t mz = 0; mz < 9; mz++) {
3915       for (uint32_t channels = 32; channels < 256; channels += 48) {
3916         DWConvMicrokernelTester()
3917           .cr(16)
3918           .kr(9)
3919           .channels(channels)
3920           .input_offset(304)
3921           .zero_index(mz)
3922           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3923       }
3924     }
3925   }
3926 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3927 
3928 
3929 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,c_eq_16)3930   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_eq_16) {
3931     TEST_REQUIRES_ARM_NEON;
3932     DWConvMicrokernelTester()
3933       .cr(16)
3934       .kr(9)
3935       .channels(16)
3936       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3937   }
3938 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,c_div_16)3939   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_div_16) {
3940     TEST_REQUIRES_ARM_NEON;
3941     for (uint32_t channels = 32; channels < 256; channels += 48) {
3942       DWConvMicrokernelTester()
3943         .cr(16)
3944         .kr(9)
3945         .channels(channels)
3946         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3947     }
3948   }
3949 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,c_div_16_with_qmin)3950   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_div_16_with_qmin) {
3951     TEST_REQUIRES_ARM_NEON;
3952     for (uint32_t channels = 32; channels < 256; channels += 48) {
3953       DWConvMicrokernelTester()
3954         .cr(16)
3955         .kr(9)
3956         .channels(channels)
3957         .qmin(128)
3958         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3959     }
3960   }
3961 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,c_div_16_with_qmax)3962   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_div_16_with_qmax) {
3963     TEST_REQUIRES_ARM_NEON;
3964     for (uint32_t channels = 32; channels < 256; channels += 48) {
3965       DWConvMicrokernelTester()
3966         .cr(16)
3967         .kr(9)
3968         .channels(channels)
3969         .qmax(128)
3970         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3971     }
3972   }
3973 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,c_lt_16)3974   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_lt_16) {
3975     TEST_REQUIRES_ARM_NEON;
3976     for (uint32_t channels = 1; channels < 16; channels++) {
3977       DWConvMicrokernelTester()
3978         .cr(16)
3979         .kr(9)
3980         .channels(channels)
3981         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3982     }
3983   }
3984 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,c_gt_16)3985   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_gt_16) {
3986     TEST_REQUIRES_ARM_NEON;
3987     for (uint32_t channels = 17; channels < 32; channels++) {
3988       DWConvMicrokernelTester()
3989         .cr(16)
3990         .kr(9)
3991         .channels(channels)
3992         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3993     }
3994   }
3995 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,c_gt_16_with_qmin)3996   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_gt_16_with_qmin) {
3997     TEST_REQUIRES_ARM_NEON;
3998     for (uint32_t channels = 17; channels < 32; channels++) {
3999       DWConvMicrokernelTester()
4000         .cr(16)
4001         .kr(9)
4002         .channels(channels)
4003         .qmin(128)
4004         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4005     }
4006   }
4007 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,c_gt_16_with_qmax)4008   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_gt_16_with_qmax) {
4009     TEST_REQUIRES_ARM_NEON;
4010     for (uint32_t channels = 17; channels < 32; channels++) {
4011       DWConvMicrokernelTester()
4012         .cr(16)
4013         .kr(9)
4014         .channels(channels)
4015         .qmax(128)
4016         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4017     }
4018   }
4019 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,multipixel)4020   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, multipixel) {
4021     TEST_REQUIRES_ARM_NEON;
4022     for (size_t channels = 1; channels <= 80; channels += 15) {
4023       DWConvMicrokernelTester()
4024         .cr(16)
4025         .kr(9)
4026         .channels(channels)
4027         .width(3)
4028         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4029     }
4030   }
4031 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,multipixel_with_step)4032   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, multipixel_with_step) {
4033     TEST_REQUIRES_ARM_NEON;
4034     for (size_t channels = 1; channels <= 80; channels += 15) {
4035       for (size_t step = 2; step <= 9; step++) {
4036         DWConvMicrokernelTester()
4037           .cr(16)
4038           .kr(9)
4039           .channels(channels)
4040           .width(3)
4041           .step(step)
4042           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4043       }
4044     }
4045   }
4046 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,multipixel_with_output_stride)4047   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, multipixel_with_output_stride) {
4048     TEST_REQUIRES_ARM_NEON;
4049     for (size_t channels = 1; channels <= 80; channels += 15) {
4050       DWConvMicrokernelTester()
4051         .cr(16)
4052         .kr(9)
4053         .channels(16)
4054         .width(5)
4055         .output_stride(83)
4056         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4057     }
4058   }
4059 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,multipixel_with_qmin)4060   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, multipixel_with_qmin) {
4061     TEST_REQUIRES_ARM_NEON;
4062     for (size_t channels = 1; channels <= 80; channels += 15) {
4063       DWConvMicrokernelTester()
4064         .cr(16)
4065         .kr(9)
4066         .channels(channels)
4067         .width(3)
4068         .qmin(128)
4069         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4070     }
4071   }
4072 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,multipixel_with_qmax)4073   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, multipixel_with_qmax) {
4074     TEST_REQUIRES_ARM_NEON;
4075     for (size_t channels = 1; channels <= 80; channels += 15) {
4076       DWConvMicrokernelTester()
4077         .cr(16)
4078         .kr(9)
4079         .channels(channels)
4080         .width(3)
4081         .qmax(128)
4082         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4083     }
4084   }
4085 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,input_offset)4086   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, input_offset) {
4087     TEST_REQUIRES_ARM_NEON;
4088     for (uint32_t channels = 32; channels < 256; channels += 48) {
4089       DWConvMicrokernelTester()
4090         .cr(16)
4091         .kr(9)
4092         .channels(channels)
4093         .input_offset(304)
4094         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4095     }
4096   }
4097 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,zero)4098   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, zero) {
4099     TEST_REQUIRES_ARM_NEON;
4100     for (uint32_t mz = 0; mz < 9; mz++) {
4101       for (uint32_t channels = 32; channels < 256; channels += 48) {
4102         DWConvMicrokernelTester()
4103           .cr(16)
4104           .kr(9)
4105           .channels(channels)
4106           .input_offset(304)
4107           .zero_index(mz)
4108           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4109       }
4110     }
4111   }
4112 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
4113 
4114 
4115 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,c_eq_16)4116   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_eq_16) {
4117     TEST_REQUIRES_ARM_NEON;
4118     DWConvMicrokernelTester()
4119       .cr(16)
4120       .kr(9)
4121       .channels(16)
4122       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4123   }
4124 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,c_div_16)4125   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_div_16) {
4126     TEST_REQUIRES_ARM_NEON;
4127     for (uint32_t channels = 32; channels < 256; channels += 48) {
4128       DWConvMicrokernelTester()
4129         .cr(16)
4130         .kr(9)
4131         .channels(channels)
4132         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4133     }
4134   }
4135 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,c_div_16_with_qmin)4136   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_div_16_with_qmin) {
4137     TEST_REQUIRES_ARM_NEON;
4138     for (uint32_t channels = 32; channels < 256; channels += 48) {
4139       DWConvMicrokernelTester()
4140         .cr(16)
4141         .kr(9)
4142         .channels(channels)
4143         .qmin(128)
4144         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4145     }
4146   }
4147 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,c_div_16_with_qmax)4148   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_div_16_with_qmax) {
4149     TEST_REQUIRES_ARM_NEON;
4150     for (uint32_t channels = 32; channels < 256; channels += 48) {
4151       DWConvMicrokernelTester()
4152         .cr(16)
4153         .kr(9)
4154         .channels(channels)
4155         .qmax(128)
4156         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4157     }
4158   }
4159 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,c_lt_16)4160   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_lt_16) {
4161     TEST_REQUIRES_ARM_NEON;
4162     for (uint32_t channels = 1; channels < 16; channels++) {
4163       DWConvMicrokernelTester()
4164         .cr(16)
4165         .kr(9)
4166         .channels(channels)
4167         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4168     }
4169   }
4170 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,c_gt_16)4171   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_gt_16) {
4172     TEST_REQUIRES_ARM_NEON;
4173     for (uint32_t channels = 17; channels < 32; channels++) {
4174       DWConvMicrokernelTester()
4175         .cr(16)
4176         .kr(9)
4177         .channels(channels)
4178         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4179     }
4180   }
4181 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,c_gt_16_with_qmin)4182   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_gt_16_with_qmin) {
4183     TEST_REQUIRES_ARM_NEON;
4184     for (uint32_t channels = 17; channels < 32; channels++) {
4185       DWConvMicrokernelTester()
4186         .cr(16)
4187         .kr(9)
4188         .channels(channels)
4189         .qmin(128)
4190         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4191     }
4192   }
4193 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,c_gt_16_with_qmax)4194   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_gt_16_with_qmax) {
4195     TEST_REQUIRES_ARM_NEON;
4196     for (uint32_t channels = 17; channels < 32; channels++) {
4197       DWConvMicrokernelTester()
4198         .cr(16)
4199         .kr(9)
4200         .channels(channels)
4201         .qmax(128)
4202         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4203     }
4204   }
4205 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,multipixel)4206   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, multipixel) {
4207     TEST_REQUIRES_ARM_NEON;
4208     for (size_t channels = 1; channels <= 80; channels += 15) {
4209       DWConvMicrokernelTester()
4210         .cr(16)
4211         .kr(9)
4212         .channels(channels)
4213         .width(3)
4214         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4215     }
4216   }
4217 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,multipixel_with_step)4218   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, multipixel_with_step) {
4219     TEST_REQUIRES_ARM_NEON;
4220     for (size_t channels = 1; channels <= 80; channels += 15) {
4221       for (size_t step = 2; step <= 9; step++) {
4222         DWConvMicrokernelTester()
4223           .cr(16)
4224           .kr(9)
4225           .channels(channels)
4226           .width(3)
4227           .step(step)
4228           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4229       }
4230     }
4231   }
4232 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,multipixel_with_output_stride)4233   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, multipixel_with_output_stride) {
4234     TEST_REQUIRES_ARM_NEON;
4235     for (size_t channels = 1; channels <= 80; channels += 15) {
4236       DWConvMicrokernelTester()
4237         .cr(16)
4238         .kr(9)
4239         .channels(16)
4240         .width(5)
4241         .output_stride(83)
4242         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4243     }
4244   }
4245 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,multipixel_with_qmin)4246   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, multipixel_with_qmin) {
4247     TEST_REQUIRES_ARM_NEON;
4248     for (size_t channels = 1; channels <= 80; channels += 15) {
4249       DWConvMicrokernelTester()
4250         .cr(16)
4251         .kr(9)
4252         .channels(channels)
4253         .width(3)
4254         .qmin(128)
4255         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4256     }
4257   }
4258 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,multipixel_with_qmax)4259   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, multipixel_with_qmax) {
4260     TEST_REQUIRES_ARM_NEON;
4261     for (size_t channels = 1; channels <= 80; channels += 15) {
4262       DWConvMicrokernelTester()
4263         .cr(16)
4264         .kr(9)
4265         .channels(channels)
4266         .width(3)
4267         .qmax(128)
4268         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4269     }
4270   }
4271 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,input_offset)4272   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, input_offset) {
4273     TEST_REQUIRES_ARM_NEON;
4274     for (uint32_t channels = 32; channels < 256; channels += 48) {
4275       DWConvMicrokernelTester()
4276         .cr(16)
4277         .kr(9)
4278         .channels(channels)
4279         .input_offset(304)
4280         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4281     }
4282   }
4283 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,zero)4284   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, zero) {
4285     TEST_REQUIRES_ARM_NEON;
4286     for (uint32_t mz = 0; mz < 9; mz++) {
4287       for (uint32_t channels = 32; channels < 256; channels += 48) {
4288         DWConvMicrokernelTester()
4289           .cr(16)
4290           .kr(9)
4291           .channels(channels)
4292           .input_offset(304)
4293           .zero_index(mz)
4294           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4295       }
4296     }
4297   }
4298 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
4299 
4300 
4301 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,c_eq_16)4302   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_eq_16) {
4303     TEST_REQUIRES_ARM_NEON;
4304     DWConvMicrokernelTester()
4305       .cr(16)
4306       .kr(9)
4307       .channels(16)
4308       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4309   }
4310 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,c_div_16)4311   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_div_16) {
4312     TEST_REQUIRES_ARM_NEON;
4313     for (uint32_t channels = 32; channels < 256; channels += 48) {
4314       DWConvMicrokernelTester()
4315         .cr(16)
4316         .kr(9)
4317         .channels(channels)
4318         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4319     }
4320   }
4321 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,c_div_16_with_qmin)4322   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_div_16_with_qmin) {
4323     TEST_REQUIRES_ARM_NEON;
4324     for (uint32_t channels = 32; channels < 256; channels += 48) {
4325       DWConvMicrokernelTester()
4326         .cr(16)
4327         .kr(9)
4328         .channels(channels)
4329         .qmin(128)
4330         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4331     }
4332   }
4333 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,c_div_16_with_qmax)4334   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_div_16_with_qmax) {
4335     TEST_REQUIRES_ARM_NEON;
4336     for (uint32_t channels = 32; channels < 256; channels += 48) {
4337       DWConvMicrokernelTester()
4338         .cr(16)
4339         .kr(9)
4340         .channels(channels)
4341         .qmax(128)
4342         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4343     }
4344   }
4345 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,c_lt_16)4346   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_lt_16) {
4347     TEST_REQUIRES_ARM_NEON;
4348     for (uint32_t channels = 1; channels < 16; channels++) {
4349       DWConvMicrokernelTester()
4350         .cr(16)
4351         .kr(9)
4352         .channels(channels)
4353         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4354     }
4355   }
4356 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,c_gt_16)4357   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_gt_16) {
4358     TEST_REQUIRES_ARM_NEON;
4359     for (uint32_t channels = 17; channels < 32; channels++) {
4360       DWConvMicrokernelTester()
4361         .cr(16)
4362         .kr(9)
4363         .channels(channels)
4364         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4365     }
4366   }
4367 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,c_gt_16_with_qmin)4368   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_gt_16_with_qmin) {
4369     TEST_REQUIRES_ARM_NEON;
4370     for (uint32_t channels = 17; channels < 32; channels++) {
4371       DWConvMicrokernelTester()
4372         .cr(16)
4373         .kr(9)
4374         .channels(channels)
4375         .qmin(128)
4376         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4377     }
4378   }
4379 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,c_gt_16_with_qmax)4380   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_gt_16_with_qmax) {
4381     TEST_REQUIRES_ARM_NEON;
4382     for (uint32_t channels = 17; channels < 32; channels++) {
4383       DWConvMicrokernelTester()
4384         .cr(16)
4385         .kr(9)
4386         .channels(channels)
4387         .qmax(128)
4388         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4389     }
4390   }
4391 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,multipixel)4392   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, multipixel) {
4393     TEST_REQUIRES_ARM_NEON;
4394     for (size_t channels = 1; channels <= 80; channels += 15) {
4395       DWConvMicrokernelTester()
4396         .cr(16)
4397         .kr(9)
4398         .channels(channels)
4399         .width(3)
4400         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4401     }
4402   }
4403 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,multipixel_with_step)4404   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, multipixel_with_step) {
4405     TEST_REQUIRES_ARM_NEON;
4406     for (size_t channels = 1; channels <= 80; channels += 15) {
4407       for (size_t step = 2; step <= 9; step++) {
4408         DWConvMicrokernelTester()
4409           .cr(16)
4410           .kr(9)
4411           .channels(channels)
4412           .width(3)
4413           .step(step)
4414           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4415       }
4416     }
4417   }
4418 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,multipixel_with_output_stride)4419   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, multipixel_with_output_stride) {
4420     TEST_REQUIRES_ARM_NEON;
4421     for (size_t channels = 1; channels <= 80; channels += 15) {
4422       DWConvMicrokernelTester()
4423         .cr(16)
4424         .kr(9)
4425         .channels(16)
4426         .width(5)
4427         .output_stride(83)
4428         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4429     }
4430   }
4431 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,multipixel_with_qmin)4432   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, multipixel_with_qmin) {
4433     TEST_REQUIRES_ARM_NEON;
4434     for (size_t channels = 1; channels <= 80; channels += 15) {
4435       DWConvMicrokernelTester()
4436         .cr(16)
4437         .kr(9)
4438         .channels(channels)
4439         .width(3)
4440         .qmin(128)
4441         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4442     }
4443   }
4444 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,multipixel_with_qmax)4445   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, multipixel_with_qmax) {
4446     TEST_REQUIRES_ARM_NEON;
4447     for (size_t channels = 1; channels <= 80; channels += 15) {
4448       DWConvMicrokernelTester()
4449         .cr(16)
4450         .kr(9)
4451         .channels(channels)
4452         .width(3)
4453         .qmax(128)
4454         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4455     }
4456   }
4457 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,input_offset)4458   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, input_offset) {
4459     TEST_REQUIRES_ARM_NEON;
4460     for (uint32_t channels = 32; channels < 256; channels += 48) {
4461       DWConvMicrokernelTester()
4462         .cr(16)
4463         .kr(9)
4464         .channels(channels)
4465         .input_offset(304)
4466         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4467     }
4468   }
4469 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,zero)4470   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, zero) {
4471     TEST_REQUIRES_ARM_NEON;
4472     for (uint32_t mz = 0; mz < 9; mz++) {
4473       for (uint32_t channels = 32; channels < 256; channels += 48) {
4474         DWConvMicrokernelTester()
4475           .cr(16)
4476           .kr(9)
4477           .channels(channels)
4478           .input_offset(304)
4479           .zero_index(mz)
4480           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4481       }
4482     }
4483   }
4484 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
4485 
4486 
4487 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_eq_16)4488   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_eq_16) {
4489     TEST_REQUIRES_ARM_NEON;
4490     DWConvMicrokernelTester()
4491       .cr(16)
4492       .kr(9)
4493       .channels(16)
4494       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4495   }
4496 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_div_16)4497   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_div_16) {
4498     TEST_REQUIRES_ARM_NEON;
4499     for (uint32_t channels = 32; channels < 256; channels += 48) {
4500       DWConvMicrokernelTester()
4501         .cr(16)
4502         .kr(9)
4503         .channels(channels)
4504         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4505     }
4506   }
4507 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_div_16_with_qmin)4508   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_div_16_with_qmin) {
4509     TEST_REQUIRES_ARM_NEON;
4510     for (uint32_t channels = 32; channels < 256; channels += 48) {
4511       DWConvMicrokernelTester()
4512         .cr(16)
4513         .kr(9)
4514         .channels(channels)
4515         .qmin(128)
4516         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4517     }
4518   }
4519 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_div_16_with_qmax)4520   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_div_16_with_qmax) {
4521     TEST_REQUIRES_ARM_NEON;
4522     for (uint32_t channels = 32; channels < 256; channels += 48) {
4523       DWConvMicrokernelTester()
4524         .cr(16)
4525         .kr(9)
4526         .channels(channels)
4527         .qmax(128)
4528         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4529     }
4530   }
4531 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_lt_16)4532   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_lt_16) {
4533     TEST_REQUIRES_ARM_NEON;
4534     for (uint32_t channels = 1; channels < 16; channels++) {
4535       DWConvMicrokernelTester()
4536         .cr(16)
4537         .kr(9)
4538         .channels(channels)
4539         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4540     }
4541   }
4542 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_gt_16)4543   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_gt_16) {
4544     TEST_REQUIRES_ARM_NEON;
4545     for (uint32_t channels = 17; channels < 32; channels++) {
4546       DWConvMicrokernelTester()
4547         .cr(16)
4548         .kr(9)
4549         .channels(channels)
4550         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4551     }
4552   }
4553 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_gt_16_with_qmin)4554   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_gt_16_with_qmin) {
4555     TEST_REQUIRES_ARM_NEON;
4556     for (uint32_t channels = 17; channels < 32; channels++) {
4557       DWConvMicrokernelTester()
4558         .cr(16)
4559         .kr(9)
4560         .channels(channels)
4561         .qmin(128)
4562         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4563     }
4564   }
4565 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_gt_16_with_qmax)4566   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_gt_16_with_qmax) {
4567     TEST_REQUIRES_ARM_NEON;
4568     for (uint32_t channels = 17; channels < 32; channels++) {
4569       DWConvMicrokernelTester()
4570         .cr(16)
4571         .kr(9)
4572         .channels(channels)
4573         .qmax(128)
4574         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4575     }
4576   }
4577 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,multipixel)4578   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel) {
4579     TEST_REQUIRES_ARM_NEON;
4580     for (size_t channels = 1; channels <= 80; channels += 15) {
4581       DWConvMicrokernelTester()
4582         .cr(16)
4583         .kr(9)
4584         .channels(channels)
4585         .width(3)
4586         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4587     }
4588   }
4589 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,multipixel_with_step)4590   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_step) {
4591     TEST_REQUIRES_ARM_NEON;
4592     for (size_t channels = 1; channels <= 80; channels += 15) {
4593       for (size_t step = 2; step <= 9; step++) {
4594         DWConvMicrokernelTester()
4595           .cr(16)
4596           .kr(9)
4597           .channels(channels)
4598           .width(3)
4599           .step(step)
4600           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4601       }
4602     }
4603   }
4604 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,multipixel_with_output_stride)4605   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_output_stride) {
4606     TEST_REQUIRES_ARM_NEON;
4607     for (size_t channels = 1; channels <= 80; channels += 15) {
4608       DWConvMicrokernelTester()
4609         .cr(16)
4610         .kr(9)
4611         .channels(16)
4612         .width(5)
4613         .output_stride(83)
4614         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4615     }
4616   }
4617 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,multipixel_with_qmin)4618   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_qmin) {
4619     TEST_REQUIRES_ARM_NEON;
4620     for (size_t channels = 1; channels <= 80; channels += 15) {
4621       DWConvMicrokernelTester()
4622         .cr(16)
4623         .kr(9)
4624         .channels(channels)
4625         .width(3)
4626         .qmin(128)
4627         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4628     }
4629   }
4630 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,multipixel_with_qmax)4631   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_qmax) {
4632     TEST_REQUIRES_ARM_NEON;
4633     for (size_t channels = 1; channels <= 80; channels += 15) {
4634       DWConvMicrokernelTester()
4635         .cr(16)
4636         .kr(9)
4637         .channels(channels)
4638         .width(3)
4639         .qmax(128)
4640         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4641     }
4642   }
4643 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,input_offset)4644   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, input_offset) {
4645     TEST_REQUIRES_ARM_NEON;
4646     for (uint32_t channels = 32; channels < 256; channels += 48) {
4647       DWConvMicrokernelTester()
4648         .cr(16)
4649         .kr(9)
4650         .channels(channels)
4651         .input_offset(304)
4652         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4653     }
4654   }
4655 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,zero)4656   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, zero) {
4657     TEST_REQUIRES_ARM_NEON;
4658     for (uint32_t mz = 0; mz < 9; mz++) {
4659       for (uint32_t channels = 32; channels < 256; channels += 48) {
4660         DWConvMicrokernelTester()
4661           .cr(16)
4662           .kr(9)
4663           .channels(channels)
4664           .input_offset(304)
4665           .zero_index(mz)
4666           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4667       }
4668     }
4669   }
4670 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
4671 
4672 
4673 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,c_eq_16)4674   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_eq_16) {
4675     TEST_REQUIRES_ARM_NEON_V8;
4676     DWConvMicrokernelTester()
4677       .cr(16)
4678       .kr(9)
4679       .channels(16)
4680       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4681   }
4682 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,c_div_16)4683   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_div_16) {
4684     TEST_REQUIRES_ARM_NEON_V8;
4685     for (uint32_t channels = 32; channels < 256; channels += 48) {
4686       DWConvMicrokernelTester()
4687         .cr(16)
4688         .kr(9)
4689         .channels(channels)
4690         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4691     }
4692   }
4693 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,c_div_16_with_qmin)4694   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_div_16_with_qmin) {
4695     TEST_REQUIRES_ARM_NEON_V8;
4696     for (uint32_t channels = 32; channels < 256; channels += 48) {
4697       DWConvMicrokernelTester()
4698         .cr(16)
4699         .kr(9)
4700         .channels(channels)
4701         .qmin(128)
4702         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4703     }
4704   }
4705 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,c_div_16_with_qmax)4706   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_div_16_with_qmax) {
4707     TEST_REQUIRES_ARM_NEON_V8;
4708     for (uint32_t channels = 32; channels < 256; channels += 48) {
4709       DWConvMicrokernelTester()
4710         .cr(16)
4711         .kr(9)
4712         .channels(channels)
4713         .qmax(128)
4714         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4715     }
4716   }
4717 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,c_lt_16)4718   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_lt_16) {
4719     TEST_REQUIRES_ARM_NEON_V8;
4720     for (uint32_t channels = 1; channels < 16; channels++) {
4721       DWConvMicrokernelTester()
4722         .cr(16)
4723         .kr(9)
4724         .channels(channels)
4725         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4726     }
4727   }
4728 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,c_gt_16)4729   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_gt_16) {
4730     TEST_REQUIRES_ARM_NEON_V8;
4731     for (uint32_t channels = 17; channels < 32; channels++) {
4732       DWConvMicrokernelTester()
4733         .cr(16)
4734         .kr(9)
4735         .channels(channels)
4736         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4737     }
4738   }
4739 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,c_gt_16_with_qmin)4740   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_gt_16_with_qmin) {
4741     TEST_REQUIRES_ARM_NEON_V8;
4742     for (uint32_t channels = 17; channels < 32; channels++) {
4743       DWConvMicrokernelTester()
4744         .cr(16)
4745         .kr(9)
4746         .channels(channels)
4747         .qmin(128)
4748         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4749     }
4750   }
4751 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,c_gt_16_with_qmax)4752   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_gt_16_with_qmax) {
4753     TEST_REQUIRES_ARM_NEON_V8;
4754     for (uint32_t channels = 17; channels < 32; channels++) {
4755       DWConvMicrokernelTester()
4756         .cr(16)
4757         .kr(9)
4758         .channels(channels)
4759         .qmax(128)
4760         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4761     }
4762   }
4763 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,multipixel)4764   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, multipixel) {
4765     TEST_REQUIRES_ARM_NEON_V8;
4766     for (size_t channels = 1; channels <= 80; channels += 15) {
4767       DWConvMicrokernelTester()
4768         .cr(16)
4769         .kr(9)
4770         .channels(channels)
4771         .width(3)
4772         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4773     }
4774   }
4775 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,multipixel_with_step)4776   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, multipixel_with_step) {
4777     TEST_REQUIRES_ARM_NEON_V8;
4778     for (size_t channels = 1; channels <= 80; channels += 15) {
4779       for (size_t step = 2; step <= 9; step++) {
4780         DWConvMicrokernelTester()
4781           .cr(16)
4782           .kr(9)
4783           .channels(channels)
4784           .width(3)
4785           .step(step)
4786           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4787       }
4788     }
4789   }
4790 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,multipixel_with_output_stride)4791   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, multipixel_with_output_stride) {
4792     TEST_REQUIRES_ARM_NEON_V8;
4793     for (size_t channels = 1; channels <= 80; channels += 15) {
4794       DWConvMicrokernelTester()
4795         .cr(16)
4796         .kr(9)
4797         .channels(16)
4798         .width(5)
4799         .output_stride(83)
4800         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4801     }
4802   }
4803 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,multipixel_with_qmin)4804   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, multipixel_with_qmin) {
4805     TEST_REQUIRES_ARM_NEON_V8;
4806     for (size_t channels = 1; channels <= 80; channels += 15) {
4807       DWConvMicrokernelTester()
4808         .cr(16)
4809         .kr(9)
4810         .channels(channels)
4811         .width(3)
4812         .qmin(128)
4813         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4814     }
4815   }
4816 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,multipixel_with_qmax)4817   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, multipixel_with_qmax) {
4818     TEST_REQUIRES_ARM_NEON_V8;
4819     for (size_t channels = 1; channels <= 80; channels += 15) {
4820       DWConvMicrokernelTester()
4821         .cr(16)
4822         .kr(9)
4823         .channels(channels)
4824         .width(3)
4825         .qmax(128)
4826         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4827     }
4828   }
4829 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,input_offset)4830   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, input_offset) {
4831     TEST_REQUIRES_ARM_NEON_V8;
4832     for (uint32_t channels = 32; channels < 256; channels += 48) {
4833       DWConvMicrokernelTester()
4834         .cr(16)
4835         .kr(9)
4836         .channels(channels)
4837         .input_offset(304)
4838         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4839     }
4840   }
4841 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,zero)4842   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, zero) {
4843     TEST_REQUIRES_ARM_NEON_V8;
4844     for (uint32_t mz = 0; mz < 9; mz++) {
4845       for (uint32_t channels = 32; channels < 256; channels += 48) {
4846         DWConvMicrokernelTester()
4847           .cr(16)
4848           .kr(9)
4849           .channels(channels)
4850           .input_offset(304)
4851           .zero_index(mz)
4852           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4853       }
4854     }
4855   }
4856 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
4857 
4858 
4859 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,c_eq_16)4860   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_eq_16) {
4861     TEST_REQUIRES_ARM_NEON_V8;
4862     DWConvMicrokernelTester()
4863       .cr(16)
4864       .kr(9)
4865       .channels(16)
4866       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4867   }
4868 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,c_div_16)4869   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_div_16) {
4870     TEST_REQUIRES_ARM_NEON_V8;
4871     for (uint32_t channels = 32; channels < 256; channels += 48) {
4872       DWConvMicrokernelTester()
4873         .cr(16)
4874         .kr(9)
4875         .channels(channels)
4876         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4877     }
4878   }
4879 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,c_div_16_with_qmin)4880   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_div_16_with_qmin) {
4881     TEST_REQUIRES_ARM_NEON_V8;
4882     for (uint32_t channels = 32; channels < 256; channels += 48) {
4883       DWConvMicrokernelTester()
4884         .cr(16)
4885         .kr(9)
4886         .channels(channels)
4887         .qmin(128)
4888         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4889     }
4890   }
4891 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,c_div_16_with_qmax)4892   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_div_16_with_qmax) {
4893     TEST_REQUIRES_ARM_NEON_V8;
4894     for (uint32_t channels = 32; channels < 256; channels += 48) {
4895       DWConvMicrokernelTester()
4896         .cr(16)
4897         .kr(9)
4898         .channels(channels)
4899         .qmax(128)
4900         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4901     }
4902   }
4903 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,c_lt_16)4904   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_lt_16) {
4905     TEST_REQUIRES_ARM_NEON_V8;
4906     for (uint32_t channels = 1; channels < 16; channels++) {
4907       DWConvMicrokernelTester()
4908         .cr(16)
4909         .kr(9)
4910         .channels(channels)
4911         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4912     }
4913   }
4914 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,c_gt_16)4915   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_gt_16) {
4916     TEST_REQUIRES_ARM_NEON_V8;
4917     for (uint32_t channels = 17; channels < 32; channels++) {
4918       DWConvMicrokernelTester()
4919         .cr(16)
4920         .kr(9)
4921         .channels(channels)
4922         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4923     }
4924   }
4925 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,c_gt_16_with_qmin)4926   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_gt_16_with_qmin) {
4927     TEST_REQUIRES_ARM_NEON_V8;
4928     for (uint32_t channels = 17; channels < 32; channels++) {
4929       DWConvMicrokernelTester()
4930         .cr(16)
4931         .kr(9)
4932         .channels(channels)
4933         .qmin(128)
4934         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4935     }
4936   }
4937 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,c_gt_16_with_qmax)4938   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_gt_16_with_qmax) {
4939     TEST_REQUIRES_ARM_NEON_V8;
4940     for (uint32_t channels = 17; channels < 32; channels++) {
4941       DWConvMicrokernelTester()
4942         .cr(16)
4943         .kr(9)
4944         .channels(channels)
4945         .qmax(128)
4946         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4947     }
4948   }
4949 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,multipixel)4950   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, multipixel) {
4951     TEST_REQUIRES_ARM_NEON_V8;
4952     for (size_t channels = 1; channels <= 80; channels += 15) {
4953       DWConvMicrokernelTester()
4954         .cr(16)
4955         .kr(9)
4956         .channels(channels)
4957         .width(3)
4958         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4959     }
4960   }
4961 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,multipixel_with_step)4962   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, multipixel_with_step) {
4963     TEST_REQUIRES_ARM_NEON_V8;
4964     for (size_t channels = 1; channels <= 80; channels += 15) {
4965       for (size_t step = 2; step <= 9; step++) {
4966         DWConvMicrokernelTester()
4967           .cr(16)
4968           .kr(9)
4969           .channels(channels)
4970           .width(3)
4971           .step(step)
4972           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4973       }
4974     }
4975   }
4976 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,multipixel_with_output_stride)4977   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, multipixel_with_output_stride) {
4978     TEST_REQUIRES_ARM_NEON_V8;
4979     for (size_t channels = 1; channels <= 80; channels += 15) {
4980       DWConvMicrokernelTester()
4981         .cr(16)
4982         .kr(9)
4983         .channels(16)
4984         .width(5)
4985         .output_stride(83)
4986         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4987     }
4988   }
4989 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,multipixel_with_qmin)4990   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, multipixel_with_qmin) {
4991     TEST_REQUIRES_ARM_NEON_V8;
4992     for (size_t channels = 1; channels <= 80; channels += 15) {
4993       DWConvMicrokernelTester()
4994         .cr(16)
4995         .kr(9)
4996         .channels(channels)
4997         .width(3)
4998         .qmin(128)
4999         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5000     }
5001   }
5002 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,multipixel_with_qmax)5003   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, multipixel_with_qmax) {
5004     TEST_REQUIRES_ARM_NEON_V8;
5005     for (size_t channels = 1; channels <= 80; channels += 15) {
5006       DWConvMicrokernelTester()
5007         .cr(16)
5008         .kr(9)
5009         .channels(channels)
5010         .width(3)
5011         .qmax(128)
5012         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5013     }
5014   }
5015 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,input_offset)5016   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, input_offset) {
5017     TEST_REQUIRES_ARM_NEON_V8;
5018     for (uint32_t channels = 32; channels < 256; channels += 48) {
5019       DWConvMicrokernelTester()
5020         .cr(16)
5021         .kr(9)
5022         .channels(channels)
5023         .input_offset(304)
5024         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5025     }
5026   }
5027 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,zero)5028   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, zero) {
5029     TEST_REQUIRES_ARM_NEON_V8;
5030     for (uint32_t mz = 0; mz < 9; mz++) {
5031       for (uint32_t channels = 32; channels < 256; channels += 48) {
5032         DWConvMicrokernelTester()
5033           .cr(16)
5034           .kr(9)
5035           .channels(channels)
5036           .input_offset(304)
5037           .zero_index(mz)
5038           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5039       }
5040     }
5041   }
5042 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
5043 
5044 
5045 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,c_eq_16)5046   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_eq_16) {
5047     TEST_REQUIRES_ARM_NEON_V8;
5048     DWConvMicrokernelTester()
5049       .cr(16)
5050       .kr(9)
5051       .channels(16)
5052       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5053   }
5054 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,c_div_16)5055   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_div_16) {
5056     TEST_REQUIRES_ARM_NEON_V8;
5057     for (uint32_t channels = 32; channels < 256; channels += 48) {
5058       DWConvMicrokernelTester()
5059         .cr(16)
5060         .kr(9)
5061         .channels(channels)
5062         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5063     }
5064   }
5065 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,c_div_16_with_qmin)5066   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_div_16_with_qmin) {
5067     TEST_REQUIRES_ARM_NEON_V8;
5068     for (uint32_t channels = 32; channels < 256; channels += 48) {
5069       DWConvMicrokernelTester()
5070         .cr(16)
5071         .kr(9)
5072         .channels(channels)
5073         .qmin(128)
5074         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5075     }
5076   }
5077 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,c_div_16_with_qmax)5078   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_div_16_with_qmax) {
5079     TEST_REQUIRES_ARM_NEON_V8;
5080     for (uint32_t channels = 32; channels < 256; channels += 48) {
5081       DWConvMicrokernelTester()
5082         .cr(16)
5083         .kr(9)
5084         .channels(channels)
5085         .qmax(128)
5086         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5087     }
5088   }
5089 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,c_lt_16)5090   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_lt_16) {
5091     TEST_REQUIRES_ARM_NEON_V8;
5092     for (uint32_t channels = 1; channels < 16; channels++) {
5093       DWConvMicrokernelTester()
5094         .cr(16)
5095         .kr(9)
5096         .channels(channels)
5097         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5098     }
5099   }
5100 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,c_gt_16)5101   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_gt_16) {
5102     TEST_REQUIRES_ARM_NEON_V8;
5103     for (uint32_t channels = 17; channels < 32; channels++) {
5104       DWConvMicrokernelTester()
5105         .cr(16)
5106         .kr(9)
5107         .channels(channels)
5108         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5109     }
5110   }
5111 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,c_gt_16_with_qmin)5112   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_gt_16_with_qmin) {
5113     TEST_REQUIRES_ARM_NEON_V8;
5114     for (uint32_t channels = 17; channels < 32; channels++) {
5115       DWConvMicrokernelTester()
5116         .cr(16)
5117         .kr(9)
5118         .channels(channels)
5119         .qmin(128)
5120         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5121     }
5122   }
5123 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,c_gt_16_with_qmax)5124   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_gt_16_with_qmax) {
5125     TEST_REQUIRES_ARM_NEON_V8;
5126     for (uint32_t channels = 17; channels < 32; channels++) {
5127       DWConvMicrokernelTester()
5128         .cr(16)
5129         .kr(9)
5130         .channels(channels)
5131         .qmax(128)
5132         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5133     }
5134   }
5135 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,multipixel)5136   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, multipixel) {
5137     TEST_REQUIRES_ARM_NEON_V8;
5138     for (size_t channels = 1; channels <= 80; channels += 15) {
5139       DWConvMicrokernelTester()
5140         .cr(16)
5141         .kr(9)
5142         .channels(channels)
5143         .width(3)
5144         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5145     }
5146   }
5147 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,multipixel_with_step)5148   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, multipixel_with_step) {
5149     TEST_REQUIRES_ARM_NEON_V8;
5150     for (size_t channels = 1; channels <= 80; channels += 15) {
5151       for (size_t step = 2; step <= 9; step++) {
5152         DWConvMicrokernelTester()
5153           .cr(16)
5154           .kr(9)
5155           .channels(channels)
5156           .width(3)
5157           .step(step)
5158           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5159       }
5160     }
5161   }
5162 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,multipixel_with_output_stride)5163   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, multipixel_with_output_stride) {
5164     TEST_REQUIRES_ARM_NEON_V8;
5165     for (size_t channels = 1; channels <= 80; channels += 15) {
5166       DWConvMicrokernelTester()
5167         .cr(16)
5168         .kr(9)
5169         .channels(16)
5170         .width(5)
5171         .output_stride(83)
5172         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5173     }
5174   }
5175 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,multipixel_with_qmin)5176   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, multipixel_with_qmin) {
5177     TEST_REQUIRES_ARM_NEON_V8;
5178     for (size_t channels = 1; channels <= 80; channels += 15) {
5179       DWConvMicrokernelTester()
5180         .cr(16)
5181         .kr(9)
5182         .channels(channels)
5183         .width(3)
5184         .qmin(128)
5185         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5186     }
5187   }
5188 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,multipixel_with_qmax)5189   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, multipixel_with_qmax) {
5190     TEST_REQUIRES_ARM_NEON_V8;
5191     for (size_t channels = 1; channels <= 80; channels += 15) {
5192       DWConvMicrokernelTester()
5193         .cr(16)
5194         .kr(9)
5195         .channels(channels)
5196         .width(3)
5197         .qmax(128)
5198         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5199     }
5200   }
5201 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,input_offset)5202   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, input_offset) {
5203     TEST_REQUIRES_ARM_NEON_V8;
5204     for (uint32_t channels = 32; channels < 256; channels += 48) {
5205       DWConvMicrokernelTester()
5206         .cr(16)
5207         .kr(9)
5208         .channels(channels)
5209         .input_offset(304)
5210         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5211     }
5212   }
5213 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,zero)5214   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, zero) {
5215     TEST_REQUIRES_ARM_NEON_V8;
5216     for (uint32_t mz = 0; mz < 9; mz++) {
5217       for (uint32_t channels = 32; channels < 256; channels += 48) {
5218         DWConvMicrokernelTester()
5219           .cr(16)
5220           .kr(9)
5221           .channels(channels)
5222           .input_offset(304)
5223           .zero_index(mz)
5224           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5225       }
5226     }
5227   }
5228 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
5229 
5230 
5231 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,c_eq_16)5232   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_eq_16) {
5233     TEST_REQUIRES_ARM_NEON_V8;
5234     DWConvMicrokernelTester()
5235       .cr(16)
5236       .kr(9)
5237       .channels(16)
5238       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5239   }
5240 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,c_div_16)5241   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_div_16) {
5242     TEST_REQUIRES_ARM_NEON_V8;
5243     for (uint32_t channels = 32; channels < 256; channels += 48) {
5244       DWConvMicrokernelTester()
5245         .cr(16)
5246         .kr(9)
5247         .channels(channels)
5248         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5249     }
5250   }
5251 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,c_div_16_with_qmin)5252   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_div_16_with_qmin) {
5253     TEST_REQUIRES_ARM_NEON_V8;
5254     for (uint32_t channels = 32; channels < 256; channels += 48) {
5255       DWConvMicrokernelTester()
5256         .cr(16)
5257         .kr(9)
5258         .channels(channels)
5259         .qmin(128)
5260         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5261     }
5262   }
5263 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,c_div_16_with_qmax)5264   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_div_16_with_qmax) {
5265     TEST_REQUIRES_ARM_NEON_V8;
5266     for (uint32_t channels = 32; channels < 256; channels += 48) {
5267       DWConvMicrokernelTester()
5268         .cr(16)
5269         .kr(9)
5270         .channels(channels)
5271         .qmax(128)
5272         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5273     }
5274   }
5275 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,c_lt_16)5276   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_lt_16) {
5277     TEST_REQUIRES_ARM_NEON_V8;
5278     for (uint32_t channels = 1; channels < 16; channels++) {
5279       DWConvMicrokernelTester()
5280         .cr(16)
5281         .kr(9)
5282         .channels(channels)
5283         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5284     }
5285   }
5286 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,c_gt_16)5287   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_gt_16) {
5288     TEST_REQUIRES_ARM_NEON_V8;
5289     for (uint32_t channels = 17; channels < 32; channels++) {
5290       DWConvMicrokernelTester()
5291         .cr(16)
5292         .kr(9)
5293         .channels(channels)
5294         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5295     }
5296   }
5297 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,c_gt_16_with_qmin)5298   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_gt_16_with_qmin) {
5299     TEST_REQUIRES_ARM_NEON_V8;
5300     for (uint32_t channels = 17; channels < 32; channels++) {
5301       DWConvMicrokernelTester()
5302         .cr(16)
5303         .kr(9)
5304         .channels(channels)
5305         .qmin(128)
5306         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5307     }
5308   }
5309 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,c_gt_16_with_qmax)5310   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_gt_16_with_qmax) {
5311     TEST_REQUIRES_ARM_NEON_V8;
5312     for (uint32_t channels = 17; channels < 32; channels++) {
5313       DWConvMicrokernelTester()
5314         .cr(16)
5315         .kr(9)
5316         .channels(channels)
5317         .qmax(128)
5318         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5319     }
5320   }
5321 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,multipixel)5322   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, multipixel) {
5323     TEST_REQUIRES_ARM_NEON_V8;
5324     for (size_t channels = 1; channels <= 80; channels += 15) {
5325       DWConvMicrokernelTester()
5326         .cr(16)
5327         .kr(9)
5328         .channels(channels)
5329         .width(3)
5330         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5331     }
5332   }
5333 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,multipixel_with_step)5334   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, multipixel_with_step) {
5335     TEST_REQUIRES_ARM_NEON_V8;
5336     for (size_t channels = 1; channels <= 80; channels += 15) {
5337       for (size_t step = 2; step <= 9; step++) {
5338         DWConvMicrokernelTester()
5339           .cr(16)
5340           .kr(9)
5341           .channels(channels)
5342           .width(3)
5343           .step(step)
5344           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5345       }
5346     }
5347   }
5348 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,multipixel_with_output_stride)5349   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, multipixel_with_output_stride) {
5350     TEST_REQUIRES_ARM_NEON_V8;
5351     for (size_t channels = 1; channels <= 80; channels += 15) {
5352       DWConvMicrokernelTester()
5353         .cr(16)
5354         .kr(9)
5355         .channels(16)
5356         .width(5)
5357         .output_stride(83)
5358         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5359     }
5360   }
5361 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,multipixel_with_qmin)5362   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, multipixel_with_qmin) {
5363     TEST_REQUIRES_ARM_NEON_V8;
5364     for (size_t channels = 1; channels <= 80; channels += 15) {
5365       DWConvMicrokernelTester()
5366         .cr(16)
5367         .kr(9)
5368         .channels(channels)
5369         .width(3)
5370         .qmin(128)
5371         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5372     }
5373   }
5374 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,multipixel_with_qmax)5375   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, multipixel_with_qmax) {
5376     TEST_REQUIRES_ARM_NEON_V8;
5377     for (size_t channels = 1; channels <= 80; channels += 15) {
5378       DWConvMicrokernelTester()
5379         .cr(16)
5380         .kr(9)
5381         .channels(channels)
5382         .width(3)
5383         .qmax(128)
5384         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5385     }
5386   }
5387 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,input_offset)5388   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, input_offset) {
5389     TEST_REQUIRES_ARM_NEON_V8;
5390     for (uint32_t channels = 32; channels < 256; channels += 48) {
5391       DWConvMicrokernelTester()
5392         .cr(16)
5393         .kr(9)
5394         .channels(channels)
5395         .input_offset(304)
5396         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5397     }
5398   }
5399 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,zero)5400   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, zero) {
5401     TEST_REQUIRES_ARM_NEON_V8;
5402     for (uint32_t mz = 0; mz < 9; mz++) {
5403       for (uint32_t channels = 32; channels < 256; channels += 48) {
5404         DWConvMicrokernelTester()
5405           .cr(16)
5406           .kr(9)
5407           .channels(channels)
5408           .input_offset(304)
5409           .zero_index(mz)
5410           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5411       }
5412     }
5413   }
5414 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
5415 
5416 
5417 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_eq_16)5418   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_eq_16) {
5419     TEST_REQUIRES_ARM_NEON_V8;
5420     DWConvMicrokernelTester()
5421       .cr(16)
5422       .kr(9)
5423       .channels(16)
5424       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5425   }
5426 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_div_16)5427   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_div_16) {
5428     TEST_REQUIRES_ARM_NEON_V8;
5429     for (uint32_t channels = 32; channels < 256; channels += 48) {
5430       DWConvMicrokernelTester()
5431         .cr(16)
5432         .kr(9)
5433         .channels(channels)
5434         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5435     }
5436   }
5437 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_div_16_with_qmin)5438   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_div_16_with_qmin) {
5439     TEST_REQUIRES_ARM_NEON_V8;
5440     for (uint32_t channels = 32; channels < 256; channels += 48) {
5441       DWConvMicrokernelTester()
5442         .cr(16)
5443         .kr(9)
5444         .channels(channels)
5445         .qmin(128)
5446         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5447     }
5448   }
5449 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_div_16_with_qmax)5450   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_div_16_with_qmax) {
5451     TEST_REQUIRES_ARM_NEON_V8;
5452     for (uint32_t channels = 32; channels < 256; channels += 48) {
5453       DWConvMicrokernelTester()
5454         .cr(16)
5455         .kr(9)
5456         .channels(channels)
5457         .qmax(128)
5458         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5459     }
5460   }
5461 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_lt_16)5462   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_lt_16) {
5463     TEST_REQUIRES_ARM_NEON_V8;
5464     for (uint32_t channels = 1; channels < 16; channels++) {
5465       DWConvMicrokernelTester()
5466         .cr(16)
5467         .kr(9)
5468         .channels(channels)
5469         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5470     }
5471   }
5472 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_gt_16)5473   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_gt_16) {
5474     TEST_REQUIRES_ARM_NEON_V8;
5475     for (uint32_t channels = 17; channels < 32; channels++) {
5476       DWConvMicrokernelTester()
5477         .cr(16)
5478         .kr(9)
5479         .channels(channels)
5480         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5481     }
5482   }
5483 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_gt_16_with_qmin)5484   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_gt_16_with_qmin) {
5485     TEST_REQUIRES_ARM_NEON_V8;
5486     for (uint32_t channels = 17; channels < 32; channels++) {
5487       DWConvMicrokernelTester()
5488         .cr(16)
5489         .kr(9)
5490         .channels(channels)
5491         .qmin(128)
5492         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5493     }
5494   }
5495 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_gt_16_with_qmax)5496   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_gt_16_with_qmax) {
5497     TEST_REQUIRES_ARM_NEON_V8;
5498     for (uint32_t channels = 17; channels < 32; channels++) {
5499       DWConvMicrokernelTester()
5500         .cr(16)
5501         .kr(9)
5502         .channels(channels)
5503         .qmax(128)
5504         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5505     }
5506   }
5507 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,multipixel)5508   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel) {
5509     TEST_REQUIRES_ARM_NEON_V8;
5510     for (size_t channels = 1; channels <= 80; channels += 15) {
5511       DWConvMicrokernelTester()
5512         .cr(16)
5513         .kr(9)
5514         .channels(channels)
5515         .width(3)
5516         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5517     }
5518   }
5519 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,multipixel_with_step)5520   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_step) {
5521     TEST_REQUIRES_ARM_NEON_V8;
5522     for (size_t channels = 1; channels <= 80; channels += 15) {
5523       for (size_t step = 2; step <= 9; step++) {
5524         DWConvMicrokernelTester()
5525           .cr(16)
5526           .kr(9)
5527           .channels(channels)
5528           .width(3)
5529           .step(step)
5530           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5531       }
5532     }
5533   }
5534 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,multipixel_with_output_stride)5535   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_output_stride) {
5536     TEST_REQUIRES_ARM_NEON_V8;
5537     for (size_t channels = 1; channels <= 80; channels += 15) {
5538       DWConvMicrokernelTester()
5539         .cr(16)
5540         .kr(9)
5541         .channels(16)
5542         .width(5)
5543         .output_stride(83)
5544         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5545     }
5546   }
5547 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,multipixel_with_qmin)5548   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_qmin) {
5549     TEST_REQUIRES_ARM_NEON_V8;
5550     for (size_t channels = 1; channels <= 80; channels += 15) {
5551       DWConvMicrokernelTester()
5552         .cr(16)
5553         .kr(9)
5554         .channels(channels)
5555         .width(3)
5556         .qmin(128)
5557         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5558     }
5559   }
5560 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,multipixel_with_qmax)5561   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_qmax) {
5562     TEST_REQUIRES_ARM_NEON_V8;
5563     for (size_t channels = 1; channels <= 80; channels += 15) {
5564       DWConvMicrokernelTester()
5565         .cr(16)
5566         .kr(9)
5567         .channels(channels)
5568         .width(3)
5569         .qmax(128)
5570         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5571     }
5572   }
5573 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,input_offset)5574   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, input_offset) {
5575     TEST_REQUIRES_ARM_NEON_V8;
5576     for (uint32_t channels = 32; channels < 256; channels += 48) {
5577       DWConvMicrokernelTester()
5578         .cr(16)
5579         .kr(9)
5580         .channels(channels)
5581         .input_offset(304)
5582         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5583     }
5584   }
5585 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,zero)5586   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, zero) {
5587     TEST_REQUIRES_ARM_NEON_V8;
5588     for (uint32_t mz = 0; mz < 9; mz++) {
5589       for (uint32_t channels = 32; channels < 256; channels += 48) {
5590         DWConvMicrokernelTester()
5591           .cr(16)
5592           .kr(9)
5593           .channels(channels)
5594           .input_offset(304)
5595           .zero_index(mz)
5596           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5597       }
5598     }
5599   }
5600 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
5601 
5602 
5603 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,c_eq_16)5604   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_eq_16) {
5605     TEST_REQUIRES_ARM_NEON;
5606     DWConvMicrokernelTester()
5607       .cr(16)
5608       .kr(25)
5609       .channels(16)
5610       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5611   }
5612 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,c_div_16)5613   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_div_16) {
5614     TEST_REQUIRES_ARM_NEON;
5615     for (uint32_t channels = 32; channels < 256; channels += 48) {
5616       DWConvMicrokernelTester()
5617         .cr(16)
5618         .kr(25)
5619         .channels(channels)
5620         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5621     }
5622   }
5623 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,c_div_16_with_qmin)5624   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_div_16_with_qmin) {
5625     TEST_REQUIRES_ARM_NEON;
5626     for (uint32_t channels = 32; channels < 256; channels += 48) {
5627       DWConvMicrokernelTester()
5628         .cr(16)
5629         .kr(25)
5630         .channels(channels)
5631         .qmin(128)
5632         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5633     }
5634   }
5635 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,c_div_16_with_qmax)5636   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_div_16_with_qmax) {
5637     TEST_REQUIRES_ARM_NEON;
5638     for (uint32_t channels = 32; channels < 256; channels += 48) {
5639       DWConvMicrokernelTester()
5640         .cr(16)
5641         .kr(25)
5642         .channels(channels)
5643         .qmax(128)
5644         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5645     }
5646   }
5647 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,c_lt_16)5648   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_lt_16) {
5649     TEST_REQUIRES_ARM_NEON;
5650     for (uint32_t channels = 1; channels < 16; channels++) {
5651       DWConvMicrokernelTester()
5652         .cr(16)
5653         .kr(25)
5654         .channels(channels)
5655         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5656     }
5657   }
5658 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,c_gt_16)5659   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_gt_16) {
5660     TEST_REQUIRES_ARM_NEON;
5661     for (uint32_t channels = 17; channels < 32; channels++) {
5662       DWConvMicrokernelTester()
5663         .cr(16)
5664         .kr(25)
5665         .channels(channels)
5666         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5667     }
5668   }
5669 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,c_gt_16_with_qmin)5670   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_gt_16_with_qmin) {
5671     TEST_REQUIRES_ARM_NEON;
5672     for (uint32_t channels = 17; channels < 32; channels++) {
5673       DWConvMicrokernelTester()
5674         .cr(16)
5675         .kr(25)
5676         .channels(channels)
5677         .qmin(128)
5678         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5679     }
5680   }
5681 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,c_gt_16_with_qmax)5682   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_gt_16_with_qmax) {
5683     TEST_REQUIRES_ARM_NEON;
5684     for (uint32_t channels = 17; channels < 32; channels++) {
5685       DWConvMicrokernelTester()
5686         .cr(16)
5687         .kr(25)
5688         .channels(channels)
5689         .qmax(128)
5690         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5691     }
5692   }
5693 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,multipixel)5694   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, multipixel) {
5695     TEST_REQUIRES_ARM_NEON;
5696     for (size_t channels = 1; channels <= 80; channels += 15) {
5697       DWConvMicrokernelTester()
5698         .cr(16)
5699         .kr(25)
5700         .channels(channels)
5701         .width(3)
5702         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5703     }
5704   }
5705 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,multipixel_with_step)5706   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, multipixel_with_step) {
5707     TEST_REQUIRES_ARM_NEON;
5708     for (size_t channels = 1; channels <= 80; channels += 15) {
5709       for (size_t step = 2; step <= 25; step++) {
5710         DWConvMicrokernelTester()
5711           .cr(16)
5712           .kr(25)
5713           .channels(channels)
5714           .width(3)
5715           .step(step)
5716           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5717       }
5718     }
5719   }
5720 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,multipixel_with_output_stride)5721   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, multipixel_with_output_stride) {
5722     TEST_REQUIRES_ARM_NEON;
5723     for (size_t channels = 1; channels <= 80; channels += 15) {
5724       DWConvMicrokernelTester()
5725         .cr(16)
5726         .kr(25)
5727         .channels(16)
5728         .width(5)
5729         .output_stride(83)
5730         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5731     }
5732   }
5733 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,multipixel_with_qmin)5734   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, multipixel_with_qmin) {
5735     TEST_REQUIRES_ARM_NEON;
5736     for (size_t channels = 1; channels <= 80; channels += 15) {
5737       DWConvMicrokernelTester()
5738         .cr(16)
5739         .kr(25)
5740         .channels(channels)
5741         .width(3)
5742         .qmin(128)
5743         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5744     }
5745   }
5746 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,multipixel_with_qmax)5747   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, multipixel_with_qmax) {
5748     TEST_REQUIRES_ARM_NEON;
5749     for (size_t channels = 1; channels <= 80; channels += 15) {
5750       DWConvMicrokernelTester()
5751         .cr(16)
5752         .kr(25)
5753         .channels(channels)
5754         .width(3)
5755         .qmax(128)
5756         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5757     }
5758   }
5759 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,input_offset)5760   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, input_offset) {
5761     TEST_REQUIRES_ARM_NEON;
5762     for (uint32_t channels = 32; channels < 256; channels += 48) {
5763       DWConvMicrokernelTester()
5764         .cr(16)
5765         .kr(25)
5766         .channels(channels)
5767         .input_offset(304)
5768         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5769     }
5770   }
5771 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,zero)5772   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, zero) {
5773     TEST_REQUIRES_ARM_NEON;
5774     for (uint32_t mz = 0; mz < 25; mz++) {
5775       for (uint32_t channels = 32; channels < 256; channels += 48) {
5776         DWConvMicrokernelTester()
5777           .cr(16)
5778           .kr(25)
5779           .channels(channels)
5780           .input_offset(304)
5781           .zero_index(mz)
5782           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5783       }
5784     }
5785   }
5786 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
5787 
5788 
5789 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,c_eq_16)5790   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_eq_16) {
5791     TEST_REQUIRES_ARM_NEON;
5792     DWConvMicrokernelTester()
5793       .cr(16)
5794       .kr(25)
5795       .channels(16)
5796       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5797   }
5798 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,c_div_16)5799   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_div_16) {
5800     TEST_REQUIRES_ARM_NEON;
5801     for (uint32_t channels = 32; channels < 256; channels += 48) {
5802       DWConvMicrokernelTester()
5803         .cr(16)
5804         .kr(25)
5805         .channels(channels)
5806         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5807     }
5808   }
5809 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,c_div_16_with_qmin)5810   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_div_16_with_qmin) {
5811     TEST_REQUIRES_ARM_NEON;
5812     for (uint32_t channels = 32; channels < 256; channels += 48) {
5813       DWConvMicrokernelTester()
5814         .cr(16)
5815         .kr(25)
5816         .channels(channels)
5817         .qmin(128)
5818         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5819     }
5820   }
5821 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,c_div_16_with_qmax)5822   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_div_16_with_qmax) {
5823     TEST_REQUIRES_ARM_NEON;
5824     for (uint32_t channels = 32; channels < 256; channels += 48) {
5825       DWConvMicrokernelTester()
5826         .cr(16)
5827         .kr(25)
5828         .channels(channels)
5829         .qmax(128)
5830         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5831     }
5832   }
5833 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,c_lt_16)5834   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_lt_16) {
5835     TEST_REQUIRES_ARM_NEON;
5836     for (uint32_t channels = 1; channels < 16; channels++) {
5837       DWConvMicrokernelTester()
5838         .cr(16)
5839         .kr(25)
5840         .channels(channels)
5841         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5842     }
5843   }
5844 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,c_gt_16)5845   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_gt_16) {
5846     TEST_REQUIRES_ARM_NEON;
5847     for (uint32_t channels = 17; channels < 32; channels++) {
5848       DWConvMicrokernelTester()
5849         .cr(16)
5850         .kr(25)
5851         .channels(channels)
5852         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5853     }
5854   }
5855 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,c_gt_16_with_qmin)5856   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_gt_16_with_qmin) {
5857     TEST_REQUIRES_ARM_NEON;
5858     for (uint32_t channels = 17; channels < 32; channels++) {
5859       DWConvMicrokernelTester()
5860         .cr(16)
5861         .kr(25)
5862         .channels(channels)
5863         .qmin(128)
5864         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5865     }
5866   }
5867 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,c_gt_16_with_qmax)5868   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_gt_16_with_qmax) {
5869     TEST_REQUIRES_ARM_NEON;
5870     for (uint32_t channels = 17; channels < 32; channels++) {
5871       DWConvMicrokernelTester()
5872         .cr(16)
5873         .kr(25)
5874         .channels(channels)
5875         .qmax(128)
5876         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5877     }
5878   }
5879 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,multipixel)5880   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, multipixel) {
5881     TEST_REQUIRES_ARM_NEON;
5882     for (size_t channels = 1; channels <= 80; channels += 15) {
5883       DWConvMicrokernelTester()
5884         .cr(16)
5885         .kr(25)
5886         .channels(channels)
5887         .width(3)
5888         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5889     }
5890   }
5891 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,multipixel_with_step)5892   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, multipixel_with_step) {
5893     TEST_REQUIRES_ARM_NEON;
5894     for (size_t channels = 1; channels <= 80; channels += 15) {
5895       for (size_t step = 2; step <= 25; step++) {
5896         DWConvMicrokernelTester()
5897           .cr(16)
5898           .kr(25)
5899           .channels(channels)
5900           .width(3)
5901           .step(step)
5902           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5903       }
5904     }
5905   }
5906 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,multipixel_with_output_stride)5907   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, multipixel_with_output_stride) {
5908     TEST_REQUIRES_ARM_NEON;
5909     for (size_t channels = 1; channels <= 80; channels += 15) {
5910       DWConvMicrokernelTester()
5911         .cr(16)
5912         .kr(25)
5913         .channels(16)
5914         .width(5)
5915         .output_stride(83)
5916         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5917     }
5918   }
5919 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,multipixel_with_qmin)5920   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, multipixel_with_qmin) {
5921     TEST_REQUIRES_ARM_NEON;
5922     for (size_t channels = 1; channels <= 80; channels += 15) {
5923       DWConvMicrokernelTester()
5924         .cr(16)
5925         .kr(25)
5926         .channels(channels)
5927         .width(3)
5928         .qmin(128)
5929         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5930     }
5931   }
5932 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,multipixel_with_qmax)5933   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, multipixel_with_qmax) {
5934     TEST_REQUIRES_ARM_NEON;
5935     for (size_t channels = 1; channels <= 80; channels += 15) {
5936       DWConvMicrokernelTester()
5937         .cr(16)
5938         .kr(25)
5939         .channels(channels)
5940         .width(3)
5941         .qmax(128)
5942         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5943     }
5944   }
5945 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,input_offset)5946   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, input_offset) {
5947     TEST_REQUIRES_ARM_NEON;
5948     for (uint32_t channels = 32; channels < 256; channels += 48) {
5949       DWConvMicrokernelTester()
5950         .cr(16)
5951         .kr(25)
5952         .channels(channels)
5953         .input_offset(304)
5954         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5955     }
5956   }
5957 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,zero)5958   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, zero) {
5959     TEST_REQUIRES_ARM_NEON;
5960     for (uint32_t mz = 0; mz < 25; mz++) {
5961       for (uint32_t channels = 32; channels < 256; channels += 48) {
5962         DWConvMicrokernelTester()
5963           .cr(16)
5964           .kr(25)
5965           .channels(channels)
5966           .input_offset(304)
5967           .zero_index(mz)
5968           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5969       }
5970     }
5971   }
5972 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
5973 
5974 
5975 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,c_eq_16)5976   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_eq_16) {
5977     TEST_REQUIRES_ARM_NEON;
5978     DWConvMicrokernelTester()
5979       .cr(16)
5980       .kr(25)
5981       .channels(16)
5982       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5983   }
5984 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,c_div_16)5985   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_div_16) {
5986     TEST_REQUIRES_ARM_NEON;
5987     for (uint32_t channels = 32; channels < 256; channels += 48) {
5988       DWConvMicrokernelTester()
5989         .cr(16)
5990         .kr(25)
5991         .channels(channels)
5992         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5993     }
5994   }
5995 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,c_div_16_with_qmin)5996   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_div_16_with_qmin) {
5997     TEST_REQUIRES_ARM_NEON;
5998     for (uint32_t channels = 32; channels < 256; channels += 48) {
5999       DWConvMicrokernelTester()
6000         .cr(16)
6001         .kr(25)
6002         .channels(channels)
6003         .qmin(128)
6004         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6005     }
6006   }
6007 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,c_div_16_with_qmax)6008   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_div_16_with_qmax) {
6009     TEST_REQUIRES_ARM_NEON;
6010     for (uint32_t channels = 32; channels < 256; channels += 48) {
6011       DWConvMicrokernelTester()
6012         .cr(16)
6013         .kr(25)
6014         .channels(channels)
6015         .qmax(128)
6016         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6017     }
6018   }
6019 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,c_lt_16)6020   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_lt_16) {
6021     TEST_REQUIRES_ARM_NEON;
6022     for (uint32_t channels = 1; channels < 16; channels++) {
6023       DWConvMicrokernelTester()
6024         .cr(16)
6025         .kr(25)
6026         .channels(channels)
6027         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6028     }
6029   }
6030 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,c_gt_16)6031   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_gt_16) {
6032     TEST_REQUIRES_ARM_NEON;
6033     for (uint32_t channels = 17; channels < 32; channels++) {
6034       DWConvMicrokernelTester()
6035         .cr(16)
6036         .kr(25)
6037         .channels(channels)
6038         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6039     }
6040   }
6041 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,c_gt_16_with_qmin)6042   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_gt_16_with_qmin) {
6043     TEST_REQUIRES_ARM_NEON;
6044     for (uint32_t channels = 17; channels < 32; channels++) {
6045       DWConvMicrokernelTester()
6046         .cr(16)
6047         .kr(25)
6048         .channels(channels)
6049         .qmin(128)
6050         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6051     }
6052   }
6053 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,c_gt_16_with_qmax)6054   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_gt_16_with_qmax) {
6055     TEST_REQUIRES_ARM_NEON;
6056     for (uint32_t channels = 17; channels < 32; channels++) {
6057       DWConvMicrokernelTester()
6058         .cr(16)
6059         .kr(25)
6060         .channels(channels)
6061         .qmax(128)
6062         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6063     }
6064   }
6065 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,multipixel)6066   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, multipixel) {
6067     TEST_REQUIRES_ARM_NEON;
6068     for (size_t channels = 1; channels <= 80; channels += 15) {
6069       DWConvMicrokernelTester()
6070         .cr(16)
6071         .kr(25)
6072         .channels(channels)
6073         .width(3)
6074         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6075     }
6076   }
6077 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,multipixel_with_step)6078   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, multipixel_with_step) {
6079     TEST_REQUIRES_ARM_NEON;
6080     for (size_t channels = 1; channels <= 80; channels += 15) {
6081       for (size_t step = 2; step <= 25; step++) {
6082         DWConvMicrokernelTester()
6083           .cr(16)
6084           .kr(25)
6085           .channels(channels)
6086           .width(3)
6087           .step(step)
6088           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6089       }
6090     }
6091   }
6092 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,multipixel_with_output_stride)6093   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, multipixel_with_output_stride) {
6094     TEST_REQUIRES_ARM_NEON;
6095     for (size_t channels = 1; channels <= 80; channels += 15) {
6096       DWConvMicrokernelTester()
6097         .cr(16)
6098         .kr(25)
6099         .channels(16)
6100         .width(5)
6101         .output_stride(83)
6102         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6103     }
6104   }
6105 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,multipixel_with_qmin)6106   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, multipixel_with_qmin) {
6107     TEST_REQUIRES_ARM_NEON;
6108     for (size_t channels = 1; channels <= 80; channels += 15) {
6109       DWConvMicrokernelTester()
6110         .cr(16)
6111         .kr(25)
6112         .channels(channels)
6113         .width(3)
6114         .qmin(128)
6115         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6116     }
6117   }
6118 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,multipixel_with_qmax)6119   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, multipixel_with_qmax) {
6120     TEST_REQUIRES_ARM_NEON;
6121     for (size_t channels = 1; channels <= 80; channels += 15) {
6122       DWConvMicrokernelTester()
6123         .cr(16)
6124         .kr(25)
6125         .channels(channels)
6126         .width(3)
6127         .qmax(128)
6128         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6129     }
6130   }
6131 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,input_offset)6132   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, input_offset) {
6133     TEST_REQUIRES_ARM_NEON;
6134     for (uint32_t channels = 32; channels < 256; channels += 48) {
6135       DWConvMicrokernelTester()
6136         .cr(16)
6137         .kr(25)
6138         .channels(channels)
6139         .input_offset(304)
6140         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6141     }
6142   }
6143 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,zero)6144   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, zero) {
6145     TEST_REQUIRES_ARM_NEON;
6146     for (uint32_t mz = 0; mz < 25; mz++) {
6147       for (uint32_t channels = 32; channels < 256; channels += 48) {
6148         DWConvMicrokernelTester()
6149           .cr(16)
6150           .kr(25)
6151           .channels(channels)
6152           .input_offset(304)
6153           .zero_index(mz)
6154           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6155       }
6156     }
6157   }
6158 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
6159 
6160 
6161 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,c_eq_16)6162   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_eq_16) {
6163     TEST_REQUIRES_ARM_NEON;
6164     DWConvMicrokernelTester()
6165       .cr(16)
6166       .kr(25)
6167       .channels(16)
6168       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6169   }
6170 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,c_div_16)6171   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_div_16) {
6172     TEST_REQUIRES_ARM_NEON;
6173     for (uint32_t channels = 32; channels < 256; channels += 48) {
6174       DWConvMicrokernelTester()
6175         .cr(16)
6176         .kr(25)
6177         .channels(channels)
6178         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6179     }
6180   }
6181 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,c_div_16_with_qmin)6182   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_div_16_with_qmin) {
6183     TEST_REQUIRES_ARM_NEON;
6184     for (uint32_t channels = 32; channels < 256; channels += 48) {
6185       DWConvMicrokernelTester()
6186         .cr(16)
6187         .kr(25)
6188         .channels(channels)
6189         .qmin(128)
6190         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6191     }
6192   }
6193 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,c_div_16_with_qmax)6194   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_div_16_with_qmax) {
6195     TEST_REQUIRES_ARM_NEON;
6196     for (uint32_t channels = 32; channels < 256; channels += 48) {
6197       DWConvMicrokernelTester()
6198         .cr(16)
6199         .kr(25)
6200         .channels(channels)
6201         .qmax(128)
6202         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6203     }
6204   }
6205 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,c_lt_16)6206   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_lt_16) {
6207     TEST_REQUIRES_ARM_NEON;
6208     for (uint32_t channels = 1; channels < 16; channels++) {
6209       DWConvMicrokernelTester()
6210         .cr(16)
6211         .kr(25)
6212         .channels(channels)
6213         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6214     }
6215   }
6216 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,c_gt_16)6217   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_gt_16) {
6218     TEST_REQUIRES_ARM_NEON;
6219     for (uint32_t channels = 17; channels < 32; channels++) {
6220       DWConvMicrokernelTester()
6221         .cr(16)
6222         .kr(25)
6223         .channels(channels)
6224         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6225     }
6226   }
6227 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,c_gt_16_with_qmin)6228   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_gt_16_with_qmin) {
6229     TEST_REQUIRES_ARM_NEON;
6230     for (uint32_t channels = 17; channels < 32; channels++) {
6231       DWConvMicrokernelTester()
6232         .cr(16)
6233         .kr(25)
6234         .channels(channels)
6235         .qmin(128)
6236         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6237     }
6238   }
6239 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,c_gt_16_with_qmax)6240   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_gt_16_with_qmax) {
6241     TEST_REQUIRES_ARM_NEON;
6242     for (uint32_t channels = 17; channels < 32; channels++) {
6243       DWConvMicrokernelTester()
6244         .cr(16)
6245         .kr(25)
6246         .channels(channels)
6247         .qmax(128)
6248         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6249     }
6250   }
6251 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,multipixel)6252   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, multipixel) {
6253     TEST_REQUIRES_ARM_NEON;
6254     for (size_t channels = 1; channels <= 80; channels += 15) {
6255       DWConvMicrokernelTester()
6256         .cr(16)
6257         .kr(25)
6258         .channels(channels)
6259         .width(3)
6260         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6261     }
6262   }
6263 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,multipixel_with_step)6264   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, multipixel_with_step) {
6265     TEST_REQUIRES_ARM_NEON;
6266     for (size_t channels = 1; channels <= 80; channels += 15) {
6267       for (size_t step = 2; step <= 25; step++) {
6268         DWConvMicrokernelTester()
6269           .cr(16)
6270           .kr(25)
6271           .channels(channels)
6272           .width(3)
6273           .step(step)
6274           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6275       }
6276     }
6277   }
6278 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,multipixel_with_output_stride)6279   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, multipixel_with_output_stride) {
6280     TEST_REQUIRES_ARM_NEON;
6281     for (size_t channels = 1; channels <= 80; channels += 15) {
6282       DWConvMicrokernelTester()
6283         .cr(16)
6284         .kr(25)
6285         .channels(16)
6286         .width(5)
6287         .output_stride(83)
6288         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6289     }
6290   }
6291 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,multipixel_with_qmin)6292   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, multipixel_with_qmin) {
6293     TEST_REQUIRES_ARM_NEON;
6294     for (size_t channels = 1; channels <= 80; channels += 15) {
6295       DWConvMicrokernelTester()
6296         .cr(16)
6297         .kr(25)
6298         .channels(channels)
6299         .width(3)
6300         .qmin(128)
6301         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6302     }
6303   }
6304 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,multipixel_with_qmax)6305   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, multipixel_with_qmax) {
6306     TEST_REQUIRES_ARM_NEON;
6307     for (size_t channels = 1; channels <= 80; channels += 15) {
6308       DWConvMicrokernelTester()
6309         .cr(16)
6310         .kr(25)
6311         .channels(channels)
6312         .width(3)
6313         .qmax(128)
6314         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6315     }
6316   }
6317 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,input_offset)6318   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, input_offset) {
6319     TEST_REQUIRES_ARM_NEON;
6320     for (uint32_t channels = 32; channels < 256; channels += 48) {
6321       DWConvMicrokernelTester()
6322         .cr(16)
6323         .kr(25)
6324         .channels(channels)
6325         .input_offset(304)
6326         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6327     }
6328   }
6329 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,zero)6330   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, zero) {
6331     TEST_REQUIRES_ARM_NEON;
6332     for (uint32_t mz = 0; mz < 25; mz++) {
6333       for (uint32_t channels = 32; channels < 256; channels += 48) {
6334         DWConvMicrokernelTester()
6335           .cr(16)
6336           .kr(25)
6337           .channels(channels)
6338           .input_offset(304)
6339           .zero_index(mz)
6340           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6341       }
6342     }
6343   }
6344 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
6345 
6346 
6347 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_eq_16)6348   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_eq_16) {
6349     TEST_REQUIRES_ARM_NEON;
6350     DWConvMicrokernelTester()
6351       .cr(16)
6352       .kr(25)
6353       .channels(16)
6354       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6355   }
6356 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_div_16)6357   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_div_16) {
6358     TEST_REQUIRES_ARM_NEON;
6359     for (uint32_t channels = 32; channels < 256; channels += 48) {
6360       DWConvMicrokernelTester()
6361         .cr(16)
6362         .kr(25)
6363         .channels(channels)
6364         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6365     }
6366   }
6367 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_div_16_with_qmin)6368   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_div_16_with_qmin) {
6369     TEST_REQUIRES_ARM_NEON;
6370     for (uint32_t channels = 32; channels < 256; channels += 48) {
6371       DWConvMicrokernelTester()
6372         .cr(16)
6373         .kr(25)
6374         .channels(channels)
6375         .qmin(128)
6376         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6377     }
6378   }
6379 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_div_16_with_qmax)6380   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_div_16_with_qmax) {
6381     TEST_REQUIRES_ARM_NEON;
6382     for (uint32_t channels = 32; channels < 256; channels += 48) {
6383       DWConvMicrokernelTester()
6384         .cr(16)
6385         .kr(25)
6386         .channels(channels)
6387         .qmax(128)
6388         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6389     }
6390   }
6391 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_lt_16)6392   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_lt_16) {
6393     TEST_REQUIRES_ARM_NEON;
6394     for (uint32_t channels = 1; channels < 16; channels++) {
6395       DWConvMicrokernelTester()
6396         .cr(16)
6397         .kr(25)
6398         .channels(channels)
6399         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6400     }
6401   }
6402 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_gt_16)6403   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_gt_16) {
6404     TEST_REQUIRES_ARM_NEON;
6405     for (uint32_t channels = 17; channels < 32; channels++) {
6406       DWConvMicrokernelTester()
6407         .cr(16)
6408         .kr(25)
6409         .channels(channels)
6410         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6411     }
6412   }
6413 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_gt_16_with_qmin)6414   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_gt_16_with_qmin) {
6415     TEST_REQUIRES_ARM_NEON;
6416     for (uint32_t channels = 17; channels < 32; channels++) {
6417       DWConvMicrokernelTester()
6418         .cr(16)
6419         .kr(25)
6420         .channels(channels)
6421         .qmin(128)
6422         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6423     }
6424   }
6425 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_gt_16_with_qmax)6426   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_gt_16_with_qmax) {
6427     TEST_REQUIRES_ARM_NEON;
6428     for (uint32_t channels = 17; channels < 32; channels++) {
6429       DWConvMicrokernelTester()
6430         .cr(16)
6431         .kr(25)
6432         .channels(channels)
6433         .qmax(128)
6434         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6435     }
6436   }
6437 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,multipixel)6438   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel) {
6439     TEST_REQUIRES_ARM_NEON;
6440     for (size_t channels = 1; channels <= 80; channels += 15) {
6441       DWConvMicrokernelTester()
6442         .cr(16)
6443         .kr(25)
6444         .channels(channels)
6445         .width(3)
6446         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6447     }
6448   }
6449 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,multipixel_with_step)6450   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_step) {
6451     TEST_REQUIRES_ARM_NEON;
6452     for (size_t channels = 1; channels <= 80; channels += 15) {
6453       for (size_t step = 2; step <= 25; step++) {
6454         DWConvMicrokernelTester()
6455           .cr(16)
6456           .kr(25)
6457           .channels(channels)
6458           .width(3)
6459           .step(step)
6460           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6461       }
6462     }
6463   }
6464 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,multipixel_with_output_stride)6465   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_output_stride) {
6466     TEST_REQUIRES_ARM_NEON;
6467     for (size_t channels = 1; channels <= 80; channels += 15) {
6468       DWConvMicrokernelTester()
6469         .cr(16)
6470         .kr(25)
6471         .channels(16)
6472         .width(5)
6473         .output_stride(83)
6474         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6475     }
6476   }
6477 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,multipixel_with_qmin)6478   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_qmin) {
6479     TEST_REQUIRES_ARM_NEON;
6480     for (size_t channels = 1; channels <= 80; channels += 15) {
6481       DWConvMicrokernelTester()
6482         .cr(16)
6483         .kr(25)
6484         .channels(channels)
6485         .width(3)
6486         .qmin(128)
6487         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6488     }
6489   }
6490 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,multipixel_with_qmax)6491   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_qmax) {
6492     TEST_REQUIRES_ARM_NEON;
6493     for (size_t channels = 1; channels <= 80; channels += 15) {
6494       DWConvMicrokernelTester()
6495         .cr(16)
6496         .kr(25)
6497         .channels(channels)
6498         .width(3)
6499         .qmax(128)
6500         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6501     }
6502   }
6503 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,input_offset)6504   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, input_offset) {
6505     TEST_REQUIRES_ARM_NEON;
6506     for (uint32_t channels = 32; channels < 256; channels += 48) {
6507       DWConvMicrokernelTester()
6508         .cr(16)
6509         .kr(25)
6510         .channels(channels)
6511         .input_offset(304)
6512         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6513     }
6514   }
6515 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,zero)6516   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, zero) {
6517     TEST_REQUIRES_ARM_NEON;
6518     for (uint32_t mz = 0; mz < 25; mz++) {
6519       for (uint32_t channels = 32; channels < 256; channels += 48) {
6520         DWConvMicrokernelTester()
6521           .cr(16)
6522           .kr(25)
6523           .channels(channels)
6524           .input_offset(304)
6525           .zero_index(mz)
6526           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6527       }
6528     }
6529   }
6530 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
6531 
6532 
6533 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,c_eq_16)6534   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_eq_16) {
6535     TEST_REQUIRES_ARM_NEON_V8;
6536     DWConvMicrokernelTester()
6537       .cr(16)
6538       .kr(25)
6539       .channels(16)
6540       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6541   }
6542 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,c_div_16)6543   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_div_16) {
6544     TEST_REQUIRES_ARM_NEON_V8;
6545     for (uint32_t channels = 32; channels < 256; channels += 48) {
6546       DWConvMicrokernelTester()
6547         .cr(16)
6548         .kr(25)
6549         .channels(channels)
6550         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6551     }
6552   }
6553 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,c_div_16_with_qmin)6554   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_div_16_with_qmin) {
6555     TEST_REQUIRES_ARM_NEON_V8;
6556     for (uint32_t channels = 32; channels < 256; channels += 48) {
6557       DWConvMicrokernelTester()
6558         .cr(16)
6559         .kr(25)
6560         .channels(channels)
6561         .qmin(128)
6562         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6563     }
6564   }
6565 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,c_div_16_with_qmax)6566   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_div_16_with_qmax) {
6567     TEST_REQUIRES_ARM_NEON_V8;
6568     for (uint32_t channels = 32; channels < 256; channels += 48) {
6569       DWConvMicrokernelTester()
6570         .cr(16)
6571         .kr(25)
6572         .channels(channels)
6573         .qmax(128)
6574         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6575     }
6576   }
6577 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,c_lt_16)6578   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_lt_16) {
6579     TEST_REQUIRES_ARM_NEON_V8;
6580     for (uint32_t channels = 1; channels < 16; channels++) {
6581       DWConvMicrokernelTester()
6582         .cr(16)
6583         .kr(25)
6584         .channels(channels)
6585         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6586     }
6587   }
6588 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,c_gt_16)6589   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_gt_16) {
6590     TEST_REQUIRES_ARM_NEON_V8;
6591     for (uint32_t channels = 17; channels < 32; channels++) {
6592       DWConvMicrokernelTester()
6593         .cr(16)
6594         .kr(25)
6595         .channels(channels)
6596         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6597     }
6598   }
6599 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,c_gt_16_with_qmin)6600   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_gt_16_with_qmin) {
6601     TEST_REQUIRES_ARM_NEON_V8;
6602     for (uint32_t channels = 17; channels < 32; channels++) {
6603       DWConvMicrokernelTester()
6604         .cr(16)
6605         .kr(25)
6606         .channels(channels)
6607         .qmin(128)
6608         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6609     }
6610   }
6611 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,c_gt_16_with_qmax)6612   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_gt_16_with_qmax) {
6613     TEST_REQUIRES_ARM_NEON_V8;
6614     for (uint32_t channels = 17; channels < 32; channels++) {
6615       DWConvMicrokernelTester()
6616         .cr(16)
6617         .kr(25)
6618         .channels(channels)
6619         .qmax(128)
6620         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6621     }
6622   }
6623 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,multipixel)6624   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, multipixel) {
6625     TEST_REQUIRES_ARM_NEON_V8;
6626     for (size_t channels = 1; channels <= 80; channels += 15) {
6627       DWConvMicrokernelTester()
6628         .cr(16)
6629         .kr(25)
6630         .channels(channels)
6631         .width(3)
6632         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6633     }
6634   }
6635 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,multipixel_with_step)6636   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, multipixel_with_step) {
6637     TEST_REQUIRES_ARM_NEON_V8;
6638     for (size_t channels = 1; channels <= 80; channels += 15) {
6639       for (size_t step = 2; step <= 25; step++) {
6640         DWConvMicrokernelTester()
6641           .cr(16)
6642           .kr(25)
6643           .channels(channels)
6644           .width(3)
6645           .step(step)
6646           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6647       }
6648     }
6649   }
6650 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,multipixel_with_output_stride)6651   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, multipixel_with_output_stride) {
6652     TEST_REQUIRES_ARM_NEON_V8;
6653     for (size_t channels = 1; channels <= 80; channels += 15) {
6654       DWConvMicrokernelTester()
6655         .cr(16)
6656         .kr(25)
6657         .channels(16)
6658         .width(5)
6659         .output_stride(83)
6660         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6661     }
6662   }
6663 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,multipixel_with_qmin)6664   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, multipixel_with_qmin) {
6665     TEST_REQUIRES_ARM_NEON_V8;
6666     for (size_t channels = 1; channels <= 80; channels += 15) {
6667       DWConvMicrokernelTester()
6668         .cr(16)
6669         .kr(25)
6670         .channels(channels)
6671         .width(3)
6672         .qmin(128)
6673         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6674     }
6675   }
6676 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,multipixel_with_qmax)6677   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, multipixel_with_qmax) {
6678     TEST_REQUIRES_ARM_NEON_V8;
6679     for (size_t channels = 1; channels <= 80; channels += 15) {
6680       DWConvMicrokernelTester()
6681         .cr(16)
6682         .kr(25)
6683         .channels(channels)
6684         .width(3)
6685         .qmax(128)
6686         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6687     }
6688   }
6689 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,input_offset)6690   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, input_offset) {
6691     TEST_REQUIRES_ARM_NEON_V8;
6692     for (uint32_t channels = 32; channels < 256; channels += 48) {
6693       DWConvMicrokernelTester()
6694         .cr(16)
6695         .kr(25)
6696         .channels(channels)
6697         .input_offset(304)
6698         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6699     }
6700   }
6701 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,zero)6702   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, zero) {
6703     TEST_REQUIRES_ARM_NEON_V8;
6704     for (uint32_t mz = 0; mz < 25; mz++) {
6705       for (uint32_t channels = 32; channels < 256; channels += 48) {
6706         DWConvMicrokernelTester()
6707           .cr(16)
6708           .kr(25)
6709           .channels(channels)
6710           .input_offset(304)
6711           .zero_index(mz)
6712           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6713       }
6714     }
6715   }
6716 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
6717 
6718 
6719 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,c_eq_16)6720   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_eq_16) {
6721     TEST_REQUIRES_ARM_NEON_V8;
6722     DWConvMicrokernelTester()
6723       .cr(16)
6724       .kr(25)
6725       .channels(16)
6726       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6727   }
6728 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,c_div_16)6729   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_div_16) {
6730     TEST_REQUIRES_ARM_NEON_V8;
6731     for (uint32_t channels = 32; channels < 256; channels += 48) {
6732       DWConvMicrokernelTester()
6733         .cr(16)
6734         .kr(25)
6735         .channels(channels)
6736         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6737     }
6738   }
6739 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,c_div_16_with_qmin)6740   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_div_16_with_qmin) {
6741     TEST_REQUIRES_ARM_NEON_V8;
6742     for (uint32_t channels = 32; channels < 256; channels += 48) {
6743       DWConvMicrokernelTester()
6744         .cr(16)
6745         .kr(25)
6746         .channels(channels)
6747         .qmin(128)
6748         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6749     }
6750   }
6751 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,c_div_16_with_qmax)6752   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_div_16_with_qmax) {
6753     TEST_REQUIRES_ARM_NEON_V8;
6754     for (uint32_t channels = 32; channels < 256; channels += 48) {
6755       DWConvMicrokernelTester()
6756         .cr(16)
6757         .kr(25)
6758         .channels(channels)
6759         .qmax(128)
6760         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6761     }
6762   }
6763 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,c_lt_16)6764   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_lt_16) {
6765     TEST_REQUIRES_ARM_NEON_V8;
6766     for (uint32_t channels = 1; channels < 16; channels++) {
6767       DWConvMicrokernelTester()
6768         .cr(16)
6769         .kr(25)
6770         .channels(channels)
6771         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6772     }
6773   }
6774 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,c_gt_16)6775   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_gt_16) {
6776     TEST_REQUIRES_ARM_NEON_V8;
6777     for (uint32_t channels = 17; channels < 32; channels++) {
6778       DWConvMicrokernelTester()
6779         .cr(16)
6780         .kr(25)
6781         .channels(channels)
6782         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6783     }
6784   }
6785 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,c_gt_16_with_qmin)6786   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_gt_16_with_qmin) {
6787     TEST_REQUIRES_ARM_NEON_V8;
6788     for (uint32_t channels = 17; channels < 32; channels++) {
6789       DWConvMicrokernelTester()
6790         .cr(16)
6791         .kr(25)
6792         .channels(channels)
6793         .qmin(128)
6794         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6795     }
6796   }
6797 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,c_gt_16_with_qmax)6798   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_gt_16_with_qmax) {
6799     TEST_REQUIRES_ARM_NEON_V8;
6800     for (uint32_t channels = 17; channels < 32; channels++) {
6801       DWConvMicrokernelTester()
6802         .cr(16)
6803         .kr(25)
6804         .channels(channels)
6805         .qmax(128)
6806         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6807     }
6808   }
6809 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,multipixel)6810   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, multipixel) {
6811     TEST_REQUIRES_ARM_NEON_V8;
6812     for (size_t channels = 1; channels <= 80; channels += 15) {
6813       DWConvMicrokernelTester()
6814         .cr(16)
6815         .kr(25)
6816         .channels(channels)
6817         .width(3)
6818         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6819     }
6820   }
6821 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,multipixel_with_step)6822   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, multipixel_with_step) {
6823     TEST_REQUIRES_ARM_NEON_V8;
6824     for (size_t channels = 1; channels <= 80; channels += 15) {
6825       for (size_t step = 2; step <= 25; step++) {
6826         DWConvMicrokernelTester()
6827           .cr(16)
6828           .kr(25)
6829           .channels(channels)
6830           .width(3)
6831           .step(step)
6832           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6833       }
6834     }
6835   }
6836 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,multipixel_with_output_stride)6837   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, multipixel_with_output_stride) {
6838     TEST_REQUIRES_ARM_NEON_V8;
6839     for (size_t channels = 1; channels <= 80; channels += 15) {
6840       DWConvMicrokernelTester()
6841         .cr(16)
6842         .kr(25)
6843         .channels(16)
6844         .width(5)
6845         .output_stride(83)
6846         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6847     }
6848   }
6849 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,multipixel_with_qmin)6850   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, multipixel_with_qmin) {
6851     TEST_REQUIRES_ARM_NEON_V8;
6852     for (size_t channels = 1; channels <= 80; channels += 15) {
6853       DWConvMicrokernelTester()
6854         .cr(16)
6855         .kr(25)
6856         .channels(channels)
6857         .width(3)
6858         .qmin(128)
6859         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6860     }
6861   }
6862 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,multipixel_with_qmax)6863   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, multipixel_with_qmax) {
6864     TEST_REQUIRES_ARM_NEON_V8;
6865     for (size_t channels = 1; channels <= 80; channels += 15) {
6866       DWConvMicrokernelTester()
6867         .cr(16)
6868         .kr(25)
6869         .channels(channels)
6870         .width(3)
6871         .qmax(128)
6872         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6873     }
6874   }
6875 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,input_offset)6876   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, input_offset) {
6877     TEST_REQUIRES_ARM_NEON_V8;
6878     for (uint32_t channels = 32; channels < 256; channels += 48) {
6879       DWConvMicrokernelTester()
6880         .cr(16)
6881         .kr(25)
6882         .channels(channels)
6883         .input_offset(304)
6884         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6885     }
6886   }
6887 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,zero)6888   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, zero) {
6889     TEST_REQUIRES_ARM_NEON_V8;
6890     for (uint32_t mz = 0; mz < 25; mz++) {
6891       for (uint32_t channels = 32; channels < 256; channels += 48) {
6892         DWConvMicrokernelTester()
6893           .cr(16)
6894           .kr(25)
6895           .channels(channels)
6896           .input_offset(304)
6897           .zero_index(mz)
6898           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6899       }
6900     }
6901   }
6902 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
6903 
6904 
6905 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,c_eq_16)6906   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_eq_16) {
6907     TEST_REQUIRES_ARM_NEON_V8;
6908     DWConvMicrokernelTester()
6909       .cr(16)
6910       .kr(25)
6911       .channels(16)
6912       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6913   }
6914 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,c_div_16)6915   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_div_16) {
6916     TEST_REQUIRES_ARM_NEON_V8;
6917     for (uint32_t channels = 32; channels < 256; channels += 48) {
6918       DWConvMicrokernelTester()
6919         .cr(16)
6920         .kr(25)
6921         .channels(channels)
6922         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6923     }
6924   }
6925 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,c_div_16_with_qmin)6926   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_div_16_with_qmin) {
6927     TEST_REQUIRES_ARM_NEON_V8;
6928     for (uint32_t channels = 32; channels < 256; channels += 48) {
6929       DWConvMicrokernelTester()
6930         .cr(16)
6931         .kr(25)
6932         .channels(channels)
6933         .qmin(128)
6934         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6935     }
6936   }
6937 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,c_div_16_with_qmax)6938   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_div_16_with_qmax) {
6939     TEST_REQUIRES_ARM_NEON_V8;
6940     for (uint32_t channels = 32; channels < 256; channels += 48) {
6941       DWConvMicrokernelTester()
6942         .cr(16)
6943         .kr(25)
6944         .channels(channels)
6945         .qmax(128)
6946         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6947     }
6948   }
6949 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,c_lt_16)6950   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_lt_16) {
6951     TEST_REQUIRES_ARM_NEON_V8;
6952     for (uint32_t channels = 1; channels < 16; channels++) {
6953       DWConvMicrokernelTester()
6954         .cr(16)
6955         .kr(25)
6956         .channels(channels)
6957         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6958     }
6959   }
6960 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,c_gt_16)6961   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_gt_16) {
6962     TEST_REQUIRES_ARM_NEON_V8;
6963     for (uint32_t channels = 17; channels < 32; channels++) {
6964       DWConvMicrokernelTester()
6965         .cr(16)
6966         .kr(25)
6967         .channels(channels)
6968         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6969     }
6970   }
6971 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,c_gt_16_with_qmin)6972   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_gt_16_with_qmin) {
6973     TEST_REQUIRES_ARM_NEON_V8;
6974     for (uint32_t channels = 17; channels < 32; channels++) {
6975       DWConvMicrokernelTester()
6976         .cr(16)
6977         .kr(25)
6978         .channels(channels)
6979         .qmin(128)
6980         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6981     }
6982   }
6983 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,c_gt_16_with_qmax)6984   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_gt_16_with_qmax) {
6985     TEST_REQUIRES_ARM_NEON_V8;
6986     for (uint32_t channels = 17; channels < 32; channels++) {
6987       DWConvMicrokernelTester()
6988         .cr(16)
6989         .kr(25)
6990         .channels(channels)
6991         .qmax(128)
6992         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6993     }
6994   }
6995 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,multipixel)6996   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, multipixel) {
6997     TEST_REQUIRES_ARM_NEON_V8;
6998     for (size_t channels = 1; channels <= 80; channels += 15) {
6999       DWConvMicrokernelTester()
7000         .cr(16)
7001         .kr(25)
7002         .channels(channels)
7003         .width(3)
7004         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7005     }
7006   }
7007 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,multipixel_with_step)7008   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, multipixel_with_step) {
7009     TEST_REQUIRES_ARM_NEON_V8;
7010     for (size_t channels = 1; channels <= 80; channels += 15) {
7011       for (size_t step = 2; step <= 25; step++) {
7012         DWConvMicrokernelTester()
7013           .cr(16)
7014           .kr(25)
7015           .channels(channels)
7016           .width(3)
7017           .step(step)
7018           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7019       }
7020     }
7021   }
7022 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,multipixel_with_output_stride)7023   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, multipixel_with_output_stride) {
7024     TEST_REQUIRES_ARM_NEON_V8;
7025     for (size_t channels = 1; channels <= 80; channels += 15) {
7026       DWConvMicrokernelTester()
7027         .cr(16)
7028         .kr(25)
7029         .channels(16)
7030         .width(5)
7031         .output_stride(83)
7032         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7033     }
7034   }
7035 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,multipixel_with_qmin)7036   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, multipixel_with_qmin) {
7037     TEST_REQUIRES_ARM_NEON_V8;
7038     for (size_t channels = 1; channels <= 80; channels += 15) {
7039       DWConvMicrokernelTester()
7040         .cr(16)
7041         .kr(25)
7042         .channels(channels)
7043         .width(3)
7044         .qmin(128)
7045         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7046     }
7047   }
7048 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,multipixel_with_qmax)7049   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, multipixel_with_qmax) {
7050     TEST_REQUIRES_ARM_NEON_V8;
7051     for (size_t channels = 1; channels <= 80; channels += 15) {
7052       DWConvMicrokernelTester()
7053         .cr(16)
7054         .kr(25)
7055         .channels(channels)
7056         .width(3)
7057         .qmax(128)
7058         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7059     }
7060   }
7061 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,input_offset)7062   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, input_offset) {
7063     TEST_REQUIRES_ARM_NEON_V8;
7064     for (uint32_t channels = 32; channels < 256; channels += 48) {
7065       DWConvMicrokernelTester()
7066         .cr(16)
7067         .kr(25)
7068         .channels(channels)
7069         .input_offset(304)
7070         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7071     }
7072   }
7073 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,zero)7074   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, zero) {
7075     TEST_REQUIRES_ARM_NEON_V8;
7076     for (uint32_t mz = 0; mz < 25; mz++) {
7077       for (uint32_t channels = 32; channels < 256; channels += 48) {
7078         DWConvMicrokernelTester()
7079           .cr(16)
7080           .kr(25)
7081           .channels(channels)
7082           .input_offset(304)
7083           .zero_index(mz)
7084           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7085       }
7086     }
7087   }
7088 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7089 
7090 
7091 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,c_eq_16)7092   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_eq_16) {
7093     TEST_REQUIRES_ARM_NEON_V8;
7094     DWConvMicrokernelTester()
7095       .cr(16)
7096       .kr(25)
7097       .channels(16)
7098       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7099   }
7100 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,c_div_16)7101   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_div_16) {
7102     TEST_REQUIRES_ARM_NEON_V8;
7103     for (uint32_t channels = 32; channels < 256; channels += 48) {
7104       DWConvMicrokernelTester()
7105         .cr(16)
7106         .kr(25)
7107         .channels(channels)
7108         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7109     }
7110   }
7111 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,c_div_16_with_qmin)7112   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_div_16_with_qmin) {
7113     TEST_REQUIRES_ARM_NEON_V8;
7114     for (uint32_t channels = 32; channels < 256; channels += 48) {
7115       DWConvMicrokernelTester()
7116         .cr(16)
7117         .kr(25)
7118         .channels(channels)
7119         .qmin(128)
7120         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7121     }
7122   }
7123 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,c_div_16_with_qmax)7124   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_div_16_with_qmax) {
7125     TEST_REQUIRES_ARM_NEON_V8;
7126     for (uint32_t channels = 32; channels < 256; channels += 48) {
7127       DWConvMicrokernelTester()
7128         .cr(16)
7129         .kr(25)
7130         .channels(channels)
7131         .qmax(128)
7132         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7133     }
7134   }
7135 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,c_lt_16)7136   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_lt_16) {
7137     TEST_REQUIRES_ARM_NEON_V8;
7138     for (uint32_t channels = 1; channels < 16; channels++) {
7139       DWConvMicrokernelTester()
7140         .cr(16)
7141         .kr(25)
7142         .channels(channels)
7143         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7144     }
7145   }
7146 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,c_gt_16)7147   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_gt_16) {
7148     TEST_REQUIRES_ARM_NEON_V8;
7149     for (uint32_t channels = 17; channels < 32; channels++) {
7150       DWConvMicrokernelTester()
7151         .cr(16)
7152         .kr(25)
7153         .channels(channels)
7154         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7155     }
7156   }
7157 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,c_gt_16_with_qmin)7158   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_gt_16_with_qmin) {
7159     TEST_REQUIRES_ARM_NEON_V8;
7160     for (uint32_t channels = 17; channels < 32; channels++) {
7161       DWConvMicrokernelTester()
7162         .cr(16)
7163         .kr(25)
7164         .channels(channels)
7165         .qmin(128)
7166         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7167     }
7168   }
7169 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,c_gt_16_with_qmax)7170   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_gt_16_with_qmax) {
7171     TEST_REQUIRES_ARM_NEON_V8;
7172     for (uint32_t channels = 17; channels < 32; channels++) {
7173       DWConvMicrokernelTester()
7174         .cr(16)
7175         .kr(25)
7176         .channels(channels)
7177         .qmax(128)
7178         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7179     }
7180   }
7181 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,multipixel)7182   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, multipixel) {
7183     TEST_REQUIRES_ARM_NEON_V8;
7184     for (size_t channels = 1; channels <= 80; channels += 15) {
7185       DWConvMicrokernelTester()
7186         .cr(16)
7187         .kr(25)
7188         .channels(channels)
7189         .width(3)
7190         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7191     }
7192   }
7193 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,multipixel_with_step)7194   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, multipixel_with_step) {
7195     TEST_REQUIRES_ARM_NEON_V8;
7196     for (size_t channels = 1; channels <= 80; channels += 15) {
7197       for (size_t step = 2; step <= 25; step++) {
7198         DWConvMicrokernelTester()
7199           .cr(16)
7200           .kr(25)
7201           .channels(channels)
7202           .width(3)
7203           .step(step)
7204           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7205       }
7206     }
7207   }
7208 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,multipixel_with_output_stride)7209   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, multipixel_with_output_stride) {
7210     TEST_REQUIRES_ARM_NEON_V8;
7211     for (size_t channels = 1; channels <= 80; channels += 15) {
7212       DWConvMicrokernelTester()
7213         .cr(16)
7214         .kr(25)
7215         .channels(16)
7216         .width(5)
7217         .output_stride(83)
7218         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7219     }
7220   }
7221 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,multipixel_with_qmin)7222   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, multipixel_with_qmin) {
7223     TEST_REQUIRES_ARM_NEON_V8;
7224     for (size_t channels = 1; channels <= 80; channels += 15) {
7225       DWConvMicrokernelTester()
7226         .cr(16)
7227         .kr(25)
7228         .channels(channels)
7229         .width(3)
7230         .qmin(128)
7231         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7232     }
7233   }
7234 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,multipixel_with_qmax)7235   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, multipixel_with_qmax) {
7236     TEST_REQUIRES_ARM_NEON_V8;
7237     for (size_t channels = 1; channels <= 80; channels += 15) {
7238       DWConvMicrokernelTester()
7239         .cr(16)
7240         .kr(25)
7241         .channels(channels)
7242         .width(3)
7243         .qmax(128)
7244         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7245     }
7246   }
7247 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,input_offset)7248   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, input_offset) {
7249     TEST_REQUIRES_ARM_NEON_V8;
7250     for (uint32_t channels = 32; channels < 256; channels += 48) {
7251       DWConvMicrokernelTester()
7252         .cr(16)
7253         .kr(25)
7254         .channels(channels)
7255         .input_offset(304)
7256         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7257     }
7258   }
7259 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,zero)7260   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, zero) {
7261     TEST_REQUIRES_ARM_NEON_V8;
7262     for (uint32_t mz = 0; mz < 25; mz++) {
7263       for (uint32_t channels = 32; channels < 256; channels += 48) {
7264         DWConvMicrokernelTester()
7265           .cr(16)
7266           .kr(25)
7267           .channels(channels)
7268           .input_offset(304)
7269           .zero_index(mz)
7270           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7271       }
7272     }
7273   }
7274 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7275 
7276 
7277 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_eq_16)7278   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_eq_16) {
7279     TEST_REQUIRES_ARM_NEON_V8;
7280     DWConvMicrokernelTester()
7281       .cr(16)
7282       .kr(25)
7283       .channels(16)
7284       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7285   }
7286 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_div_16)7287   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_div_16) {
7288     TEST_REQUIRES_ARM_NEON_V8;
7289     for (uint32_t channels = 32; channels < 256; channels += 48) {
7290       DWConvMicrokernelTester()
7291         .cr(16)
7292         .kr(25)
7293         .channels(channels)
7294         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7295     }
7296   }
7297 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_div_16_with_qmin)7298   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_div_16_with_qmin) {
7299     TEST_REQUIRES_ARM_NEON_V8;
7300     for (uint32_t channels = 32; channels < 256; channels += 48) {
7301       DWConvMicrokernelTester()
7302         .cr(16)
7303         .kr(25)
7304         .channels(channels)
7305         .qmin(128)
7306         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7307     }
7308   }
7309 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_div_16_with_qmax)7310   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_div_16_with_qmax) {
7311     TEST_REQUIRES_ARM_NEON_V8;
7312     for (uint32_t channels = 32; channels < 256; channels += 48) {
7313       DWConvMicrokernelTester()
7314         .cr(16)
7315         .kr(25)
7316         .channels(channels)
7317         .qmax(128)
7318         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7319     }
7320   }
7321 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_lt_16)7322   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_lt_16) {
7323     TEST_REQUIRES_ARM_NEON_V8;
7324     for (uint32_t channels = 1; channels < 16; channels++) {
7325       DWConvMicrokernelTester()
7326         .cr(16)
7327         .kr(25)
7328         .channels(channels)
7329         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7330     }
7331   }
7332 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_gt_16)7333   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_gt_16) {
7334     TEST_REQUIRES_ARM_NEON_V8;
7335     for (uint32_t channels = 17; channels < 32; channels++) {
7336       DWConvMicrokernelTester()
7337         .cr(16)
7338         .kr(25)
7339         .channels(channels)
7340         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7341     }
7342   }
7343 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_gt_16_with_qmin)7344   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_gt_16_with_qmin) {
7345     TEST_REQUIRES_ARM_NEON_V8;
7346     for (uint32_t channels = 17; channels < 32; channels++) {
7347       DWConvMicrokernelTester()
7348         .cr(16)
7349         .kr(25)
7350         .channels(channels)
7351         .qmin(128)
7352         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7353     }
7354   }
7355 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_gt_16_with_qmax)7356   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_gt_16_with_qmax) {
7357     TEST_REQUIRES_ARM_NEON_V8;
7358     for (uint32_t channels = 17; channels < 32; channels++) {
7359       DWConvMicrokernelTester()
7360         .cr(16)
7361         .kr(25)
7362         .channels(channels)
7363         .qmax(128)
7364         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7365     }
7366   }
7367 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,multipixel)7368   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel) {
7369     TEST_REQUIRES_ARM_NEON_V8;
7370     for (size_t channels = 1; channels <= 80; channels += 15) {
7371       DWConvMicrokernelTester()
7372         .cr(16)
7373         .kr(25)
7374         .channels(channels)
7375         .width(3)
7376         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7377     }
7378   }
7379 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,multipixel_with_step)7380   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_step) {
7381     TEST_REQUIRES_ARM_NEON_V8;
7382     for (size_t channels = 1; channels <= 80; channels += 15) {
7383       for (size_t step = 2; step <= 25; step++) {
7384         DWConvMicrokernelTester()
7385           .cr(16)
7386           .kr(25)
7387           .channels(channels)
7388           .width(3)
7389           .step(step)
7390           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7391       }
7392     }
7393   }
7394 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,multipixel_with_output_stride)7395   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_output_stride) {
7396     TEST_REQUIRES_ARM_NEON_V8;
7397     for (size_t channels = 1; channels <= 80; channels += 15) {
7398       DWConvMicrokernelTester()
7399         .cr(16)
7400         .kr(25)
7401         .channels(16)
7402         .width(5)
7403         .output_stride(83)
7404         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7405     }
7406   }
7407 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,multipixel_with_qmin)7408   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_qmin) {
7409     TEST_REQUIRES_ARM_NEON_V8;
7410     for (size_t channels = 1; channels <= 80; channels += 15) {
7411       DWConvMicrokernelTester()
7412         .cr(16)
7413         .kr(25)
7414         .channels(channels)
7415         .width(3)
7416         .qmin(128)
7417         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7418     }
7419   }
7420 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,multipixel_with_qmax)7421   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_qmax) {
7422     TEST_REQUIRES_ARM_NEON_V8;
7423     for (size_t channels = 1; channels <= 80; channels += 15) {
7424       DWConvMicrokernelTester()
7425         .cr(16)
7426         .kr(25)
7427         .channels(channels)
7428         .width(3)
7429         .qmax(128)
7430         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7431     }
7432   }
7433 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,input_offset)7434   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, input_offset) {
7435     TEST_REQUIRES_ARM_NEON_V8;
7436     for (uint32_t channels = 32; channels < 256; channels += 48) {
7437       DWConvMicrokernelTester()
7438         .cr(16)
7439         .kr(25)
7440         .channels(channels)
7441         .input_offset(304)
7442         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7443     }
7444   }
7445 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,zero)7446   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, zero) {
7447     TEST_REQUIRES_ARM_NEON_V8;
7448     for (uint32_t mz = 0; mz < 25; mz++) {
7449       for (uint32_t channels = 32; channels < 256; channels += 48) {
7450         DWConvMicrokernelTester()
7451           .cr(16)
7452           .kr(25)
7453           .channels(channels)
7454           .input_offset(304)
7455           .zero_index(mz)
7456           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7457       }
7458     }
7459   }
7460 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7461 
7462 
7463 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_eq_24)7464   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_eq_24) {
7465     TEST_REQUIRES_ARM_NEON;
7466     DWConvMicrokernelTester()
7467       .cr(24)
7468       .kr(9)
7469       .channels(24)
7470       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7471   }
7472 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_div_24)7473   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_div_24) {
7474     TEST_REQUIRES_ARM_NEON;
7475     for (uint32_t channels = 48; channels < 384; channels += 72) {
7476       DWConvMicrokernelTester()
7477         .cr(24)
7478         .kr(9)
7479         .channels(channels)
7480         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7481     }
7482   }
7483 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_div_24_with_qmin)7484   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_div_24_with_qmin) {
7485     TEST_REQUIRES_ARM_NEON;
7486     for (uint32_t channels = 48; channels < 384; channels += 72) {
7487       DWConvMicrokernelTester()
7488         .cr(24)
7489         .kr(9)
7490         .channels(channels)
7491         .qmin(128)
7492         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7493     }
7494   }
7495 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_div_24_with_qmax)7496   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_div_24_with_qmax) {
7497     TEST_REQUIRES_ARM_NEON;
7498     for (uint32_t channels = 48; channels < 384; channels += 72) {
7499       DWConvMicrokernelTester()
7500         .cr(24)
7501         .kr(9)
7502         .channels(channels)
7503         .qmax(128)
7504         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7505     }
7506   }
7507 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_lt_24)7508   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_lt_24) {
7509     TEST_REQUIRES_ARM_NEON;
7510     for (uint32_t channels = 1; channels < 24; channels++) {
7511       DWConvMicrokernelTester()
7512         .cr(24)
7513         .kr(9)
7514         .channels(channels)
7515         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7516     }
7517   }
7518 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_gt_24)7519   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_gt_24) {
7520     TEST_REQUIRES_ARM_NEON;
7521     for (uint32_t channels = 25; channels < 48; channels++) {
7522       DWConvMicrokernelTester()
7523         .cr(24)
7524         .kr(9)
7525         .channels(channels)
7526         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7527     }
7528   }
7529 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_gt_24_with_qmin)7530   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_gt_24_with_qmin) {
7531     TEST_REQUIRES_ARM_NEON;
7532     for (uint32_t channels = 25; channels < 48; channels++) {
7533       DWConvMicrokernelTester()
7534         .cr(24)
7535         .kr(9)
7536         .channels(channels)
7537         .qmin(128)
7538         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7539     }
7540   }
7541 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_gt_24_with_qmax)7542   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_gt_24_with_qmax) {
7543     TEST_REQUIRES_ARM_NEON;
7544     for (uint32_t channels = 25; channels < 48; channels++) {
7545       DWConvMicrokernelTester()
7546         .cr(24)
7547         .kr(9)
7548         .channels(channels)
7549         .qmax(128)
7550         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7551     }
7552   }
7553 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,multipixel)7554   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel) {
7555     TEST_REQUIRES_ARM_NEON;
7556     for (size_t channels = 1; channels <= 120; channels += 23) {
7557       DWConvMicrokernelTester()
7558         .cr(24)
7559         .kr(9)
7560         .channels(channels)
7561         .width(3)
7562         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7563     }
7564   }
7565 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,multipixel_with_step)7566   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_step) {
7567     TEST_REQUIRES_ARM_NEON;
7568     for (size_t channels = 1; channels <= 120; channels += 23) {
7569       for (size_t step = 2; step <= 9; step++) {
7570         DWConvMicrokernelTester()
7571           .cr(24)
7572           .kr(9)
7573           .channels(channels)
7574           .width(3)
7575           .step(step)
7576           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7577       }
7578     }
7579   }
7580 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,multipixel_with_output_stride)7581   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_output_stride) {
7582     TEST_REQUIRES_ARM_NEON;
7583     for (size_t channels = 1; channels <= 120; channels += 23) {
7584       DWConvMicrokernelTester()
7585         .cr(24)
7586         .kr(9)
7587         .channels(24)
7588         .width(5)
7589         .output_stride(127)
7590         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7591     }
7592   }
7593 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,multipixel_with_qmin)7594   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_qmin) {
7595     TEST_REQUIRES_ARM_NEON;
7596     for (size_t channels = 1; channels <= 120; channels += 23) {
7597       DWConvMicrokernelTester()
7598         .cr(24)
7599         .kr(9)
7600         .channels(channels)
7601         .width(3)
7602         .qmin(128)
7603         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7604     }
7605   }
7606 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,multipixel_with_qmax)7607   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_qmax) {
7608     TEST_REQUIRES_ARM_NEON;
7609     for (size_t channels = 1; channels <= 120; channels += 23) {
7610       DWConvMicrokernelTester()
7611         .cr(24)
7612         .kr(9)
7613         .channels(channels)
7614         .width(3)
7615         .qmax(128)
7616         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7617     }
7618   }
7619 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,input_offset)7620   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, input_offset) {
7621     TEST_REQUIRES_ARM_NEON;
7622     for (uint32_t channels = 48; channels < 384; channels += 72) {
7623       DWConvMicrokernelTester()
7624         .cr(24)
7625         .kr(9)
7626         .channels(channels)
7627         .input_offset(464)
7628         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7629     }
7630   }
7631 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,zero)7632   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, zero) {
7633     TEST_REQUIRES_ARM_NEON;
7634     for (uint32_t mz = 0; mz < 9; mz++) {
7635       for (uint32_t channels = 48; channels < 384; channels += 72) {
7636         DWConvMicrokernelTester()
7637           .cr(24)
7638           .kr(9)
7639           .channels(channels)
7640           .input_offset(464)
7641           .zero_index(mz)
7642           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7643       }
7644     }
7645   }
7646 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7647 
7648 
7649 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_eq_24)7650   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_eq_24) {
7651     TEST_REQUIRES_ARM_NEON_V8;
7652     DWConvMicrokernelTester()
7653       .cr(24)
7654       .kr(9)
7655       .channels(24)
7656       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7657   }
7658 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_div_24)7659   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_div_24) {
7660     TEST_REQUIRES_ARM_NEON_V8;
7661     for (uint32_t channels = 48; channels < 384; channels += 72) {
7662       DWConvMicrokernelTester()
7663         .cr(24)
7664         .kr(9)
7665         .channels(channels)
7666         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7667     }
7668   }
7669 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_div_24_with_qmin)7670   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_div_24_with_qmin) {
7671     TEST_REQUIRES_ARM_NEON_V8;
7672     for (uint32_t channels = 48; channels < 384; channels += 72) {
7673       DWConvMicrokernelTester()
7674         .cr(24)
7675         .kr(9)
7676         .channels(channels)
7677         .qmin(128)
7678         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7679     }
7680   }
7681 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_div_24_with_qmax)7682   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_div_24_with_qmax) {
7683     TEST_REQUIRES_ARM_NEON_V8;
7684     for (uint32_t channels = 48; channels < 384; channels += 72) {
7685       DWConvMicrokernelTester()
7686         .cr(24)
7687         .kr(9)
7688         .channels(channels)
7689         .qmax(128)
7690         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7691     }
7692   }
7693 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_lt_24)7694   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_lt_24) {
7695     TEST_REQUIRES_ARM_NEON_V8;
7696     for (uint32_t channels = 1; channels < 24; channels++) {
7697       DWConvMicrokernelTester()
7698         .cr(24)
7699         .kr(9)
7700         .channels(channels)
7701         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7702     }
7703   }
7704 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_gt_24)7705   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_gt_24) {
7706     TEST_REQUIRES_ARM_NEON_V8;
7707     for (uint32_t channels = 25; channels < 48; channels++) {
7708       DWConvMicrokernelTester()
7709         .cr(24)
7710         .kr(9)
7711         .channels(channels)
7712         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7713     }
7714   }
7715 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_gt_24_with_qmin)7716   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_gt_24_with_qmin) {
7717     TEST_REQUIRES_ARM_NEON_V8;
7718     for (uint32_t channels = 25; channels < 48; channels++) {
7719       DWConvMicrokernelTester()
7720         .cr(24)
7721         .kr(9)
7722         .channels(channels)
7723         .qmin(128)
7724         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7725     }
7726   }
7727 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_gt_24_with_qmax)7728   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_gt_24_with_qmax) {
7729     TEST_REQUIRES_ARM_NEON_V8;
7730     for (uint32_t channels = 25; channels < 48; channels++) {
7731       DWConvMicrokernelTester()
7732         .cr(24)
7733         .kr(9)
7734         .channels(channels)
7735         .qmax(128)
7736         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7737     }
7738   }
7739 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,multipixel)7740   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel) {
7741     TEST_REQUIRES_ARM_NEON_V8;
7742     for (size_t channels = 1; channels <= 120; channels += 23) {
7743       DWConvMicrokernelTester()
7744         .cr(24)
7745         .kr(9)
7746         .channels(channels)
7747         .width(3)
7748         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7749     }
7750   }
7751 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,multipixel_with_step)7752   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_step) {
7753     TEST_REQUIRES_ARM_NEON_V8;
7754     for (size_t channels = 1; channels <= 120; channels += 23) {
7755       for (size_t step = 2; step <= 9; step++) {
7756         DWConvMicrokernelTester()
7757           .cr(24)
7758           .kr(9)
7759           .channels(channels)
7760           .width(3)
7761           .step(step)
7762           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7763       }
7764     }
7765   }
7766 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,multipixel_with_output_stride)7767   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_output_stride) {
7768     TEST_REQUIRES_ARM_NEON_V8;
7769     for (size_t channels = 1; channels <= 120; channels += 23) {
7770       DWConvMicrokernelTester()
7771         .cr(24)
7772         .kr(9)
7773         .channels(24)
7774         .width(5)
7775         .output_stride(127)
7776         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7777     }
7778   }
7779 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,multipixel_with_qmin)7780   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_qmin) {
7781     TEST_REQUIRES_ARM_NEON_V8;
7782     for (size_t channels = 1; channels <= 120; channels += 23) {
7783       DWConvMicrokernelTester()
7784         .cr(24)
7785         .kr(9)
7786         .channels(channels)
7787         .width(3)
7788         .qmin(128)
7789         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7790     }
7791   }
7792 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,multipixel_with_qmax)7793   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_qmax) {
7794     TEST_REQUIRES_ARM_NEON_V8;
7795     for (size_t channels = 1; channels <= 120; channels += 23) {
7796       DWConvMicrokernelTester()
7797         .cr(24)
7798         .kr(9)
7799         .channels(channels)
7800         .width(3)
7801         .qmax(128)
7802         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7803     }
7804   }
7805 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,input_offset)7806   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, input_offset) {
7807     TEST_REQUIRES_ARM_NEON_V8;
7808     for (uint32_t channels = 48; channels < 384; channels += 72) {
7809       DWConvMicrokernelTester()
7810         .cr(24)
7811         .kr(9)
7812         .channels(channels)
7813         .input_offset(464)
7814         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7815     }
7816   }
7817 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,zero)7818   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, zero) {
7819     TEST_REQUIRES_ARM_NEON_V8;
7820     for (uint32_t mz = 0; mz < 9; mz++) {
7821       for (uint32_t channels = 48; channels < 384; channels += 72) {
7822         DWConvMicrokernelTester()
7823           .cr(24)
7824           .kr(9)
7825           .channels(channels)
7826           .input_offset(464)
7827           .zero_index(mz)
7828           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7829       }
7830     }
7831   }
7832 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7833 
7834 
7835 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_eq_24)7836   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_eq_24) {
7837     TEST_REQUIRES_ARM_NEON;
7838     DWConvMicrokernelTester()
7839       .cr(24)
7840       .kr(25)
7841       .channels(24)
7842       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7843   }
7844 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_div_24)7845   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_div_24) {
7846     TEST_REQUIRES_ARM_NEON;
7847     for (uint32_t channels = 48; channels < 384; channels += 72) {
7848       DWConvMicrokernelTester()
7849         .cr(24)
7850         .kr(25)
7851         .channels(channels)
7852         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7853     }
7854   }
7855 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_div_24_with_qmin)7856   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_div_24_with_qmin) {
7857     TEST_REQUIRES_ARM_NEON;
7858     for (uint32_t channels = 48; channels < 384; channels += 72) {
7859       DWConvMicrokernelTester()
7860         .cr(24)
7861         .kr(25)
7862         .channels(channels)
7863         .qmin(128)
7864         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7865     }
7866   }
7867 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_div_24_with_qmax)7868   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_div_24_with_qmax) {
7869     TEST_REQUIRES_ARM_NEON;
7870     for (uint32_t channels = 48; channels < 384; channels += 72) {
7871       DWConvMicrokernelTester()
7872         .cr(24)
7873         .kr(25)
7874         .channels(channels)
7875         .qmax(128)
7876         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7877     }
7878   }
7879 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_lt_24)7880   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_lt_24) {
7881     TEST_REQUIRES_ARM_NEON;
7882     for (uint32_t channels = 1; channels < 24; channels++) {
7883       DWConvMicrokernelTester()
7884         .cr(24)
7885         .kr(25)
7886         .channels(channels)
7887         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7888     }
7889   }
7890 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_gt_24)7891   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_gt_24) {
7892     TEST_REQUIRES_ARM_NEON;
7893     for (uint32_t channels = 25; channels < 48; channels++) {
7894       DWConvMicrokernelTester()
7895         .cr(24)
7896         .kr(25)
7897         .channels(channels)
7898         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7899     }
7900   }
7901 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_gt_24_with_qmin)7902   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_gt_24_with_qmin) {
7903     TEST_REQUIRES_ARM_NEON;
7904     for (uint32_t channels = 25; channels < 48; channels++) {
7905       DWConvMicrokernelTester()
7906         .cr(24)
7907         .kr(25)
7908         .channels(channels)
7909         .qmin(128)
7910         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7911     }
7912   }
7913 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_gt_24_with_qmax)7914   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_gt_24_with_qmax) {
7915     TEST_REQUIRES_ARM_NEON;
7916     for (uint32_t channels = 25; channels < 48; channels++) {
7917       DWConvMicrokernelTester()
7918         .cr(24)
7919         .kr(25)
7920         .channels(channels)
7921         .qmax(128)
7922         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7923     }
7924   }
7925 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,multipixel)7926   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel) {
7927     TEST_REQUIRES_ARM_NEON;
7928     for (size_t channels = 1; channels <= 120; channels += 23) {
7929       DWConvMicrokernelTester()
7930         .cr(24)
7931         .kr(25)
7932         .channels(channels)
7933         .width(3)
7934         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7935     }
7936   }
7937 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,multipixel_with_step)7938   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_step) {
7939     TEST_REQUIRES_ARM_NEON;
7940     for (size_t channels = 1; channels <= 120; channels += 23) {
7941       for (size_t step = 2; step <= 25; step++) {
7942         DWConvMicrokernelTester()
7943           .cr(24)
7944           .kr(25)
7945           .channels(channels)
7946           .width(3)
7947           .step(step)
7948           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7949       }
7950     }
7951   }
7952 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,multipixel_with_output_stride)7953   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_output_stride) {
7954     TEST_REQUIRES_ARM_NEON;
7955     for (size_t channels = 1; channels <= 120; channels += 23) {
7956       DWConvMicrokernelTester()
7957         .cr(24)
7958         .kr(25)
7959         .channels(24)
7960         .width(5)
7961         .output_stride(127)
7962         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7963     }
7964   }
7965 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,multipixel_with_qmin)7966   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_qmin) {
7967     TEST_REQUIRES_ARM_NEON;
7968     for (size_t channels = 1; channels <= 120; channels += 23) {
7969       DWConvMicrokernelTester()
7970         .cr(24)
7971         .kr(25)
7972         .channels(channels)
7973         .width(3)
7974         .qmin(128)
7975         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7976     }
7977   }
7978 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,multipixel_with_qmax)7979   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_qmax) {
7980     TEST_REQUIRES_ARM_NEON;
7981     for (size_t channels = 1; channels <= 120; channels += 23) {
7982       DWConvMicrokernelTester()
7983         .cr(24)
7984         .kr(25)
7985         .channels(channels)
7986         .width(3)
7987         .qmax(128)
7988         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7989     }
7990   }
7991 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,input_offset)7992   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, input_offset) {
7993     TEST_REQUIRES_ARM_NEON;
7994     for (uint32_t channels = 48; channels < 384; channels += 72) {
7995       DWConvMicrokernelTester()
7996         .cr(24)
7997         .kr(25)
7998         .channels(channels)
7999         .input_offset(464)
8000         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8001     }
8002   }
8003 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,zero)8004   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, zero) {
8005     TEST_REQUIRES_ARM_NEON;
8006     for (uint32_t mz = 0; mz < 25; mz++) {
8007       for (uint32_t channels = 48; channels < 384; channels += 72) {
8008         DWConvMicrokernelTester()
8009           .cr(24)
8010           .kr(25)
8011           .channels(channels)
8012           .input_offset(464)
8013           .zero_index(mz)
8014           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8015       }
8016     }
8017   }
8018 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
8019 
8020 
8021 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_eq_24)8022   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_eq_24) {
8023     TEST_REQUIRES_ARM_NEON_V8;
8024     DWConvMicrokernelTester()
8025       .cr(24)
8026       .kr(25)
8027       .channels(24)
8028       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8029   }
8030 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_div_24)8031   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_div_24) {
8032     TEST_REQUIRES_ARM_NEON_V8;
8033     for (uint32_t channels = 48; channels < 384; channels += 72) {
8034       DWConvMicrokernelTester()
8035         .cr(24)
8036         .kr(25)
8037         .channels(channels)
8038         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8039     }
8040   }
8041 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_div_24_with_qmin)8042   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_div_24_with_qmin) {
8043     TEST_REQUIRES_ARM_NEON_V8;
8044     for (uint32_t channels = 48; channels < 384; channels += 72) {
8045       DWConvMicrokernelTester()
8046         .cr(24)
8047         .kr(25)
8048         .channels(channels)
8049         .qmin(128)
8050         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8051     }
8052   }
8053 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_div_24_with_qmax)8054   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_div_24_with_qmax) {
8055     TEST_REQUIRES_ARM_NEON_V8;
8056     for (uint32_t channels = 48; channels < 384; channels += 72) {
8057       DWConvMicrokernelTester()
8058         .cr(24)
8059         .kr(25)
8060         .channels(channels)
8061         .qmax(128)
8062         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8063     }
8064   }
8065 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_lt_24)8066   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_lt_24) {
8067     TEST_REQUIRES_ARM_NEON_V8;
8068     for (uint32_t channels = 1; channels < 24; channels++) {
8069       DWConvMicrokernelTester()
8070         .cr(24)
8071         .kr(25)
8072         .channels(channels)
8073         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8074     }
8075   }
8076 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_gt_24)8077   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_gt_24) {
8078     TEST_REQUIRES_ARM_NEON_V8;
8079     for (uint32_t channels = 25; channels < 48; channels++) {
8080       DWConvMicrokernelTester()
8081         .cr(24)
8082         .kr(25)
8083         .channels(channels)
8084         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8085     }
8086   }
8087 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_gt_24_with_qmin)8088   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_gt_24_with_qmin) {
8089     TEST_REQUIRES_ARM_NEON_V8;
8090     for (uint32_t channels = 25; channels < 48; channels++) {
8091       DWConvMicrokernelTester()
8092         .cr(24)
8093         .kr(25)
8094         .channels(channels)
8095         .qmin(128)
8096         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8097     }
8098   }
8099 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_gt_24_with_qmax)8100   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_gt_24_with_qmax) {
8101     TEST_REQUIRES_ARM_NEON_V8;
8102     for (uint32_t channels = 25; channels < 48; channels++) {
8103       DWConvMicrokernelTester()
8104         .cr(24)
8105         .kr(25)
8106         .channels(channels)
8107         .qmax(128)
8108         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8109     }
8110   }
8111 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,multipixel)8112   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel) {
8113     TEST_REQUIRES_ARM_NEON_V8;
8114     for (size_t channels = 1; channels <= 120; channels += 23) {
8115       DWConvMicrokernelTester()
8116         .cr(24)
8117         .kr(25)
8118         .channels(channels)
8119         .width(3)
8120         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8121     }
8122   }
8123 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,multipixel_with_step)8124   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_step) {
8125     TEST_REQUIRES_ARM_NEON_V8;
8126     for (size_t channels = 1; channels <= 120; channels += 23) {
8127       for (size_t step = 2; step <= 25; step++) {
8128         DWConvMicrokernelTester()
8129           .cr(24)
8130           .kr(25)
8131           .channels(channels)
8132           .width(3)
8133           .step(step)
8134           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8135       }
8136     }
8137   }
8138 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,multipixel_with_output_stride)8139   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_output_stride) {
8140     TEST_REQUIRES_ARM_NEON_V8;
8141     for (size_t channels = 1; channels <= 120; channels += 23) {
8142       DWConvMicrokernelTester()
8143         .cr(24)
8144         .kr(25)
8145         .channels(24)
8146         .width(5)
8147         .output_stride(127)
8148         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8149     }
8150   }
8151 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,multipixel_with_qmin)8152   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_qmin) {
8153     TEST_REQUIRES_ARM_NEON_V8;
8154     for (size_t channels = 1; channels <= 120; channels += 23) {
8155       DWConvMicrokernelTester()
8156         .cr(24)
8157         .kr(25)
8158         .channels(channels)
8159         .width(3)
8160         .qmin(128)
8161         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8162     }
8163   }
8164 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,multipixel_with_qmax)8165   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_qmax) {
8166     TEST_REQUIRES_ARM_NEON_V8;
8167     for (size_t channels = 1; channels <= 120; channels += 23) {
8168       DWConvMicrokernelTester()
8169         .cr(24)
8170         .kr(25)
8171         .channels(channels)
8172         .width(3)
8173         .qmax(128)
8174         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8175     }
8176   }
8177 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,input_offset)8178   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, input_offset) {
8179     TEST_REQUIRES_ARM_NEON_V8;
8180     for (uint32_t channels = 48; channels < 384; channels += 72) {
8181       DWConvMicrokernelTester()
8182         .cr(24)
8183         .kr(25)
8184         .channels(channels)
8185         .input_offset(464)
8186         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8187     }
8188   }
8189 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,zero)8190   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, zero) {
8191     TEST_REQUIRES_ARM_NEON_V8;
8192     for (uint32_t mz = 0; mz < 25; mz++) {
8193       for (uint32_t channels = 48; channels < 384; channels += 72) {
8194         DWConvMicrokernelTester()
8195           .cr(24)
8196           .kr(25)
8197           .channels(channels)
8198           .input_offset(464)
8199           .zero_index(mz)
8200           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8201       }
8202     }
8203   }
8204 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
8205 
8206 
8207 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_eq_32)8208   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_eq_32) {
8209     TEST_REQUIRES_ARM_NEON;
8210     DWConvMicrokernelTester()
8211       .cr(32)
8212       .kr(9)
8213       .channels(32)
8214       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8215   }
8216 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_div_32)8217   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_div_32) {
8218     TEST_REQUIRES_ARM_NEON;
8219     for (uint32_t channels = 64; channels < 512; channels += 96) {
8220       DWConvMicrokernelTester()
8221         .cr(32)
8222         .kr(9)
8223         .channels(channels)
8224         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8225     }
8226   }
8227 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_div_32_with_qmin)8228   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_div_32_with_qmin) {
8229     TEST_REQUIRES_ARM_NEON;
8230     for (uint32_t channels = 64; channels < 512; channels += 96) {
8231       DWConvMicrokernelTester()
8232         .cr(32)
8233         .kr(9)
8234         .channels(channels)
8235         .qmin(128)
8236         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8237     }
8238   }
8239 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_div_32_with_qmax)8240   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_div_32_with_qmax) {
8241     TEST_REQUIRES_ARM_NEON;
8242     for (uint32_t channels = 64; channels < 512; channels += 96) {
8243       DWConvMicrokernelTester()
8244         .cr(32)
8245         .kr(9)
8246         .channels(channels)
8247         .qmax(128)
8248         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8249     }
8250   }
8251 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_lt_32)8252   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_lt_32) {
8253     TEST_REQUIRES_ARM_NEON;
8254     for (uint32_t channels = 1; channels < 32; channels++) {
8255       DWConvMicrokernelTester()
8256         .cr(32)
8257         .kr(9)
8258         .channels(channels)
8259         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8260     }
8261   }
8262 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_gt_32)8263   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_gt_32) {
8264     TEST_REQUIRES_ARM_NEON;
8265     for (uint32_t channels = 33; channels < 64; channels++) {
8266       DWConvMicrokernelTester()
8267         .cr(32)
8268         .kr(9)
8269         .channels(channels)
8270         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8271     }
8272   }
8273 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_gt_32_with_qmin)8274   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_gt_32_with_qmin) {
8275     TEST_REQUIRES_ARM_NEON;
8276     for (uint32_t channels = 33; channels < 64; channels++) {
8277       DWConvMicrokernelTester()
8278         .cr(32)
8279         .kr(9)
8280         .channels(channels)
8281         .qmin(128)
8282         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8283     }
8284   }
8285 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_gt_32_with_qmax)8286   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_gt_32_with_qmax) {
8287     TEST_REQUIRES_ARM_NEON;
8288     for (uint32_t channels = 33; channels < 64; channels++) {
8289       DWConvMicrokernelTester()
8290         .cr(32)
8291         .kr(9)
8292         .channels(channels)
8293         .qmax(128)
8294         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8295     }
8296   }
8297 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,multipixel)8298   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel) {
8299     TEST_REQUIRES_ARM_NEON;
8300     for (size_t channels = 1; channels <= 160; channels += 31) {
8301       DWConvMicrokernelTester()
8302         .cr(32)
8303         .kr(9)
8304         .channels(channels)
8305         .width(3)
8306         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8307     }
8308   }
8309 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,multipixel_with_step)8310   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_step) {
8311     TEST_REQUIRES_ARM_NEON;
8312     for (size_t channels = 1; channels <= 160; channels += 31) {
8313       for (size_t step = 2; step <= 9; step++) {
8314         DWConvMicrokernelTester()
8315           .cr(32)
8316           .kr(9)
8317           .channels(channels)
8318           .width(3)
8319           .step(step)
8320           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8321       }
8322     }
8323   }
8324 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,multipixel_with_output_stride)8325   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_output_stride) {
8326     TEST_REQUIRES_ARM_NEON;
8327     for (size_t channels = 1; channels <= 160; channels += 31) {
8328       DWConvMicrokernelTester()
8329         .cr(32)
8330         .kr(9)
8331         .channels(32)
8332         .width(5)
8333         .output_stride(163)
8334         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8335     }
8336   }
8337 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,multipixel_with_qmin)8338   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_qmin) {
8339     TEST_REQUIRES_ARM_NEON;
8340     for (size_t channels = 1; channels <= 160; channels += 31) {
8341       DWConvMicrokernelTester()
8342         .cr(32)
8343         .kr(9)
8344         .channels(channels)
8345         .width(3)
8346         .qmin(128)
8347         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8348     }
8349   }
8350 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,multipixel_with_qmax)8351   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_qmax) {
8352     TEST_REQUIRES_ARM_NEON;
8353     for (size_t channels = 1; channels <= 160; channels += 31) {
8354       DWConvMicrokernelTester()
8355         .cr(32)
8356         .kr(9)
8357         .channels(channels)
8358         .width(3)
8359         .qmax(128)
8360         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8361     }
8362   }
8363 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,input_offset)8364   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, input_offset) {
8365     TEST_REQUIRES_ARM_NEON;
8366     for (uint32_t channels = 64; channels < 512; channels += 96) {
8367       DWConvMicrokernelTester()
8368         .cr(32)
8369         .kr(9)
8370         .channels(channels)
8371         .input_offset(592)
8372         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8373     }
8374   }
8375 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,zero)8376   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, zero) {
8377     TEST_REQUIRES_ARM_NEON;
8378     for (uint32_t mz = 0; mz < 9; mz++) {
8379       for (uint32_t channels = 64; channels < 512; channels += 96) {
8380         DWConvMicrokernelTester()
8381           .cr(32)
8382           .kr(9)
8383           .channels(channels)
8384           .input_offset(592)
8385           .zero_index(mz)
8386           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8387       }
8388     }
8389   }
8390 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
8391 
8392 
8393 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_eq_32)8394   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_eq_32) {
8395     TEST_REQUIRES_ARM_NEON_V8;
8396     DWConvMicrokernelTester()
8397       .cr(32)
8398       .kr(9)
8399       .channels(32)
8400       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8401   }
8402 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_div_32)8403   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_div_32) {
8404     TEST_REQUIRES_ARM_NEON_V8;
8405     for (uint32_t channels = 64; channels < 512; channels += 96) {
8406       DWConvMicrokernelTester()
8407         .cr(32)
8408         .kr(9)
8409         .channels(channels)
8410         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8411     }
8412   }
8413 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_div_32_with_qmin)8414   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_div_32_with_qmin) {
8415     TEST_REQUIRES_ARM_NEON_V8;
8416     for (uint32_t channels = 64; channels < 512; channels += 96) {
8417       DWConvMicrokernelTester()
8418         .cr(32)
8419         .kr(9)
8420         .channels(channels)
8421         .qmin(128)
8422         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8423     }
8424   }
8425 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_div_32_with_qmax)8426   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_div_32_with_qmax) {
8427     TEST_REQUIRES_ARM_NEON_V8;
8428     for (uint32_t channels = 64; channels < 512; channels += 96) {
8429       DWConvMicrokernelTester()
8430         .cr(32)
8431         .kr(9)
8432         .channels(channels)
8433         .qmax(128)
8434         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8435     }
8436   }
8437 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_lt_32)8438   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_lt_32) {
8439     TEST_REQUIRES_ARM_NEON_V8;
8440     for (uint32_t channels = 1; channels < 32; channels++) {
8441       DWConvMicrokernelTester()
8442         .cr(32)
8443         .kr(9)
8444         .channels(channels)
8445         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8446     }
8447   }
8448 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_gt_32)8449   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_gt_32) {
8450     TEST_REQUIRES_ARM_NEON_V8;
8451     for (uint32_t channels = 33; channels < 64; channels++) {
8452       DWConvMicrokernelTester()
8453         .cr(32)
8454         .kr(9)
8455         .channels(channels)
8456         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8457     }
8458   }
8459 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_gt_32_with_qmin)8460   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_gt_32_with_qmin) {
8461     TEST_REQUIRES_ARM_NEON_V8;
8462     for (uint32_t channels = 33; channels < 64; channels++) {
8463       DWConvMicrokernelTester()
8464         .cr(32)
8465         .kr(9)
8466         .channels(channels)
8467         .qmin(128)
8468         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8469     }
8470   }
8471 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_gt_32_with_qmax)8472   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_gt_32_with_qmax) {
8473     TEST_REQUIRES_ARM_NEON_V8;
8474     for (uint32_t channels = 33; channels < 64; channels++) {
8475       DWConvMicrokernelTester()
8476         .cr(32)
8477         .kr(9)
8478         .channels(channels)
8479         .qmax(128)
8480         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8481     }
8482   }
8483 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,multipixel)8484   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel) {
8485     TEST_REQUIRES_ARM_NEON_V8;
8486     for (size_t channels = 1; channels <= 160; channels += 31) {
8487       DWConvMicrokernelTester()
8488         .cr(32)
8489         .kr(9)
8490         .channels(channels)
8491         .width(3)
8492         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8493     }
8494   }
8495 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,multipixel_with_step)8496   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_step) {
8497     TEST_REQUIRES_ARM_NEON_V8;
8498     for (size_t channels = 1; channels <= 160; channels += 31) {
8499       for (size_t step = 2; step <= 9; step++) {
8500         DWConvMicrokernelTester()
8501           .cr(32)
8502           .kr(9)
8503           .channels(channels)
8504           .width(3)
8505           .step(step)
8506           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8507       }
8508     }
8509   }
8510 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,multipixel_with_output_stride)8511   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_output_stride) {
8512     TEST_REQUIRES_ARM_NEON_V8;
8513     for (size_t channels = 1; channels <= 160; channels += 31) {
8514       DWConvMicrokernelTester()
8515         .cr(32)
8516         .kr(9)
8517         .channels(32)
8518         .width(5)
8519         .output_stride(163)
8520         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8521     }
8522   }
8523 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,multipixel_with_qmin)8524   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_qmin) {
8525     TEST_REQUIRES_ARM_NEON_V8;
8526     for (size_t channels = 1; channels <= 160; channels += 31) {
8527       DWConvMicrokernelTester()
8528         .cr(32)
8529         .kr(9)
8530         .channels(channels)
8531         .width(3)
8532         .qmin(128)
8533         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8534     }
8535   }
8536 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,multipixel_with_qmax)8537   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_qmax) {
8538     TEST_REQUIRES_ARM_NEON_V8;
8539     for (size_t channels = 1; channels <= 160; channels += 31) {
8540       DWConvMicrokernelTester()
8541         .cr(32)
8542         .kr(9)
8543         .channels(channels)
8544         .width(3)
8545         .qmax(128)
8546         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8547     }
8548   }
8549 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,input_offset)8550   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, input_offset) {
8551     TEST_REQUIRES_ARM_NEON_V8;
8552     for (uint32_t channels = 64; channels < 512; channels += 96) {
8553       DWConvMicrokernelTester()
8554         .cr(32)
8555         .kr(9)
8556         .channels(channels)
8557         .input_offset(592)
8558         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8559     }
8560   }
8561 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,zero)8562   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, zero) {
8563     TEST_REQUIRES_ARM_NEON_V8;
8564     for (uint32_t mz = 0; mz < 9; mz++) {
8565       for (uint32_t channels = 64; channels < 512; channels += 96) {
8566         DWConvMicrokernelTester()
8567           .cr(32)
8568           .kr(9)
8569           .channels(channels)
8570           .input_offset(592)
8571           .zero_index(mz)
8572           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8573       }
8574     }
8575   }
8576 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
8577 
8578 
8579 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_eq_32)8580   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_eq_32) {
8581     TEST_REQUIRES_ARM_NEON;
8582     DWConvMicrokernelTester()
8583       .cr(32)
8584       .kr(25)
8585       .channels(32)
8586       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8587   }
8588 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_div_32)8589   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_div_32) {
8590     TEST_REQUIRES_ARM_NEON;
8591     for (uint32_t channels = 64; channels < 512; channels += 96) {
8592       DWConvMicrokernelTester()
8593         .cr(32)
8594         .kr(25)
8595         .channels(channels)
8596         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8597     }
8598   }
8599 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_div_32_with_qmin)8600   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_div_32_with_qmin) {
8601     TEST_REQUIRES_ARM_NEON;
8602     for (uint32_t channels = 64; channels < 512; channels += 96) {
8603       DWConvMicrokernelTester()
8604         .cr(32)
8605         .kr(25)
8606         .channels(channels)
8607         .qmin(128)
8608         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8609     }
8610   }
8611 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_div_32_with_qmax)8612   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_div_32_with_qmax) {
8613     TEST_REQUIRES_ARM_NEON;
8614     for (uint32_t channels = 64; channels < 512; channels += 96) {
8615       DWConvMicrokernelTester()
8616         .cr(32)
8617         .kr(25)
8618         .channels(channels)
8619         .qmax(128)
8620         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8621     }
8622   }
8623 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_lt_32)8624   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_lt_32) {
8625     TEST_REQUIRES_ARM_NEON;
8626     for (uint32_t channels = 1; channels < 32; channels++) {
8627       DWConvMicrokernelTester()
8628         .cr(32)
8629         .kr(25)
8630         .channels(channels)
8631         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8632     }
8633   }
8634 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_gt_32)8635   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_gt_32) {
8636     TEST_REQUIRES_ARM_NEON;
8637     for (uint32_t channels = 33; channels < 64; channels++) {
8638       DWConvMicrokernelTester()
8639         .cr(32)
8640         .kr(25)
8641         .channels(channels)
8642         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8643     }
8644   }
8645 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_gt_32_with_qmin)8646   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_gt_32_with_qmin) {
8647     TEST_REQUIRES_ARM_NEON;
8648     for (uint32_t channels = 33; channels < 64; channels++) {
8649       DWConvMicrokernelTester()
8650         .cr(32)
8651         .kr(25)
8652         .channels(channels)
8653         .qmin(128)
8654         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8655     }
8656   }
8657 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_gt_32_with_qmax)8658   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_gt_32_with_qmax) {
8659     TEST_REQUIRES_ARM_NEON;
8660     for (uint32_t channels = 33; channels < 64; channels++) {
8661       DWConvMicrokernelTester()
8662         .cr(32)
8663         .kr(25)
8664         .channels(channels)
8665         .qmax(128)
8666         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8667     }
8668   }
8669 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,multipixel)8670   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel) {
8671     TEST_REQUIRES_ARM_NEON;
8672     for (size_t channels = 1; channels <= 160; channels += 31) {
8673       DWConvMicrokernelTester()
8674         .cr(32)
8675         .kr(25)
8676         .channels(channels)
8677         .width(3)
8678         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8679     }
8680   }
8681 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,multipixel_with_step)8682   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_step) {
8683     TEST_REQUIRES_ARM_NEON;
8684     for (size_t channels = 1; channels <= 160; channels += 31) {
8685       for (size_t step = 2; step <= 25; step++) {
8686         DWConvMicrokernelTester()
8687           .cr(32)
8688           .kr(25)
8689           .channels(channels)
8690           .width(3)
8691           .step(step)
8692           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8693       }
8694     }
8695   }
8696 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,multipixel_with_output_stride)8697   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_output_stride) {
8698     TEST_REQUIRES_ARM_NEON;
8699     for (size_t channels = 1; channels <= 160; channels += 31) {
8700       DWConvMicrokernelTester()
8701         .cr(32)
8702         .kr(25)
8703         .channels(32)
8704         .width(5)
8705         .output_stride(163)
8706         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8707     }
8708   }
8709 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,multipixel_with_qmin)8710   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_qmin) {
8711     TEST_REQUIRES_ARM_NEON;
8712     for (size_t channels = 1; channels <= 160; channels += 31) {
8713       DWConvMicrokernelTester()
8714         .cr(32)
8715         .kr(25)
8716         .channels(channels)
8717         .width(3)
8718         .qmin(128)
8719         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8720     }
8721   }
8722 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,multipixel_with_qmax)8723   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_qmax) {
8724     TEST_REQUIRES_ARM_NEON;
8725     for (size_t channels = 1; channels <= 160; channels += 31) {
8726       DWConvMicrokernelTester()
8727         .cr(32)
8728         .kr(25)
8729         .channels(channels)
8730         .width(3)
8731         .qmax(128)
8732         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8733     }
8734   }
8735 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,input_offset)8736   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, input_offset) {
8737     TEST_REQUIRES_ARM_NEON;
8738     for (uint32_t channels = 64; channels < 512; channels += 96) {
8739       DWConvMicrokernelTester()
8740         .cr(32)
8741         .kr(25)
8742         .channels(channels)
8743         .input_offset(592)
8744         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8745     }
8746   }
8747 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,zero)8748   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, zero) {
8749     TEST_REQUIRES_ARM_NEON;
8750     for (uint32_t mz = 0; mz < 25; mz++) {
8751       for (uint32_t channels = 64; channels < 512; channels += 96) {
8752         DWConvMicrokernelTester()
8753           .cr(32)
8754           .kr(25)
8755           .channels(channels)
8756           .input_offset(592)
8757           .zero_index(mz)
8758           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8759       }
8760     }
8761   }
8762 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
8763 
8764 
8765 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_eq_32)8766   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_eq_32) {
8767     TEST_REQUIRES_ARM_NEON_V8;
8768     DWConvMicrokernelTester()
8769       .cr(32)
8770       .kr(25)
8771       .channels(32)
8772       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8773   }
8774 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_div_32)8775   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_div_32) {
8776     TEST_REQUIRES_ARM_NEON_V8;
8777     for (uint32_t channels = 64; channels < 512; channels += 96) {
8778       DWConvMicrokernelTester()
8779         .cr(32)
8780         .kr(25)
8781         .channels(channels)
8782         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8783     }
8784   }
8785 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_div_32_with_qmin)8786   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_div_32_with_qmin) {
8787     TEST_REQUIRES_ARM_NEON_V8;
8788     for (uint32_t channels = 64; channels < 512; channels += 96) {
8789       DWConvMicrokernelTester()
8790         .cr(32)
8791         .kr(25)
8792         .channels(channels)
8793         .qmin(128)
8794         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8795     }
8796   }
8797 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_div_32_with_qmax)8798   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_div_32_with_qmax) {
8799     TEST_REQUIRES_ARM_NEON_V8;
8800     for (uint32_t channels = 64; channels < 512; channels += 96) {
8801       DWConvMicrokernelTester()
8802         .cr(32)
8803         .kr(25)
8804         .channels(channels)
8805         .qmax(128)
8806         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8807     }
8808   }
8809 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_lt_32)8810   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_lt_32) {
8811     TEST_REQUIRES_ARM_NEON_V8;
8812     for (uint32_t channels = 1; channels < 32; channels++) {
8813       DWConvMicrokernelTester()
8814         .cr(32)
8815         .kr(25)
8816         .channels(channels)
8817         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8818     }
8819   }
8820 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_gt_32)8821   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_gt_32) {
8822     TEST_REQUIRES_ARM_NEON_V8;
8823     for (uint32_t channels = 33; channels < 64; channels++) {
8824       DWConvMicrokernelTester()
8825         .cr(32)
8826         .kr(25)
8827         .channels(channels)
8828         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8829     }
8830   }
8831 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_gt_32_with_qmin)8832   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_gt_32_with_qmin) {
8833     TEST_REQUIRES_ARM_NEON_V8;
8834     for (uint32_t channels = 33; channels < 64; channels++) {
8835       DWConvMicrokernelTester()
8836         .cr(32)
8837         .kr(25)
8838         .channels(channels)
8839         .qmin(128)
8840         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8841     }
8842   }
8843 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_gt_32_with_qmax)8844   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_gt_32_with_qmax) {
8845     TEST_REQUIRES_ARM_NEON_V8;
8846     for (uint32_t channels = 33; channels < 64; channels++) {
8847       DWConvMicrokernelTester()
8848         .cr(32)
8849         .kr(25)
8850         .channels(channels)
8851         .qmax(128)
8852         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8853     }
8854   }
8855 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,multipixel)8856   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel) {
8857     TEST_REQUIRES_ARM_NEON_V8;
8858     for (size_t channels = 1; channels <= 160; channels += 31) {
8859       DWConvMicrokernelTester()
8860         .cr(32)
8861         .kr(25)
8862         .channels(channels)
8863         .width(3)
8864         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8865     }
8866   }
8867 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,multipixel_with_step)8868   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_step) {
8869     TEST_REQUIRES_ARM_NEON_V8;
8870     for (size_t channels = 1; channels <= 160; channels += 31) {
8871       for (size_t step = 2; step <= 25; step++) {
8872         DWConvMicrokernelTester()
8873           .cr(32)
8874           .kr(25)
8875           .channels(channels)
8876           .width(3)
8877           .step(step)
8878           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8879       }
8880     }
8881   }
8882 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,multipixel_with_output_stride)8883   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_output_stride) {
8884     TEST_REQUIRES_ARM_NEON_V8;
8885     for (size_t channels = 1; channels <= 160; channels += 31) {
8886       DWConvMicrokernelTester()
8887         .cr(32)
8888         .kr(25)
8889         .channels(32)
8890         .width(5)
8891         .output_stride(163)
8892         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8893     }
8894   }
8895 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,multipixel_with_qmin)8896   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_qmin) {
8897     TEST_REQUIRES_ARM_NEON_V8;
8898     for (size_t channels = 1; channels <= 160; channels += 31) {
8899       DWConvMicrokernelTester()
8900         .cr(32)
8901         .kr(25)
8902         .channels(channels)
8903         .width(3)
8904         .qmin(128)
8905         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8906     }
8907   }
8908 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,multipixel_with_qmax)8909   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_qmax) {
8910     TEST_REQUIRES_ARM_NEON_V8;
8911     for (size_t channels = 1; channels <= 160; channels += 31) {
8912       DWConvMicrokernelTester()
8913         .cr(32)
8914         .kr(25)
8915         .channels(channels)
8916         .width(3)
8917         .qmax(128)
8918         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8919     }
8920   }
8921 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,input_offset)8922   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, input_offset) {
8923     TEST_REQUIRES_ARM_NEON_V8;
8924     for (uint32_t channels = 64; channels < 512; channels += 96) {
8925       DWConvMicrokernelTester()
8926         .cr(32)
8927         .kr(25)
8928         .channels(channels)
8929         .input_offset(592)
8930         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8931     }
8932   }
8933 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,zero)8934   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, zero) {
8935     TEST_REQUIRES_ARM_NEON_V8;
8936     for (uint32_t mz = 0; mz < 25; mz++) {
8937       for (uint32_t channels = 64; channels < 512; channels += 96) {
8938         DWConvMicrokernelTester()
8939           .cr(32)
8940           .kr(25)
8941           .channels(channels)
8942           .input_offset(592)
8943           .zero_index(mz)
8944           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8945       }
8946     }
8947   }
8948 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
8949 
8950 
8951 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,c_eq_8)8952   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, c_eq_8) {
8953     TEST_REQUIRES_X86_SSE2;
8954     DWConvMicrokernelTester()
8955       .cr(8)
8956       .kr(3)
8957       .channels(8)
8958       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
8959   }
8960 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,c_div_8)8961   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, c_div_8) {
8962     TEST_REQUIRES_X86_SSE2;
8963     for (uint32_t channels = 16; channels < 128; channels += 24) {
8964       DWConvMicrokernelTester()
8965         .cr(8)
8966         .kr(3)
8967         .channels(channels)
8968         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
8969     }
8970   }
8971 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,c_div_8_with_qmin)8972   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, c_div_8_with_qmin) {
8973     TEST_REQUIRES_X86_SSE2;
8974     for (uint32_t channels = 16; channels < 128; channels += 24) {
8975       DWConvMicrokernelTester()
8976         .cr(8)
8977         .kr(3)
8978         .channels(channels)
8979         .qmin(128)
8980         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
8981     }
8982   }
8983 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,c_div_8_with_qmax)8984   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, c_div_8_with_qmax) {
8985     TEST_REQUIRES_X86_SSE2;
8986     for (uint32_t channels = 16; channels < 128; channels += 24) {
8987       DWConvMicrokernelTester()
8988         .cr(8)
8989         .kr(3)
8990         .channels(channels)
8991         .qmax(128)
8992         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
8993     }
8994   }
8995 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,c_lt_8)8996   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, c_lt_8) {
8997     TEST_REQUIRES_X86_SSE2;
8998     for (uint32_t channels = 1; channels < 8; channels++) {
8999       DWConvMicrokernelTester()
9000         .cr(8)
9001         .kr(3)
9002         .channels(channels)
9003         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9004     }
9005   }
9006 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,c_gt_8)9007   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, c_gt_8) {
9008     TEST_REQUIRES_X86_SSE2;
9009     for (uint32_t channels = 9; channels < 16; channels++) {
9010       DWConvMicrokernelTester()
9011         .cr(8)
9012         .kr(3)
9013         .channels(channels)
9014         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9015     }
9016   }
9017 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,c_gt_8_with_qmin)9018   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, c_gt_8_with_qmin) {
9019     TEST_REQUIRES_X86_SSE2;
9020     for (uint32_t channels = 9; channels < 16; channels++) {
9021       DWConvMicrokernelTester()
9022         .cr(8)
9023         .kr(3)
9024         .channels(channels)
9025         .qmin(128)
9026         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9027     }
9028   }
9029 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,c_gt_8_with_qmax)9030   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, c_gt_8_with_qmax) {
9031     TEST_REQUIRES_X86_SSE2;
9032     for (uint32_t channels = 9; channels < 16; channels++) {
9033       DWConvMicrokernelTester()
9034         .cr(8)
9035         .kr(3)
9036         .channels(channels)
9037         .qmax(128)
9038         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9039     }
9040   }
9041 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,multipixel)9042   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, multipixel) {
9043     TEST_REQUIRES_X86_SSE2;
9044     for (size_t channels = 1; channels <= 40; channels += 7) {
9045       DWConvMicrokernelTester()
9046         .cr(8)
9047         .kr(3)
9048         .channels(channels)
9049         .width(3)
9050         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9051     }
9052   }
9053 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,multipixel_with_step)9054   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, multipixel_with_step) {
9055     TEST_REQUIRES_X86_SSE2;
9056     for (size_t channels = 1; channels <= 40; channels += 7) {
9057       for (size_t step = 2; step <= 3; step++) {
9058         DWConvMicrokernelTester()
9059           .cr(8)
9060           .kr(3)
9061           .channels(channels)
9062           .width(3)
9063           .step(step)
9064           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9065       }
9066     }
9067   }
9068 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,multipixel_with_output_stride)9069   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, multipixel_with_output_stride) {
9070     TEST_REQUIRES_X86_SSE2;
9071     for (size_t channels = 1; channels <= 40; channels += 7) {
9072       DWConvMicrokernelTester()
9073         .cr(8)
9074         .kr(3)
9075         .channels(8)
9076         .width(5)
9077         .output_stride(43)
9078         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9079     }
9080   }
9081 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,multipixel_with_qmin)9082   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, multipixel_with_qmin) {
9083     TEST_REQUIRES_X86_SSE2;
9084     for (size_t channels = 1; channels <= 40; channels += 7) {
9085       DWConvMicrokernelTester()
9086         .cr(8)
9087         .kr(3)
9088         .channels(channels)
9089         .width(3)
9090         .qmin(128)
9091         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9092     }
9093   }
9094 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,multipixel_with_qmax)9095   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, multipixel_with_qmax) {
9096     TEST_REQUIRES_X86_SSE2;
9097     for (size_t channels = 1; channels <= 40; channels += 7) {
9098       DWConvMicrokernelTester()
9099         .cr(8)
9100         .kr(3)
9101         .channels(channels)
9102         .width(3)
9103         .qmax(128)
9104         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9105     }
9106   }
9107 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,input_offset)9108   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, input_offset) {
9109     TEST_REQUIRES_X86_SSE2;
9110     for (uint32_t channels = 16; channels < 128; channels += 24) {
9111       DWConvMicrokernelTester()
9112         .cr(8)
9113         .kr(3)
9114         .channels(channels)
9115         .input_offset(176)
9116         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9117     }
9118   }
9119 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,zero)9120   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, zero) {
9121     TEST_REQUIRES_X86_SSE2;
9122     for (uint32_t mz = 0; mz < 3; mz++) {
9123       for (uint32_t channels = 16; channels < 128; channels += 24) {
9124         DWConvMicrokernelTester()
9125           .cr(8)
9126           .kr(3)
9127           .channels(channels)
9128           .input_offset(176)
9129           .zero_index(mz)
9130           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9131       }
9132     }
9133   }
9134 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9135 
9136 
9137 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,c_eq_8)9138   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, c_eq_8) {
9139     TEST_REQUIRES_X86_SSE41;
9140     DWConvMicrokernelTester()
9141       .cr(8)
9142       .kr(3)
9143       .channels(8)
9144       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9145   }
9146 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,c_div_8)9147   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, c_div_8) {
9148     TEST_REQUIRES_X86_SSE41;
9149     for (uint32_t channels = 16; channels < 128; channels += 24) {
9150       DWConvMicrokernelTester()
9151         .cr(8)
9152         .kr(3)
9153         .channels(channels)
9154         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9155     }
9156   }
9157 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,c_div_8_with_qmin)9158   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, c_div_8_with_qmin) {
9159     TEST_REQUIRES_X86_SSE41;
9160     for (uint32_t channels = 16; channels < 128; channels += 24) {
9161       DWConvMicrokernelTester()
9162         .cr(8)
9163         .kr(3)
9164         .channels(channels)
9165         .qmin(128)
9166         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9167     }
9168   }
9169 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,c_div_8_with_qmax)9170   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, c_div_8_with_qmax) {
9171     TEST_REQUIRES_X86_SSE41;
9172     for (uint32_t channels = 16; channels < 128; channels += 24) {
9173       DWConvMicrokernelTester()
9174         .cr(8)
9175         .kr(3)
9176         .channels(channels)
9177         .qmax(128)
9178         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9179     }
9180   }
9181 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,c_lt_8)9182   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, c_lt_8) {
9183     TEST_REQUIRES_X86_SSE41;
9184     for (uint32_t channels = 1; channels < 8; channels++) {
9185       DWConvMicrokernelTester()
9186         .cr(8)
9187         .kr(3)
9188         .channels(channels)
9189         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9190     }
9191   }
9192 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,c_gt_8)9193   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, c_gt_8) {
9194     TEST_REQUIRES_X86_SSE41;
9195     for (uint32_t channels = 9; channels < 16; channels++) {
9196       DWConvMicrokernelTester()
9197         .cr(8)
9198         .kr(3)
9199         .channels(channels)
9200         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9201     }
9202   }
9203 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,c_gt_8_with_qmin)9204   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, c_gt_8_with_qmin) {
9205     TEST_REQUIRES_X86_SSE41;
9206     for (uint32_t channels = 9; channels < 16; channels++) {
9207       DWConvMicrokernelTester()
9208         .cr(8)
9209         .kr(3)
9210         .channels(channels)
9211         .qmin(128)
9212         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9213     }
9214   }
9215 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,c_gt_8_with_qmax)9216   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, c_gt_8_with_qmax) {
9217     TEST_REQUIRES_X86_SSE41;
9218     for (uint32_t channels = 9; channels < 16; channels++) {
9219       DWConvMicrokernelTester()
9220         .cr(8)
9221         .kr(3)
9222         .channels(channels)
9223         .qmax(128)
9224         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9225     }
9226   }
9227 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,multipixel)9228   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, multipixel) {
9229     TEST_REQUIRES_X86_SSE41;
9230     for (size_t channels = 1; channels <= 40; channels += 7) {
9231       DWConvMicrokernelTester()
9232         .cr(8)
9233         .kr(3)
9234         .channels(channels)
9235         .width(3)
9236         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9237     }
9238   }
9239 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,multipixel_with_step)9240   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, multipixel_with_step) {
9241     TEST_REQUIRES_X86_SSE41;
9242     for (size_t channels = 1; channels <= 40; channels += 7) {
9243       for (size_t step = 2; step <= 3; step++) {
9244         DWConvMicrokernelTester()
9245           .cr(8)
9246           .kr(3)
9247           .channels(channels)
9248           .width(3)
9249           .step(step)
9250           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9251       }
9252     }
9253   }
9254 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,multipixel_with_output_stride)9255   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, multipixel_with_output_stride) {
9256     TEST_REQUIRES_X86_SSE41;
9257     for (size_t channels = 1; channels <= 40; channels += 7) {
9258       DWConvMicrokernelTester()
9259         .cr(8)
9260         .kr(3)
9261         .channels(8)
9262         .width(5)
9263         .output_stride(43)
9264         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9265     }
9266   }
9267 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,multipixel_with_qmin)9268   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, multipixel_with_qmin) {
9269     TEST_REQUIRES_X86_SSE41;
9270     for (size_t channels = 1; channels <= 40; channels += 7) {
9271       DWConvMicrokernelTester()
9272         .cr(8)
9273         .kr(3)
9274         .channels(channels)
9275         .width(3)
9276         .qmin(128)
9277         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9278     }
9279   }
9280 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,multipixel_with_qmax)9281   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, multipixel_with_qmax) {
9282     TEST_REQUIRES_X86_SSE41;
9283     for (size_t channels = 1; channels <= 40; channels += 7) {
9284       DWConvMicrokernelTester()
9285         .cr(8)
9286         .kr(3)
9287         .channels(channels)
9288         .width(3)
9289         .qmax(128)
9290         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9291     }
9292   }
9293 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,input_offset)9294   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, input_offset) {
9295     TEST_REQUIRES_X86_SSE41;
9296     for (uint32_t channels = 16; channels < 128; channels += 24) {
9297       DWConvMicrokernelTester()
9298         .cr(8)
9299         .kr(3)
9300         .channels(channels)
9301         .input_offset(176)
9302         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9303     }
9304   }
9305 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,zero)9306   TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, zero) {
9307     TEST_REQUIRES_X86_SSE41;
9308     for (uint32_t mz = 0; mz < 3; mz++) {
9309       for (uint32_t channels = 16; channels < 128; channels += 24) {
9310         DWConvMicrokernelTester()
9311           .cr(8)
9312           .kr(3)
9313           .channels(channels)
9314           .input_offset(176)
9315           .zero_index(mz)
9316           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9317       }
9318     }
9319   }
9320 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9321 
9322 
9323 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_eq_8)9324   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_eq_8) {
9325     TEST_REQUIRES_X86_SSE2;
9326     DWConvMicrokernelTester()
9327       .cr(8)
9328       .kr(9)
9329       .channels(8)
9330       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9331   }
9332 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_div_8)9333   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_div_8) {
9334     TEST_REQUIRES_X86_SSE2;
9335     for (uint32_t channels = 16; channels < 128; channels += 24) {
9336       DWConvMicrokernelTester()
9337         .cr(8)
9338         .kr(9)
9339         .channels(channels)
9340         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9341     }
9342   }
9343 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_div_8_with_qmin)9344   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_div_8_with_qmin) {
9345     TEST_REQUIRES_X86_SSE2;
9346     for (uint32_t channels = 16; channels < 128; channels += 24) {
9347       DWConvMicrokernelTester()
9348         .cr(8)
9349         .kr(9)
9350         .channels(channels)
9351         .qmin(128)
9352         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9353     }
9354   }
9355 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_div_8_with_qmax)9356   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_div_8_with_qmax) {
9357     TEST_REQUIRES_X86_SSE2;
9358     for (uint32_t channels = 16; channels < 128; channels += 24) {
9359       DWConvMicrokernelTester()
9360         .cr(8)
9361         .kr(9)
9362         .channels(channels)
9363         .qmax(128)
9364         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9365     }
9366   }
9367 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_lt_8)9368   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_lt_8) {
9369     TEST_REQUIRES_X86_SSE2;
9370     for (uint32_t channels = 1; channels < 8; channels++) {
9371       DWConvMicrokernelTester()
9372         .cr(8)
9373         .kr(9)
9374         .channels(channels)
9375         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9376     }
9377   }
9378 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_gt_8)9379   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_gt_8) {
9380     TEST_REQUIRES_X86_SSE2;
9381     for (uint32_t channels = 9; channels < 16; channels++) {
9382       DWConvMicrokernelTester()
9383         .cr(8)
9384         .kr(9)
9385         .channels(channels)
9386         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9387     }
9388   }
9389 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_gt_8_with_qmin)9390   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_gt_8_with_qmin) {
9391     TEST_REQUIRES_X86_SSE2;
9392     for (uint32_t channels = 9; channels < 16; channels++) {
9393       DWConvMicrokernelTester()
9394         .cr(8)
9395         .kr(9)
9396         .channels(channels)
9397         .qmin(128)
9398         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9399     }
9400   }
9401 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_gt_8_with_qmax)9402   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_gt_8_with_qmax) {
9403     TEST_REQUIRES_X86_SSE2;
9404     for (uint32_t channels = 9; channels < 16; channels++) {
9405       DWConvMicrokernelTester()
9406         .cr(8)
9407         .kr(9)
9408         .channels(channels)
9409         .qmax(128)
9410         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9411     }
9412   }
9413 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,multipixel)9414   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel) {
9415     TEST_REQUIRES_X86_SSE2;
9416     for (size_t channels = 1; channels <= 40; channels += 7) {
9417       DWConvMicrokernelTester()
9418         .cr(8)
9419         .kr(9)
9420         .channels(channels)
9421         .width(3)
9422         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9423     }
9424   }
9425 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,multipixel_with_step)9426   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_step) {
9427     TEST_REQUIRES_X86_SSE2;
9428     for (size_t channels = 1; channels <= 40; channels += 7) {
9429       for (size_t step = 2; step <= 9; step++) {
9430         DWConvMicrokernelTester()
9431           .cr(8)
9432           .kr(9)
9433           .channels(channels)
9434           .width(3)
9435           .step(step)
9436           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9437       }
9438     }
9439   }
9440 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,multipixel_with_output_stride)9441   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_output_stride) {
9442     TEST_REQUIRES_X86_SSE2;
9443     for (size_t channels = 1; channels <= 40; channels += 7) {
9444       DWConvMicrokernelTester()
9445         .cr(8)
9446         .kr(9)
9447         .channels(8)
9448         .width(5)
9449         .output_stride(43)
9450         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9451     }
9452   }
9453 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,multipixel_with_qmin)9454   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_qmin) {
9455     TEST_REQUIRES_X86_SSE2;
9456     for (size_t channels = 1; channels <= 40; channels += 7) {
9457       DWConvMicrokernelTester()
9458         .cr(8)
9459         .kr(9)
9460         .channels(channels)
9461         .width(3)
9462         .qmin(128)
9463         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9464     }
9465   }
9466 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,multipixel_with_qmax)9467   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_qmax) {
9468     TEST_REQUIRES_X86_SSE2;
9469     for (size_t channels = 1; channels <= 40; channels += 7) {
9470       DWConvMicrokernelTester()
9471         .cr(8)
9472         .kr(9)
9473         .channels(channels)
9474         .width(3)
9475         .qmax(128)
9476         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9477     }
9478   }
9479 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,input_offset)9480   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, input_offset) {
9481     TEST_REQUIRES_X86_SSE2;
9482     for (uint32_t channels = 16; channels < 128; channels += 24) {
9483       DWConvMicrokernelTester()
9484         .cr(8)
9485         .kr(9)
9486         .channels(channels)
9487         .input_offset(176)
9488         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9489     }
9490   }
9491 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,zero)9492   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, zero) {
9493     TEST_REQUIRES_X86_SSE2;
9494     for (uint32_t mz = 0; mz < 9; mz++) {
9495       for (uint32_t channels = 16; channels < 128; channels += 24) {
9496         DWConvMicrokernelTester()
9497           .cr(8)
9498           .kr(9)
9499           .channels(channels)
9500           .input_offset(176)
9501           .zero_index(mz)
9502           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9503       }
9504     }
9505   }
9506 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9507 
9508 
9509 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_eq_8)9510   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_eq_8) {
9511     TEST_REQUIRES_X86_SSE2;
9512     DWConvMicrokernelTester()
9513       .cr(8)
9514       .kr(9)
9515       .channels(8)
9516       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9517   }
9518 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_div_8)9519   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_div_8) {
9520     TEST_REQUIRES_X86_SSE2;
9521     for (uint32_t channels = 16; channels < 128; channels += 24) {
9522       DWConvMicrokernelTester()
9523         .cr(8)
9524         .kr(9)
9525         .channels(channels)
9526         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9527     }
9528   }
9529 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_div_8_with_qmin)9530   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_div_8_with_qmin) {
9531     TEST_REQUIRES_X86_SSE2;
9532     for (uint32_t channels = 16; channels < 128; channels += 24) {
9533       DWConvMicrokernelTester()
9534         .cr(8)
9535         .kr(9)
9536         .channels(channels)
9537         .qmin(128)
9538         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9539     }
9540   }
9541 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_div_8_with_qmax)9542   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_div_8_with_qmax) {
9543     TEST_REQUIRES_X86_SSE2;
9544     for (uint32_t channels = 16; channels < 128; channels += 24) {
9545       DWConvMicrokernelTester()
9546         .cr(8)
9547         .kr(9)
9548         .channels(channels)
9549         .qmax(128)
9550         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9551     }
9552   }
9553 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_lt_8)9554   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_lt_8) {
9555     TEST_REQUIRES_X86_SSE2;
9556     for (uint32_t channels = 1; channels < 8; channels++) {
9557       DWConvMicrokernelTester()
9558         .cr(8)
9559         .kr(9)
9560         .channels(channels)
9561         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9562     }
9563   }
9564 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_gt_8)9565   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_gt_8) {
9566     TEST_REQUIRES_X86_SSE2;
9567     for (uint32_t channels = 9; channels < 16; channels++) {
9568       DWConvMicrokernelTester()
9569         .cr(8)
9570         .kr(9)
9571         .channels(channels)
9572         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9573     }
9574   }
9575 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_gt_8_with_qmin)9576   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_gt_8_with_qmin) {
9577     TEST_REQUIRES_X86_SSE2;
9578     for (uint32_t channels = 9; channels < 16; channels++) {
9579       DWConvMicrokernelTester()
9580         .cr(8)
9581         .kr(9)
9582         .channels(channels)
9583         .qmin(128)
9584         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9585     }
9586   }
9587 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_gt_8_with_qmax)9588   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_gt_8_with_qmax) {
9589     TEST_REQUIRES_X86_SSE2;
9590     for (uint32_t channels = 9; channels < 16; channels++) {
9591       DWConvMicrokernelTester()
9592         .cr(8)
9593         .kr(9)
9594         .channels(channels)
9595         .qmax(128)
9596         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9597     }
9598   }
9599 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,multipixel)9600   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel) {
9601     TEST_REQUIRES_X86_SSE2;
9602     for (size_t channels = 1; channels <= 40; channels += 7) {
9603       DWConvMicrokernelTester()
9604         .cr(8)
9605         .kr(9)
9606         .channels(channels)
9607         .width(3)
9608         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9609     }
9610   }
9611 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,multipixel_with_step)9612   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel_with_step) {
9613     TEST_REQUIRES_X86_SSE2;
9614     for (size_t channels = 1; channels <= 40; channels += 7) {
9615       for (size_t step = 2; step <= 9; step++) {
9616         DWConvMicrokernelTester()
9617           .cr(8)
9618           .kr(9)
9619           .channels(channels)
9620           .width(3)
9621           .step(step)
9622           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9623       }
9624     }
9625   }
9626 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,multipixel_with_output_stride)9627   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel_with_output_stride) {
9628     TEST_REQUIRES_X86_SSE2;
9629     for (size_t channels = 1; channels <= 40; channels += 7) {
9630       DWConvMicrokernelTester()
9631         .cr(8)
9632         .kr(9)
9633         .channels(8)
9634         .width(5)
9635         .output_stride(43)
9636         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9637     }
9638   }
9639 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,multipixel_with_qmin)9640   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel_with_qmin) {
9641     TEST_REQUIRES_X86_SSE2;
9642     for (size_t channels = 1; channels <= 40; channels += 7) {
9643       DWConvMicrokernelTester()
9644         .cr(8)
9645         .kr(9)
9646         .channels(channels)
9647         .width(3)
9648         .qmin(128)
9649         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9650     }
9651   }
9652 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,multipixel_with_qmax)9653   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel_with_qmax) {
9654     TEST_REQUIRES_X86_SSE2;
9655     for (size_t channels = 1; channels <= 40; channels += 7) {
9656       DWConvMicrokernelTester()
9657         .cr(8)
9658         .kr(9)
9659         .channels(channels)
9660         .width(3)
9661         .qmax(128)
9662         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9663     }
9664   }
9665 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,input_offset)9666   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, input_offset) {
9667     TEST_REQUIRES_X86_SSE2;
9668     for (uint32_t channels = 16; channels < 128; channels += 24) {
9669       DWConvMicrokernelTester()
9670         .cr(8)
9671         .kr(9)
9672         .channels(channels)
9673         .input_offset(176)
9674         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9675     }
9676   }
9677 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,zero)9678   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, zero) {
9679     TEST_REQUIRES_X86_SSE2;
9680     for (uint32_t mz = 0; mz < 9; mz++) {
9681       for (uint32_t channels = 16; channels < 128; channels += 24) {
9682         DWConvMicrokernelTester()
9683           .cr(8)
9684           .kr(9)
9685           .channels(channels)
9686           .input_offset(176)
9687           .zero_index(mz)
9688           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9689       }
9690     }
9691   }
9692 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9693 
9694 
9695 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_eq_8)9696   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_eq_8) {
9697     TEST_REQUIRES_X86_SSE41;
9698     DWConvMicrokernelTester()
9699       .cr(8)
9700       .kr(9)
9701       .channels(8)
9702       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9703   }
9704 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_div_8)9705   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_div_8) {
9706     TEST_REQUIRES_X86_SSE41;
9707     for (uint32_t channels = 16; channels < 128; channels += 24) {
9708       DWConvMicrokernelTester()
9709         .cr(8)
9710         .kr(9)
9711         .channels(channels)
9712         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9713     }
9714   }
9715 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_div_8_with_qmin)9716   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_div_8_with_qmin) {
9717     TEST_REQUIRES_X86_SSE41;
9718     for (uint32_t channels = 16; channels < 128; channels += 24) {
9719       DWConvMicrokernelTester()
9720         .cr(8)
9721         .kr(9)
9722         .channels(channels)
9723         .qmin(128)
9724         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9725     }
9726   }
9727 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_div_8_with_qmax)9728   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_div_8_with_qmax) {
9729     TEST_REQUIRES_X86_SSE41;
9730     for (uint32_t channels = 16; channels < 128; channels += 24) {
9731       DWConvMicrokernelTester()
9732         .cr(8)
9733         .kr(9)
9734         .channels(channels)
9735         .qmax(128)
9736         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9737     }
9738   }
9739 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_lt_8)9740   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_lt_8) {
9741     TEST_REQUIRES_X86_SSE41;
9742     for (uint32_t channels = 1; channels < 8; channels++) {
9743       DWConvMicrokernelTester()
9744         .cr(8)
9745         .kr(9)
9746         .channels(channels)
9747         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9748     }
9749   }
9750 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_gt_8)9751   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_gt_8) {
9752     TEST_REQUIRES_X86_SSE41;
9753     for (uint32_t channels = 9; channels < 16; channels++) {
9754       DWConvMicrokernelTester()
9755         .cr(8)
9756         .kr(9)
9757         .channels(channels)
9758         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9759     }
9760   }
9761 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_gt_8_with_qmin)9762   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_gt_8_with_qmin) {
9763     TEST_REQUIRES_X86_SSE41;
9764     for (uint32_t channels = 9; channels < 16; channels++) {
9765       DWConvMicrokernelTester()
9766         .cr(8)
9767         .kr(9)
9768         .channels(channels)
9769         .qmin(128)
9770         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9771     }
9772   }
9773 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_gt_8_with_qmax)9774   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_gt_8_with_qmax) {
9775     TEST_REQUIRES_X86_SSE41;
9776     for (uint32_t channels = 9; channels < 16; channels++) {
9777       DWConvMicrokernelTester()
9778         .cr(8)
9779         .kr(9)
9780         .channels(channels)
9781         .qmax(128)
9782         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9783     }
9784   }
9785 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,multipixel)9786   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel) {
9787     TEST_REQUIRES_X86_SSE41;
9788     for (size_t channels = 1; channels <= 40; channels += 7) {
9789       DWConvMicrokernelTester()
9790         .cr(8)
9791         .kr(9)
9792         .channels(channels)
9793         .width(3)
9794         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9795     }
9796   }
9797 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,multipixel_with_step)9798   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_step) {
9799     TEST_REQUIRES_X86_SSE41;
9800     for (size_t channels = 1; channels <= 40; channels += 7) {
9801       for (size_t step = 2; step <= 9; step++) {
9802         DWConvMicrokernelTester()
9803           .cr(8)
9804           .kr(9)
9805           .channels(channels)
9806           .width(3)
9807           .step(step)
9808           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9809       }
9810     }
9811   }
9812 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,multipixel_with_output_stride)9813   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_output_stride) {
9814     TEST_REQUIRES_X86_SSE41;
9815     for (size_t channels = 1; channels <= 40; channels += 7) {
9816       DWConvMicrokernelTester()
9817         .cr(8)
9818         .kr(9)
9819         .channels(8)
9820         .width(5)
9821         .output_stride(43)
9822         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9823     }
9824   }
9825 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,multipixel_with_qmin)9826   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_qmin) {
9827     TEST_REQUIRES_X86_SSE41;
9828     for (size_t channels = 1; channels <= 40; channels += 7) {
9829       DWConvMicrokernelTester()
9830         .cr(8)
9831         .kr(9)
9832         .channels(channels)
9833         .width(3)
9834         .qmin(128)
9835         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9836     }
9837   }
9838 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,multipixel_with_qmax)9839   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_qmax) {
9840     TEST_REQUIRES_X86_SSE41;
9841     for (size_t channels = 1; channels <= 40; channels += 7) {
9842       DWConvMicrokernelTester()
9843         .cr(8)
9844         .kr(9)
9845         .channels(channels)
9846         .width(3)
9847         .qmax(128)
9848         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9849     }
9850   }
9851 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,input_offset)9852   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, input_offset) {
9853     TEST_REQUIRES_X86_SSE41;
9854     for (uint32_t channels = 16; channels < 128; channels += 24) {
9855       DWConvMicrokernelTester()
9856         .cr(8)
9857         .kr(9)
9858         .channels(channels)
9859         .input_offset(176)
9860         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9861     }
9862   }
9863 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,zero)9864   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, zero) {
9865     TEST_REQUIRES_X86_SSE41;
9866     for (uint32_t mz = 0; mz < 9; mz++) {
9867       for (uint32_t channels = 16; channels < 128; channels += 24) {
9868         DWConvMicrokernelTester()
9869           .cr(8)
9870           .kr(9)
9871           .channels(channels)
9872           .input_offset(176)
9873           .zero_index(mz)
9874           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9875       }
9876     }
9877   }
9878 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9879 
9880 
9881 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_eq_8)9882   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_eq_8) {
9883     TEST_REQUIRES_X86_SSE41;
9884     DWConvMicrokernelTester()
9885       .cr(8)
9886       .kr(9)
9887       .channels(8)
9888       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9889   }
9890 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_div_8)9891   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_div_8) {
9892     TEST_REQUIRES_X86_SSE41;
9893     for (uint32_t channels = 16; channels < 128; channels += 24) {
9894       DWConvMicrokernelTester()
9895         .cr(8)
9896         .kr(9)
9897         .channels(channels)
9898         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9899     }
9900   }
9901 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_div_8_with_qmin)9902   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_div_8_with_qmin) {
9903     TEST_REQUIRES_X86_SSE41;
9904     for (uint32_t channels = 16; channels < 128; channels += 24) {
9905       DWConvMicrokernelTester()
9906         .cr(8)
9907         .kr(9)
9908         .channels(channels)
9909         .qmin(128)
9910         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9911     }
9912   }
9913 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_div_8_with_qmax)9914   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_div_8_with_qmax) {
9915     TEST_REQUIRES_X86_SSE41;
9916     for (uint32_t channels = 16; channels < 128; channels += 24) {
9917       DWConvMicrokernelTester()
9918         .cr(8)
9919         .kr(9)
9920         .channels(channels)
9921         .qmax(128)
9922         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9923     }
9924   }
9925 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_lt_8)9926   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_lt_8) {
9927     TEST_REQUIRES_X86_SSE41;
9928     for (uint32_t channels = 1; channels < 8; channels++) {
9929       DWConvMicrokernelTester()
9930         .cr(8)
9931         .kr(9)
9932         .channels(channels)
9933         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9934     }
9935   }
9936 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_gt_8)9937   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_gt_8) {
9938     TEST_REQUIRES_X86_SSE41;
9939     for (uint32_t channels = 9; channels < 16; channels++) {
9940       DWConvMicrokernelTester()
9941         .cr(8)
9942         .kr(9)
9943         .channels(channels)
9944         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9945     }
9946   }
9947 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_gt_8_with_qmin)9948   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_gt_8_with_qmin) {
9949     TEST_REQUIRES_X86_SSE41;
9950     for (uint32_t channels = 9; channels < 16; channels++) {
9951       DWConvMicrokernelTester()
9952         .cr(8)
9953         .kr(9)
9954         .channels(channels)
9955         .qmin(128)
9956         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9957     }
9958   }
9959 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_gt_8_with_qmax)9960   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_gt_8_with_qmax) {
9961     TEST_REQUIRES_X86_SSE41;
9962     for (uint32_t channels = 9; channels < 16; channels++) {
9963       DWConvMicrokernelTester()
9964         .cr(8)
9965         .kr(9)
9966         .channels(channels)
9967         .qmax(128)
9968         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9969     }
9970   }
9971 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,multipixel)9972   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel) {
9973     TEST_REQUIRES_X86_SSE41;
9974     for (size_t channels = 1; channels <= 40; channels += 7) {
9975       DWConvMicrokernelTester()
9976         .cr(8)
9977         .kr(9)
9978         .channels(channels)
9979         .width(3)
9980         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9981     }
9982   }
9983 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,multipixel_with_step)9984   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel_with_step) {
9985     TEST_REQUIRES_X86_SSE41;
9986     for (size_t channels = 1; channels <= 40; channels += 7) {
9987       for (size_t step = 2; step <= 9; step++) {
9988         DWConvMicrokernelTester()
9989           .cr(8)
9990           .kr(9)
9991           .channels(channels)
9992           .width(3)
9993           .step(step)
9994           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9995       }
9996     }
9997   }
9998 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,multipixel_with_output_stride)9999   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel_with_output_stride) {
10000     TEST_REQUIRES_X86_SSE41;
10001     for (size_t channels = 1; channels <= 40; channels += 7) {
10002       DWConvMicrokernelTester()
10003         .cr(8)
10004         .kr(9)
10005         .channels(8)
10006         .width(5)
10007         .output_stride(43)
10008         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10009     }
10010   }
10011 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,multipixel_with_qmin)10012   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel_with_qmin) {
10013     TEST_REQUIRES_X86_SSE41;
10014     for (size_t channels = 1; channels <= 40; channels += 7) {
10015       DWConvMicrokernelTester()
10016         .cr(8)
10017         .kr(9)
10018         .channels(channels)
10019         .width(3)
10020         .qmin(128)
10021         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10022     }
10023   }
10024 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,multipixel_with_qmax)10025   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel_with_qmax) {
10026     TEST_REQUIRES_X86_SSE41;
10027     for (size_t channels = 1; channels <= 40; channels += 7) {
10028       DWConvMicrokernelTester()
10029         .cr(8)
10030         .kr(9)
10031         .channels(channels)
10032         .width(3)
10033         .qmax(128)
10034         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10035     }
10036   }
10037 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,input_offset)10038   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, input_offset) {
10039     TEST_REQUIRES_X86_SSE41;
10040     for (uint32_t channels = 16; channels < 128; channels += 24) {
10041       DWConvMicrokernelTester()
10042         .cr(8)
10043         .kr(9)
10044         .channels(channels)
10045         .input_offset(176)
10046         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10047     }
10048   }
10049 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,zero)10050   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, zero) {
10051     TEST_REQUIRES_X86_SSE41;
10052     for (uint32_t mz = 0; mz < 9; mz++) {
10053       for (uint32_t channels = 16; channels < 128; channels += 24) {
10054         DWConvMicrokernelTester()
10055           .cr(8)
10056           .kr(9)
10057           .channels(channels)
10058           .input_offset(176)
10059           .zero_index(mz)
10060           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10061       }
10062     }
10063   }
10064 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
10065 
10066 
10067 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_eq_8)10068   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_eq_8) {
10069     TEST_REQUIRES_X86_SSE41;
10070     DWConvMicrokernelTester()
10071       .cr(8)
10072       .kr(9)
10073       .channels(8)
10074       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10075   }
10076 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_div_8)10077   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_div_8) {
10078     TEST_REQUIRES_X86_SSE41;
10079     for (uint32_t channels = 16; channels < 128; channels += 24) {
10080       DWConvMicrokernelTester()
10081         .cr(8)
10082         .kr(9)
10083         .channels(channels)
10084         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10085     }
10086   }
10087 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_div_8_with_qmin)10088   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_div_8_with_qmin) {
10089     TEST_REQUIRES_X86_SSE41;
10090     for (uint32_t channels = 16; channels < 128; channels += 24) {
10091       DWConvMicrokernelTester()
10092         .cr(8)
10093         .kr(9)
10094         .channels(channels)
10095         .qmin(128)
10096         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10097     }
10098   }
10099 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_div_8_with_qmax)10100   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_div_8_with_qmax) {
10101     TEST_REQUIRES_X86_SSE41;
10102     for (uint32_t channels = 16; channels < 128; channels += 24) {
10103       DWConvMicrokernelTester()
10104         .cr(8)
10105         .kr(9)
10106         .channels(channels)
10107         .qmax(128)
10108         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10109     }
10110   }
10111 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_lt_8)10112   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_lt_8) {
10113     TEST_REQUIRES_X86_SSE41;
10114     for (uint32_t channels = 1; channels < 8; channels++) {
10115       DWConvMicrokernelTester()
10116         .cr(8)
10117         .kr(9)
10118         .channels(channels)
10119         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10120     }
10121   }
10122 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_gt_8)10123   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_gt_8) {
10124     TEST_REQUIRES_X86_SSE41;
10125     for (uint32_t channels = 9; channels < 16; channels++) {
10126       DWConvMicrokernelTester()
10127         .cr(8)
10128         .kr(9)
10129         .channels(channels)
10130         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10131     }
10132   }
10133 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_gt_8_with_qmin)10134   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_gt_8_with_qmin) {
10135     TEST_REQUIRES_X86_SSE41;
10136     for (uint32_t channels = 9; channels < 16; channels++) {
10137       DWConvMicrokernelTester()
10138         .cr(8)
10139         .kr(9)
10140         .channels(channels)
10141         .qmin(128)
10142         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10143     }
10144   }
10145 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_gt_8_with_qmax)10146   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_gt_8_with_qmax) {
10147     TEST_REQUIRES_X86_SSE41;
10148     for (uint32_t channels = 9; channels < 16; channels++) {
10149       DWConvMicrokernelTester()
10150         .cr(8)
10151         .kr(9)
10152         .channels(channels)
10153         .qmax(128)
10154         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10155     }
10156   }
10157 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,multipixel)10158   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel) {
10159     TEST_REQUIRES_X86_SSE41;
10160     for (size_t channels = 1; channels <= 40; channels += 7) {
10161       DWConvMicrokernelTester()
10162         .cr(8)
10163         .kr(9)
10164         .channels(channels)
10165         .width(3)
10166         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10167     }
10168   }
10169 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,multipixel_with_step)10170   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_step) {
10171     TEST_REQUIRES_X86_SSE41;
10172     for (size_t channels = 1; channels <= 40; channels += 7) {
10173       for (size_t step = 2; step <= 9; step++) {
10174         DWConvMicrokernelTester()
10175           .cr(8)
10176           .kr(9)
10177           .channels(channels)
10178           .width(3)
10179           .step(step)
10180           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10181       }
10182     }
10183   }
10184 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,multipixel_with_output_stride)10185   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_output_stride) {
10186     TEST_REQUIRES_X86_SSE41;
10187     for (size_t channels = 1; channels <= 40; channels += 7) {
10188       DWConvMicrokernelTester()
10189         .cr(8)
10190         .kr(9)
10191         .channels(8)
10192         .width(5)
10193         .output_stride(43)
10194         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10195     }
10196   }
10197 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,multipixel_with_qmin)10198   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_qmin) {
10199     TEST_REQUIRES_X86_SSE41;
10200     for (size_t channels = 1; channels <= 40; channels += 7) {
10201       DWConvMicrokernelTester()
10202         .cr(8)
10203         .kr(9)
10204         .channels(channels)
10205         .width(3)
10206         .qmin(128)
10207         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10208     }
10209   }
10210 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,multipixel_with_qmax)10211   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_qmax) {
10212     TEST_REQUIRES_X86_SSE41;
10213     for (size_t channels = 1; channels <= 40; channels += 7) {
10214       DWConvMicrokernelTester()
10215         .cr(8)
10216         .kr(9)
10217         .channels(channels)
10218         .width(3)
10219         .qmax(128)
10220         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10221     }
10222   }
10223 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,input_offset)10224   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, input_offset) {
10225     TEST_REQUIRES_X86_SSE41;
10226     for (uint32_t channels = 16; channels < 128; channels += 24) {
10227       DWConvMicrokernelTester()
10228         .cr(8)
10229         .kr(9)
10230         .channels(channels)
10231         .input_offset(176)
10232         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10233     }
10234   }
10235 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,zero)10236   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, zero) {
10237     TEST_REQUIRES_X86_SSE41;
10238     for (uint32_t mz = 0; mz < 9; mz++) {
10239       for (uint32_t channels = 16; channels < 128; channels += 24) {
10240         DWConvMicrokernelTester()
10241           .cr(8)
10242           .kr(9)
10243           .channels(channels)
10244           .input_offset(176)
10245           .zero_index(mz)
10246           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10247       }
10248     }
10249   }
10250 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
10251 
10252 
10253 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_eq_8)10254   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_eq_8) {
10255     TEST_REQUIRES_X86_SSE2;
10256     DWConvMicrokernelTester()
10257       .cr(8)
10258       .kr(25)
10259       .channels(8)
10260       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10261   }
10262 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_div_8)10263   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_div_8) {
10264     TEST_REQUIRES_X86_SSE2;
10265     for (uint32_t channels = 16; channels < 128; channels += 24) {
10266       DWConvMicrokernelTester()
10267         .cr(8)
10268         .kr(25)
10269         .channels(channels)
10270         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10271     }
10272   }
10273 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_div_8_with_qmin)10274   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_div_8_with_qmin) {
10275     TEST_REQUIRES_X86_SSE2;
10276     for (uint32_t channels = 16; channels < 128; channels += 24) {
10277       DWConvMicrokernelTester()
10278         .cr(8)
10279         .kr(25)
10280         .channels(channels)
10281         .qmin(128)
10282         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10283     }
10284   }
10285 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_div_8_with_qmax)10286   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_div_8_with_qmax) {
10287     TEST_REQUIRES_X86_SSE2;
10288     for (uint32_t channels = 16; channels < 128; channels += 24) {
10289       DWConvMicrokernelTester()
10290         .cr(8)
10291         .kr(25)
10292         .channels(channels)
10293         .qmax(128)
10294         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10295     }
10296   }
10297 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_lt_8)10298   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_lt_8) {
10299     TEST_REQUIRES_X86_SSE2;
10300     for (uint32_t channels = 1; channels < 8; channels++) {
10301       DWConvMicrokernelTester()
10302         .cr(8)
10303         .kr(25)
10304         .channels(channels)
10305         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10306     }
10307   }
10308 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_gt_8)10309   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_gt_8) {
10310     TEST_REQUIRES_X86_SSE2;
10311     for (uint32_t channels = 9; channels < 16; channels++) {
10312       DWConvMicrokernelTester()
10313         .cr(8)
10314         .kr(25)
10315         .channels(channels)
10316         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10317     }
10318   }
10319 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_gt_8_with_qmin)10320   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_gt_8_with_qmin) {
10321     TEST_REQUIRES_X86_SSE2;
10322     for (uint32_t channels = 9; channels < 16; channels++) {
10323       DWConvMicrokernelTester()
10324         .cr(8)
10325         .kr(25)
10326         .channels(channels)
10327         .qmin(128)
10328         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10329     }
10330   }
10331 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_gt_8_with_qmax)10332   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_gt_8_with_qmax) {
10333     TEST_REQUIRES_X86_SSE2;
10334     for (uint32_t channels = 9; channels < 16; channels++) {
10335       DWConvMicrokernelTester()
10336         .cr(8)
10337         .kr(25)
10338         .channels(channels)
10339         .qmax(128)
10340         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10341     }
10342   }
10343 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,multipixel)10344   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel) {
10345     TEST_REQUIRES_X86_SSE2;
10346     for (size_t channels = 1; channels <= 40; channels += 7) {
10347       DWConvMicrokernelTester()
10348         .cr(8)
10349         .kr(25)
10350         .channels(channels)
10351         .width(3)
10352         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10353     }
10354   }
10355 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,multipixel_with_step)10356   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_step) {
10357     TEST_REQUIRES_X86_SSE2;
10358     for (size_t channels = 1; channels <= 40; channels += 7) {
10359       for (size_t step = 2; step <= 25; step++) {
10360         DWConvMicrokernelTester()
10361           .cr(8)
10362           .kr(25)
10363           .channels(channels)
10364           .width(3)
10365           .step(step)
10366           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10367       }
10368     }
10369   }
10370 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,multipixel_with_output_stride)10371   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_output_stride) {
10372     TEST_REQUIRES_X86_SSE2;
10373     for (size_t channels = 1; channels <= 40; channels += 7) {
10374       DWConvMicrokernelTester()
10375         .cr(8)
10376         .kr(25)
10377         .channels(8)
10378         .width(5)
10379         .output_stride(43)
10380         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10381     }
10382   }
10383 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,multipixel_with_qmin)10384   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_qmin) {
10385     TEST_REQUIRES_X86_SSE2;
10386     for (size_t channels = 1; channels <= 40; channels += 7) {
10387       DWConvMicrokernelTester()
10388         .cr(8)
10389         .kr(25)
10390         .channels(channels)
10391         .width(3)
10392         .qmin(128)
10393         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10394     }
10395   }
10396 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,multipixel_with_qmax)10397   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_qmax) {
10398     TEST_REQUIRES_X86_SSE2;
10399     for (size_t channels = 1; channels <= 40; channels += 7) {
10400       DWConvMicrokernelTester()
10401         .cr(8)
10402         .kr(25)
10403         .channels(channels)
10404         .width(3)
10405         .qmax(128)
10406         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10407     }
10408   }
10409 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,input_offset)10410   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, input_offset) {
10411     TEST_REQUIRES_X86_SSE2;
10412     for (uint32_t channels = 16; channels < 128; channels += 24) {
10413       DWConvMicrokernelTester()
10414         .cr(8)
10415         .kr(25)
10416         .channels(channels)
10417         .input_offset(176)
10418         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10419     }
10420   }
10421 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,zero)10422   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, zero) {
10423     TEST_REQUIRES_X86_SSE2;
10424     for (uint32_t mz = 0; mz < 25; mz++) {
10425       for (uint32_t channels = 16; channels < 128; channels += 24) {
10426         DWConvMicrokernelTester()
10427           .cr(8)
10428           .kr(25)
10429           .channels(channels)
10430           .input_offset(176)
10431           .zero_index(mz)
10432           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10433       }
10434     }
10435   }
10436 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
10437 
10438 
10439 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_eq_8)10440   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_eq_8) {
10441     TEST_REQUIRES_X86_SSE2;
10442     DWConvMicrokernelTester()
10443       .cr(8)
10444       .kr(25)
10445       .channels(8)
10446       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10447   }
10448 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_div_8)10449   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_div_8) {
10450     TEST_REQUIRES_X86_SSE2;
10451     for (uint32_t channels = 16; channels < 128; channels += 24) {
10452       DWConvMicrokernelTester()
10453         .cr(8)
10454         .kr(25)
10455         .channels(channels)
10456         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10457     }
10458   }
10459 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_div_8_with_qmin)10460   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_div_8_with_qmin) {
10461     TEST_REQUIRES_X86_SSE2;
10462     for (uint32_t channels = 16; channels < 128; channels += 24) {
10463       DWConvMicrokernelTester()
10464         .cr(8)
10465         .kr(25)
10466         .channels(channels)
10467         .qmin(128)
10468         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10469     }
10470   }
10471 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_div_8_with_qmax)10472   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_div_8_with_qmax) {
10473     TEST_REQUIRES_X86_SSE2;
10474     for (uint32_t channels = 16; channels < 128; channels += 24) {
10475       DWConvMicrokernelTester()
10476         .cr(8)
10477         .kr(25)
10478         .channels(channels)
10479         .qmax(128)
10480         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10481     }
10482   }
10483 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_lt_8)10484   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_lt_8) {
10485     TEST_REQUIRES_X86_SSE2;
10486     for (uint32_t channels = 1; channels < 8; channels++) {
10487       DWConvMicrokernelTester()
10488         .cr(8)
10489         .kr(25)
10490         .channels(channels)
10491         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10492     }
10493   }
10494 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_gt_8)10495   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_gt_8) {
10496     TEST_REQUIRES_X86_SSE2;
10497     for (uint32_t channels = 9; channels < 16; channels++) {
10498       DWConvMicrokernelTester()
10499         .cr(8)
10500         .kr(25)
10501         .channels(channels)
10502         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10503     }
10504   }
10505 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_gt_8_with_qmin)10506   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_gt_8_with_qmin) {
10507     TEST_REQUIRES_X86_SSE2;
10508     for (uint32_t channels = 9; channels < 16; channels++) {
10509       DWConvMicrokernelTester()
10510         .cr(8)
10511         .kr(25)
10512         .channels(channels)
10513         .qmin(128)
10514         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10515     }
10516   }
10517 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_gt_8_with_qmax)10518   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_gt_8_with_qmax) {
10519     TEST_REQUIRES_X86_SSE2;
10520     for (uint32_t channels = 9; channels < 16; channels++) {
10521       DWConvMicrokernelTester()
10522         .cr(8)
10523         .kr(25)
10524         .channels(channels)
10525         .qmax(128)
10526         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10527     }
10528   }
10529 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,multipixel)10530   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel) {
10531     TEST_REQUIRES_X86_SSE2;
10532     for (size_t channels = 1; channels <= 40; channels += 7) {
10533       DWConvMicrokernelTester()
10534         .cr(8)
10535         .kr(25)
10536         .channels(channels)
10537         .width(3)
10538         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10539     }
10540   }
10541 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,multipixel_with_step)10542   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel_with_step) {
10543     TEST_REQUIRES_X86_SSE2;
10544     for (size_t channels = 1; channels <= 40; channels += 7) {
10545       for (size_t step = 2; step <= 25; step++) {
10546         DWConvMicrokernelTester()
10547           .cr(8)
10548           .kr(25)
10549           .channels(channels)
10550           .width(3)
10551           .step(step)
10552           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10553       }
10554     }
10555   }
10556 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,multipixel_with_output_stride)10557   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel_with_output_stride) {
10558     TEST_REQUIRES_X86_SSE2;
10559     for (size_t channels = 1; channels <= 40; channels += 7) {
10560       DWConvMicrokernelTester()
10561         .cr(8)
10562         .kr(25)
10563         .channels(8)
10564         .width(5)
10565         .output_stride(43)
10566         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10567     }
10568   }
10569 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,multipixel_with_qmin)10570   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel_with_qmin) {
10571     TEST_REQUIRES_X86_SSE2;
10572     for (size_t channels = 1; channels <= 40; channels += 7) {
10573       DWConvMicrokernelTester()
10574         .cr(8)
10575         .kr(25)
10576         .channels(channels)
10577         .width(3)
10578         .qmin(128)
10579         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10580     }
10581   }
10582 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,multipixel_with_qmax)10583   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel_with_qmax) {
10584     TEST_REQUIRES_X86_SSE2;
10585     for (size_t channels = 1; channels <= 40; channels += 7) {
10586       DWConvMicrokernelTester()
10587         .cr(8)
10588         .kr(25)
10589         .channels(channels)
10590         .width(3)
10591         .qmax(128)
10592         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10593     }
10594   }
10595 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,input_offset)10596   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, input_offset) {
10597     TEST_REQUIRES_X86_SSE2;
10598     for (uint32_t channels = 16; channels < 128; channels += 24) {
10599       DWConvMicrokernelTester()
10600         .cr(8)
10601         .kr(25)
10602         .channels(channels)
10603         .input_offset(176)
10604         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10605     }
10606   }
10607 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,zero)10608   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, zero) {
10609     TEST_REQUIRES_X86_SSE2;
10610     for (uint32_t mz = 0; mz < 25; mz++) {
10611       for (uint32_t channels = 16; channels < 128; channels += 24) {
10612         DWConvMicrokernelTester()
10613           .cr(8)
10614           .kr(25)
10615           .channels(channels)
10616           .input_offset(176)
10617           .zero_index(mz)
10618           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10619       }
10620     }
10621   }
10622 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
10623 
10624 
10625 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_eq_8)10626   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_eq_8) {
10627     TEST_REQUIRES_X86_SSE41;
10628     DWConvMicrokernelTester()
10629       .cr(8)
10630       .kr(25)
10631       .channels(8)
10632       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10633   }
10634 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_div_8)10635   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_div_8) {
10636     TEST_REQUIRES_X86_SSE41;
10637     for (uint32_t channels = 16; channels < 128; channels += 24) {
10638       DWConvMicrokernelTester()
10639         .cr(8)
10640         .kr(25)
10641         .channels(channels)
10642         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10643     }
10644   }
10645 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_div_8_with_qmin)10646   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_div_8_with_qmin) {
10647     TEST_REQUIRES_X86_SSE41;
10648     for (uint32_t channels = 16; channels < 128; channels += 24) {
10649       DWConvMicrokernelTester()
10650         .cr(8)
10651         .kr(25)
10652         .channels(channels)
10653         .qmin(128)
10654         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10655     }
10656   }
10657 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_div_8_with_qmax)10658   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_div_8_with_qmax) {
10659     TEST_REQUIRES_X86_SSE41;
10660     for (uint32_t channels = 16; channels < 128; channels += 24) {
10661       DWConvMicrokernelTester()
10662         .cr(8)
10663         .kr(25)
10664         .channels(channels)
10665         .qmax(128)
10666         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10667     }
10668   }
10669 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_lt_8)10670   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_lt_8) {
10671     TEST_REQUIRES_X86_SSE41;
10672     for (uint32_t channels = 1; channels < 8; channels++) {
10673       DWConvMicrokernelTester()
10674         .cr(8)
10675         .kr(25)
10676         .channels(channels)
10677         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10678     }
10679   }
10680 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_gt_8)10681   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_gt_8) {
10682     TEST_REQUIRES_X86_SSE41;
10683     for (uint32_t channels = 9; channels < 16; channels++) {
10684       DWConvMicrokernelTester()
10685         .cr(8)
10686         .kr(25)
10687         .channels(channels)
10688         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10689     }
10690   }
10691 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_gt_8_with_qmin)10692   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_gt_8_with_qmin) {
10693     TEST_REQUIRES_X86_SSE41;
10694     for (uint32_t channels = 9; channels < 16; channels++) {
10695       DWConvMicrokernelTester()
10696         .cr(8)
10697         .kr(25)
10698         .channels(channels)
10699         .qmin(128)
10700         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10701     }
10702   }
10703 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_gt_8_with_qmax)10704   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_gt_8_with_qmax) {
10705     TEST_REQUIRES_X86_SSE41;
10706     for (uint32_t channels = 9; channels < 16; channels++) {
10707       DWConvMicrokernelTester()
10708         .cr(8)
10709         .kr(25)
10710         .channels(channels)
10711         .qmax(128)
10712         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10713     }
10714   }
10715 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,multipixel)10716   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel) {
10717     TEST_REQUIRES_X86_SSE41;
10718     for (size_t channels = 1; channels <= 40; channels += 7) {
10719       DWConvMicrokernelTester()
10720         .cr(8)
10721         .kr(25)
10722         .channels(channels)
10723         .width(3)
10724         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10725     }
10726   }
10727 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,multipixel_with_step)10728   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_step) {
10729     TEST_REQUIRES_X86_SSE41;
10730     for (size_t channels = 1; channels <= 40; channels += 7) {
10731       for (size_t step = 2; step <= 25; step++) {
10732         DWConvMicrokernelTester()
10733           .cr(8)
10734           .kr(25)
10735           .channels(channels)
10736           .width(3)
10737           .step(step)
10738           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10739       }
10740     }
10741   }
10742 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,multipixel_with_output_stride)10743   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_output_stride) {
10744     TEST_REQUIRES_X86_SSE41;
10745     for (size_t channels = 1; channels <= 40; channels += 7) {
10746       DWConvMicrokernelTester()
10747         .cr(8)
10748         .kr(25)
10749         .channels(8)
10750         .width(5)
10751         .output_stride(43)
10752         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10753     }
10754   }
10755 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,multipixel_with_qmin)10756   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_qmin) {
10757     TEST_REQUIRES_X86_SSE41;
10758     for (size_t channels = 1; channels <= 40; channels += 7) {
10759       DWConvMicrokernelTester()
10760         .cr(8)
10761         .kr(25)
10762         .channels(channels)
10763         .width(3)
10764         .qmin(128)
10765         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10766     }
10767   }
10768 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,multipixel_with_qmax)10769   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_qmax) {
10770     TEST_REQUIRES_X86_SSE41;
10771     for (size_t channels = 1; channels <= 40; channels += 7) {
10772       DWConvMicrokernelTester()
10773         .cr(8)
10774         .kr(25)
10775         .channels(channels)
10776         .width(3)
10777         .qmax(128)
10778         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10779     }
10780   }
10781 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,input_offset)10782   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, input_offset) {
10783     TEST_REQUIRES_X86_SSE41;
10784     for (uint32_t channels = 16; channels < 128; channels += 24) {
10785       DWConvMicrokernelTester()
10786         .cr(8)
10787         .kr(25)
10788         .channels(channels)
10789         .input_offset(176)
10790         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10791     }
10792   }
10793 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,zero)10794   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, zero) {
10795     TEST_REQUIRES_X86_SSE41;
10796     for (uint32_t mz = 0; mz < 25; mz++) {
10797       for (uint32_t channels = 16; channels < 128; channels += 24) {
10798         DWConvMicrokernelTester()
10799           .cr(8)
10800           .kr(25)
10801           .channels(channels)
10802           .input_offset(176)
10803           .zero_index(mz)
10804           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10805       }
10806     }
10807   }
10808 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
10809 
10810 
10811 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_eq_8)10812   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_eq_8) {
10813     TEST_REQUIRES_X86_SSE41;
10814     DWConvMicrokernelTester()
10815       .cr(8)
10816       .kr(25)
10817       .channels(8)
10818       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10819   }
10820 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_div_8)10821   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_div_8) {
10822     TEST_REQUIRES_X86_SSE41;
10823     for (uint32_t channels = 16; channels < 128; channels += 24) {
10824       DWConvMicrokernelTester()
10825         .cr(8)
10826         .kr(25)
10827         .channels(channels)
10828         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10829     }
10830   }
10831 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_div_8_with_qmin)10832   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_div_8_with_qmin) {
10833     TEST_REQUIRES_X86_SSE41;
10834     for (uint32_t channels = 16; channels < 128; channels += 24) {
10835       DWConvMicrokernelTester()
10836         .cr(8)
10837         .kr(25)
10838         .channels(channels)
10839         .qmin(128)
10840         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10841     }
10842   }
10843 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_div_8_with_qmax)10844   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_div_8_with_qmax) {
10845     TEST_REQUIRES_X86_SSE41;
10846     for (uint32_t channels = 16; channels < 128; channels += 24) {
10847       DWConvMicrokernelTester()
10848         .cr(8)
10849         .kr(25)
10850         .channels(channels)
10851         .qmax(128)
10852         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10853     }
10854   }
10855 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_lt_8)10856   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_lt_8) {
10857     TEST_REQUIRES_X86_SSE41;
10858     for (uint32_t channels = 1; channels < 8; channels++) {
10859       DWConvMicrokernelTester()
10860         .cr(8)
10861         .kr(25)
10862         .channels(channels)
10863         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10864     }
10865   }
10866 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_gt_8)10867   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_gt_8) {
10868     TEST_REQUIRES_X86_SSE41;
10869     for (uint32_t channels = 9; channels < 16; channels++) {
10870       DWConvMicrokernelTester()
10871         .cr(8)
10872         .kr(25)
10873         .channels(channels)
10874         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10875     }
10876   }
10877 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_gt_8_with_qmin)10878   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_gt_8_with_qmin) {
10879     TEST_REQUIRES_X86_SSE41;
10880     for (uint32_t channels = 9; channels < 16; channels++) {
10881       DWConvMicrokernelTester()
10882         .cr(8)
10883         .kr(25)
10884         .channels(channels)
10885         .qmin(128)
10886         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10887     }
10888   }
10889 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_gt_8_with_qmax)10890   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_gt_8_with_qmax) {
10891     TEST_REQUIRES_X86_SSE41;
10892     for (uint32_t channels = 9; channels < 16; channels++) {
10893       DWConvMicrokernelTester()
10894         .cr(8)
10895         .kr(25)
10896         .channels(channels)
10897         .qmax(128)
10898         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10899     }
10900   }
10901 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,multipixel)10902   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel) {
10903     TEST_REQUIRES_X86_SSE41;
10904     for (size_t channels = 1; channels <= 40; channels += 7) {
10905       DWConvMicrokernelTester()
10906         .cr(8)
10907         .kr(25)
10908         .channels(channels)
10909         .width(3)
10910         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10911     }
10912   }
10913 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,multipixel_with_step)10914   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel_with_step) {
10915     TEST_REQUIRES_X86_SSE41;
10916     for (size_t channels = 1; channels <= 40; channels += 7) {
10917       for (size_t step = 2; step <= 25; step++) {
10918         DWConvMicrokernelTester()
10919           .cr(8)
10920           .kr(25)
10921           .channels(channels)
10922           .width(3)
10923           .step(step)
10924           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10925       }
10926     }
10927   }
10928 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,multipixel_with_output_stride)10929   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel_with_output_stride) {
10930     TEST_REQUIRES_X86_SSE41;
10931     for (size_t channels = 1; channels <= 40; channels += 7) {
10932       DWConvMicrokernelTester()
10933         .cr(8)
10934         .kr(25)
10935         .channels(8)
10936         .width(5)
10937         .output_stride(43)
10938         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10939     }
10940   }
10941 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,multipixel_with_qmin)10942   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel_with_qmin) {
10943     TEST_REQUIRES_X86_SSE41;
10944     for (size_t channels = 1; channels <= 40; channels += 7) {
10945       DWConvMicrokernelTester()
10946         .cr(8)
10947         .kr(25)
10948         .channels(channels)
10949         .width(3)
10950         .qmin(128)
10951         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10952     }
10953   }
10954 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,multipixel_with_qmax)10955   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel_with_qmax) {
10956     TEST_REQUIRES_X86_SSE41;
10957     for (size_t channels = 1; channels <= 40; channels += 7) {
10958       DWConvMicrokernelTester()
10959         .cr(8)
10960         .kr(25)
10961         .channels(channels)
10962         .width(3)
10963         .qmax(128)
10964         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10965     }
10966   }
10967 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,input_offset)10968   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, input_offset) {
10969     TEST_REQUIRES_X86_SSE41;
10970     for (uint32_t channels = 16; channels < 128; channels += 24) {
10971       DWConvMicrokernelTester()
10972         .cr(8)
10973         .kr(25)
10974         .channels(channels)
10975         .input_offset(176)
10976         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10977     }
10978   }
10979 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,zero)10980   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, zero) {
10981     TEST_REQUIRES_X86_SSE41;
10982     for (uint32_t mz = 0; mz < 25; mz++) {
10983       for (uint32_t channels = 16; channels < 128; channels += 24) {
10984         DWConvMicrokernelTester()
10985           .cr(8)
10986           .kr(25)
10987           .channels(channels)
10988           .input_offset(176)
10989           .zero_index(mz)
10990           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10991       }
10992     }
10993   }
10994 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
10995 
10996 
10997 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_eq_8)10998   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_eq_8) {
10999     TEST_REQUIRES_X86_SSE41;
11000     DWConvMicrokernelTester()
11001       .cr(8)
11002       .kr(25)
11003       .channels(8)
11004       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11005   }
11006 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_div_8)11007   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_div_8) {
11008     TEST_REQUIRES_X86_SSE41;
11009     for (uint32_t channels = 16; channels < 128; channels += 24) {
11010       DWConvMicrokernelTester()
11011         .cr(8)
11012         .kr(25)
11013         .channels(channels)
11014         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11015     }
11016   }
11017 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_div_8_with_qmin)11018   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_div_8_with_qmin) {
11019     TEST_REQUIRES_X86_SSE41;
11020     for (uint32_t channels = 16; channels < 128; channels += 24) {
11021       DWConvMicrokernelTester()
11022         .cr(8)
11023         .kr(25)
11024         .channels(channels)
11025         .qmin(128)
11026         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11027     }
11028   }
11029 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_div_8_with_qmax)11030   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_div_8_with_qmax) {
11031     TEST_REQUIRES_X86_SSE41;
11032     for (uint32_t channels = 16; channels < 128; channels += 24) {
11033       DWConvMicrokernelTester()
11034         .cr(8)
11035         .kr(25)
11036         .channels(channels)
11037         .qmax(128)
11038         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11039     }
11040   }
11041 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_lt_8)11042   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_lt_8) {
11043     TEST_REQUIRES_X86_SSE41;
11044     for (uint32_t channels = 1; channels < 8; channels++) {
11045       DWConvMicrokernelTester()
11046         .cr(8)
11047         .kr(25)
11048         .channels(channels)
11049         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11050     }
11051   }
11052 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_gt_8)11053   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_gt_8) {
11054     TEST_REQUIRES_X86_SSE41;
11055     for (uint32_t channels = 9; channels < 16; channels++) {
11056       DWConvMicrokernelTester()
11057         .cr(8)
11058         .kr(25)
11059         .channels(channels)
11060         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11061     }
11062   }
11063 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_gt_8_with_qmin)11064   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_gt_8_with_qmin) {
11065     TEST_REQUIRES_X86_SSE41;
11066     for (uint32_t channels = 9; channels < 16; channels++) {
11067       DWConvMicrokernelTester()
11068         .cr(8)
11069         .kr(25)
11070         .channels(channels)
11071         .qmin(128)
11072         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11073     }
11074   }
11075 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_gt_8_with_qmax)11076   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_gt_8_with_qmax) {
11077     TEST_REQUIRES_X86_SSE41;
11078     for (uint32_t channels = 9; channels < 16; channels++) {
11079       DWConvMicrokernelTester()
11080         .cr(8)
11081         .kr(25)
11082         .channels(channels)
11083         .qmax(128)
11084         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11085     }
11086   }
11087 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,multipixel)11088   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel) {
11089     TEST_REQUIRES_X86_SSE41;
11090     for (size_t channels = 1; channels <= 40; channels += 7) {
11091       DWConvMicrokernelTester()
11092         .cr(8)
11093         .kr(25)
11094         .channels(channels)
11095         .width(3)
11096         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11097     }
11098   }
11099 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,multipixel_with_step)11100   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_step) {
11101     TEST_REQUIRES_X86_SSE41;
11102     for (size_t channels = 1; channels <= 40; channels += 7) {
11103       for (size_t step = 2; step <= 25; step++) {
11104         DWConvMicrokernelTester()
11105           .cr(8)
11106           .kr(25)
11107           .channels(channels)
11108           .width(3)
11109           .step(step)
11110           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11111       }
11112     }
11113   }
11114 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,multipixel_with_output_stride)11115   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_output_stride) {
11116     TEST_REQUIRES_X86_SSE41;
11117     for (size_t channels = 1; channels <= 40; channels += 7) {
11118       DWConvMicrokernelTester()
11119         .cr(8)
11120         .kr(25)
11121         .channels(8)
11122         .width(5)
11123         .output_stride(43)
11124         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11125     }
11126   }
11127 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,multipixel_with_qmin)11128   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_qmin) {
11129     TEST_REQUIRES_X86_SSE41;
11130     for (size_t channels = 1; channels <= 40; channels += 7) {
11131       DWConvMicrokernelTester()
11132         .cr(8)
11133         .kr(25)
11134         .channels(channels)
11135         .width(3)
11136         .qmin(128)
11137         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11138     }
11139   }
11140 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,multipixel_with_qmax)11141   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_qmax) {
11142     TEST_REQUIRES_X86_SSE41;
11143     for (size_t channels = 1; channels <= 40; channels += 7) {
11144       DWConvMicrokernelTester()
11145         .cr(8)
11146         .kr(25)
11147         .channels(channels)
11148         .width(3)
11149         .qmax(128)
11150         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11151     }
11152   }
11153 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,input_offset)11154   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, input_offset) {
11155     TEST_REQUIRES_X86_SSE41;
11156     for (uint32_t channels = 16; channels < 128; channels += 24) {
11157       DWConvMicrokernelTester()
11158         .cr(8)
11159         .kr(25)
11160         .channels(channels)
11161         .input_offset(176)
11162         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11163     }
11164   }
11165 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,zero)11166   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, zero) {
11167     TEST_REQUIRES_X86_SSE41;
11168     for (uint32_t mz = 0; mz < 25; mz++) {
11169       for (uint32_t channels = 16; channels < 128; channels += 24) {
11170         DWConvMicrokernelTester()
11171           .cr(8)
11172           .kr(25)
11173           .channels(channels)
11174           .input_offset(176)
11175           .zero_index(mz)
11176           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11177       }
11178     }
11179   }
11180 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
11181 
11182 
11183 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_eq_16)11184   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_eq_16) {
11185     TEST_REQUIRES_X86_SSE2;
11186     DWConvMicrokernelTester()
11187       .cr(16)
11188       .kr(9)
11189       .channels(16)
11190       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11191   }
11192 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_div_16)11193   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_div_16) {
11194     TEST_REQUIRES_X86_SSE2;
11195     for (uint32_t channels = 32; channels < 256; channels += 48) {
11196       DWConvMicrokernelTester()
11197         .cr(16)
11198         .kr(9)
11199         .channels(channels)
11200         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11201     }
11202   }
11203 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_div_16_with_qmin)11204   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_div_16_with_qmin) {
11205     TEST_REQUIRES_X86_SSE2;
11206     for (uint32_t channels = 32; channels < 256; channels += 48) {
11207       DWConvMicrokernelTester()
11208         .cr(16)
11209         .kr(9)
11210         .channels(channels)
11211         .qmin(128)
11212         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11213     }
11214   }
11215 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_div_16_with_qmax)11216   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_div_16_with_qmax) {
11217     TEST_REQUIRES_X86_SSE2;
11218     for (uint32_t channels = 32; channels < 256; channels += 48) {
11219       DWConvMicrokernelTester()
11220         .cr(16)
11221         .kr(9)
11222         .channels(channels)
11223         .qmax(128)
11224         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11225     }
11226   }
11227 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_lt_16)11228   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_lt_16) {
11229     TEST_REQUIRES_X86_SSE2;
11230     for (uint32_t channels = 1; channels < 16; channels++) {
11231       DWConvMicrokernelTester()
11232         .cr(16)
11233         .kr(9)
11234         .channels(channels)
11235         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11236     }
11237   }
11238 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_gt_16)11239   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_gt_16) {
11240     TEST_REQUIRES_X86_SSE2;
11241     for (uint32_t channels = 17; channels < 32; channels++) {
11242       DWConvMicrokernelTester()
11243         .cr(16)
11244         .kr(9)
11245         .channels(channels)
11246         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11247     }
11248   }
11249 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_gt_16_with_qmin)11250   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_gt_16_with_qmin) {
11251     TEST_REQUIRES_X86_SSE2;
11252     for (uint32_t channels = 17; channels < 32; channels++) {
11253       DWConvMicrokernelTester()
11254         .cr(16)
11255         .kr(9)
11256         .channels(channels)
11257         .qmin(128)
11258         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11259     }
11260   }
11261 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_gt_16_with_qmax)11262   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_gt_16_with_qmax) {
11263     TEST_REQUIRES_X86_SSE2;
11264     for (uint32_t channels = 17; channels < 32; channels++) {
11265       DWConvMicrokernelTester()
11266         .cr(16)
11267         .kr(9)
11268         .channels(channels)
11269         .qmax(128)
11270         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11271     }
11272   }
11273 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,multipixel)11274   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel) {
11275     TEST_REQUIRES_X86_SSE2;
11276     for (size_t channels = 1; channels <= 80; channels += 15) {
11277       DWConvMicrokernelTester()
11278         .cr(16)
11279         .kr(9)
11280         .channels(channels)
11281         .width(3)
11282         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11283     }
11284   }
11285 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,multipixel_with_step)11286   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_step) {
11287     TEST_REQUIRES_X86_SSE2;
11288     for (size_t channels = 1; channels <= 80; channels += 15) {
11289       for (size_t step = 2; step <= 9; step++) {
11290         DWConvMicrokernelTester()
11291           .cr(16)
11292           .kr(9)
11293           .channels(channels)
11294           .width(3)
11295           .step(step)
11296           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11297       }
11298     }
11299   }
11300 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,multipixel_with_output_stride)11301   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_output_stride) {
11302     TEST_REQUIRES_X86_SSE2;
11303     for (size_t channels = 1; channels <= 80; channels += 15) {
11304       DWConvMicrokernelTester()
11305         .cr(16)
11306         .kr(9)
11307         .channels(16)
11308         .width(5)
11309         .output_stride(83)
11310         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11311     }
11312   }
11313 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,multipixel_with_qmin)11314   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_qmin) {
11315     TEST_REQUIRES_X86_SSE2;
11316     for (size_t channels = 1; channels <= 80; channels += 15) {
11317       DWConvMicrokernelTester()
11318         .cr(16)
11319         .kr(9)
11320         .channels(channels)
11321         .width(3)
11322         .qmin(128)
11323         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11324     }
11325   }
11326 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,multipixel_with_qmax)11327   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_qmax) {
11328     TEST_REQUIRES_X86_SSE2;
11329     for (size_t channels = 1; channels <= 80; channels += 15) {
11330       DWConvMicrokernelTester()
11331         .cr(16)
11332         .kr(9)
11333         .channels(channels)
11334         .width(3)
11335         .qmax(128)
11336         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11337     }
11338   }
11339 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,input_offset)11340   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, input_offset) {
11341     TEST_REQUIRES_X86_SSE2;
11342     for (uint32_t channels = 32; channels < 256; channels += 48) {
11343       DWConvMicrokernelTester()
11344         .cr(16)
11345         .kr(9)
11346         .channels(channels)
11347         .input_offset(304)
11348         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11349     }
11350   }
11351 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,zero)11352   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, zero) {
11353     TEST_REQUIRES_X86_SSE2;
11354     for (uint32_t mz = 0; mz < 9; mz++) {
11355       for (uint32_t channels = 32; channels < 256; channels += 48) {
11356         DWConvMicrokernelTester()
11357           .cr(16)
11358           .kr(9)
11359           .channels(channels)
11360           .input_offset(304)
11361           .zero_index(mz)
11362           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11363       }
11364     }
11365   }
11366 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
11367 
11368 
11369 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_eq_16)11370   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_eq_16) {
11371     TEST_REQUIRES_X86_SSE2;
11372     DWConvMicrokernelTester()
11373       .cr(16)
11374       .kr(9)
11375       .channels(16)
11376       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11377   }
11378 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_div_16)11379   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_div_16) {
11380     TEST_REQUIRES_X86_SSE2;
11381     for (uint32_t channels = 32; channels < 256; channels += 48) {
11382       DWConvMicrokernelTester()
11383         .cr(16)
11384         .kr(9)
11385         .channels(channels)
11386         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11387     }
11388   }
11389 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_div_16_with_qmin)11390   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_div_16_with_qmin) {
11391     TEST_REQUIRES_X86_SSE2;
11392     for (uint32_t channels = 32; channels < 256; channels += 48) {
11393       DWConvMicrokernelTester()
11394         .cr(16)
11395         .kr(9)
11396         .channels(channels)
11397         .qmin(128)
11398         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11399     }
11400   }
11401 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_div_16_with_qmax)11402   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_div_16_with_qmax) {
11403     TEST_REQUIRES_X86_SSE2;
11404     for (uint32_t channels = 32; channels < 256; channels += 48) {
11405       DWConvMicrokernelTester()
11406         .cr(16)
11407         .kr(9)
11408         .channels(channels)
11409         .qmax(128)
11410         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11411     }
11412   }
11413 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_lt_16)11414   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_lt_16) {
11415     TEST_REQUIRES_X86_SSE2;
11416     for (uint32_t channels = 1; channels < 16; channels++) {
11417       DWConvMicrokernelTester()
11418         .cr(16)
11419         .kr(9)
11420         .channels(channels)
11421         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11422     }
11423   }
11424 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_gt_16)11425   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_gt_16) {
11426     TEST_REQUIRES_X86_SSE2;
11427     for (uint32_t channels = 17; channels < 32; channels++) {
11428       DWConvMicrokernelTester()
11429         .cr(16)
11430         .kr(9)
11431         .channels(channels)
11432         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11433     }
11434   }
11435 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_gt_16_with_qmin)11436   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_gt_16_with_qmin) {
11437     TEST_REQUIRES_X86_SSE2;
11438     for (uint32_t channels = 17; channels < 32; channels++) {
11439       DWConvMicrokernelTester()
11440         .cr(16)
11441         .kr(9)
11442         .channels(channels)
11443         .qmin(128)
11444         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11445     }
11446   }
11447 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_gt_16_with_qmax)11448   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_gt_16_with_qmax) {
11449     TEST_REQUIRES_X86_SSE2;
11450     for (uint32_t channels = 17; channels < 32; channels++) {
11451       DWConvMicrokernelTester()
11452         .cr(16)
11453         .kr(9)
11454         .channels(channels)
11455         .qmax(128)
11456         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11457     }
11458   }
11459 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,multipixel)11460   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel) {
11461     TEST_REQUIRES_X86_SSE2;
11462     for (size_t channels = 1; channels <= 80; channels += 15) {
11463       DWConvMicrokernelTester()
11464         .cr(16)
11465         .kr(9)
11466         .channels(channels)
11467         .width(3)
11468         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11469     }
11470   }
11471 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,multipixel_with_step)11472   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel_with_step) {
11473     TEST_REQUIRES_X86_SSE2;
11474     for (size_t channels = 1; channels <= 80; channels += 15) {
11475       for (size_t step = 2; step <= 9; step++) {
11476         DWConvMicrokernelTester()
11477           .cr(16)
11478           .kr(9)
11479           .channels(channels)
11480           .width(3)
11481           .step(step)
11482           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11483       }
11484     }
11485   }
11486 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,multipixel_with_output_stride)11487   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel_with_output_stride) {
11488     TEST_REQUIRES_X86_SSE2;
11489     for (size_t channels = 1; channels <= 80; channels += 15) {
11490       DWConvMicrokernelTester()
11491         .cr(16)
11492         .kr(9)
11493         .channels(16)
11494         .width(5)
11495         .output_stride(83)
11496         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11497     }
11498   }
11499 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,multipixel_with_qmin)11500   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel_with_qmin) {
11501     TEST_REQUIRES_X86_SSE2;
11502     for (size_t channels = 1; channels <= 80; channels += 15) {
11503       DWConvMicrokernelTester()
11504         .cr(16)
11505         .kr(9)
11506         .channels(channels)
11507         .width(3)
11508         .qmin(128)
11509         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11510     }
11511   }
11512 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,multipixel_with_qmax)11513   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel_with_qmax) {
11514     TEST_REQUIRES_X86_SSE2;
11515     for (size_t channels = 1; channels <= 80; channels += 15) {
11516       DWConvMicrokernelTester()
11517         .cr(16)
11518         .kr(9)
11519         .channels(channels)
11520         .width(3)
11521         .qmax(128)
11522         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11523     }
11524   }
11525 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,input_offset)11526   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, input_offset) {
11527     TEST_REQUIRES_X86_SSE2;
11528     for (uint32_t channels = 32; channels < 256; channels += 48) {
11529       DWConvMicrokernelTester()
11530         .cr(16)
11531         .kr(9)
11532         .channels(channels)
11533         .input_offset(304)
11534         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11535     }
11536   }
11537 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,zero)11538   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, zero) {
11539     TEST_REQUIRES_X86_SSE2;
11540     for (uint32_t mz = 0; mz < 9; mz++) {
11541       for (uint32_t channels = 32; channels < 256; channels += 48) {
11542         DWConvMicrokernelTester()
11543           .cr(16)
11544           .kr(9)
11545           .channels(channels)
11546           .input_offset(304)
11547           .zero_index(mz)
11548           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11549       }
11550     }
11551   }
11552 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
11553 
11554 
11555 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_eq_16)11556   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_eq_16) {
11557     TEST_REQUIRES_X86_SSE41;
11558     DWConvMicrokernelTester()
11559       .cr(16)
11560       .kr(9)
11561       .channels(16)
11562       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11563   }
11564 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_div_16)11565   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_div_16) {
11566     TEST_REQUIRES_X86_SSE41;
11567     for (uint32_t channels = 32; channels < 256; channels += 48) {
11568       DWConvMicrokernelTester()
11569         .cr(16)
11570         .kr(9)
11571         .channels(channels)
11572         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11573     }
11574   }
11575 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_div_16_with_qmin)11576   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_div_16_with_qmin) {
11577     TEST_REQUIRES_X86_SSE41;
11578     for (uint32_t channels = 32; channels < 256; channels += 48) {
11579       DWConvMicrokernelTester()
11580         .cr(16)
11581         .kr(9)
11582         .channels(channels)
11583         .qmin(128)
11584         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11585     }
11586   }
11587 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_div_16_with_qmax)11588   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_div_16_with_qmax) {
11589     TEST_REQUIRES_X86_SSE41;
11590     for (uint32_t channels = 32; channels < 256; channels += 48) {
11591       DWConvMicrokernelTester()
11592         .cr(16)
11593         .kr(9)
11594         .channels(channels)
11595         .qmax(128)
11596         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11597     }
11598   }
11599 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_lt_16)11600   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_lt_16) {
11601     TEST_REQUIRES_X86_SSE41;
11602     for (uint32_t channels = 1; channels < 16; channels++) {
11603       DWConvMicrokernelTester()
11604         .cr(16)
11605         .kr(9)
11606         .channels(channels)
11607         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11608     }
11609   }
11610 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_gt_16)11611   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_gt_16) {
11612     TEST_REQUIRES_X86_SSE41;
11613     for (uint32_t channels = 17; channels < 32; channels++) {
11614       DWConvMicrokernelTester()
11615         .cr(16)
11616         .kr(9)
11617         .channels(channels)
11618         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11619     }
11620   }
11621 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_gt_16_with_qmin)11622   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_gt_16_with_qmin) {
11623     TEST_REQUIRES_X86_SSE41;
11624     for (uint32_t channels = 17; channels < 32; channels++) {
11625       DWConvMicrokernelTester()
11626         .cr(16)
11627         .kr(9)
11628         .channels(channels)
11629         .qmin(128)
11630         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11631     }
11632   }
11633 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_gt_16_with_qmax)11634   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_gt_16_with_qmax) {
11635     TEST_REQUIRES_X86_SSE41;
11636     for (uint32_t channels = 17; channels < 32; channels++) {
11637       DWConvMicrokernelTester()
11638         .cr(16)
11639         .kr(9)
11640         .channels(channels)
11641         .qmax(128)
11642         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11643     }
11644   }
11645 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,multipixel)11646   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel) {
11647     TEST_REQUIRES_X86_SSE41;
11648     for (size_t channels = 1; channels <= 80; channels += 15) {
11649       DWConvMicrokernelTester()
11650         .cr(16)
11651         .kr(9)
11652         .channels(channels)
11653         .width(3)
11654         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11655     }
11656   }
11657 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,multipixel_with_step)11658   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_step) {
11659     TEST_REQUIRES_X86_SSE41;
11660     for (size_t channels = 1; channels <= 80; channels += 15) {
11661       for (size_t step = 2; step <= 9; step++) {
11662         DWConvMicrokernelTester()
11663           .cr(16)
11664           .kr(9)
11665           .channels(channels)
11666           .width(3)
11667           .step(step)
11668           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11669       }
11670     }
11671   }
11672 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,multipixel_with_output_stride)11673   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_output_stride) {
11674     TEST_REQUIRES_X86_SSE41;
11675     for (size_t channels = 1; channels <= 80; channels += 15) {
11676       DWConvMicrokernelTester()
11677         .cr(16)
11678         .kr(9)
11679         .channels(16)
11680         .width(5)
11681         .output_stride(83)
11682         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11683     }
11684   }
11685 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,multipixel_with_qmin)11686   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_qmin) {
11687     TEST_REQUIRES_X86_SSE41;
11688     for (size_t channels = 1; channels <= 80; channels += 15) {
11689       DWConvMicrokernelTester()
11690         .cr(16)
11691         .kr(9)
11692         .channels(channels)
11693         .width(3)
11694         .qmin(128)
11695         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11696     }
11697   }
11698 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,multipixel_with_qmax)11699   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_qmax) {
11700     TEST_REQUIRES_X86_SSE41;
11701     for (size_t channels = 1; channels <= 80; channels += 15) {
11702       DWConvMicrokernelTester()
11703         .cr(16)
11704         .kr(9)
11705         .channels(channels)
11706         .width(3)
11707         .qmax(128)
11708         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11709     }
11710   }
11711 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,input_offset)11712   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, input_offset) {
11713     TEST_REQUIRES_X86_SSE41;
11714     for (uint32_t channels = 32; channels < 256; channels += 48) {
11715       DWConvMicrokernelTester()
11716         .cr(16)
11717         .kr(9)
11718         .channels(channels)
11719         .input_offset(304)
11720         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11721     }
11722   }
11723 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,zero)11724   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, zero) {
11725     TEST_REQUIRES_X86_SSE41;
11726     for (uint32_t mz = 0; mz < 9; mz++) {
11727       for (uint32_t channels = 32; channels < 256; channels += 48) {
11728         DWConvMicrokernelTester()
11729           .cr(16)
11730           .kr(9)
11731           .channels(channels)
11732           .input_offset(304)
11733           .zero_index(mz)
11734           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11735       }
11736     }
11737   }
11738 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
11739 
11740 
11741 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_eq_16)11742   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_eq_16) {
11743     TEST_REQUIRES_X86_SSE41;
11744     DWConvMicrokernelTester()
11745       .cr(16)
11746       .kr(9)
11747       .channels(16)
11748       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11749   }
11750 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_div_16)11751   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_div_16) {
11752     TEST_REQUIRES_X86_SSE41;
11753     for (uint32_t channels = 32; channels < 256; channels += 48) {
11754       DWConvMicrokernelTester()
11755         .cr(16)
11756         .kr(9)
11757         .channels(channels)
11758         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11759     }
11760   }
11761 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_div_16_with_qmin)11762   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_div_16_with_qmin) {
11763     TEST_REQUIRES_X86_SSE41;
11764     for (uint32_t channels = 32; channels < 256; channels += 48) {
11765       DWConvMicrokernelTester()
11766         .cr(16)
11767         .kr(9)
11768         .channels(channels)
11769         .qmin(128)
11770         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11771     }
11772   }
11773 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_div_16_with_qmax)11774   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_div_16_with_qmax) {
11775     TEST_REQUIRES_X86_SSE41;
11776     for (uint32_t channels = 32; channels < 256; channels += 48) {
11777       DWConvMicrokernelTester()
11778         .cr(16)
11779         .kr(9)
11780         .channels(channels)
11781         .qmax(128)
11782         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11783     }
11784   }
11785 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_lt_16)11786   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_lt_16) {
11787     TEST_REQUIRES_X86_SSE41;
11788     for (uint32_t channels = 1; channels < 16; channels++) {
11789       DWConvMicrokernelTester()
11790         .cr(16)
11791         .kr(9)
11792         .channels(channels)
11793         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11794     }
11795   }
11796 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_gt_16)11797   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_gt_16) {
11798     TEST_REQUIRES_X86_SSE41;
11799     for (uint32_t channels = 17; channels < 32; channels++) {
11800       DWConvMicrokernelTester()
11801         .cr(16)
11802         .kr(9)
11803         .channels(channels)
11804         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11805     }
11806   }
11807 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_gt_16_with_qmin)11808   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_gt_16_with_qmin) {
11809     TEST_REQUIRES_X86_SSE41;
11810     for (uint32_t channels = 17; channels < 32; channels++) {
11811       DWConvMicrokernelTester()
11812         .cr(16)
11813         .kr(9)
11814         .channels(channels)
11815         .qmin(128)
11816         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11817     }
11818   }
11819 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_gt_16_with_qmax)11820   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_gt_16_with_qmax) {
11821     TEST_REQUIRES_X86_SSE41;
11822     for (uint32_t channels = 17; channels < 32; channels++) {
11823       DWConvMicrokernelTester()
11824         .cr(16)
11825         .kr(9)
11826         .channels(channels)
11827         .qmax(128)
11828         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11829     }
11830   }
11831 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,multipixel)11832   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel) {
11833     TEST_REQUIRES_X86_SSE41;
11834     for (size_t channels = 1; channels <= 80; channels += 15) {
11835       DWConvMicrokernelTester()
11836         .cr(16)
11837         .kr(9)
11838         .channels(channels)
11839         .width(3)
11840         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11841     }
11842   }
11843 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,multipixel_with_step)11844   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel_with_step) {
11845     TEST_REQUIRES_X86_SSE41;
11846     for (size_t channels = 1; channels <= 80; channels += 15) {
11847       for (size_t step = 2; step <= 9; step++) {
11848         DWConvMicrokernelTester()
11849           .cr(16)
11850           .kr(9)
11851           .channels(channels)
11852           .width(3)
11853           .step(step)
11854           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11855       }
11856     }
11857   }
11858 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,multipixel_with_output_stride)11859   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel_with_output_stride) {
11860     TEST_REQUIRES_X86_SSE41;
11861     for (size_t channels = 1; channels <= 80; channels += 15) {
11862       DWConvMicrokernelTester()
11863         .cr(16)
11864         .kr(9)
11865         .channels(16)
11866         .width(5)
11867         .output_stride(83)
11868         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11869     }
11870   }
11871 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,multipixel_with_qmin)11872   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel_with_qmin) {
11873     TEST_REQUIRES_X86_SSE41;
11874     for (size_t channels = 1; channels <= 80; channels += 15) {
11875       DWConvMicrokernelTester()
11876         .cr(16)
11877         .kr(9)
11878         .channels(channels)
11879         .width(3)
11880         .qmin(128)
11881         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11882     }
11883   }
11884 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,multipixel_with_qmax)11885   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel_with_qmax) {
11886     TEST_REQUIRES_X86_SSE41;
11887     for (size_t channels = 1; channels <= 80; channels += 15) {
11888       DWConvMicrokernelTester()
11889         .cr(16)
11890         .kr(9)
11891         .channels(channels)
11892         .width(3)
11893         .qmax(128)
11894         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11895     }
11896   }
11897 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,input_offset)11898   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, input_offset) {
11899     TEST_REQUIRES_X86_SSE41;
11900     for (uint32_t channels = 32; channels < 256; channels += 48) {
11901       DWConvMicrokernelTester()
11902         .cr(16)
11903         .kr(9)
11904         .channels(channels)
11905         .input_offset(304)
11906         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11907     }
11908   }
11909 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,zero)11910   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, zero) {
11911     TEST_REQUIRES_X86_SSE41;
11912     for (uint32_t mz = 0; mz < 9; mz++) {
11913       for (uint32_t channels = 32; channels < 256; channels += 48) {
11914         DWConvMicrokernelTester()
11915           .cr(16)
11916           .kr(9)
11917           .channels(channels)
11918           .input_offset(304)
11919           .zero_index(mz)
11920           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11921       }
11922     }
11923   }
11924 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
11925 
11926 
11927 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_eq_16)11928   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_eq_16) {
11929     TEST_REQUIRES_X86_SSE41;
11930     DWConvMicrokernelTester()
11931       .cr(16)
11932       .kr(9)
11933       .channels(16)
11934       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11935   }
11936 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_div_16)11937   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_div_16) {
11938     TEST_REQUIRES_X86_SSE41;
11939     for (uint32_t channels = 32; channels < 256; channels += 48) {
11940       DWConvMicrokernelTester()
11941         .cr(16)
11942         .kr(9)
11943         .channels(channels)
11944         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11945     }
11946   }
11947 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_div_16_with_qmin)11948   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_div_16_with_qmin) {
11949     TEST_REQUIRES_X86_SSE41;
11950     for (uint32_t channels = 32; channels < 256; channels += 48) {
11951       DWConvMicrokernelTester()
11952         .cr(16)
11953         .kr(9)
11954         .channels(channels)
11955         .qmin(128)
11956         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11957     }
11958   }
11959 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_div_16_with_qmax)11960   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_div_16_with_qmax) {
11961     TEST_REQUIRES_X86_SSE41;
11962     for (uint32_t channels = 32; channels < 256; channels += 48) {
11963       DWConvMicrokernelTester()
11964         .cr(16)
11965         .kr(9)
11966         .channels(channels)
11967         .qmax(128)
11968         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11969     }
11970   }
11971 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_lt_16)11972   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_lt_16) {
11973     TEST_REQUIRES_X86_SSE41;
11974     for (uint32_t channels = 1; channels < 16; channels++) {
11975       DWConvMicrokernelTester()
11976         .cr(16)
11977         .kr(9)
11978         .channels(channels)
11979         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11980     }
11981   }
11982 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_gt_16)11983   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_gt_16) {
11984     TEST_REQUIRES_X86_SSE41;
11985     for (uint32_t channels = 17; channels < 32; channels++) {
11986       DWConvMicrokernelTester()
11987         .cr(16)
11988         .kr(9)
11989         .channels(channels)
11990         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11991     }
11992   }
11993 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_gt_16_with_qmin)11994   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_gt_16_with_qmin) {
11995     TEST_REQUIRES_X86_SSE41;
11996     for (uint32_t channels = 17; channels < 32; channels++) {
11997       DWConvMicrokernelTester()
11998         .cr(16)
11999         .kr(9)
12000         .channels(channels)
12001         .qmin(128)
12002         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12003     }
12004   }
12005 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_gt_16_with_qmax)12006   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_gt_16_with_qmax) {
12007     TEST_REQUIRES_X86_SSE41;
12008     for (uint32_t channels = 17; channels < 32; channels++) {
12009       DWConvMicrokernelTester()
12010         .cr(16)
12011         .kr(9)
12012         .channels(channels)
12013         .qmax(128)
12014         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12015     }
12016   }
12017 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,multipixel)12018   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel) {
12019     TEST_REQUIRES_X86_SSE41;
12020     for (size_t channels = 1; channels <= 80; channels += 15) {
12021       DWConvMicrokernelTester()
12022         .cr(16)
12023         .kr(9)
12024         .channels(channels)
12025         .width(3)
12026         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12027     }
12028   }
12029 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,multipixel_with_step)12030   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_step) {
12031     TEST_REQUIRES_X86_SSE41;
12032     for (size_t channels = 1; channels <= 80; channels += 15) {
12033       for (size_t step = 2; step <= 9; step++) {
12034         DWConvMicrokernelTester()
12035           .cr(16)
12036           .kr(9)
12037           .channels(channels)
12038           .width(3)
12039           .step(step)
12040           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12041       }
12042     }
12043   }
12044 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,multipixel_with_output_stride)12045   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_output_stride) {
12046     TEST_REQUIRES_X86_SSE41;
12047     for (size_t channels = 1; channels <= 80; channels += 15) {
12048       DWConvMicrokernelTester()
12049         .cr(16)
12050         .kr(9)
12051         .channels(16)
12052         .width(5)
12053         .output_stride(83)
12054         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12055     }
12056   }
12057 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,multipixel_with_qmin)12058   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_qmin) {
12059     TEST_REQUIRES_X86_SSE41;
12060     for (size_t channels = 1; channels <= 80; channels += 15) {
12061       DWConvMicrokernelTester()
12062         .cr(16)
12063         .kr(9)
12064         .channels(channels)
12065         .width(3)
12066         .qmin(128)
12067         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12068     }
12069   }
12070 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,multipixel_with_qmax)12071   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_qmax) {
12072     TEST_REQUIRES_X86_SSE41;
12073     for (size_t channels = 1; channels <= 80; channels += 15) {
12074       DWConvMicrokernelTester()
12075         .cr(16)
12076         .kr(9)
12077         .channels(channels)
12078         .width(3)
12079         .qmax(128)
12080         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12081     }
12082   }
12083 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,input_offset)12084   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, input_offset) {
12085     TEST_REQUIRES_X86_SSE41;
12086     for (uint32_t channels = 32; channels < 256; channels += 48) {
12087       DWConvMicrokernelTester()
12088         .cr(16)
12089         .kr(9)
12090         .channels(channels)
12091         .input_offset(304)
12092         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12093     }
12094   }
12095 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,zero)12096   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, zero) {
12097     TEST_REQUIRES_X86_SSE41;
12098     for (uint32_t mz = 0; mz < 9; mz++) {
12099       for (uint32_t channels = 32; channels < 256; channels += 48) {
12100         DWConvMicrokernelTester()
12101           .cr(16)
12102           .kr(9)
12103           .channels(channels)
12104           .input_offset(304)
12105           .zero_index(mz)
12106           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12107       }
12108     }
12109   }
12110 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
12111 
12112 
12113 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_eq_16)12114   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_eq_16) {
12115     TEST_REQUIRES_X86_SSE2;
12116     DWConvMicrokernelTester()
12117       .cr(16)
12118       .kr(25)
12119       .channels(16)
12120       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12121   }
12122 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_div_16)12123   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_div_16) {
12124     TEST_REQUIRES_X86_SSE2;
12125     for (uint32_t channels = 32; channels < 256; channels += 48) {
12126       DWConvMicrokernelTester()
12127         .cr(16)
12128         .kr(25)
12129         .channels(channels)
12130         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12131     }
12132   }
12133 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_div_16_with_qmin)12134   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_div_16_with_qmin) {
12135     TEST_REQUIRES_X86_SSE2;
12136     for (uint32_t channels = 32; channels < 256; channels += 48) {
12137       DWConvMicrokernelTester()
12138         .cr(16)
12139         .kr(25)
12140         .channels(channels)
12141         .qmin(128)
12142         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12143     }
12144   }
12145 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_div_16_with_qmax)12146   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_div_16_with_qmax) {
12147     TEST_REQUIRES_X86_SSE2;
12148     for (uint32_t channels = 32; channels < 256; channels += 48) {
12149       DWConvMicrokernelTester()
12150         .cr(16)
12151         .kr(25)
12152         .channels(channels)
12153         .qmax(128)
12154         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12155     }
12156   }
12157 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_lt_16)12158   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_lt_16) {
12159     TEST_REQUIRES_X86_SSE2;
12160     for (uint32_t channels = 1; channels < 16; channels++) {
12161       DWConvMicrokernelTester()
12162         .cr(16)
12163         .kr(25)
12164         .channels(channels)
12165         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12166     }
12167   }
12168 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_gt_16)12169   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_gt_16) {
12170     TEST_REQUIRES_X86_SSE2;
12171     for (uint32_t channels = 17; channels < 32; channels++) {
12172       DWConvMicrokernelTester()
12173         .cr(16)
12174         .kr(25)
12175         .channels(channels)
12176         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12177     }
12178   }
12179 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_gt_16_with_qmin)12180   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_gt_16_with_qmin) {
12181     TEST_REQUIRES_X86_SSE2;
12182     for (uint32_t channels = 17; channels < 32; channels++) {
12183       DWConvMicrokernelTester()
12184         .cr(16)
12185         .kr(25)
12186         .channels(channels)
12187         .qmin(128)
12188         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12189     }
12190   }
12191 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_gt_16_with_qmax)12192   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_gt_16_with_qmax) {
12193     TEST_REQUIRES_X86_SSE2;
12194     for (uint32_t channels = 17; channels < 32; channels++) {
12195       DWConvMicrokernelTester()
12196         .cr(16)
12197         .kr(25)
12198         .channels(channels)
12199         .qmax(128)
12200         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12201     }
12202   }
12203 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,multipixel)12204   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel) {
12205     TEST_REQUIRES_X86_SSE2;
12206     for (size_t channels = 1; channels <= 80; channels += 15) {
12207       DWConvMicrokernelTester()
12208         .cr(16)
12209         .kr(25)
12210         .channels(channels)
12211         .width(3)
12212         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12213     }
12214   }
12215 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,multipixel_with_step)12216   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_step) {
12217     TEST_REQUIRES_X86_SSE2;
12218     for (size_t channels = 1; channels <= 80; channels += 15) {
12219       for (size_t step = 2; step <= 25; step++) {
12220         DWConvMicrokernelTester()
12221           .cr(16)
12222           .kr(25)
12223           .channels(channels)
12224           .width(3)
12225           .step(step)
12226           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12227       }
12228     }
12229   }
12230 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,multipixel_with_output_stride)12231   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_output_stride) {
12232     TEST_REQUIRES_X86_SSE2;
12233     for (size_t channels = 1; channels <= 80; channels += 15) {
12234       DWConvMicrokernelTester()
12235         .cr(16)
12236         .kr(25)
12237         .channels(16)
12238         .width(5)
12239         .output_stride(83)
12240         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12241     }
12242   }
12243 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,multipixel_with_qmin)12244   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_qmin) {
12245     TEST_REQUIRES_X86_SSE2;
12246     for (size_t channels = 1; channels <= 80; channels += 15) {
12247       DWConvMicrokernelTester()
12248         .cr(16)
12249         .kr(25)
12250         .channels(channels)
12251         .width(3)
12252         .qmin(128)
12253         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12254     }
12255   }
12256 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,multipixel_with_qmax)12257   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_qmax) {
12258     TEST_REQUIRES_X86_SSE2;
12259     for (size_t channels = 1; channels <= 80; channels += 15) {
12260       DWConvMicrokernelTester()
12261         .cr(16)
12262         .kr(25)
12263         .channels(channels)
12264         .width(3)
12265         .qmax(128)
12266         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12267     }
12268   }
12269 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,input_offset)12270   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, input_offset) {
12271     TEST_REQUIRES_X86_SSE2;
12272     for (uint32_t channels = 32; channels < 256; channels += 48) {
12273       DWConvMicrokernelTester()
12274         .cr(16)
12275         .kr(25)
12276         .channels(channels)
12277         .input_offset(304)
12278         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12279     }
12280   }
12281 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,zero)12282   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, zero) {
12283     TEST_REQUIRES_X86_SSE2;
12284     for (uint32_t mz = 0; mz < 25; mz++) {
12285       for (uint32_t channels = 32; channels < 256; channels += 48) {
12286         DWConvMicrokernelTester()
12287           .cr(16)
12288           .kr(25)
12289           .channels(channels)
12290           .input_offset(304)
12291           .zero_index(mz)
12292           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12293       }
12294     }
12295   }
12296 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
12297 
12298 
12299 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_eq_16)12300   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_eq_16) {
12301     TEST_REQUIRES_X86_SSE2;
12302     DWConvMicrokernelTester()
12303       .cr(16)
12304       .kr(25)
12305       .channels(16)
12306       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12307   }
12308 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_div_16)12309   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_div_16) {
12310     TEST_REQUIRES_X86_SSE2;
12311     for (uint32_t channels = 32; channels < 256; channels += 48) {
12312       DWConvMicrokernelTester()
12313         .cr(16)
12314         .kr(25)
12315         .channels(channels)
12316         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12317     }
12318   }
12319 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_div_16_with_qmin)12320   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_div_16_with_qmin) {
12321     TEST_REQUIRES_X86_SSE2;
12322     for (uint32_t channels = 32; channels < 256; channels += 48) {
12323       DWConvMicrokernelTester()
12324         .cr(16)
12325         .kr(25)
12326         .channels(channels)
12327         .qmin(128)
12328         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12329     }
12330   }
12331 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_div_16_with_qmax)12332   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_div_16_with_qmax) {
12333     TEST_REQUIRES_X86_SSE2;
12334     for (uint32_t channels = 32; channels < 256; channels += 48) {
12335       DWConvMicrokernelTester()
12336         .cr(16)
12337         .kr(25)
12338         .channels(channels)
12339         .qmax(128)
12340         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12341     }
12342   }
12343 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_lt_16)12344   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_lt_16) {
12345     TEST_REQUIRES_X86_SSE2;
12346     for (uint32_t channels = 1; channels < 16; channels++) {
12347       DWConvMicrokernelTester()
12348         .cr(16)
12349         .kr(25)
12350         .channels(channels)
12351         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12352     }
12353   }
12354 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_gt_16)12355   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_gt_16) {
12356     TEST_REQUIRES_X86_SSE2;
12357     for (uint32_t channels = 17; channels < 32; channels++) {
12358       DWConvMicrokernelTester()
12359         .cr(16)
12360         .kr(25)
12361         .channels(channels)
12362         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12363     }
12364   }
12365 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_gt_16_with_qmin)12366   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_gt_16_with_qmin) {
12367     TEST_REQUIRES_X86_SSE2;
12368     for (uint32_t channels = 17; channels < 32; channels++) {
12369       DWConvMicrokernelTester()
12370         .cr(16)
12371         .kr(25)
12372         .channels(channels)
12373         .qmin(128)
12374         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12375     }
12376   }
12377 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_gt_16_with_qmax)12378   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_gt_16_with_qmax) {
12379     TEST_REQUIRES_X86_SSE2;
12380     for (uint32_t channels = 17; channels < 32; channels++) {
12381       DWConvMicrokernelTester()
12382         .cr(16)
12383         .kr(25)
12384         .channels(channels)
12385         .qmax(128)
12386         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12387     }
12388   }
12389 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,multipixel)12390   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel) {
12391     TEST_REQUIRES_X86_SSE2;
12392     for (size_t channels = 1; channels <= 80; channels += 15) {
12393       DWConvMicrokernelTester()
12394         .cr(16)
12395         .kr(25)
12396         .channels(channels)
12397         .width(3)
12398         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12399     }
12400   }
12401 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,multipixel_with_step)12402   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel_with_step) {
12403     TEST_REQUIRES_X86_SSE2;
12404     for (size_t channels = 1; channels <= 80; channels += 15) {
12405       for (size_t step = 2; step <= 25; step++) {
12406         DWConvMicrokernelTester()
12407           .cr(16)
12408           .kr(25)
12409           .channels(channels)
12410           .width(3)
12411           .step(step)
12412           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12413       }
12414     }
12415   }
12416 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,multipixel_with_output_stride)12417   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel_with_output_stride) {
12418     TEST_REQUIRES_X86_SSE2;
12419     for (size_t channels = 1; channels <= 80; channels += 15) {
12420       DWConvMicrokernelTester()
12421         .cr(16)
12422         .kr(25)
12423         .channels(16)
12424         .width(5)
12425         .output_stride(83)
12426         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12427     }
12428   }
12429 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,multipixel_with_qmin)12430   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel_with_qmin) {
12431     TEST_REQUIRES_X86_SSE2;
12432     for (size_t channels = 1; channels <= 80; channels += 15) {
12433       DWConvMicrokernelTester()
12434         .cr(16)
12435         .kr(25)
12436         .channels(channels)
12437         .width(3)
12438         .qmin(128)
12439         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12440     }
12441   }
12442 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,multipixel_with_qmax)12443   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel_with_qmax) {
12444     TEST_REQUIRES_X86_SSE2;
12445     for (size_t channels = 1; channels <= 80; channels += 15) {
12446       DWConvMicrokernelTester()
12447         .cr(16)
12448         .kr(25)
12449         .channels(channels)
12450         .width(3)
12451         .qmax(128)
12452         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12453     }
12454   }
12455 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,input_offset)12456   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, input_offset) {
12457     TEST_REQUIRES_X86_SSE2;
12458     for (uint32_t channels = 32; channels < 256; channels += 48) {
12459       DWConvMicrokernelTester()
12460         .cr(16)
12461         .kr(25)
12462         .channels(channels)
12463         .input_offset(304)
12464         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12465     }
12466   }
12467 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,zero)12468   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, zero) {
12469     TEST_REQUIRES_X86_SSE2;
12470     for (uint32_t mz = 0; mz < 25; mz++) {
12471       for (uint32_t channels = 32; channels < 256; channels += 48) {
12472         DWConvMicrokernelTester()
12473           .cr(16)
12474           .kr(25)
12475           .channels(channels)
12476           .input_offset(304)
12477           .zero_index(mz)
12478           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12479       }
12480     }
12481   }
12482 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
12483 
12484 
12485 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_eq_16)12486   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_eq_16) {
12487     TEST_REQUIRES_X86_SSE41;
12488     DWConvMicrokernelTester()
12489       .cr(16)
12490       .kr(25)
12491       .channels(16)
12492       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12493   }
12494 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_div_16)12495   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_div_16) {
12496     TEST_REQUIRES_X86_SSE41;
12497     for (uint32_t channels = 32; channels < 256; channels += 48) {
12498       DWConvMicrokernelTester()
12499         .cr(16)
12500         .kr(25)
12501         .channels(channels)
12502         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12503     }
12504   }
12505 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_div_16_with_qmin)12506   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_div_16_with_qmin) {
12507     TEST_REQUIRES_X86_SSE41;
12508     for (uint32_t channels = 32; channels < 256; channels += 48) {
12509       DWConvMicrokernelTester()
12510         .cr(16)
12511         .kr(25)
12512         .channels(channels)
12513         .qmin(128)
12514         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12515     }
12516   }
12517 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_div_16_with_qmax)12518   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_div_16_with_qmax) {
12519     TEST_REQUIRES_X86_SSE41;
12520     for (uint32_t channels = 32; channels < 256; channels += 48) {
12521       DWConvMicrokernelTester()
12522         .cr(16)
12523         .kr(25)
12524         .channels(channels)
12525         .qmax(128)
12526         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12527     }
12528   }
12529 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_lt_16)12530   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_lt_16) {
12531     TEST_REQUIRES_X86_SSE41;
12532     for (uint32_t channels = 1; channels < 16; channels++) {
12533       DWConvMicrokernelTester()
12534         .cr(16)
12535         .kr(25)
12536         .channels(channels)
12537         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12538     }
12539   }
12540 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_gt_16)12541   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_gt_16) {
12542     TEST_REQUIRES_X86_SSE41;
12543     for (uint32_t channels = 17; channels < 32; channels++) {
12544       DWConvMicrokernelTester()
12545         .cr(16)
12546         .kr(25)
12547         .channels(channels)
12548         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12549     }
12550   }
12551 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_gt_16_with_qmin)12552   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_gt_16_with_qmin) {
12553     TEST_REQUIRES_X86_SSE41;
12554     for (uint32_t channels = 17; channels < 32; channels++) {
12555       DWConvMicrokernelTester()
12556         .cr(16)
12557         .kr(25)
12558         .channels(channels)
12559         .qmin(128)
12560         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12561     }
12562   }
12563 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_gt_16_with_qmax)12564   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_gt_16_with_qmax) {
12565     TEST_REQUIRES_X86_SSE41;
12566     for (uint32_t channels = 17; channels < 32; channels++) {
12567       DWConvMicrokernelTester()
12568         .cr(16)
12569         .kr(25)
12570         .channels(channels)
12571         .qmax(128)
12572         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12573     }
12574   }
12575 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,multipixel)12576   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel) {
12577     TEST_REQUIRES_X86_SSE41;
12578     for (size_t channels = 1; channels <= 80; channels += 15) {
12579       DWConvMicrokernelTester()
12580         .cr(16)
12581         .kr(25)
12582         .channels(channels)
12583         .width(3)
12584         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12585     }
12586   }
12587 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,multipixel_with_step)12588   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_step) {
12589     TEST_REQUIRES_X86_SSE41;
12590     for (size_t channels = 1; channels <= 80; channels += 15) {
12591       for (size_t step = 2; step <= 25; step++) {
12592         DWConvMicrokernelTester()
12593           .cr(16)
12594           .kr(25)
12595           .channels(channels)
12596           .width(3)
12597           .step(step)
12598           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12599       }
12600     }
12601   }
12602 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,multipixel_with_output_stride)12603   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_output_stride) {
12604     TEST_REQUIRES_X86_SSE41;
12605     for (size_t channels = 1; channels <= 80; channels += 15) {
12606       DWConvMicrokernelTester()
12607         .cr(16)
12608         .kr(25)
12609         .channels(16)
12610         .width(5)
12611         .output_stride(83)
12612         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12613     }
12614   }
12615 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,multipixel_with_qmin)12616   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_qmin) {
12617     TEST_REQUIRES_X86_SSE41;
12618     for (size_t channels = 1; channels <= 80; channels += 15) {
12619       DWConvMicrokernelTester()
12620         .cr(16)
12621         .kr(25)
12622         .channels(channels)
12623         .width(3)
12624         .qmin(128)
12625         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12626     }
12627   }
12628 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,multipixel_with_qmax)12629   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_qmax) {
12630     TEST_REQUIRES_X86_SSE41;
12631     for (size_t channels = 1; channels <= 80; channels += 15) {
12632       DWConvMicrokernelTester()
12633         .cr(16)
12634         .kr(25)
12635         .channels(channels)
12636         .width(3)
12637         .qmax(128)
12638         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12639     }
12640   }
12641 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,input_offset)12642   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, input_offset) {
12643     TEST_REQUIRES_X86_SSE41;
12644     for (uint32_t channels = 32; channels < 256; channels += 48) {
12645       DWConvMicrokernelTester()
12646         .cr(16)
12647         .kr(25)
12648         .channels(channels)
12649         .input_offset(304)
12650         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12651     }
12652   }
12653 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,zero)12654   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, zero) {
12655     TEST_REQUIRES_X86_SSE41;
12656     for (uint32_t mz = 0; mz < 25; mz++) {
12657       for (uint32_t channels = 32; channels < 256; channels += 48) {
12658         DWConvMicrokernelTester()
12659           .cr(16)
12660           .kr(25)
12661           .channels(channels)
12662           .input_offset(304)
12663           .zero_index(mz)
12664           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12665       }
12666     }
12667   }
12668 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
12669 
12670 
12671 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_eq_16)12672   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_eq_16) {
12673     TEST_REQUIRES_X86_SSE41;
12674     DWConvMicrokernelTester()
12675       .cr(16)
12676       .kr(25)
12677       .channels(16)
12678       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12679   }
12680 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_div_16)12681   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_div_16) {
12682     TEST_REQUIRES_X86_SSE41;
12683     for (uint32_t channels = 32; channels < 256; channels += 48) {
12684       DWConvMicrokernelTester()
12685         .cr(16)
12686         .kr(25)
12687         .channels(channels)
12688         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12689     }
12690   }
12691 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_div_16_with_qmin)12692   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_div_16_with_qmin) {
12693     TEST_REQUIRES_X86_SSE41;
12694     for (uint32_t channels = 32; channels < 256; channels += 48) {
12695       DWConvMicrokernelTester()
12696         .cr(16)
12697         .kr(25)
12698         .channels(channels)
12699         .qmin(128)
12700         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12701     }
12702   }
12703 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_div_16_with_qmax)12704   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_div_16_with_qmax) {
12705     TEST_REQUIRES_X86_SSE41;
12706     for (uint32_t channels = 32; channels < 256; channels += 48) {
12707       DWConvMicrokernelTester()
12708         .cr(16)
12709         .kr(25)
12710         .channels(channels)
12711         .qmax(128)
12712         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12713     }
12714   }
12715 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_lt_16)12716   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_lt_16) {
12717     TEST_REQUIRES_X86_SSE41;
12718     for (uint32_t channels = 1; channels < 16; channels++) {
12719       DWConvMicrokernelTester()
12720         .cr(16)
12721         .kr(25)
12722         .channels(channels)
12723         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12724     }
12725   }
12726 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_gt_16)12727   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_gt_16) {
12728     TEST_REQUIRES_X86_SSE41;
12729     for (uint32_t channels = 17; channels < 32; channels++) {
12730       DWConvMicrokernelTester()
12731         .cr(16)
12732         .kr(25)
12733         .channels(channels)
12734         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12735     }
12736   }
12737 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_gt_16_with_qmin)12738   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_gt_16_with_qmin) {
12739     TEST_REQUIRES_X86_SSE41;
12740     for (uint32_t channels = 17; channels < 32; channels++) {
12741       DWConvMicrokernelTester()
12742         .cr(16)
12743         .kr(25)
12744         .channels(channels)
12745         .qmin(128)
12746         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12747     }
12748   }
12749 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_gt_16_with_qmax)12750   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_gt_16_with_qmax) {
12751     TEST_REQUIRES_X86_SSE41;
12752     for (uint32_t channels = 17; channels < 32; channels++) {
12753       DWConvMicrokernelTester()
12754         .cr(16)
12755         .kr(25)
12756         .channels(channels)
12757         .qmax(128)
12758         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12759     }
12760   }
12761 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,multipixel)12762   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel) {
12763     TEST_REQUIRES_X86_SSE41;
12764     for (size_t channels = 1; channels <= 80; channels += 15) {
12765       DWConvMicrokernelTester()
12766         .cr(16)
12767         .kr(25)
12768         .channels(channels)
12769         .width(3)
12770         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12771     }
12772   }
12773 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,multipixel_with_step)12774   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel_with_step) {
12775     TEST_REQUIRES_X86_SSE41;
12776     for (size_t channels = 1; channels <= 80; channels += 15) {
12777       for (size_t step = 2; step <= 25; step++) {
12778         DWConvMicrokernelTester()
12779           .cr(16)
12780           .kr(25)
12781           .channels(channels)
12782           .width(3)
12783           .step(step)
12784           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12785       }
12786     }
12787   }
12788 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,multipixel_with_output_stride)12789   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel_with_output_stride) {
12790     TEST_REQUIRES_X86_SSE41;
12791     for (size_t channels = 1; channels <= 80; channels += 15) {
12792       DWConvMicrokernelTester()
12793         .cr(16)
12794         .kr(25)
12795         .channels(16)
12796         .width(5)
12797         .output_stride(83)
12798         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12799     }
12800   }
12801 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,multipixel_with_qmin)12802   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel_with_qmin) {
12803     TEST_REQUIRES_X86_SSE41;
12804     for (size_t channels = 1; channels <= 80; channels += 15) {
12805       DWConvMicrokernelTester()
12806         .cr(16)
12807         .kr(25)
12808         .channels(channels)
12809         .width(3)
12810         .qmin(128)
12811         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12812     }
12813   }
12814 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,multipixel_with_qmax)12815   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel_with_qmax) {
12816     TEST_REQUIRES_X86_SSE41;
12817     for (size_t channels = 1; channels <= 80; channels += 15) {
12818       DWConvMicrokernelTester()
12819         .cr(16)
12820         .kr(25)
12821         .channels(channels)
12822         .width(3)
12823         .qmax(128)
12824         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12825     }
12826   }
12827 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,input_offset)12828   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, input_offset) {
12829     TEST_REQUIRES_X86_SSE41;
12830     for (uint32_t channels = 32; channels < 256; channels += 48) {
12831       DWConvMicrokernelTester()
12832         .cr(16)
12833         .kr(25)
12834         .channels(channels)
12835         .input_offset(304)
12836         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12837     }
12838   }
12839 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,zero)12840   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, zero) {
12841     TEST_REQUIRES_X86_SSE41;
12842     for (uint32_t mz = 0; mz < 25; mz++) {
12843       for (uint32_t channels = 32; channels < 256; channels += 48) {
12844         DWConvMicrokernelTester()
12845           .cr(16)
12846           .kr(25)
12847           .channels(channels)
12848           .input_offset(304)
12849           .zero_index(mz)
12850           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12851       }
12852     }
12853   }
12854 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
12855 
12856 
12857 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_eq_16)12858   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_eq_16) {
12859     TEST_REQUIRES_X86_SSE41;
12860     DWConvMicrokernelTester()
12861       .cr(16)
12862       .kr(25)
12863       .channels(16)
12864       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12865   }
12866 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_div_16)12867   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_div_16) {
12868     TEST_REQUIRES_X86_SSE41;
12869     for (uint32_t channels = 32; channels < 256; channels += 48) {
12870       DWConvMicrokernelTester()
12871         .cr(16)
12872         .kr(25)
12873         .channels(channels)
12874         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12875     }
12876   }
12877 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_div_16_with_qmin)12878   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_div_16_with_qmin) {
12879     TEST_REQUIRES_X86_SSE41;
12880     for (uint32_t channels = 32; channels < 256; channels += 48) {
12881       DWConvMicrokernelTester()
12882         .cr(16)
12883         .kr(25)
12884         .channels(channels)
12885         .qmin(128)
12886         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12887     }
12888   }
12889 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_div_16_with_qmax)12890   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_div_16_with_qmax) {
12891     TEST_REQUIRES_X86_SSE41;
12892     for (uint32_t channels = 32; channels < 256; channels += 48) {
12893       DWConvMicrokernelTester()
12894         .cr(16)
12895         .kr(25)
12896         .channels(channels)
12897         .qmax(128)
12898         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12899     }
12900   }
12901 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_lt_16)12902   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_lt_16) {
12903     TEST_REQUIRES_X86_SSE41;
12904     for (uint32_t channels = 1; channels < 16; channels++) {
12905       DWConvMicrokernelTester()
12906         .cr(16)
12907         .kr(25)
12908         .channels(channels)
12909         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12910     }
12911   }
12912 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_gt_16)12913   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_gt_16) {
12914     TEST_REQUIRES_X86_SSE41;
12915     for (uint32_t channels = 17; channels < 32; channels++) {
12916       DWConvMicrokernelTester()
12917         .cr(16)
12918         .kr(25)
12919         .channels(channels)
12920         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12921     }
12922   }
12923 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_gt_16_with_qmin)12924   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_gt_16_with_qmin) {
12925     TEST_REQUIRES_X86_SSE41;
12926     for (uint32_t channels = 17; channels < 32; channels++) {
12927       DWConvMicrokernelTester()
12928         .cr(16)
12929         .kr(25)
12930         .channels(channels)
12931         .qmin(128)
12932         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12933     }
12934   }
12935 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_gt_16_with_qmax)12936   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_gt_16_with_qmax) {
12937     TEST_REQUIRES_X86_SSE41;
12938     for (uint32_t channels = 17; channels < 32; channels++) {
12939       DWConvMicrokernelTester()
12940         .cr(16)
12941         .kr(25)
12942         .channels(channels)
12943         .qmax(128)
12944         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12945     }
12946   }
12947 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,multipixel)12948   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel) {
12949     TEST_REQUIRES_X86_SSE41;
12950     for (size_t channels = 1; channels <= 80; channels += 15) {
12951       DWConvMicrokernelTester()
12952         .cr(16)
12953         .kr(25)
12954         .channels(channels)
12955         .width(3)
12956         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12957     }
12958   }
12959 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,multipixel_with_step)12960   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_step) {
12961     TEST_REQUIRES_X86_SSE41;
12962     for (size_t channels = 1; channels <= 80; channels += 15) {
12963       for (size_t step = 2; step <= 25; step++) {
12964         DWConvMicrokernelTester()
12965           .cr(16)
12966           .kr(25)
12967           .channels(channels)
12968           .width(3)
12969           .step(step)
12970           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12971       }
12972     }
12973   }
12974 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,multipixel_with_output_stride)12975   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_output_stride) {
12976     TEST_REQUIRES_X86_SSE41;
12977     for (size_t channels = 1; channels <= 80; channels += 15) {
12978       DWConvMicrokernelTester()
12979         .cr(16)
12980         .kr(25)
12981         .channels(16)
12982         .width(5)
12983         .output_stride(83)
12984         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12985     }
12986   }
12987 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,multipixel_with_qmin)12988   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_qmin) {
12989     TEST_REQUIRES_X86_SSE41;
12990     for (size_t channels = 1; channels <= 80; channels += 15) {
12991       DWConvMicrokernelTester()
12992         .cr(16)
12993         .kr(25)
12994         .channels(channels)
12995         .width(3)
12996         .qmin(128)
12997         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12998     }
12999   }
13000 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,multipixel_with_qmax)13001   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_qmax) {
13002     TEST_REQUIRES_X86_SSE41;
13003     for (size_t channels = 1; channels <= 80; channels += 15) {
13004       DWConvMicrokernelTester()
13005         .cr(16)
13006         .kr(25)
13007         .channels(channels)
13008         .width(3)
13009         .qmax(128)
13010         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13011     }
13012   }
13013 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,input_offset)13014   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, input_offset) {
13015     TEST_REQUIRES_X86_SSE41;
13016     for (uint32_t channels = 32; channels < 256; channels += 48) {
13017       DWConvMicrokernelTester()
13018         .cr(16)
13019         .kr(25)
13020         .channels(channels)
13021         .input_offset(304)
13022         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13023     }
13024   }
13025 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,zero)13026   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, zero) {
13027     TEST_REQUIRES_X86_SSE41;
13028     for (uint32_t mz = 0; mz < 25; mz++) {
13029       for (uint32_t channels = 32; channels < 256; channels += 48) {
13030         DWConvMicrokernelTester()
13031           .cr(16)
13032           .kr(25)
13033           .channels(channels)
13034           .input_offset(304)
13035           .zero_index(mz)
13036           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13037       }
13038     }
13039   }
13040 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
13041 
13042 
13043 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_eq_24)13044   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_eq_24) {
13045     TEST_REQUIRES_X86_SSE2;
13046     DWConvMicrokernelTester()
13047       .cr(24)
13048       .kr(9)
13049       .channels(24)
13050       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13051   }
13052 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_div_24)13053   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_div_24) {
13054     TEST_REQUIRES_X86_SSE2;
13055     for (uint32_t channels = 48; channels < 384; channels += 72) {
13056       DWConvMicrokernelTester()
13057         .cr(24)
13058         .kr(9)
13059         .channels(channels)
13060         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13061     }
13062   }
13063 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_div_24_with_qmin)13064   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_div_24_with_qmin) {
13065     TEST_REQUIRES_X86_SSE2;
13066     for (uint32_t channels = 48; channels < 384; channels += 72) {
13067       DWConvMicrokernelTester()
13068         .cr(24)
13069         .kr(9)
13070         .channels(channels)
13071         .qmin(128)
13072         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13073     }
13074   }
13075 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_div_24_with_qmax)13076   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_div_24_with_qmax) {
13077     TEST_REQUIRES_X86_SSE2;
13078     for (uint32_t channels = 48; channels < 384; channels += 72) {
13079       DWConvMicrokernelTester()
13080         .cr(24)
13081         .kr(9)
13082         .channels(channels)
13083         .qmax(128)
13084         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13085     }
13086   }
13087 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_lt_24)13088   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_lt_24) {
13089     TEST_REQUIRES_X86_SSE2;
13090     for (uint32_t channels = 1; channels < 24; channels++) {
13091       DWConvMicrokernelTester()
13092         .cr(24)
13093         .kr(9)
13094         .channels(channels)
13095         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13096     }
13097   }
13098 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_gt_24)13099   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_gt_24) {
13100     TEST_REQUIRES_X86_SSE2;
13101     for (uint32_t channels = 25; channels < 48; channels++) {
13102       DWConvMicrokernelTester()
13103         .cr(24)
13104         .kr(9)
13105         .channels(channels)
13106         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13107     }
13108   }
13109 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_gt_24_with_qmin)13110   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_gt_24_with_qmin) {
13111     TEST_REQUIRES_X86_SSE2;
13112     for (uint32_t channels = 25; channels < 48; channels++) {
13113       DWConvMicrokernelTester()
13114         .cr(24)
13115         .kr(9)
13116         .channels(channels)
13117         .qmin(128)
13118         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13119     }
13120   }
13121 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_gt_24_with_qmax)13122   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_gt_24_with_qmax) {
13123     TEST_REQUIRES_X86_SSE2;
13124     for (uint32_t channels = 25; channels < 48; channels++) {
13125       DWConvMicrokernelTester()
13126         .cr(24)
13127         .kr(9)
13128         .channels(channels)
13129         .qmax(128)
13130         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13131     }
13132   }
13133 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,multipixel)13134   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel) {
13135     TEST_REQUIRES_X86_SSE2;
13136     for (size_t channels = 1; channels <= 120; channels += 23) {
13137       DWConvMicrokernelTester()
13138         .cr(24)
13139         .kr(9)
13140         .channels(channels)
13141         .width(3)
13142         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13143     }
13144   }
13145 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,multipixel_with_step)13146   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel_with_step) {
13147     TEST_REQUIRES_X86_SSE2;
13148     for (size_t channels = 1; channels <= 120; channels += 23) {
13149       for (size_t step = 2; step <= 9; step++) {
13150         DWConvMicrokernelTester()
13151           .cr(24)
13152           .kr(9)
13153           .channels(channels)
13154           .width(3)
13155           .step(step)
13156           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13157       }
13158     }
13159   }
13160 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,multipixel_with_output_stride)13161   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel_with_output_stride) {
13162     TEST_REQUIRES_X86_SSE2;
13163     for (size_t channels = 1; channels <= 120; channels += 23) {
13164       DWConvMicrokernelTester()
13165         .cr(24)
13166         .kr(9)
13167         .channels(24)
13168         .width(5)
13169         .output_stride(127)
13170         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13171     }
13172   }
13173 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,multipixel_with_qmin)13174   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel_with_qmin) {
13175     TEST_REQUIRES_X86_SSE2;
13176     for (size_t channels = 1; channels <= 120; channels += 23) {
13177       DWConvMicrokernelTester()
13178         .cr(24)
13179         .kr(9)
13180         .channels(channels)
13181         .width(3)
13182         .qmin(128)
13183         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13184     }
13185   }
13186 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,multipixel_with_qmax)13187   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel_with_qmax) {
13188     TEST_REQUIRES_X86_SSE2;
13189     for (size_t channels = 1; channels <= 120; channels += 23) {
13190       DWConvMicrokernelTester()
13191         .cr(24)
13192         .kr(9)
13193         .channels(channels)
13194         .width(3)
13195         .qmax(128)
13196         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13197     }
13198   }
13199 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,input_offset)13200   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, input_offset) {
13201     TEST_REQUIRES_X86_SSE2;
13202     for (uint32_t channels = 48; channels < 384; channels += 72) {
13203       DWConvMicrokernelTester()
13204         .cr(24)
13205         .kr(9)
13206         .channels(channels)
13207         .input_offset(464)
13208         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13209     }
13210   }
13211 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,zero)13212   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, zero) {
13213     TEST_REQUIRES_X86_SSE2;
13214     for (uint32_t mz = 0; mz < 9; mz++) {
13215       for (uint32_t channels = 48; channels < 384; channels += 72) {
13216         DWConvMicrokernelTester()
13217           .cr(24)
13218           .kr(9)
13219           .channels(channels)
13220           .input_offset(464)
13221           .zero_index(mz)
13222           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13223       }
13224     }
13225   }
13226 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
13227 
13228 
13229 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_eq_24)13230   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_eq_24) {
13231     TEST_REQUIRES_X86_SSE41;
13232     DWConvMicrokernelTester()
13233       .cr(24)
13234       .kr(9)
13235       .channels(24)
13236       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13237   }
13238 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_div_24)13239   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_div_24) {
13240     TEST_REQUIRES_X86_SSE41;
13241     for (uint32_t channels = 48; channels < 384; channels += 72) {
13242       DWConvMicrokernelTester()
13243         .cr(24)
13244         .kr(9)
13245         .channels(channels)
13246         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13247     }
13248   }
13249 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_div_24_with_qmin)13250   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_div_24_with_qmin) {
13251     TEST_REQUIRES_X86_SSE41;
13252     for (uint32_t channels = 48; channels < 384; channels += 72) {
13253       DWConvMicrokernelTester()
13254         .cr(24)
13255         .kr(9)
13256         .channels(channels)
13257         .qmin(128)
13258         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13259     }
13260   }
13261 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_div_24_with_qmax)13262   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_div_24_with_qmax) {
13263     TEST_REQUIRES_X86_SSE41;
13264     for (uint32_t channels = 48; channels < 384; channels += 72) {
13265       DWConvMicrokernelTester()
13266         .cr(24)
13267         .kr(9)
13268         .channels(channels)
13269         .qmax(128)
13270         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13271     }
13272   }
13273 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_lt_24)13274   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_lt_24) {
13275     TEST_REQUIRES_X86_SSE41;
13276     for (uint32_t channels = 1; channels < 24; channels++) {
13277       DWConvMicrokernelTester()
13278         .cr(24)
13279         .kr(9)
13280         .channels(channels)
13281         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13282     }
13283   }
13284 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_gt_24)13285   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_gt_24) {
13286     TEST_REQUIRES_X86_SSE41;
13287     for (uint32_t channels = 25; channels < 48; channels++) {
13288       DWConvMicrokernelTester()
13289         .cr(24)
13290         .kr(9)
13291         .channels(channels)
13292         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13293     }
13294   }
13295 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_gt_24_with_qmin)13296   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_gt_24_with_qmin) {
13297     TEST_REQUIRES_X86_SSE41;
13298     for (uint32_t channels = 25; channels < 48; channels++) {
13299       DWConvMicrokernelTester()
13300         .cr(24)
13301         .kr(9)
13302         .channels(channels)
13303         .qmin(128)
13304         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13305     }
13306   }
13307 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_gt_24_with_qmax)13308   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_gt_24_with_qmax) {
13309     TEST_REQUIRES_X86_SSE41;
13310     for (uint32_t channels = 25; channels < 48; channels++) {
13311       DWConvMicrokernelTester()
13312         .cr(24)
13313         .kr(9)
13314         .channels(channels)
13315         .qmax(128)
13316         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13317     }
13318   }
13319 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,multipixel)13320   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel) {
13321     TEST_REQUIRES_X86_SSE41;
13322     for (size_t channels = 1; channels <= 120; channels += 23) {
13323       DWConvMicrokernelTester()
13324         .cr(24)
13325         .kr(9)
13326         .channels(channels)
13327         .width(3)
13328         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13329     }
13330   }
13331 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,multipixel_with_step)13332   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel_with_step) {
13333     TEST_REQUIRES_X86_SSE41;
13334     for (size_t channels = 1; channels <= 120; channels += 23) {
13335       for (size_t step = 2; step <= 9; step++) {
13336         DWConvMicrokernelTester()
13337           .cr(24)
13338           .kr(9)
13339           .channels(channels)
13340           .width(3)
13341           .step(step)
13342           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13343       }
13344     }
13345   }
13346 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,multipixel_with_output_stride)13347   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel_with_output_stride) {
13348     TEST_REQUIRES_X86_SSE41;
13349     for (size_t channels = 1; channels <= 120; channels += 23) {
13350       DWConvMicrokernelTester()
13351         .cr(24)
13352         .kr(9)
13353         .channels(24)
13354         .width(5)
13355         .output_stride(127)
13356         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13357     }
13358   }
13359 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,multipixel_with_qmin)13360   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel_with_qmin) {
13361     TEST_REQUIRES_X86_SSE41;
13362     for (size_t channels = 1; channels <= 120; channels += 23) {
13363       DWConvMicrokernelTester()
13364         .cr(24)
13365         .kr(9)
13366         .channels(channels)
13367         .width(3)
13368         .qmin(128)
13369         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13370     }
13371   }
13372 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,multipixel_with_qmax)13373   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel_with_qmax) {
13374     TEST_REQUIRES_X86_SSE41;
13375     for (size_t channels = 1; channels <= 120; channels += 23) {
13376       DWConvMicrokernelTester()
13377         .cr(24)
13378         .kr(9)
13379         .channels(channels)
13380         .width(3)
13381         .qmax(128)
13382         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13383     }
13384   }
13385 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,input_offset)13386   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, input_offset) {
13387     TEST_REQUIRES_X86_SSE41;
13388     for (uint32_t channels = 48; channels < 384; channels += 72) {
13389       DWConvMicrokernelTester()
13390         .cr(24)
13391         .kr(9)
13392         .channels(channels)
13393         .input_offset(464)
13394         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13395     }
13396   }
13397 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,zero)13398   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, zero) {
13399     TEST_REQUIRES_X86_SSE41;
13400     for (uint32_t mz = 0; mz < 9; mz++) {
13401       for (uint32_t channels = 48; channels < 384; channels += 72) {
13402         DWConvMicrokernelTester()
13403           .cr(24)
13404           .kr(9)
13405           .channels(channels)
13406           .input_offset(464)
13407           .zero_index(mz)
13408           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13409       }
13410     }
13411   }
13412 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
13413 
13414 
13415 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_eq_24)13416   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_eq_24) {
13417     TEST_REQUIRES_X86_SSE41;
13418     DWConvMicrokernelTester()
13419       .cr(24)
13420       .kr(9)
13421       .channels(24)
13422       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13423   }
13424 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_div_24)13425   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_div_24) {
13426     TEST_REQUIRES_X86_SSE41;
13427     for (uint32_t channels = 48; channels < 384; channels += 72) {
13428       DWConvMicrokernelTester()
13429         .cr(24)
13430         .kr(9)
13431         .channels(channels)
13432         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13433     }
13434   }
13435 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_div_24_with_qmin)13436   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_div_24_with_qmin) {
13437     TEST_REQUIRES_X86_SSE41;
13438     for (uint32_t channels = 48; channels < 384; channels += 72) {
13439       DWConvMicrokernelTester()
13440         .cr(24)
13441         .kr(9)
13442         .channels(channels)
13443         .qmin(128)
13444         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13445     }
13446   }
13447 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_div_24_with_qmax)13448   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_div_24_with_qmax) {
13449     TEST_REQUIRES_X86_SSE41;
13450     for (uint32_t channels = 48; channels < 384; channels += 72) {
13451       DWConvMicrokernelTester()
13452         .cr(24)
13453         .kr(9)
13454         .channels(channels)
13455         .qmax(128)
13456         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13457     }
13458   }
13459 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_lt_24)13460   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_lt_24) {
13461     TEST_REQUIRES_X86_SSE41;
13462     for (uint32_t channels = 1; channels < 24; channels++) {
13463       DWConvMicrokernelTester()
13464         .cr(24)
13465         .kr(9)
13466         .channels(channels)
13467         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13468     }
13469   }
13470 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_gt_24)13471   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_gt_24) {
13472     TEST_REQUIRES_X86_SSE41;
13473     for (uint32_t channels = 25; channels < 48; channels++) {
13474       DWConvMicrokernelTester()
13475         .cr(24)
13476         .kr(9)
13477         .channels(channels)
13478         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13479     }
13480   }
13481 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_gt_24_with_qmin)13482   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_gt_24_with_qmin) {
13483     TEST_REQUIRES_X86_SSE41;
13484     for (uint32_t channels = 25; channels < 48; channels++) {
13485       DWConvMicrokernelTester()
13486         .cr(24)
13487         .kr(9)
13488         .channels(channels)
13489         .qmin(128)
13490         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13491     }
13492   }
13493 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_gt_24_with_qmax)13494   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_gt_24_with_qmax) {
13495     TEST_REQUIRES_X86_SSE41;
13496     for (uint32_t channels = 25; channels < 48; channels++) {
13497       DWConvMicrokernelTester()
13498         .cr(24)
13499         .kr(9)
13500         .channels(channels)
13501         .qmax(128)
13502         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13503     }
13504   }
13505 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,multipixel)13506   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel) {
13507     TEST_REQUIRES_X86_SSE41;
13508     for (size_t channels = 1; channels <= 120; channels += 23) {
13509       DWConvMicrokernelTester()
13510         .cr(24)
13511         .kr(9)
13512         .channels(channels)
13513         .width(3)
13514         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13515     }
13516   }
13517 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,multipixel_with_step)13518   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel_with_step) {
13519     TEST_REQUIRES_X86_SSE41;
13520     for (size_t channels = 1; channels <= 120; channels += 23) {
13521       for (size_t step = 2; step <= 9; step++) {
13522         DWConvMicrokernelTester()
13523           .cr(24)
13524           .kr(9)
13525           .channels(channels)
13526           .width(3)
13527           .step(step)
13528           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13529       }
13530     }
13531   }
13532 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,multipixel_with_output_stride)13533   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel_with_output_stride) {
13534     TEST_REQUIRES_X86_SSE41;
13535     for (size_t channels = 1; channels <= 120; channels += 23) {
13536       DWConvMicrokernelTester()
13537         .cr(24)
13538         .kr(9)
13539         .channels(24)
13540         .width(5)
13541         .output_stride(127)
13542         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13543     }
13544   }
13545 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,multipixel_with_qmin)13546   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel_with_qmin) {
13547     TEST_REQUIRES_X86_SSE41;
13548     for (size_t channels = 1; channels <= 120; channels += 23) {
13549       DWConvMicrokernelTester()
13550         .cr(24)
13551         .kr(9)
13552         .channels(channels)
13553         .width(3)
13554         .qmin(128)
13555         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13556     }
13557   }
13558 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,multipixel_with_qmax)13559   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel_with_qmax) {
13560     TEST_REQUIRES_X86_SSE41;
13561     for (size_t channels = 1; channels <= 120; channels += 23) {
13562       DWConvMicrokernelTester()
13563         .cr(24)
13564         .kr(9)
13565         .channels(channels)
13566         .width(3)
13567         .qmax(128)
13568         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13569     }
13570   }
13571 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,input_offset)13572   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, input_offset) {
13573     TEST_REQUIRES_X86_SSE41;
13574     for (uint32_t channels = 48; channels < 384; channels += 72) {
13575       DWConvMicrokernelTester()
13576         .cr(24)
13577         .kr(9)
13578         .channels(channels)
13579         .input_offset(464)
13580         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13581     }
13582   }
13583 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,zero)13584   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, zero) {
13585     TEST_REQUIRES_X86_SSE41;
13586     for (uint32_t mz = 0; mz < 9; mz++) {
13587       for (uint32_t channels = 48; channels < 384; channels += 72) {
13588         DWConvMicrokernelTester()
13589           .cr(24)
13590           .kr(9)
13591           .channels(channels)
13592           .input_offset(464)
13593           .zero_index(mz)
13594           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13595       }
13596     }
13597   }
13598 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
13599 
13600 
13601 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_eq_24)13602   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_eq_24) {
13603     TEST_REQUIRES_X86_SSE2;
13604     DWConvMicrokernelTester()
13605       .cr(24)
13606       .kr(25)
13607       .channels(24)
13608       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13609   }
13610 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_div_24)13611   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_div_24) {
13612     TEST_REQUIRES_X86_SSE2;
13613     for (uint32_t channels = 48; channels < 384; channels += 72) {
13614       DWConvMicrokernelTester()
13615         .cr(24)
13616         .kr(25)
13617         .channels(channels)
13618         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13619     }
13620   }
13621 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_div_24_with_qmin)13622   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_div_24_with_qmin) {
13623     TEST_REQUIRES_X86_SSE2;
13624     for (uint32_t channels = 48; channels < 384; channels += 72) {
13625       DWConvMicrokernelTester()
13626         .cr(24)
13627         .kr(25)
13628         .channels(channels)
13629         .qmin(128)
13630         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13631     }
13632   }
13633 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_div_24_with_qmax)13634   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_div_24_with_qmax) {
13635     TEST_REQUIRES_X86_SSE2;
13636     for (uint32_t channels = 48; channels < 384; channels += 72) {
13637       DWConvMicrokernelTester()
13638         .cr(24)
13639         .kr(25)
13640         .channels(channels)
13641         .qmax(128)
13642         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13643     }
13644   }
13645 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_lt_24)13646   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_lt_24) {
13647     TEST_REQUIRES_X86_SSE2;
13648     for (uint32_t channels = 1; channels < 24; channels++) {
13649       DWConvMicrokernelTester()
13650         .cr(24)
13651         .kr(25)
13652         .channels(channels)
13653         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13654     }
13655   }
13656 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_gt_24)13657   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_gt_24) {
13658     TEST_REQUIRES_X86_SSE2;
13659     for (uint32_t channels = 25; channels < 48; channels++) {
13660       DWConvMicrokernelTester()
13661         .cr(24)
13662         .kr(25)
13663         .channels(channels)
13664         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13665     }
13666   }
13667 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_gt_24_with_qmin)13668   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_gt_24_with_qmin) {
13669     TEST_REQUIRES_X86_SSE2;
13670     for (uint32_t channels = 25; channels < 48; channels++) {
13671       DWConvMicrokernelTester()
13672         .cr(24)
13673         .kr(25)
13674         .channels(channels)
13675         .qmin(128)
13676         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13677     }
13678   }
13679 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_gt_24_with_qmax)13680   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_gt_24_with_qmax) {
13681     TEST_REQUIRES_X86_SSE2;
13682     for (uint32_t channels = 25; channels < 48; channels++) {
13683       DWConvMicrokernelTester()
13684         .cr(24)
13685         .kr(25)
13686         .channels(channels)
13687         .qmax(128)
13688         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13689     }
13690   }
13691 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,multipixel)13692   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel) {
13693     TEST_REQUIRES_X86_SSE2;
13694     for (size_t channels = 1; channels <= 120; channels += 23) {
13695       DWConvMicrokernelTester()
13696         .cr(24)
13697         .kr(25)
13698         .channels(channels)
13699         .width(3)
13700         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13701     }
13702   }
13703 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,multipixel_with_step)13704   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel_with_step) {
13705     TEST_REQUIRES_X86_SSE2;
13706     for (size_t channels = 1; channels <= 120; channels += 23) {
13707       for (size_t step = 2; step <= 25; step++) {
13708         DWConvMicrokernelTester()
13709           .cr(24)
13710           .kr(25)
13711           .channels(channels)
13712           .width(3)
13713           .step(step)
13714           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13715       }
13716     }
13717   }
13718 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,multipixel_with_output_stride)13719   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel_with_output_stride) {
13720     TEST_REQUIRES_X86_SSE2;
13721     for (size_t channels = 1; channels <= 120; channels += 23) {
13722       DWConvMicrokernelTester()
13723         .cr(24)
13724         .kr(25)
13725         .channels(24)
13726         .width(5)
13727         .output_stride(127)
13728         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13729     }
13730   }
13731 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,multipixel_with_qmin)13732   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel_with_qmin) {
13733     TEST_REQUIRES_X86_SSE2;
13734     for (size_t channels = 1; channels <= 120; channels += 23) {
13735       DWConvMicrokernelTester()
13736         .cr(24)
13737         .kr(25)
13738         .channels(channels)
13739         .width(3)
13740         .qmin(128)
13741         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13742     }
13743   }
13744 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,multipixel_with_qmax)13745   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel_with_qmax) {
13746     TEST_REQUIRES_X86_SSE2;
13747     for (size_t channels = 1; channels <= 120; channels += 23) {
13748       DWConvMicrokernelTester()
13749         .cr(24)
13750         .kr(25)
13751         .channels(channels)
13752         .width(3)
13753         .qmax(128)
13754         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13755     }
13756   }
13757 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,input_offset)13758   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, input_offset) {
13759     TEST_REQUIRES_X86_SSE2;
13760     for (uint32_t channels = 48; channels < 384; channels += 72) {
13761       DWConvMicrokernelTester()
13762         .cr(24)
13763         .kr(25)
13764         .channels(channels)
13765         .input_offset(464)
13766         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13767     }
13768   }
13769 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,zero)13770   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, zero) {
13771     TEST_REQUIRES_X86_SSE2;
13772     for (uint32_t mz = 0; mz < 25; mz++) {
13773       for (uint32_t channels = 48; channels < 384; channels += 72) {
13774         DWConvMicrokernelTester()
13775           .cr(24)
13776           .kr(25)
13777           .channels(channels)
13778           .input_offset(464)
13779           .zero_index(mz)
13780           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13781       }
13782     }
13783   }
13784 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
13785 
13786 
13787 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_eq_24)13788   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_eq_24) {
13789     TEST_REQUIRES_X86_SSE41;
13790     DWConvMicrokernelTester()
13791       .cr(24)
13792       .kr(25)
13793       .channels(24)
13794       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13795   }
13796 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_div_24)13797   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_div_24) {
13798     TEST_REQUIRES_X86_SSE41;
13799     for (uint32_t channels = 48; channels < 384; channels += 72) {
13800       DWConvMicrokernelTester()
13801         .cr(24)
13802         .kr(25)
13803         .channels(channels)
13804         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13805     }
13806   }
13807 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_div_24_with_qmin)13808   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_div_24_with_qmin) {
13809     TEST_REQUIRES_X86_SSE41;
13810     for (uint32_t channels = 48; channels < 384; channels += 72) {
13811       DWConvMicrokernelTester()
13812         .cr(24)
13813         .kr(25)
13814         .channels(channels)
13815         .qmin(128)
13816         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13817     }
13818   }
13819 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_div_24_with_qmax)13820   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_div_24_with_qmax) {
13821     TEST_REQUIRES_X86_SSE41;
13822     for (uint32_t channels = 48; channels < 384; channels += 72) {
13823       DWConvMicrokernelTester()
13824         .cr(24)
13825         .kr(25)
13826         .channels(channels)
13827         .qmax(128)
13828         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13829     }
13830   }
13831 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_lt_24)13832   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_lt_24) {
13833     TEST_REQUIRES_X86_SSE41;
13834     for (uint32_t channels = 1; channels < 24; channels++) {
13835       DWConvMicrokernelTester()
13836         .cr(24)
13837         .kr(25)
13838         .channels(channels)
13839         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13840     }
13841   }
13842 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_gt_24)13843   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_gt_24) {
13844     TEST_REQUIRES_X86_SSE41;
13845     for (uint32_t channels = 25; channels < 48; channels++) {
13846       DWConvMicrokernelTester()
13847         .cr(24)
13848         .kr(25)
13849         .channels(channels)
13850         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13851     }
13852   }
13853 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_gt_24_with_qmin)13854   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_gt_24_with_qmin) {
13855     TEST_REQUIRES_X86_SSE41;
13856     for (uint32_t channels = 25; channels < 48; channels++) {
13857       DWConvMicrokernelTester()
13858         .cr(24)
13859         .kr(25)
13860         .channels(channels)
13861         .qmin(128)
13862         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13863     }
13864   }
13865 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_gt_24_with_qmax)13866   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_gt_24_with_qmax) {
13867     TEST_REQUIRES_X86_SSE41;
13868     for (uint32_t channels = 25; channels < 48; channels++) {
13869       DWConvMicrokernelTester()
13870         .cr(24)
13871         .kr(25)
13872         .channels(channels)
13873         .qmax(128)
13874         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13875     }
13876   }
13877 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,multipixel)13878   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel) {
13879     TEST_REQUIRES_X86_SSE41;
13880     for (size_t channels = 1; channels <= 120; channels += 23) {
13881       DWConvMicrokernelTester()
13882         .cr(24)
13883         .kr(25)
13884         .channels(channels)
13885         .width(3)
13886         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13887     }
13888   }
13889 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,multipixel_with_step)13890   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel_with_step) {
13891     TEST_REQUIRES_X86_SSE41;
13892     for (size_t channels = 1; channels <= 120; channels += 23) {
13893       for (size_t step = 2; step <= 25; step++) {
13894         DWConvMicrokernelTester()
13895           .cr(24)
13896           .kr(25)
13897           .channels(channels)
13898           .width(3)
13899           .step(step)
13900           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13901       }
13902     }
13903   }
13904 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,multipixel_with_output_stride)13905   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel_with_output_stride) {
13906     TEST_REQUIRES_X86_SSE41;
13907     for (size_t channels = 1; channels <= 120; channels += 23) {
13908       DWConvMicrokernelTester()
13909         .cr(24)
13910         .kr(25)
13911         .channels(24)
13912         .width(5)
13913         .output_stride(127)
13914         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13915     }
13916   }
13917 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,multipixel_with_qmin)13918   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel_with_qmin) {
13919     TEST_REQUIRES_X86_SSE41;
13920     for (size_t channels = 1; channels <= 120; channels += 23) {
13921       DWConvMicrokernelTester()
13922         .cr(24)
13923         .kr(25)
13924         .channels(channels)
13925         .width(3)
13926         .qmin(128)
13927         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13928     }
13929   }
13930 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,multipixel_with_qmax)13931   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel_with_qmax) {
13932     TEST_REQUIRES_X86_SSE41;
13933     for (size_t channels = 1; channels <= 120; channels += 23) {
13934       DWConvMicrokernelTester()
13935         .cr(24)
13936         .kr(25)
13937         .channels(channels)
13938         .width(3)
13939         .qmax(128)
13940         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13941     }
13942   }
13943 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,input_offset)13944   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, input_offset) {
13945     TEST_REQUIRES_X86_SSE41;
13946     for (uint32_t channels = 48; channels < 384; channels += 72) {
13947       DWConvMicrokernelTester()
13948         .cr(24)
13949         .kr(25)
13950         .channels(channels)
13951         .input_offset(464)
13952         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13953     }
13954   }
13955 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,zero)13956   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, zero) {
13957     TEST_REQUIRES_X86_SSE41;
13958     for (uint32_t mz = 0; mz < 25; mz++) {
13959       for (uint32_t channels = 48; channels < 384; channels += 72) {
13960         DWConvMicrokernelTester()
13961           .cr(24)
13962           .kr(25)
13963           .channels(channels)
13964           .input_offset(464)
13965           .zero_index(mz)
13966           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13967       }
13968     }
13969   }
13970 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
13971 
13972 
13973 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_eq_24)13974   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_eq_24) {
13975     TEST_REQUIRES_X86_SSE41;
13976     DWConvMicrokernelTester()
13977       .cr(24)
13978       .kr(25)
13979       .channels(24)
13980       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13981   }
13982 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_div_24)13983   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_div_24) {
13984     TEST_REQUIRES_X86_SSE41;
13985     for (uint32_t channels = 48; channels < 384; channels += 72) {
13986       DWConvMicrokernelTester()
13987         .cr(24)
13988         .kr(25)
13989         .channels(channels)
13990         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13991     }
13992   }
13993 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_div_24_with_qmin)13994   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_div_24_with_qmin) {
13995     TEST_REQUIRES_X86_SSE41;
13996     for (uint32_t channels = 48; channels < 384; channels += 72) {
13997       DWConvMicrokernelTester()
13998         .cr(24)
13999         .kr(25)
14000         .channels(channels)
14001         .qmin(128)
14002         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14003     }
14004   }
14005 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_div_24_with_qmax)14006   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_div_24_with_qmax) {
14007     TEST_REQUIRES_X86_SSE41;
14008     for (uint32_t channels = 48; channels < 384; channels += 72) {
14009       DWConvMicrokernelTester()
14010         .cr(24)
14011         .kr(25)
14012         .channels(channels)
14013         .qmax(128)
14014         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14015     }
14016   }
14017 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_lt_24)14018   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_lt_24) {
14019     TEST_REQUIRES_X86_SSE41;
14020     for (uint32_t channels = 1; channels < 24; channels++) {
14021       DWConvMicrokernelTester()
14022         .cr(24)
14023         .kr(25)
14024         .channels(channels)
14025         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14026     }
14027   }
14028 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_gt_24)14029   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_gt_24) {
14030     TEST_REQUIRES_X86_SSE41;
14031     for (uint32_t channels = 25; channels < 48; channels++) {
14032       DWConvMicrokernelTester()
14033         .cr(24)
14034         .kr(25)
14035         .channels(channels)
14036         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14037     }
14038   }
14039 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_gt_24_with_qmin)14040   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_gt_24_with_qmin) {
14041     TEST_REQUIRES_X86_SSE41;
14042     for (uint32_t channels = 25; channels < 48; channels++) {
14043       DWConvMicrokernelTester()
14044         .cr(24)
14045         .kr(25)
14046         .channels(channels)
14047         .qmin(128)
14048         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14049     }
14050   }
14051 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_gt_24_with_qmax)14052   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_gt_24_with_qmax) {
14053     TEST_REQUIRES_X86_SSE41;
14054     for (uint32_t channels = 25; channels < 48; channels++) {
14055       DWConvMicrokernelTester()
14056         .cr(24)
14057         .kr(25)
14058         .channels(channels)
14059         .qmax(128)
14060         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14061     }
14062   }
14063 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,multipixel)14064   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel) {
14065     TEST_REQUIRES_X86_SSE41;
14066     for (size_t channels = 1; channels <= 120; channels += 23) {
14067       DWConvMicrokernelTester()
14068         .cr(24)
14069         .kr(25)
14070         .channels(channels)
14071         .width(3)
14072         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14073     }
14074   }
14075 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,multipixel_with_step)14076   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel_with_step) {
14077     TEST_REQUIRES_X86_SSE41;
14078     for (size_t channels = 1; channels <= 120; channels += 23) {
14079       for (size_t step = 2; step <= 25; step++) {
14080         DWConvMicrokernelTester()
14081           .cr(24)
14082           .kr(25)
14083           .channels(channels)
14084           .width(3)
14085           .step(step)
14086           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14087       }
14088     }
14089   }
14090 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,multipixel_with_output_stride)14091   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel_with_output_stride) {
14092     TEST_REQUIRES_X86_SSE41;
14093     for (size_t channels = 1; channels <= 120; channels += 23) {
14094       DWConvMicrokernelTester()
14095         .cr(24)
14096         .kr(25)
14097         .channels(24)
14098         .width(5)
14099         .output_stride(127)
14100         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14101     }
14102   }
14103 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,multipixel_with_qmin)14104   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel_with_qmin) {
14105     TEST_REQUIRES_X86_SSE41;
14106     for (size_t channels = 1; channels <= 120; channels += 23) {
14107       DWConvMicrokernelTester()
14108         .cr(24)
14109         .kr(25)
14110         .channels(channels)
14111         .width(3)
14112         .qmin(128)
14113         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14114     }
14115   }
14116 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,multipixel_with_qmax)14117   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel_with_qmax) {
14118     TEST_REQUIRES_X86_SSE41;
14119     for (size_t channels = 1; channels <= 120; channels += 23) {
14120       DWConvMicrokernelTester()
14121         .cr(24)
14122         .kr(25)
14123         .channels(channels)
14124         .width(3)
14125         .qmax(128)
14126         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14127     }
14128   }
14129 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,input_offset)14130   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, input_offset) {
14131     TEST_REQUIRES_X86_SSE41;
14132     for (uint32_t channels = 48; channels < 384; channels += 72) {
14133       DWConvMicrokernelTester()
14134         .cr(24)
14135         .kr(25)
14136         .channels(channels)
14137         .input_offset(464)
14138         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14139     }
14140   }
14141 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,zero)14142   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, zero) {
14143     TEST_REQUIRES_X86_SSE41;
14144     for (uint32_t mz = 0; mz < 25; mz++) {
14145       for (uint32_t channels = 48; channels < 384; channels += 72) {
14146         DWConvMicrokernelTester()
14147           .cr(24)
14148           .kr(25)
14149           .channels(channels)
14150           .input_offset(464)
14151           .zero_index(mz)
14152           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14153       }
14154     }
14155   }
14156 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
14157 
14158 
14159 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_eq_8)14160   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_eq_8) {
14161     TEST_REQUIRES_X86_AVX;
14162     DWConvMicrokernelTester()
14163       .cr(8)
14164       .kr(9)
14165       .channels(8)
14166       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14167   }
14168 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_div_8)14169   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_div_8) {
14170     TEST_REQUIRES_X86_AVX;
14171     for (uint32_t channels = 16; channels < 128; channels += 24) {
14172       DWConvMicrokernelTester()
14173         .cr(8)
14174         .kr(9)
14175         .channels(channels)
14176         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14177     }
14178   }
14179 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_div_8_with_qmin)14180   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_div_8_with_qmin) {
14181     TEST_REQUIRES_X86_AVX;
14182     for (uint32_t channels = 16; channels < 128; channels += 24) {
14183       DWConvMicrokernelTester()
14184         .cr(8)
14185         .kr(9)
14186         .channels(channels)
14187         .qmin(128)
14188         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14189     }
14190   }
14191 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_div_8_with_qmax)14192   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_div_8_with_qmax) {
14193     TEST_REQUIRES_X86_AVX;
14194     for (uint32_t channels = 16; channels < 128; channels += 24) {
14195       DWConvMicrokernelTester()
14196         .cr(8)
14197         .kr(9)
14198         .channels(channels)
14199         .qmax(128)
14200         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14201     }
14202   }
14203 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_lt_8)14204   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_lt_8) {
14205     TEST_REQUIRES_X86_AVX;
14206     for (uint32_t channels = 1; channels < 8; channels++) {
14207       DWConvMicrokernelTester()
14208         .cr(8)
14209         .kr(9)
14210         .channels(channels)
14211         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14212     }
14213   }
14214 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_gt_8)14215   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_gt_8) {
14216     TEST_REQUIRES_X86_AVX;
14217     for (uint32_t channels = 9; channels < 16; channels++) {
14218       DWConvMicrokernelTester()
14219         .cr(8)
14220         .kr(9)
14221         .channels(channels)
14222         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14223     }
14224   }
14225 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_gt_8_with_qmin)14226   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_gt_8_with_qmin) {
14227     TEST_REQUIRES_X86_AVX;
14228     for (uint32_t channels = 9; channels < 16; channels++) {
14229       DWConvMicrokernelTester()
14230         .cr(8)
14231         .kr(9)
14232         .channels(channels)
14233         .qmin(128)
14234         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14235     }
14236   }
14237 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_gt_8_with_qmax)14238   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_gt_8_with_qmax) {
14239     TEST_REQUIRES_X86_AVX;
14240     for (uint32_t channels = 9; channels < 16; channels++) {
14241       DWConvMicrokernelTester()
14242         .cr(8)
14243         .kr(9)
14244         .channels(channels)
14245         .qmax(128)
14246         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14247     }
14248   }
14249 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,multipixel)14250   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel) {
14251     TEST_REQUIRES_X86_AVX;
14252     for (size_t channels = 1; channels <= 40; channels += 7) {
14253       DWConvMicrokernelTester()
14254         .cr(8)
14255         .kr(9)
14256         .channels(channels)
14257         .width(3)
14258         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14259     }
14260   }
14261 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,multipixel_with_step)14262   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_step) {
14263     TEST_REQUIRES_X86_AVX;
14264     for (size_t channels = 1; channels <= 40; channels += 7) {
14265       for (size_t step = 2; step <= 9; step++) {
14266         DWConvMicrokernelTester()
14267           .cr(8)
14268           .kr(9)
14269           .channels(channels)
14270           .width(3)
14271           .step(step)
14272           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14273       }
14274     }
14275   }
14276 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,multipixel_with_output_stride)14277   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_output_stride) {
14278     TEST_REQUIRES_X86_AVX;
14279     for (size_t channels = 1; channels <= 40; channels += 7) {
14280       DWConvMicrokernelTester()
14281         .cr(8)
14282         .kr(9)
14283         .channels(8)
14284         .width(5)
14285         .output_stride(43)
14286         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14287     }
14288   }
14289 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,multipixel_with_qmin)14290   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_qmin) {
14291     TEST_REQUIRES_X86_AVX;
14292     for (size_t channels = 1; channels <= 40; channels += 7) {
14293       DWConvMicrokernelTester()
14294         .cr(8)
14295         .kr(9)
14296         .channels(channels)
14297         .width(3)
14298         .qmin(128)
14299         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14300     }
14301   }
14302 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,multipixel_with_qmax)14303   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_qmax) {
14304     TEST_REQUIRES_X86_AVX;
14305     for (size_t channels = 1; channels <= 40; channels += 7) {
14306       DWConvMicrokernelTester()
14307         .cr(8)
14308         .kr(9)
14309         .channels(channels)
14310         .width(3)
14311         .qmax(128)
14312         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14313     }
14314   }
14315 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,input_offset)14316   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, input_offset) {
14317     TEST_REQUIRES_X86_AVX;
14318     for (uint32_t channels = 16; channels < 128; channels += 24) {
14319       DWConvMicrokernelTester()
14320         .cr(8)
14321         .kr(9)
14322         .channels(channels)
14323         .input_offset(176)
14324         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14325     }
14326   }
14327 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,zero)14328   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, zero) {
14329     TEST_REQUIRES_X86_AVX;
14330     for (uint32_t mz = 0; mz < 9; mz++) {
14331       for (uint32_t channels = 16; channels < 128; channels += 24) {
14332         DWConvMicrokernelTester()
14333           .cr(8)
14334           .kr(9)
14335           .channels(channels)
14336           .input_offset(176)
14337           .zero_index(mz)
14338           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14339       }
14340     }
14341   }
14342 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
14343 
14344 
14345 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_eq_8)14346   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_eq_8) {
14347     TEST_REQUIRES_X86_AVX;
14348     DWConvMicrokernelTester()
14349       .cr(8)
14350       .kr(9)
14351       .channels(8)
14352       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14353   }
14354 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_div_8)14355   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_div_8) {
14356     TEST_REQUIRES_X86_AVX;
14357     for (uint32_t channels = 16; channels < 128; channels += 24) {
14358       DWConvMicrokernelTester()
14359         .cr(8)
14360         .kr(9)
14361         .channels(channels)
14362         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14363     }
14364   }
14365 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_div_8_with_qmin)14366   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_div_8_with_qmin) {
14367     TEST_REQUIRES_X86_AVX;
14368     for (uint32_t channels = 16; channels < 128; channels += 24) {
14369       DWConvMicrokernelTester()
14370         .cr(8)
14371         .kr(9)
14372         .channels(channels)
14373         .qmin(128)
14374         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14375     }
14376   }
14377 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_div_8_with_qmax)14378   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_div_8_with_qmax) {
14379     TEST_REQUIRES_X86_AVX;
14380     for (uint32_t channels = 16; channels < 128; channels += 24) {
14381       DWConvMicrokernelTester()
14382         .cr(8)
14383         .kr(9)
14384         .channels(channels)
14385         .qmax(128)
14386         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14387     }
14388   }
14389 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_lt_8)14390   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_lt_8) {
14391     TEST_REQUIRES_X86_AVX;
14392     for (uint32_t channels = 1; channels < 8; channels++) {
14393       DWConvMicrokernelTester()
14394         .cr(8)
14395         .kr(9)
14396         .channels(channels)
14397         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14398     }
14399   }
14400 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_gt_8)14401   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_gt_8) {
14402     TEST_REQUIRES_X86_AVX;
14403     for (uint32_t channels = 9; channels < 16; channels++) {
14404       DWConvMicrokernelTester()
14405         .cr(8)
14406         .kr(9)
14407         .channels(channels)
14408         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14409     }
14410   }
14411 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_gt_8_with_qmin)14412   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_gt_8_with_qmin) {
14413     TEST_REQUIRES_X86_AVX;
14414     for (uint32_t channels = 9; channels < 16; channels++) {
14415       DWConvMicrokernelTester()
14416         .cr(8)
14417         .kr(9)
14418         .channels(channels)
14419         .qmin(128)
14420         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14421     }
14422   }
14423 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_gt_8_with_qmax)14424   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_gt_8_with_qmax) {
14425     TEST_REQUIRES_X86_AVX;
14426     for (uint32_t channels = 9; channels < 16; channels++) {
14427       DWConvMicrokernelTester()
14428         .cr(8)
14429         .kr(9)
14430         .channels(channels)
14431         .qmax(128)
14432         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14433     }
14434   }
14435 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,multipixel)14436   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel) {
14437     TEST_REQUIRES_X86_AVX;
14438     for (size_t channels = 1; channels <= 40; channels += 7) {
14439       DWConvMicrokernelTester()
14440         .cr(8)
14441         .kr(9)
14442         .channels(channels)
14443         .width(3)
14444         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14445     }
14446   }
14447 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,multipixel_with_step)14448   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel_with_step) {
14449     TEST_REQUIRES_X86_AVX;
14450     for (size_t channels = 1; channels <= 40; channels += 7) {
14451       for (size_t step = 2; step <= 9; step++) {
14452         DWConvMicrokernelTester()
14453           .cr(8)
14454           .kr(9)
14455           .channels(channels)
14456           .width(3)
14457           .step(step)
14458           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14459       }
14460     }
14461   }
14462 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,multipixel_with_output_stride)14463   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel_with_output_stride) {
14464     TEST_REQUIRES_X86_AVX;
14465     for (size_t channels = 1; channels <= 40; channels += 7) {
14466       DWConvMicrokernelTester()
14467         .cr(8)
14468         .kr(9)
14469         .channels(8)
14470         .width(5)
14471         .output_stride(43)
14472         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14473     }
14474   }
14475 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,multipixel_with_qmin)14476   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel_with_qmin) {
14477     TEST_REQUIRES_X86_AVX;
14478     for (size_t channels = 1; channels <= 40; channels += 7) {
14479       DWConvMicrokernelTester()
14480         .cr(8)
14481         .kr(9)
14482         .channels(channels)
14483         .width(3)
14484         .qmin(128)
14485         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14486     }
14487   }
14488 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,multipixel_with_qmax)14489   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel_with_qmax) {
14490     TEST_REQUIRES_X86_AVX;
14491     for (size_t channels = 1; channels <= 40; channels += 7) {
14492       DWConvMicrokernelTester()
14493         .cr(8)
14494         .kr(9)
14495         .channels(channels)
14496         .width(3)
14497         .qmax(128)
14498         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14499     }
14500   }
14501 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,input_offset)14502   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, input_offset) {
14503     TEST_REQUIRES_X86_AVX;
14504     for (uint32_t channels = 16; channels < 128; channels += 24) {
14505       DWConvMicrokernelTester()
14506         .cr(8)
14507         .kr(9)
14508         .channels(channels)
14509         .input_offset(176)
14510         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14511     }
14512   }
14513 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,zero)14514   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, zero) {
14515     TEST_REQUIRES_X86_AVX;
14516     for (uint32_t mz = 0; mz < 9; mz++) {
14517       for (uint32_t channels = 16; channels < 128; channels += 24) {
14518         DWConvMicrokernelTester()
14519           .cr(8)
14520           .kr(9)
14521           .channels(channels)
14522           .input_offset(176)
14523           .zero_index(mz)
14524           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14525       }
14526     }
14527   }
14528 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
14529 
14530 
14531 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_eq_8)14532   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_eq_8) {
14533     TEST_REQUIRES_X86_AVX;
14534     DWConvMicrokernelTester()
14535       .cr(8)
14536       .kr(9)
14537       .channels(8)
14538       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14539   }
14540 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_div_8)14541   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_div_8) {
14542     TEST_REQUIRES_X86_AVX;
14543     for (uint32_t channels = 16; channels < 128; channels += 24) {
14544       DWConvMicrokernelTester()
14545         .cr(8)
14546         .kr(9)
14547         .channels(channels)
14548         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14549     }
14550   }
14551 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_div_8_with_qmin)14552   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_div_8_with_qmin) {
14553     TEST_REQUIRES_X86_AVX;
14554     for (uint32_t channels = 16; channels < 128; channels += 24) {
14555       DWConvMicrokernelTester()
14556         .cr(8)
14557         .kr(9)
14558         .channels(channels)
14559         .qmin(128)
14560         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14561     }
14562   }
14563 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_div_8_with_qmax)14564   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_div_8_with_qmax) {
14565     TEST_REQUIRES_X86_AVX;
14566     for (uint32_t channels = 16; channels < 128; channels += 24) {
14567       DWConvMicrokernelTester()
14568         .cr(8)
14569         .kr(9)
14570         .channels(channels)
14571         .qmax(128)
14572         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14573     }
14574   }
14575 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_lt_8)14576   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_lt_8) {
14577     TEST_REQUIRES_X86_AVX;
14578     for (uint32_t channels = 1; channels < 8; channels++) {
14579       DWConvMicrokernelTester()
14580         .cr(8)
14581         .kr(9)
14582         .channels(channels)
14583         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14584     }
14585   }
14586 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_gt_8)14587   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_gt_8) {
14588     TEST_REQUIRES_X86_AVX;
14589     for (uint32_t channels = 9; channels < 16; channels++) {
14590       DWConvMicrokernelTester()
14591         .cr(8)
14592         .kr(9)
14593         .channels(channels)
14594         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14595     }
14596   }
14597 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_gt_8_with_qmin)14598   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_gt_8_with_qmin) {
14599     TEST_REQUIRES_X86_AVX;
14600     for (uint32_t channels = 9; channels < 16; channels++) {
14601       DWConvMicrokernelTester()
14602         .cr(8)
14603         .kr(9)
14604         .channels(channels)
14605         .qmin(128)
14606         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14607     }
14608   }
14609 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_gt_8_with_qmax)14610   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_gt_8_with_qmax) {
14611     TEST_REQUIRES_X86_AVX;
14612     for (uint32_t channels = 9; channels < 16; channels++) {
14613       DWConvMicrokernelTester()
14614         .cr(8)
14615         .kr(9)
14616         .channels(channels)
14617         .qmax(128)
14618         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14619     }
14620   }
14621 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,multipixel)14622   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel) {
14623     TEST_REQUIRES_X86_AVX;
14624     for (size_t channels = 1; channels <= 40; channels += 7) {
14625       DWConvMicrokernelTester()
14626         .cr(8)
14627         .kr(9)
14628         .channels(channels)
14629         .width(3)
14630         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14631     }
14632   }
14633 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,multipixel_with_step)14634   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_step) {
14635     TEST_REQUIRES_X86_AVX;
14636     for (size_t channels = 1; channels <= 40; channels += 7) {
14637       for (size_t step = 2; step <= 9; step++) {
14638         DWConvMicrokernelTester()
14639           .cr(8)
14640           .kr(9)
14641           .channels(channels)
14642           .width(3)
14643           .step(step)
14644           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14645       }
14646     }
14647   }
14648 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,multipixel_with_output_stride)14649   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_output_stride) {
14650     TEST_REQUIRES_X86_AVX;
14651     for (size_t channels = 1; channels <= 40; channels += 7) {
14652       DWConvMicrokernelTester()
14653         .cr(8)
14654         .kr(9)
14655         .channels(8)
14656         .width(5)
14657         .output_stride(43)
14658         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14659     }
14660   }
14661 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,multipixel_with_qmin)14662   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_qmin) {
14663     TEST_REQUIRES_X86_AVX;
14664     for (size_t channels = 1; channels <= 40; channels += 7) {
14665       DWConvMicrokernelTester()
14666         .cr(8)
14667         .kr(9)
14668         .channels(channels)
14669         .width(3)
14670         .qmin(128)
14671         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14672     }
14673   }
14674 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,multipixel_with_qmax)14675   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_qmax) {
14676     TEST_REQUIRES_X86_AVX;
14677     for (size_t channels = 1; channels <= 40; channels += 7) {
14678       DWConvMicrokernelTester()
14679         .cr(8)
14680         .kr(9)
14681         .channels(channels)
14682         .width(3)
14683         .qmax(128)
14684         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14685     }
14686   }
14687 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,input_offset)14688   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, input_offset) {
14689     TEST_REQUIRES_X86_AVX;
14690     for (uint32_t channels = 16; channels < 128; channels += 24) {
14691       DWConvMicrokernelTester()
14692         .cr(8)
14693         .kr(9)
14694         .channels(channels)
14695         .input_offset(176)
14696         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14697     }
14698   }
14699 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,zero)14700   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, zero) {
14701     TEST_REQUIRES_X86_AVX;
14702     for (uint32_t mz = 0; mz < 9; mz++) {
14703       for (uint32_t channels = 16; channels < 128; channels += 24) {
14704         DWConvMicrokernelTester()
14705           .cr(8)
14706           .kr(9)
14707           .channels(channels)
14708           .input_offset(176)
14709           .zero_index(mz)
14710           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14711       }
14712     }
14713   }
14714 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
14715 
14716 
14717 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_eq_8)14718   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_eq_8) {
14719     TEST_REQUIRES_X86_AVX2;
14720     DWConvMicrokernelTester()
14721       .cr(8)
14722       .kr(9)
14723       .channels(8)
14724       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14725   }
14726 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_div_8)14727   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8) {
14728     TEST_REQUIRES_X86_AVX2;
14729     for (uint32_t channels = 16; channels < 128; channels += 24) {
14730       DWConvMicrokernelTester()
14731         .cr(8)
14732         .kr(9)
14733         .channels(channels)
14734         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14735     }
14736   }
14737 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_div_8_with_qmin)14738   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8_with_qmin) {
14739     TEST_REQUIRES_X86_AVX2;
14740     for (uint32_t channels = 16; channels < 128; channels += 24) {
14741       DWConvMicrokernelTester()
14742         .cr(8)
14743         .kr(9)
14744         .channels(channels)
14745         .qmin(128)
14746         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14747     }
14748   }
14749 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_div_8_with_qmax)14750   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8_with_qmax) {
14751     TEST_REQUIRES_X86_AVX2;
14752     for (uint32_t channels = 16; channels < 128; channels += 24) {
14753       DWConvMicrokernelTester()
14754         .cr(8)
14755         .kr(9)
14756         .channels(channels)
14757         .qmax(128)
14758         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14759     }
14760   }
14761 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_lt_8)14762   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_lt_8) {
14763     TEST_REQUIRES_X86_AVX2;
14764     for (uint32_t channels = 1; channels < 8; channels++) {
14765       DWConvMicrokernelTester()
14766         .cr(8)
14767         .kr(9)
14768         .channels(channels)
14769         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14770     }
14771   }
14772 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_gt_8)14773   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8) {
14774     TEST_REQUIRES_X86_AVX2;
14775     for (uint32_t channels = 9; channels < 16; channels++) {
14776       DWConvMicrokernelTester()
14777         .cr(8)
14778         .kr(9)
14779         .channels(channels)
14780         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14781     }
14782   }
14783 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_gt_8_with_qmin)14784   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8_with_qmin) {
14785     TEST_REQUIRES_X86_AVX2;
14786     for (uint32_t channels = 9; channels < 16; channels++) {
14787       DWConvMicrokernelTester()
14788         .cr(8)
14789         .kr(9)
14790         .channels(channels)
14791         .qmin(128)
14792         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14793     }
14794   }
14795 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_gt_8_with_qmax)14796   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8_with_qmax) {
14797     TEST_REQUIRES_X86_AVX2;
14798     for (uint32_t channels = 9; channels < 16; channels++) {
14799       DWConvMicrokernelTester()
14800         .cr(8)
14801         .kr(9)
14802         .channels(channels)
14803         .qmax(128)
14804         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14805     }
14806   }
14807 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,multipixel)14808   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel) {
14809     TEST_REQUIRES_X86_AVX2;
14810     for (size_t channels = 1; channels <= 40; channels += 7) {
14811       DWConvMicrokernelTester()
14812         .cr(8)
14813         .kr(9)
14814         .channels(channels)
14815         .width(3)
14816         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14817     }
14818   }
14819 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,multipixel_with_step)14820   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_step) {
14821     TEST_REQUIRES_X86_AVX2;
14822     for (size_t channels = 1; channels <= 40; channels += 7) {
14823       for (size_t step = 2; step <= 9; step++) {
14824         DWConvMicrokernelTester()
14825           .cr(8)
14826           .kr(9)
14827           .channels(channels)
14828           .width(3)
14829           .step(step)
14830           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14831       }
14832     }
14833   }
14834 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,multipixel_with_output_stride)14835   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_output_stride) {
14836     TEST_REQUIRES_X86_AVX2;
14837     for (size_t channels = 1; channels <= 40; channels += 7) {
14838       DWConvMicrokernelTester()
14839         .cr(8)
14840         .kr(9)
14841         .channels(8)
14842         .width(5)
14843         .output_stride(43)
14844         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14845     }
14846   }
14847 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,multipixel_with_qmin)14848   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_qmin) {
14849     TEST_REQUIRES_X86_AVX2;
14850     for (size_t channels = 1; channels <= 40; channels += 7) {
14851       DWConvMicrokernelTester()
14852         .cr(8)
14853         .kr(9)
14854         .channels(channels)
14855         .width(3)
14856         .qmin(128)
14857         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14858     }
14859   }
14860 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,multipixel_with_qmax)14861   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_qmax) {
14862     TEST_REQUIRES_X86_AVX2;
14863     for (size_t channels = 1; channels <= 40; channels += 7) {
14864       DWConvMicrokernelTester()
14865         .cr(8)
14866         .kr(9)
14867         .channels(channels)
14868         .width(3)
14869         .qmax(128)
14870         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14871     }
14872   }
14873 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,input_offset)14874   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, input_offset) {
14875     TEST_REQUIRES_X86_AVX2;
14876     for (uint32_t channels = 16; channels < 128; channels += 24) {
14877       DWConvMicrokernelTester()
14878         .cr(8)
14879         .kr(9)
14880         .channels(channels)
14881         .input_offset(176)
14882         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14883     }
14884   }
14885 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,zero)14886   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, zero) {
14887     TEST_REQUIRES_X86_AVX2;
14888     for (uint32_t mz = 0; mz < 9; mz++) {
14889       for (uint32_t channels = 16; channels < 128; channels += 24) {
14890         DWConvMicrokernelTester()
14891           .cr(8)
14892           .kr(9)
14893           .channels(channels)
14894           .input_offset(176)
14895           .zero_index(mz)
14896           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14897       }
14898     }
14899   }
14900 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
14901 
14902 
14903 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_eq_8)14904   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_eq_8) {
14905     TEST_REQUIRES_X86_XOP;
14906     DWConvMicrokernelTester()
14907       .cr(8)
14908       .kr(9)
14909       .channels(8)
14910       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14911   }
14912 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_div_8)14913   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_div_8) {
14914     TEST_REQUIRES_X86_XOP;
14915     for (uint32_t channels = 16; channels < 128; channels += 24) {
14916       DWConvMicrokernelTester()
14917         .cr(8)
14918         .kr(9)
14919         .channels(channels)
14920         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14921     }
14922   }
14923 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_div_8_with_qmin)14924   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_div_8_with_qmin) {
14925     TEST_REQUIRES_X86_XOP;
14926     for (uint32_t channels = 16; channels < 128; channels += 24) {
14927       DWConvMicrokernelTester()
14928         .cr(8)
14929         .kr(9)
14930         .channels(channels)
14931         .qmin(128)
14932         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14933     }
14934   }
14935 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_div_8_with_qmax)14936   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_div_8_with_qmax) {
14937     TEST_REQUIRES_X86_XOP;
14938     for (uint32_t channels = 16; channels < 128; channels += 24) {
14939       DWConvMicrokernelTester()
14940         .cr(8)
14941         .kr(9)
14942         .channels(channels)
14943         .qmax(128)
14944         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14945     }
14946   }
14947 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_lt_8)14948   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_lt_8) {
14949     TEST_REQUIRES_X86_XOP;
14950     for (uint32_t channels = 1; channels < 8; channels++) {
14951       DWConvMicrokernelTester()
14952         .cr(8)
14953         .kr(9)
14954         .channels(channels)
14955         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14956     }
14957   }
14958 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_gt_8)14959   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_gt_8) {
14960     TEST_REQUIRES_X86_XOP;
14961     for (uint32_t channels = 9; channels < 16; channels++) {
14962       DWConvMicrokernelTester()
14963         .cr(8)
14964         .kr(9)
14965         .channels(channels)
14966         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14967     }
14968   }
14969 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_gt_8_with_qmin)14970   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_gt_8_with_qmin) {
14971     TEST_REQUIRES_X86_XOP;
14972     for (uint32_t channels = 9; channels < 16; channels++) {
14973       DWConvMicrokernelTester()
14974         .cr(8)
14975         .kr(9)
14976         .channels(channels)
14977         .qmin(128)
14978         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14979     }
14980   }
14981 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_gt_8_with_qmax)14982   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_gt_8_with_qmax) {
14983     TEST_REQUIRES_X86_XOP;
14984     for (uint32_t channels = 9; channels < 16; channels++) {
14985       DWConvMicrokernelTester()
14986         .cr(8)
14987         .kr(9)
14988         .channels(channels)
14989         .qmax(128)
14990         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14991     }
14992   }
14993 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,multipixel)14994   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel) {
14995     TEST_REQUIRES_X86_XOP;
14996     for (size_t channels = 1; channels <= 40; channels += 7) {
14997       DWConvMicrokernelTester()
14998         .cr(8)
14999         .kr(9)
15000         .channels(channels)
15001         .width(3)
15002         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15003     }
15004   }
15005 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,multipixel_with_step)15006   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel_with_step) {
15007     TEST_REQUIRES_X86_XOP;
15008     for (size_t channels = 1; channels <= 40; channels += 7) {
15009       for (size_t step = 2; step <= 9; step++) {
15010         DWConvMicrokernelTester()
15011           .cr(8)
15012           .kr(9)
15013           .channels(channels)
15014           .width(3)
15015           .step(step)
15016           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15017       }
15018     }
15019   }
15020 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,multipixel_with_output_stride)15021   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel_with_output_stride) {
15022     TEST_REQUIRES_X86_XOP;
15023     for (size_t channels = 1; channels <= 40; channels += 7) {
15024       DWConvMicrokernelTester()
15025         .cr(8)
15026         .kr(9)
15027         .channels(8)
15028         .width(5)
15029         .output_stride(43)
15030         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15031     }
15032   }
15033 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,multipixel_with_qmin)15034   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel_with_qmin) {
15035     TEST_REQUIRES_X86_XOP;
15036     for (size_t channels = 1; channels <= 40; channels += 7) {
15037       DWConvMicrokernelTester()
15038         .cr(8)
15039         .kr(9)
15040         .channels(channels)
15041         .width(3)
15042         .qmin(128)
15043         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15044     }
15045   }
15046 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,multipixel_with_qmax)15047   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel_with_qmax) {
15048     TEST_REQUIRES_X86_XOP;
15049     for (size_t channels = 1; channels <= 40; channels += 7) {
15050       DWConvMicrokernelTester()
15051         .cr(8)
15052         .kr(9)
15053         .channels(channels)
15054         .width(3)
15055         .qmax(128)
15056         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15057     }
15058   }
15059 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,input_offset)15060   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, input_offset) {
15061     TEST_REQUIRES_X86_XOP;
15062     for (uint32_t channels = 16; channels < 128; channels += 24) {
15063       DWConvMicrokernelTester()
15064         .cr(8)
15065         .kr(9)
15066         .channels(channels)
15067         .input_offset(176)
15068         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15069     }
15070   }
15071 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,zero)15072   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, zero) {
15073     TEST_REQUIRES_X86_XOP;
15074     for (uint32_t mz = 0; mz < 9; mz++) {
15075       for (uint32_t channels = 16; channels < 128; channels += 24) {
15076         DWConvMicrokernelTester()
15077           .cr(8)
15078           .kr(9)
15079           .channels(channels)
15080           .input_offset(176)
15081           .zero_index(mz)
15082           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15083       }
15084     }
15085   }
15086 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
15087 
15088 
15089 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_eq_8)15090   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_eq_8) {
15091     TEST_REQUIRES_X86_XOP;
15092     DWConvMicrokernelTester()
15093       .cr(8)
15094       .kr(9)
15095       .channels(8)
15096       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15097   }
15098 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_div_8)15099   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_div_8) {
15100     TEST_REQUIRES_X86_XOP;
15101     for (uint32_t channels = 16; channels < 128; channels += 24) {
15102       DWConvMicrokernelTester()
15103         .cr(8)
15104         .kr(9)
15105         .channels(channels)
15106         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15107     }
15108   }
15109 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_div_8_with_qmin)15110   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_div_8_with_qmin) {
15111     TEST_REQUIRES_X86_XOP;
15112     for (uint32_t channels = 16; channels < 128; channels += 24) {
15113       DWConvMicrokernelTester()
15114         .cr(8)
15115         .kr(9)
15116         .channels(channels)
15117         .qmin(128)
15118         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15119     }
15120   }
15121 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_div_8_with_qmax)15122   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_div_8_with_qmax) {
15123     TEST_REQUIRES_X86_XOP;
15124     for (uint32_t channels = 16; channels < 128; channels += 24) {
15125       DWConvMicrokernelTester()
15126         .cr(8)
15127         .kr(9)
15128         .channels(channels)
15129         .qmax(128)
15130         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15131     }
15132   }
15133 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_lt_8)15134   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_lt_8) {
15135     TEST_REQUIRES_X86_XOP;
15136     for (uint32_t channels = 1; channels < 8; channels++) {
15137       DWConvMicrokernelTester()
15138         .cr(8)
15139         .kr(9)
15140         .channels(channels)
15141         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15142     }
15143   }
15144 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_gt_8)15145   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_gt_8) {
15146     TEST_REQUIRES_X86_XOP;
15147     for (uint32_t channels = 9; channels < 16; channels++) {
15148       DWConvMicrokernelTester()
15149         .cr(8)
15150         .kr(9)
15151         .channels(channels)
15152         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15153     }
15154   }
15155 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_gt_8_with_qmin)15156   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_gt_8_with_qmin) {
15157     TEST_REQUIRES_X86_XOP;
15158     for (uint32_t channels = 9; channels < 16; channels++) {
15159       DWConvMicrokernelTester()
15160         .cr(8)
15161         .kr(9)
15162         .channels(channels)
15163         .qmin(128)
15164         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15165     }
15166   }
15167 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_gt_8_with_qmax)15168   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_gt_8_with_qmax) {
15169     TEST_REQUIRES_X86_XOP;
15170     for (uint32_t channels = 9; channels < 16; channels++) {
15171       DWConvMicrokernelTester()
15172         .cr(8)
15173         .kr(9)
15174         .channels(channels)
15175         .qmax(128)
15176         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15177     }
15178   }
15179 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,multipixel)15180   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel) {
15181     TEST_REQUIRES_X86_XOP;
15182     for (size_t channels = 1; channels <= 40; channels += 7) {
15183       DWConvMicrokernelTester()
15184         .cr(8)
15185         .kr(9)
15186         .channels(channels)
15187         .width(3)
15188         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15189     }
15190   }
15191 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,multipixel_with_step)15192   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_step) {
15193     TEST_REQUIRES_X86_XOP;
15194     for (size_t channels = 1; channels <= 40; channels += 7) {
15195       for (size_t step = 2; step <= 9; step++) {
15196         DWConvMicrokernelTester()
15197           .cr(8)
15198           .kr(9)
15199           .channels(channels)
15200           .width(3)
15201           .step(step)
15202           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15203       }
15204     }
15205   }
15206 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,multipixel_with_output_stride)15207   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_output_stride) {
15208     TEST_REQUIRES_X86_XOP;
15209     for (size_t channels = 1; channels <= 40; channels += 7) {
15210       DWConvMicrokernelTester()
15211         .cr(8)
15212         .kr(9)
15213         .channels(8)
15214         .width(5)
15215         .output_stride(43)
15216         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15217     }
15218   }
15219 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,multipixel_with_qmin)15220   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_qmin) {
15221     TEST_REQUIRES_X86_XOP;
15222     for (size_t channels = 1; channels <= 40; channels += 7) {
15223       DWConvMicrokernelTester()
15224         .cr(8)
15225         .kr(9)
15226         .channels(channels)
15227         .width(3)
15228         .qmin(128)
15229         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15230     }
15231   }
15232 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,multipixel_with_qmax)15233   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_qmax) {
15234     TEST_REQUIRES_X86_XOP;
15235     for (size_t channels = 1; channels <= 40; channels += 7) {
15236       DWConvMicrokernelTester()
15237         .cr(8)
15238         .kr(9)
15239         .channels(channels)
15240         .width(3)
15241         .qmax(128)
15242         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15243     }
15244   }
15245 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,input_offset)15246   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, input_offset) {
15247     TEST_REQUIRES_X86_XOP;
15248     for (uint32_t channels = 16; channels < 128; channels += 24) {
15249       DWConvMicrokernelTester()
15250         .cr(8)
15251         .kr(9)
15252         .channels(channels)
15253         .input_offset(176)
15254         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15255     }
15256   }
15257 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,zero)15258   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, zero) {
15259     TEST_REQUIRES_X86_XOP;
15260     for (uint32_t mz = 0; mz < 9; mz++) {
15261       for (uint32_t channels = 16; channels < 128; channels += 24) {
15262         DWConvMicrokernelTester()
15263           .cr(8)
15264           .kr(9)
15265           .channels(channels)
15266           .input_offset(176)
15267           .zero_index(mz)
15268           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15269       }
15270     }
15271   }
15272 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
15273 
15274 
15275 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_eq_8)15276   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_eq_8) {
15277     TEST_REQUIRES_X86_AVX;
15278     DWConvMicrokernelTester()
15279       .cr(8)
15280       .kr(25)
15281       .channels(8)
15282       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15283   }
15284 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_div_8)15285   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_div_8) {
15286     TEST_REQUIRES_X86_AVX;
15287     for (uint32_t channels = 16; channels < 128; channels += 24) {
15288       DWConvMicrokernelTester()
15289         .cr(8)
15290         .kr(25)
15291         .channels(channels)
15292         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15293     }
15294   }
15295 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_div_8_with_qmin)15296   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_div_8_with_qmin) {
15297     TEST_REQUIRES_X86_AVX;
15298     for (uint32_t channels = 16; channels < 128; channels += 24) {
15299       DWConvMicrokernelTester()
15300         .cr(8)
15301         .kr(25)
15302         .channels(channels)
15303         .qmin(128)
15304         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15305     }
15306   }
15307 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_div_8_with_qmax)15308   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_div_8_with_qmax) {
15309     TEST_REQUIRES_X86_AVX;
15310     for (uint32_t channels = 16; channels < 128; channels += 24) {
15311       DWConvMicrokernelTester()
15312         .cr(8)
15313         .kr(25)
15314         .channels(channels)
15315         .qmax(128)
15316         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15317     }
15318   }
15319 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_lt_8)15320   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_lt_8) {
15321     TEST_REQUIRES_X86_AVX;
15322     for (uint32_t channels = 1; channels < 8; channels++) {
15323       DWConvMicrokernelTester()
15324         .cr(8)
15325         .kr(25)
15326         .channels(channels)
15327         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15328     }
15329   }
15330 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_gt_8)15331   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_gt_8) {
15332     TEST_REQUIRES_X86_AVX;
15333     for (uint32_t channels = 9; channels < 16; channels++) {
15334       DWConvMicrokernelTester()
15335         .cr(8)
15336         .kr(25)
15337         .channels(channels)
15338         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15339     }
15340   }
15341 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_gt_8_with_qmin)15342   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_gt_8_with_qmin) {
15343     TEST_REQUIRES_X86_AVX;
15344     for (uint32_t channels = 9; channels < 16; channels++) {
15345       DWConvMicrokernelTester()
15346         .cr(8)
15347         .kr(25)
15348         .channels(channels)
15349         .qmin(128)
15350         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15351     }
15352   }
15353 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_gt_8_with_qmax)15354   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_gt_8_with_qmax) {
15355     TEST_REQUIRES_X86_AVX;
15356     for (uint32_t channels = 9; channels < 16; channels++) {
15357       DWConvMicrokernelTester()
15358         .cr(8)
15359         .kr(25)
15360         .channels(channels)
15361         .qmax(128)
15362         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15363     }
15364   }
15365 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,multipixel)15366   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel) {
15367     TEST_REQUIRES_X86_AVX;
15368     for (size_t channels = 1; channels <= 40; channels += 7) {
15369       DWConvMicrokernelTester()
15370         .cr(8)
15371         .kr(25)
15372         .channels(channels)
15373         .width(3)
15374         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15375     }
15376   }
15377 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,multipixel_with_step)15378   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_step) {
15379     TEST_REQUIRES_X86_AVX;
15380     for (size_t channels = 1; channels <= 40; channels += 7) {
15381       for (size_t step = 2; step <= 25; step++) {
15382         DWConvMicrokernelTester()
15383           .cr(8)
15384           .kr(25)
15385           .channels(channels)
15386           .width(3)
15387           .step(step)
15388           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15389       }
15390     }
15391   }
15392 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,multipixel_with_output_stride)15393   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_output_stride) {
15394     TEST_REQUIRES_X86_AVX;
15395     for (size_t channels = 1; channels <= 40; channels += 7) {
15396       DWConvMicrokernelTester()
15397         .cr(8)
15398         .kr(25)
15399         .channels(8)
15400         .width(5)
15401         .output_stride(43)
15402         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15403     }
15404   }
15405 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,multipixel_with_qmin)15406   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_qmin) {
15407     TEST_REQUIRES_X86_AVX;
15408     for (size_t channels = 1; channels <= 40; channels += 7) {
15409       DWConvMicrokernelTester()
15410         .cr(8)
15411         .kr(25)
15412         .channels(channels)
15413         .width(3)
15414         .qmin(128)
15415         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15416     }
15417   }
15418 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,multipixel_with_qmax)15419   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_qmax) {
15420     TEST_REQUIRES_X86_AVX;
15421     for (size_t channels = 1; channels <= 40; channels += 7) {
15422       DWConvMicrokernelTester()
15423         .cr(8)
15424         .kr(25)
15425         .channels(channels)
15426         .width(3)
15427         .qmax(128)
15428         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15429     }
15430   }
15431 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,input_offset)15432   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, input_offset) {
15433     TEST_REQUIRES_X86_AVX;
15434     for (uint32_t channels = 16; channels < 128; channels += 24) {
15435       DWConvMicrokernelTester()
15436         .cr(8)
15437         .kr(25)
15438         .channels(channels)
15439         .input_offset(176)
15440         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15441     }
15442   }
15443 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,zero)15444   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, zero) {
15445     TEST_REQUIRES_X86_AVX;
15446     for (uint32_t mz = 0; mz < 25; mz++) {
15447       for (uint32_t channels = 16; channels < 128; channels += 24) {
15448         DWConvMicrokernelTester()
15449           .cr(8)
15450           .kr(25)
15451           .channels(channels)
15452           .input_offset(176)
15453           .zero_index(mz)
15454           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15455       }
15456     }
15457   }
15458 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
15459 
15460 
15461 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_eq_8)15462   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_eq_8) {
15463     TEST_REQUIRES_X86_AVX;
15464     DWConvMicrokernelTester()
15465       .cr(8)
15466       .kr(25)
15467       .channels(8)
15468       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15469   }
15470 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_div_8)15471   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_div_8) {
15472     TEST_REQUIRES_X86_AVX;
15473     for (uint32_t channels = 16; channels < 128; channels += 24) {
15474       DWConvMicrokernelTester()
15475         .cr(8)
15476         .kr(25)
15477         .channels(channels)
15478         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15479     }
15480   }
15481 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_div_8_with_qmin)15482   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_div_8_with_qmin) {
15483     TEST_REQUIRES_X86_AVX;
15484     for (uint32_t channels = 16; channels < 128; channels += 24) {
15485       DWConvMicrokernelTester()
15486         .cr(8)
15487         .kr(25)
15488         .channels(channels)
15489         .qmin(128)
15490         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15491     }
15492   }
15493 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_div_8_with_qmax)15494   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_div_8_with_qmax) {
15495     TEST_REQUIRES_X86_AVX;
15496     for (uint32_t channels = 16; channels < 128; channels += 24) {
15497       DWConvMicrokernelTester()
15498         .cr(8)
15499         .kr(25)
15500         .channels(channels)
15501         .qmax(128)
15502         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15503     }
15504   }
15505 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_lt_8)15506   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_lt_8) {
15507     TEST_REQUIRES_X86_AVX;
15508     for (uint32_t channels = 1; channels < 8; channels++) {
15509       DWConvMicrokernelTester()
15510         .cr(8)
15511         .kr(25)
15512         .channels(channels)
15513         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15514     }
15515   }
15516 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_gt_8)15517   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_gt_8) {
15518     TEST_REQUIRES_X86_AVX;
15519     for (uint32_t channels = 9; channels < 16; channels++) {
15520       DWConvMicrokernelTester()
15521         .cr(8)
15522         .kr(25)
15523         .channels(channels)
15524         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15525     }
15526   }
15527 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_gt_8_with_qmin)15528   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_gt_8_with_qmin) {
15529     TEST_REQUIRES_X86_AVX;
15530     for (uint32_t channels = 9; channels < 16; channels++) {
15531       DWConvMicrokernelTester()
15532         .cr(8)
15533         .kr(25)
15534         .channels(channels)
15535         .qmin(128)
15536         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15537     }
15538   }
15539 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_gt_8_with_qmax)15540   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_gt_8_with_qmax) {
15541     TEST_REQUIRES_X86_AVX;
15542     for (uint32_t channels = 9; channels < 16; channels++) {
15543       DWConvMicrokernelTester()
15544         .cr(8)
15545         .kr(25)
15546         .channels(channels)
15547         .qmax(128)
15548         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15549     }
15550   }
15551 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,multipixel)15552   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel) {
15553     TEST_REQUIRES_X86_AVX;
15554     for (size_t channels = 1; channels <= 40; channels += 7) {
15555       DWConvMicrokernelTester()
15556         .cr(8)
15557         .kr(25)
15558         .channels(channels)
15559         .width(3)
15560         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15561     }
15562   }
15563 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,multipixel_with_step)15564   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel_with_step) {
15565     TEST_REQUIRES_X86_AVX;
15566     for (size_t channels = 1; channels <= 40; channels += 7) {
15567       for (size_t step = 2; step <= 25; step++) {
15568         DWConvMicrokernelTester()
15569           .cr(8)
15570           .kr(25)
15571           .channels(channels)
15572           .width(3)
15573           .step(step)
15574           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15575       }
15576     }
15577   }
15578 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,multipixel_with_output_stride)15579   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel_with_output_stride) {
15580     TEST_REQUIRES_X86_AVX;
15581     for (size_t channels = 1; channels <= 40; channels += 7) {
15582       DWConvMicrokernelTester()
15583         .cr(8)
15584         .kr(25)
15585         .channels(8)
15586         .width(5)
15587         .output_stride(43)
15588         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15589     }
15590   }
15591 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,multipixel_with_qmin)15592   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel_with_qmin) {
15593     TEST_REQUIRES_X86_AVX;
15594     for (size_t channels = 1; channels <= 40; channels += 7) {
15595       DWConvMicrokernelTester()
15596         .cr(8)
15597         .kr(25)
15598         .channels(channels)
15599         .width(3)
15600         .qmin(128)
15601         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15602     }
15603   }
15604 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,multipixel_with_qmax)15605   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel_with_qmax) {
15606     TEST_REQUIRES_X86_AVX;
15607     for (size_t channels = 1; channels <= 40; channels += 7) {
15608       DWConvMicrokernelTester()
15609         .cr(8)
15610         .kr(25)
15611         .channels(channels)
15612         .width(3)
15613         .qmax(128)
15614         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15615     }
15616   }
15617 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,input_offset)15618   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, input_offset) {
15619     TEST_REQUIRES_X86_AVX;
15620     for (uint32_t channels = 16; channels < 128; channels += 24) {
15621       DWConvMicrokernelTester()
15622         .cr(8)
15623         .kr(25)
15624         .channels(channels)
15625         .input_offset(176)
15626         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15627     }
15628   }
15629 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,zero)15630   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, zero) {
15631     TEST_REQUIRES_X86_AVX;
15632     for (uint32_t mz = 0; mz < 25; mz++) {
15633       for (uint32_t channels = 16; channels < 128; channels += 24) {
15634         DWConvMicrokernelTester()
15635           .cr(8)
15636           .kr(25)
15637           .channels(channels)
15638           .input_offset(176)
15639           .zero_index(mz)
15640           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15641       }
15642     }
15643   }
15644 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
15645 
15646 
15647 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_eq_8)15648   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_eq_8) {
15649     TEST_REQUIRES_X86_AVX;
15650     DWConvMicrokernelTester()
15651       .cr(8)
15652       .kr(25)
15653       .channels(8)
15654       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15655   }
15656 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_div_8)15657   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_div_8) {
15658     TEST_REQUIRES_X86_AVX;
15659     for (uint32_t channels = 16; channels < 128; channels += 24) {
15660       DWConvMicrokernelTester()
15661         .cr(8)
15662         .kr(25)
15663         .channels(channels)
15664         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15665     }
15666   }
15667 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_div_8_with_qmin)15668   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_div_8_with_qmin) {
15669     TEST_REQUIRES_X86_AVX;
15670     for (uint32_t channels = 16; channels < 128; channels += 24) {
15671       DWConvMicrokernelTester()
15672         .cr(8)
15673         .kr(25)
15674         .channels(channels)
15675         .qmin(128)
15676         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15677     }
15678   }
15679 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_div_8_with_qmax)15680   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_div_8_with_qmax) {
15681     TEST_REQUIRES_X86_AVX;
15682     for (uint32_t channels = 16; channels < 128; channels += 24) {
15683       DWConvMicrokernelTester()
15684         .cr(8)
15685         .kr(25)
15686         .channels(channels)
15687         .qmax(128)
15688         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15689     }
15690   }
15691 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_lt_8)15692   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_lt_8) {
15693     TEST_REQUIRES_X86_AVX;
15694     for (uint32_t channels = 1; channels < 8; channels++) {
15695       DWConvMicrokernelTester()
15696         .cr(8)
15697         .kr(25)
15698         .channels(channels)
15699         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15700     }
15701   }
15702 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_gt_8)15703   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_gt_8) {
15704     TEST_REQUIRES_X86_AVX;
15705     for (uint32_t channels = 9; channels < 16; channels++) {
15706       DWConvMicrokernelTester()
15707         .cr(8)
15708         .kr(25)
15709         .channels(channels)
15710         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15711     }
15712   }
15713 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_gt_8_with_qmin)15714   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_gt_8_with_qmin) {
15715     TEST_REQUIRES_X86_AVX;
15716     for (uint32_t channels = 9; channels < 16; channels++) {
15717       DWConvMicrokernelTester()
15718         .cr(8)
15719         .kr(25)
15720         .channels(channels)
15721         .qmin(128)
15722         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15723     }
15724   }
15725 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_gt_8_with_qmax)15726   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_gt_8_with_qmax) {
15727     TEST_REQUIRES_X86_AVX;
15728     for (uint32_t channels = 9; channels < 16; channels++) {
15729       DWConvMicrokernelTester()
15730         .cr(8)
15731         .kr(25)
15732         .channels(channels)
15733         .qmax(128)
15734         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15735     }
15736   }
15737 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,multipixel)15738   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel) {
15739     TEST_REQUIRES_X86_AVX;
15740     for (size_t channels = 1; channels <= 40; channels += 7) {
15741       DWConvMicrokernelTester()
15742         .cr(8)
15743         .kr(25)
15744         .channels(channels)
15745         .width(3)
15746         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15747     }
15748   }
15749 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,multipixel_with_step)15750   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_step) {
15751     TEST_REQUIRES_X86_AVX;
15752     for (size_t channels = 1; channels <= 40; channels += 7) {
15753       for (size_t step = 2; step <= 25; step++) {
15754         DWConvMicrokernelTester()
15755           .cr(8)
15756           .kr(25)
15757           .channels(channels)
15758           .width(3)
15759           .step(step)
15760           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15761       }
15762     }
15763   }
15764 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,multipixel_with_output_stride)15765   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_output_stride) {
15766     TEST_REQUIRES_X86_AVX;
15767     for (size_t channels = 1; channels <= 40; channels += 7) {
15768       DWConvMicrokernelTester()
15769         .cr(8)
15770         .kr(25)
15771         .channels(8)
15772         .width(5)
15773         .output_stride(43)
15774         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15775     }
15776   }
15777 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,multipixel_with_qmin)15778   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_qmin) {
15779     TEST_REQUIRES_X86_AVX;
15780     for (size_t channels = 1; channels <= 40; channels += 7) {
15781       DWConvMicrokernelTester()
15782         .cr(8)
15783         .kr(25)
15784         .channels(channels)
15785         .width(3)
15786         .qmin(128)
15787         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15788     }
15789   }
15790 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,multipixel_with_qmax)15791   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_qmax) {
15792     TEST_REQUIRES_X86_AVX;
15793     for (size_t channels = 1; channels <= 40; channels += 7) {
15794       DWConvMicrokernelTester()
15795         .cr(8)
15796         .kr(25)
15797         .channels(channels)
15798         .width(3)
15799         .qmax(128)
15800         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15801     }
15802   }
15803 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,input_offset)15804   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, input_offset) {
15805     TEST_REQUIRES_X86_AVX;
15806     for (uint32_t channels = 16; channels < 128; channels += 24) {
15807       DWConvMicrokernelTester()
15808         .cr(8)
15809         .kr(25)
15810         .channels(channels)
15811         .input_offset(176)
15812         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15813     }
15814   }
15815 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,zero)15816   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, zero) {
15817     TEST_REQUIRES_X86_AVX;
15818     for (uint32_t mz = 0; mz < 25; mz++) {
15819       for (uint32_t channels = 16; channels < 128; channels += 24) {
15820         DWConvMicrokernelTester()
15821           .cr(8)
15822           .kr(25)
15823           .channels(channels)
15824           .input_offset(176)
15825           .zero_index(mz)
15826           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15827       }
15828     }
15829   }
15830 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
15831 
15832 
15833 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_eq_8)15834   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_eq_8) {
15835     TEST_REQUIRES_X86_AVX2;
15836     DWConvMicrokernelTester()
15837       .cr(8)
15838       .kr(25)
15839       .channels(8)
15840       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15841   }
15842 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_div_8)15843   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8) {
15844     TEST_REQUIRES_X86_AVX2;
15845     for (uint32_t channels = 16; channels < 128; channels += 24) {
15846       DWConvMicrokernelTester()
15847         .cr(8)
15848         .kr(25)
15849         .channels(channels)
15850         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15851     }
15852   }
15853 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_div_8_with_qmin)15854   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8_with_qmin) {
15855     TEST_REQUIRES_X86_AVX2;
15856     for (uint32_t channels = 16; channels < 128; channels += 24) {
15857       DWConvMicrokernelTester()
15858         .cr(8)
15859         .kr(25)
15860         .channels(channels)
15861         .qmin(128)
15862         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15863     }
15864   }
15865 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_div_8_with_qmax)15866   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8_with_qmax) {
15867     TEST_REQUIRES_X86_AVX2;
15868     for (uint32_t channels = 16; channels < 128; channels += 24) {
15869       DWConvMicrokernelTester()
15870         .cr(8)
15871         .kr(25)
15872         .channels(channels)
15873         .qmax(128)
15874         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15875     }
15876   }
15877 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_lt_8)15878   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_lt_8) {
15879     TEST_REQUIRES_X86_AVX2;
15880     for (uint32_t channels = 1; channels < 8; channels++) {
15881       DWConvMicrokernelTester()
15882         .cr(8)
15883         .kr(25)
15884         .channels(channels)
15885         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15886     }
15887   }
15888 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_gt_8)15889   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8) {
15890     TEST_REQUIRES_X86_AVX2;
15891     for (uint32_t channels = 9; channels < 16; channels++) {
15892       DWConvMicrokernelTester()
15893         .cr(8)
15894         .kr(25)
15895         .channels(channels)
15896         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15897     }
15898   }
15899 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_gt_8_with_qmin)15900   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8_with_qmin) {
15901     TEST_REQUIRES_X86_AVX2;
15902     for (uint32_t channels = 9; channels < 16; channels++) {
15903       DWConvMicrokernelTester()
15904         .cr(8)
15905         .kr(25)
15906         .channels(channels)
15907         .qmin(128)
15908         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15909     }
15910   }
15911 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_gt_8_with_qmax)15912   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8_with_qmax) {
15913     TEST_REQUIRES_X86_AVX2;
15914     for (uint32_t channels = 9; channels < 16; channels++) {
15915       DWConvMicrokernelTester()
15916         .cr(8)
15917         .kr(25)
15918         .channels(channels)
15919         .qmax(128)
15920         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15921     }
15922   }
15923 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,multipixel)15924   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel) {
15925     TEST_REQUIRES_X86_AVX2;
15926     for (size_t channels = 1; channels <= 40; channels += 7) {
15927       DWConvMicrokernelTester()
15928         .cr(8)
15929         .kr(25)
15930         .channels(channels)
15931         .width(3)
15932         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15933     }
15934   }
15935 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,multipixel_with_step)15936   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_step) {
15937     TEST_REQUIRES_X86_AVX2;
15938     for (size_t channels = 1; channels <= 40; channels += 7) {
15939       for (size_t step = 2; step <= 25; step++) {
15940         DWConvMicrokernelTester()
15941           .cr(8)
15942           .kr(25)
15943           .channels(channels)
15944           .width(3)
15945           .step(step)
15946           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15947       }
15948     }
15949   }
15950 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,multipixel_with_output_stride)15951   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_output_stride) {
15952     TEST_REQUIRES_X86_AVX2;
15953     for (size_t channels = 1; channels <= 40; channels += 7) {
15954       DWConvMicrokernelTester()
15955         .cr(8)
15956         .kr(25)
15957         .channels(8)
15958         .width(5)
15959         .output_stride(43)
15960         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15961     }
15962   }
15963 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,multipixel_with_qmin)15964   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_qmin) {
15965     TEST_REQUIRES_X86_AVX2;
15966     for (size_t channels = 1; channels <= 40; channels += 7) {
15967       DWConvMicrokernelTester()
15968         .cr(8)
15969         .kr(25)
15970         .channels(channels)
15971         .width(3)
15972         .qmin(128)
15973         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15974     }
15975   }
15976 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,multipixel_with_qmax)15977   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_qmax) {
15978     TEST_REQUIRES_X86_AVX2;
15979     for (size_t channels = 1; channels <= 40; channels += 7) {
15980       DWConvMicrokernelTester()
15981         .cr(8)
15982         .kr(25)
15983         .channels(channels)
15984         .width(3)
15985         .qmax(128)
15986         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15987     }
15988   }
15989 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,input_offset)15990   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, input_offset) {
15991     TEST_REQUIRES_X86_AVX2;
15992     for (uint32_t channels = 16; channels < 128; channels += 24) {
15993       DWConvMicrokernelTester()
15994         .cr(8)
15995         .kr(25)
15996         .channels(channels)
15997         .input_offset(176)
15998         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15999     }
16000   }
16001 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,zero)16002   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, zero) {
16003     TEST_REQUIRES_X86_AVX2;
16004     for (uint32_t mz = 0; mz < 25; mz++) {
16005       for (uint32_t channels = 16; channels < 128; channels += 24) {
16006         DWConvMicrokernelTester()
16007           .cr(8)
16008           .kr(25)
16009           .channels(channels)
16010           .input_offset(176)
16011           .zero_index(mz)
16012           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16013       }
16014     }
16015   }
16016 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
16017 
16018 
16019 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_eq_8)16020   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_eq_8) {
16021     TEST_REQUIRES_X86_XOP;
16022     DWConvMicrokernelTester()
16023       .cr(8)
16024       .kr(25)
16025       .channels(8)
16026       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16027   }
16028 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_div_8)16029   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_div_8) {
16030     TEST_REQUIRES_X86_XOP;
16031     for (uint32_t channels = 16; channels < 128; channels += 24) {
16032       DWConvMicrokernelTester()
16033         .cr(8)
16034         .kr(25)
16035         .channels(channels)
16036         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16037     }
16038   }
16039 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_div_8_with_qmin)16040   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_div_8_with_qmin) {
16041     TEST_REQUIRES_X86_XOP;
16042     for (uint32_t channels = 16; channels < 128; channels += 24) {
16043       DWConvMicrokernelTester()
16044         .cr(8)
16045         .kr(25)
16046         .channels(channels)
16047         .qmin(128)
16048         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16049     }
16050   }
16051 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_div_8_with_qmax)16052   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_div_8_with_qmax) {
16053     TEST_REQUIRES_X86_XOP;
16054     for (uint32_t channels = 16; channels < 128; channels += 24) {
16055       DWConvMicrokernelTester()
16056         .cr(8)
16057         .kr(25)
16058         .channels(channels)
16059         .qmax(128)
16060         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16061     }
16062   }
16063 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_lt_8)16064   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_lt_8) {
16065     TEST_REQUIRES_X86_XOP;
16066     for (uint32_t channels = 1; channels < 8; channels++) {
16067       DWConvMicrokernelTester()
16068         .cr(8)
16069         .kr(25)
16070         .channels(channels)
16071         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16072     }
16073   }
16074 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_gt_8)16075   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_gt_8) {
16076     TEST_REQUIRES_X86_XOP;
16077     for (uint32_t channels = 9; channels < 16; channels++) {
16078       DWConvMicrokernelTester()
16079         .cr(8)
16080         .kr(25)
16081         .channels(channels)
16082         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16083     }
16084   }
16085 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_gt_8_with_qmin)16086   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_gt_8_with_qmin) {
16087     TEST_REQUIRES_X86_XOP;
16088     for (uint32_t channels = 9; channels < 16; channels++) {
16089       DWConvMicrokernelTester()
16090         .cr(8)
16091         .kr(25)
16092         .channels(channels)
16093         .qmin(128)
16094         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16095     }
16096   }
16097 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_gt_8_with_qmax)16098   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_gt_8_with_qmax) {
16099     TEST_REQUIRES_X86_XOP;
16100     for (uint32_t channels = 9; channels < 16; channels++) {
16101       DWConvMicrokernelTester()
16102         .cr(8)
16103         .kr(25)
16104         .channels(channels)
16105         .qmax(128)
16106         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16107     }
16108   }
16109 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,multipixel)16110   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel) {
16111     TEST_REQUIRES_X86_XOP;
16112     for (size_t channels = 1; channels <= 40; channels += 7) {
16113       DWConvMicrokernelTester()
16114         .cr(8)
16115         .kr(25)
16116         .channels(channels)
16117         .width(3)
16118         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16119     }
16120   }
16121 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,multipixel_with_step)16122   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel_with_step) {
16123     TEST_REQUIRES_X86_XOP;
16124     for (size_t channels = 1; channels <= 40; channels += 7) {
16125       for (size_t step = 2; step <= 25; step++) {
16126         DWConvMicrokernelTester()
16127           .cr(8)
16128           .kr(25)
16129           .channels(channels)
16130           .width(3)
16131           .step(step)
16132           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16133       }
16134     }
16135   }
16136 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,multipixel_with_output_stride)16137   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel_with_output_stride) {
16138     TEST_REQUIRES_X86_XOP;
16139     for (size_t channels = 1; channels <= 40; channels += 7) {
16140       DWConvMicrokernelTester()
16141         .cr(8)
16142         .kr(25)
16143         .channels(8)
16144         .width(5)
16145         .output_stride(43)
16146         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16147     }
16148   }
16149 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,multipixel_with_qmin)16150   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel_with_qmin) {
16151     TEST_REQUIRES_X86_XOP;
16152     for (size_t channels = 1; channels <= 40; channels += 7) {
16153       DWConvMicrokernelTester()
16154         .cr(8)
16155         .kr(25)
16156         .channels(channels)
16157         .width(3)
16158         .qmin(128)
16159         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16160     }
16161   }
16162 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,multipixel_with_qmax)16163   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel_with_qmax) {
16164     TEST_REQUIRES_X86_XOP;
16165     for (size_t channels = 1; channels <= 40; channels += 7) {
16166       DWConvMicrokernelTester()
16167         .cr(8)
16168         .kr(25)
16169         .channels(channels)
16170         .width(3)
16171         .qmax(128)
16172         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16173     }
16174   }
16175 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,input_offset)16176   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, input_offset) {
16177     TEST_REQUIRES_X86_XOP;
16178     for (uint32_t channels = 16; channels < 128; channels += 24) {
16179       DWConvMicrokernelTester()
16180         .cr(8)
16181         .kr(25)
16182         .channels(channels)
16183         .input_offset(176)
16184         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16185     }
16186   }
16187 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,zero)16188   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, zero) {
16189     TEST_REQUIRES_X86_XOP;
16190     for (uint32_t mz = 0; mz < 25; mz++) {
16191       for (uint32_t channels = 16; channels < 128; channels += 24) {
16192         DWConvMicrokernelTester()
16193           .cr(8)
16194           .kr(25)
16195           .channels(channels)
16196           .input_offset(176)
16197           .zero_index(mz)
16198           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16199       }
16200     }
16201   }
16202 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
16203 
16204 
16205 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_eq_8)16206   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_eq_8) {
16207     TEST_REQUIRES_X86_XOP;
16208     DWConvMicrokernelTester()
16209       .cr(8)
16210       .kr(25)
16211       .channels(8)
16212       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16213   }
16214 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_div_8)16215   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_div_8) {
16216     TEST_REQUIRES_X86_XOP;
16217     for (uint32_t channels = 16; channels < 128; channels += 24) {
16218       DWConvMicrokernelTester()
16219         .cr(8)
16220         .kr(25)
16221         .channels(channels)
16222         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16223     }
16224   }
16225 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_div_8_with_qmin)16226   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_div_8_with_qmin) {
16227     TEST_REQUIRES_X86_XOP;
16228     for (uint32_t channels = 16; channels < 128; channels += 24) {
16229       DWConvMicrokernelTester()
16230         .cr(8)
16231         .kr(25)
16232         .channels(channels)
16233         .qmin(128)
16234         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16235     }
16236   }
16237 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_div_8_with_qmax)16238   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_div_8_with_qmax) {
16239     TEST_REQUIRES_X86_XOP;
16240     for (uint32_t channels = 16; channels < 128; channels += 24) {
16241       DWConvMicrokernelTester()
16242         .cr(8)
16243         .kr(25)
16244         .channels(channels)
16245         .qmax(128)
16246         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16247     }
16248   }
16249 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_lt_8)16250   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_lt_8) {
16251     TEST_REQUIRES_X86_XOP;
16252     for (uint32_t channels = 1; channels < 8; channels++) {
16253       DWConvMicrokernelTester()
16254         .cr(8)
16255         .kr(25)
16256         .channels(channels)
16257         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16258     }
16259   }
16260 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_gt_8)16261   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_gt_8) {
16262     TEST_REQUIRES_X86_XOP;
16263     for (uint32_t channels = 9; channels < 16; channels++) {
16264       DWConvMicrokernelTester()
16265         .cr(8)
16266         .kr(25)
16267         .channels(channels)
16268         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16269     }
16270   }
16271 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_gt_8_with_qmin)16272   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_gt_8_with_qmin) {
16273     TEST_REQUIRES_X86_XOP;
16274     for (uint32_t channels = 9; channels < 16; channels++) {
16275       DWConvMicrokernelTester()
16276         .cr(8)
16277         .kr(25)
16278         .channels(channels)
16279         .qmin(128)
16280         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16281     }
16282   }
16283 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_gt_8_with_qmax)16284   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_gt_8_with_qmax) {
16285     TEST_REQUIRES_X86_XOP;
16286     for (uint32_t channels = 9; channels < 16; channels++) {
16287       DWConvMicrokernelTester()
16288         .cr(8)
16289         .kr(25)
16290         .channels(channels)
16291         .qmax(128)
16292         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16293     }
16294   }
16295 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,multipixel)16296   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel) {
16297     TEST_REQUIRES_X86_XOP;
16298     for (size_t channels = 1; channels <= 40; channels += 7) {
16299       DWConvMicrokernelTester()
16300         .cr(8)
16301         .kr(25)
16302         .channels(channels)
16303         .width(3)
16304         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16305     }
16306   }
16307 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,multipixel_with_step)16308   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_step) {
16309     TEST_REQUIRES_X86_XOP;
16310     for (size_t channels = 1; channels <= 40; channels += 7) {
16311       for (size_t step = 2; step <= 25; step++) {
16312         DWConvMicrokernelTester()
16313           .cr(8)
16314           .kr(25)
16315           .channels(channels)
16316           .width(3)
16317           .step(step)
16318           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16319       }
16320     }
16321   }
16322 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,multipixel_with_output_stride)16323   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_output_stride) {
16324     TEST_REQUIRES_X86_XOP;
16325     for (size_t channels = 1; channels <= 40; channels += 7) {
16326       DWConvMicrokernelTester()
16327         .cr(8)
16328         .kr(25)
16329         .channels(8)
16330         .width(5)
16331         .output_stride(43)
16332         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16333     }
16334   }
16335 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,multipixel_with_qmin)16336   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_qmin) {
16337     TEST_REQUIRES_X86_XOP;
16338     for (size_t channels = 1; channels <= 40; channels += 7) {
16339       DWConvMicrokernelTester()
16340         .cr(8)
16341         .kr(25)
16342         .channels(channels)
16343         .width(3)
16344         .qmin(128)
16345         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16346     }
16347   }
16348 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,multipixel_with_qmax)16349   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_qmax) {
16350     TEST_REQUIRES_X86_XOP;
16351     for (size_t channels = 1; channels <= 40; channels += 7) {
16352       DWConvMicrokernelTester()
16353         .cr(8)
16354         .kr(25)
16355         .channels(channels)
16356         .width(3)
16357         .qmax(128)
16358         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16359     }
16360   }
16361 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,input_offset)16362   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, input_offset) {
16363     TEST_REQUIRES_X86_XOP;
16364     for (uint32_t channels = 16; channels < 128; channels += 24) {
16365       DWConvMicrokernelTester()
16366         .cr(8)
16367         .kr(25)
16368         .channels(channels)
16369         .input_offset(176)
16370         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16371     }
16372   }
16373 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,zero)16374   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, zero) {
16375     TEST_REQUIRES_X86_XOP;
16376     for (uint32_t mz = 0; mz < 25; mz++) {
16377       for (uint32_t channels = 16; channels < 128; channels += 24) {
16378         DWConvMicrokernelTester()
16379           .cr(8)
16380           .kr(25)
16381           .channels(channels)
16382           .input_offset(176)
16383           .zero_index(mz)
16384           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16385       }
16386     }
16387   }
16388 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
16389 
16390 
16391 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,c_eq_16)16392   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, c_eq_16) {
16393     TEST_REQUIRES_X86_AVX;
16394     DWConvMicrokernelTester()
16395       .cr(16)
16396       .kr(3)
16397       .channels(16)
16398       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16399   }
16400 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,c_div_16)16401   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, c_div_16) {
16402     TEST_REQUIRES_X86_AVX;
16403     for (uint32_t channels = 32; channels < 256; channels += 48) {
16404       DWConvMicrokernelTester()
16405         .cr(16)
16406         .kr(3)
16407         .channels(channels)
16408         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16409     }
16410   }
16411 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,c_div_16_with_qmin)16412   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, c_div_16_with_qmin) {
16413     TEST_REQUIRES_X86_AVX;
16414     for (uint32_t channels = 32; channels < 256; channels += 48) {
16415       DWConvMicrokernelTester()
16416         .cr(16)
16417         .kr(3)
16418         .channels(channels)
16419         .qmin(128)
16420         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16421     }
16422   }
16423 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,c_div_16_with_qmax)16424   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, c_div_16_with_qmax) {
16425     TEST_REQUIRES_X86_AVX;
16426     for (uint32_t channels = 32; channels < 256; channels += 48) {
16427       DWConvMicrokernelTester()
16428         .cr(16)
16429         .kr(3)
16430         .channels(channels)
16431         .qmax(128)
16432         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16433     }
16434   }
16435 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,c_lt_16)16436   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, c_lt_16) {
16437     TEST_REQUIRES_X86_AVX;
16438     for (uint32_t channels = 1; channels < 16; channels++) {
16439       DWConvMicrokernelTester()
16440         .cr(16)
16441         .kr(3)
16442         .channels(channels)
16443         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16444     }
16445   }
16446 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,c_gt_16)16447   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, c_gt_16) {
16448     TEST_REQUIRES_X86_AVX;
16449     for (uint32_t channels = 17; channels < 32; channels++) {
16450       DWConvMicrokernelTester()
16451         .cr(16)
16452         .kr(3)
16453         .channels(channels)
16454         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16455     }
16456   }
16457 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,c_gt_16_with_qmin)16458   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, c_gt_16_with_qmin) {
16459     TEST_REQUIRES_X86_AVX;
16460     for (uint32_t channels = 17; channels < 32; channels++) {
16461       DWConvMicrokernelTester()
16462         .cr(16)
16463         .kr(3)
16464         .channels(channels)
16465         .qmin(128)
16466         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16467     }
16468   }
16469 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,c_gt_16_with_qmax)16470   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, c_gt_16_with_qmax) {
16471     TEST_REQUIRES_X86_AVX;
16472     for (uint32_t channels = 17; channels < 32; channels++) {
16473       DWConvMicrokernelTester()
16474         .cr(16)
16475         .kr(3)
16476         .channels(channels)
16477         .qmax(128)
16478         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16479     }
16480   }
16481 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,multipixel)16482   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, multipixel) {
16483     TEST_REQUIRES_X86_AVX;
16484     for (size_t channels = 1; channels <= 80; channels += 15) {
16485       DWConvMicrokernelTester()
16486         .cr(16)
16487         .kr(3)
16488         .channels(channels)
16489         .width(3)
16490         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16491     }
16492   }
16493 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,multipixel_with_step)16494   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, multipixel_with_step) {
16495     TEST_REQUIRES_X86_AVX;
16496     for (size_t channels = 1; channels <= 80; channels += 15) {
16497       for (size_t step = 2; step <= 3; step++) {
16498         DWConvMicrokernelTester()
16499           .cr(16)
16500           .kr(3)
16501           .channels(channels)
16502           .width(3)
16503           .step(step)
16504           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16505       }
16506     }
16507   }
16508 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,multipixel_with_output_stride)16509   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, multipixel_with_output_stride) {
16510     TEST_REQUIRES_X86_AVX;
16511     for (size_t channels = 1; channels <= 80; channels += 15) {
16512       DWConvMicrokernelTester()
16513         .cr(16)
16514         .kr(3)
16515         .channels(16)
16516         .width(5)
16517         .output_stride(83)
16518         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16519     }
16520   }
16521 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,multipixel_with_qmin)16522   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, multipixel_with_qmin) {
16523     TEST_REQUIRES_X86_AVX;
16524     for (size_t channels = 1; channels <= 80; channels += 15) {
16525       DWConvMicrokernelTester()
16526         .cr(16)
16527         .kr(3)
16528         .channels(channels)
16529         .width(3)
16530         .qmin(128)
16531         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16532     }
16533   }
16534 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,multipixel_with_qmax)16535   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, multipixel_with_qmax) {
16536     TEST_REQUIRES_X86_AVX;
16537     for (size_t channels = 1; channels <= 80; channels += 15) {
16538       DWConvMicrokernelTester()
16539         .cr(16)
16540         .kr(3)
16541         .channels(channels)
16542         .width(3)
16543         .qmax(128)
16544         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16545     }
16546   }
16547 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,input_offset)16548   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, input_offset) {
16549     TEST_REQUIRES_X86_AVX;
16550     for (uint32_t channels = 32; channels < 256; channels += 48) {
16551       DWConvMicrokernelTester()
16552         .cr(16)
16553         .kr(3)
16554         .channels(channels)
16555         .input_offset(304)
16556         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16557     }
16558   }
16559 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,zero)16560   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, zero) {
16561     TEST_REQUIRES_X86_AVX;
16562     for (uint32_t mz = 0; mz < 3; mz++) {
16563       for (uint32_t channels = 32; channels < 256; channels += 48) {
16564         DWConvMicrokernelTester()
16565           .cr(16)
16566           .kr(3)
16567           .channels(channels)
16568           .input_offset(304)
16569           .zero_index(mz)
16570           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16571       }
16572     }
16573   }
16574 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
16575 
16576 
16577 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,c_eq_16)16578   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, c_eq_16) {
16579     TEST_REQUIRES_X86_AVX2;
16580     DWConvMicrokernelTester()
16581       .cr(16)
16582       .kr(3)
16583       .channels(16)
16584       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16585   }
16586 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,c_div_16)16587   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, c_div_16) {
16588     TEST_REQUIRES_X86_AVX2;
16589     for (uint32_t channels = 32; channels < 256; channels += 48) {
16590       DWConvMicrokernelTester()
16591         .cr(16)
16592         .kr(3)
16593         .channels(channels)
16594         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16595     }
16596   }
16597 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,c_div_16_with_qmin)16598   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, c_div_16_with_qmin) {
16599     TEST_REQUIRES_X86_AVX2;
16600     for (uint32_t channels = 32; channels < 256; channels += 48) {
16601       DWConvMicrokernelTester()
16602         .cr(16)
16603         .kr(3)
16604         .channels(channels)
16605         .qmin(128)
16606         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16607     }
16608   }
16609 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,c_div_16_with_qmax)16610   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, c_div_16_with_qmax) {
16611     TEST_REQUIRES_X86_AVX2;
16612     for (uint32_t channels = 32; channels < 256; channels += 48) {
16613       DWConvMicrokernelTester()
16614         .cr(16)
16615         .kr(3)
16616         .channels(channels)
16617         .qmax(128)
16618         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16619     }
16620   }
16621 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,c_lt_16)16622   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, c_lt_16) {
16623     TEST_REQUIRES_X86_AVX2;
16624     for (uint32_t channels = 1; channels < 16; channels++) {
16625       DWConvMicrokernelTester()
16626         .cr(16)
16627         .kr(3)
16628         .channels(channels)
16629         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16630     }
16631   }
16632 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,c_gt_16)16633   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, c_gt_16) {
16634     TEST_REQUIRES_X86_AVX2;
16635     for (uint32_t channels = 17; channels < 32; channels++) {
16636       DWConvMicrokernelTester()
16637         .cr(16)
16638         .kr(3)
16639         .channels(channels)
16640         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16641     }
16642   }
16643 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,c_gt_16_with_qmin)16644   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, c_gt_16_with_qmin) {
16645     TEST_REQUIRES_X86_AVX2;
16646     for (uint32_t channels = 17; channels < 32; channels++) {
16647       DWConvMicrokernelTester()
16648         .cr(16)
16649         .kr(3)
16650         .channels(channels)
16651         .qmin(128)
16652         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16653     }
16654   }
16655 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,c_gt_16_with_qmax)16656   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, c_gt_16_with_qmax) {
16657     TEST_REQUIRES_X86_AVX2;
16658     for (uint32_t channels = 17; channels < 32; channels++) {
16659       DWConvMicrokernelTester()
16660         .cr(16)
16661         .kr(3)
16662         .channels(channels)
16663         .qmax(128)
16664         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16665     }
16666   }
16667 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,multipixel)16668   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, multipixel) {
16669     TEST_REQUIRES_X86_AVX2;
16670     for (size_t channels = 1; channels <= 80; channels += 15) {
16671       DWConvMicrokernelTester()
16672         .cr(16)
16673         .kr(3)
16674         .channels(channels)
16675         .width(3)
16676         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16677     }
16678   }
16679 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,multipixel_with_step)16680   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, multipixel_with_step) {
16681     TEST_REQUIRES_X86_AVX2;
16682     for (size_t channels = 1; channels <= 80; channels += 15) {
16683       for (size_t step = 2; step <= 3; step++) {
16684         DWConvMicrokernelTester()
16685           .cr(16)
16686           .kr(3)
16687           .channels(channels)
16688           .width(3)
16689           .step(step)
16690           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16691       }
16692     }
16693   }
16694 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,multipixel_with_output_stride)16695   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, multipixel_with_output_stride) {
16696     TEST_REQUIRES_X86_AVX2;
16697     for (size_t channels = 1; channels <= 80; channels += 15) {
16698       DWConvMicrokernelTester()
16699         .cr(16)
16700         .kr(3)
16701         .channels(16)
16702         .width(5)
16703         .output_stride(83)
16704         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16705     }
16706   }
16707 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,multipixel_with_qmin)16708   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, multipixel_with_qmin) {
16709     TEST_REQUIRES_X86_AVX2;
16710     for (size_t channels = 1; channels <= 80; channels += 15) {
16711       DWConvMicrokernelTester()
16712         .cr(16)
16713         .kr(3)
16714         .channels(channels)
16715         .width(3)
16716         .qmin(128)
16717         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16718     }
16719   }
16720 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,multipixel_with_qmax)16721   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, multipixel_with_qmax) {
16722     TEST_REQUIRES_X86_AVX2;
16723     for (size_t channels = 1; channels <= 80; channels += 15) {
16724       DWConvMicrokernelTester()
16725         .cr(16)
16726         .kr(3)
16727         .channels(channels)
16728         .width(3)
16729         .qmax(128)
16730         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16731     }
16732   }
16733 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,input_offset)16734   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, input_offset) {
16735     TEST_REQUIRES_X86_AVX2;
16736     for (uint32_t channels = 32; channels < 256; channels += 48) {
16737       DWConvMicrokernelTester()
16738         .cr(16)
16739         .kr(3)
16740         .channels(channels)
16741         .input_offset(304)
16742         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16743     }
16744   }
16745 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,zero)16746   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, zero) {
16747     TEST_REQUIRES_X86_AVX2;
16748     for (uint32_t mz = 0; mz < 3; mz++) {
16749       for (uint32_t channels = 32; channels < 256; channels += 48) {
16750         DWConvMicrokernelTester()
16751           .cr(16)
16752           .kr(3)
16753           .channels(channels)
16754           .input_offset(304)
16755           .zero_index(mz)
16756           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16757       }
16758     }
16759   }
16760 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
16761 
16762 
16763 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,c_eq_16)16764   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, c_eq_16) {
16765     TEST_REQUIRES_X86_XOP;
16766     DWConvMicrokernelTester()
16767       .cr(16)
16768       .kr(3)
16769       .channels(16)
16770       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16771   }
16772 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,c_div_16)16773   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, c_div_16) {
16774     TEST_REQUIRES_X86_XOP;
16775     for (uint32_t channels = 32; channels < 256; channels += 48) {
16776       DWConvMicrokernelTester()
16777         .cr(16)
16778         .kr(3)
16779         .channels(channels)
16780         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16781     }
16782   }
16783 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,c_div_16_with_qmin)16784   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, c_div_16_with_qmin) {
16785     TEST_REQUIRES_X86_XOP;
16786     for (uint32_t channels = 32; channels < 256; channels += 48) {
16787       DWConvMicrokernelTester()
16788         .cr(16)
16789         .kr(3)
16790         .channels(channels)
16791         .qmin(128)
16792         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16793     }
16794   }
16795 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,c_div_16_with_qmax)16796   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, c_div_16_with_qmax) {
16797     TEST_REQUIRES_X86_XOP;
16798     for (uint32_t channels = 32; channels < 256; channels += 48) {
16799       DWConvMicrokernelTester()
16800         .cr(16)
16801         .kr(3)
16802         .channels(channels)
16803         .qmax(128)
16804         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16805     }
16806   }
16807 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,c_lt_16)16808   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, c_lt_16) {
16809     TEST_REQUIRES_X86_XOP;
16810     for (uint32_t channels = 1; channels < 16; channels++) {
16811       DWConvMicrokernelTester()
16812         .cr(16)
16813         .kr(3)
16814         .channels(channels)
16815         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16816     }
16817   }
16818 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,c_gt_16)16819   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, c_gt_16) {
16820     TEST_REQUIRES_X86_XOP;
16821     for (uint32_t channels = 17; channels < 32; channels++) {
16822       DWConvMicrokernelTester()
16823         .cr(16)
16824         .kr(3)
16825         .channels(channels)
16826         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16827     }
16828   }
16829 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,c_gt_16_with_qmin)16830   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, c_gt_16_with_qmin) {
16831     TEST_REQUIRES_X86_XOP;
16832     for (uint32_t channels = 17; channels < 32; channels++) {
16833       DWConvMicrokernelTester()
16834         .cr(16)
16835         .kr(3)
16836         .channels(channels)
16837         .qmin(128)
16838         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16839     }
16840   }
16841 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,c_gt_16_with_qmax)16842   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, c_gt_16_with_qmax) {
16843     TEST_REQUIRES_X86_XOP;
16844     for (uint32_t channels = 17; channels < 32; channels++) {
16845       DWConvMicrokernelTester()
16846         .cr(16)
16847         .kr(3)
16848         .channels(channels)
16849         .qmax(128)
16850         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16851     }
16852   }
16853 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,multipixel)16854   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, multipixel) {
16855     TEST_REQUIRES_X86_XOP;
16856     for (size_t channels = 1; channels <= 80; channels += 15) {
16857       DWConvMicrokernelTester()
16858         .cr(16)
16859         .kr(3)
16860         .channels(channels)
16861         .width(3)
16862         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16863     }
16864   }
16865 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,multipixel_with_step)16866   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, multipixel_with_step) {
16867     TEST_REQUIRES_X86_XOP;
16868     for (size_t channels = 1; channels <= 80; channels += 15) {
16869       for (size_t step = 2; step <= 3; step++) {
16870         DWConvMicrokernelTester()
16871           .cr(16)
16872           .kr(3)
16873           .channels(channels)
16874           .width(3)
16875           .step(step)
16876           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16877       }
16878     }
16879   }
16880 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,multipixel_with_output_stride)16881   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, multipixel_with_output_stride) {
16882     TEST_REQUIRES_X86_XOP;
16883     for (size_t channels = 1; channels <= 80; channels += 15) {
16884       DWConvMicrokernelTester()
16885         .cr(16)
16886         .kr(3)
16887         .channels(16)
16888         .width(5)
16889         .output_stride(83)
16890         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16891     }
16892   }
16893 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,multipixel_with_qmin)16894   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, multipixel_with_qmin) {
16895     TEST_REQUIRES_X86_XOP;
16896     for (size_t channels = 1; channels <= 80; channels += 15) {
16897       DWConvMicrokernelTester()
16898         .cr(16)
16899         .kr(3)
16900         .channels(channels)
16901         .width(3)
16902         .qmin(128)
16903         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16904     }
16905   }
16906 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,multipixel_with_qmax)16907   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, multipixel_with_qmax) {
16908     TEST_REQUIRES_X86_XOP;
16909     for (size_t channels = 1; channels <= 80; channels += 15) {
16910       DWConvMicrokernelTester()
16911         .cr(16)
16912         .kr(3)
16913         .channels(channels)
16914         .width(3)
16915         .qmax(128)
16916         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16917     }
16918   }
16919 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,input_offset)16920   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, input_offset) {
16921     TEST_REQUIRES_X86_XOP;
16922     for (uint32_t channels = 32; channels < 256; channels += 48) {
16923       DWConvMicrokernelTester()
16924         .cr(16)
16925         .kr(3)
16926         .channels(channels)
16927         .input_offset(304)
16928         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16929     }
16930   }
16931 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,zero)16932   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, zero) {
16933     TEST_REQUIRES_X86_XOP;
16934     for (uint32_t mz = 0; mz < 3; mz++) {
16935       for (uint32_t channels = 32; channels < 256; channels += 48) {
16936         DWConvMicrokernelTester()
16937           .cr(16)
16938           .kr(3)
16939           .channels(channels)
16940           .input_offset(304)
16941           .zero_index(mz)
16942           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16943       }
16944     }
16945   }
16946 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
16947 
16948 
16949 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_eq_16)16950   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_eq_16) {
16951     TEST_REQUIRES_X86_AVX;
16952     DWConvMicrokernelTester()
16953       .cr(16)
16954       .kr(9)
16955       .channels(16)
16956       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16957   }
16958 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_div_16)16959   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_div_16) {
16960     TEST_REQUIRES_X86_AVX;
16961     for (uint32_t channels = 32; channels < 256; channels += 48) {
16962       DWConvMicrokernelTester()
16963         .cr(16)
16964         .kr(9)
16965         .channels(channels)
16966         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16967     }
16968   }
16969 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_div_16_with_qmin)16970   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_div_16_with_qmin) {
16971     TEST_REQUIRES_X86_AVX;
16972     for (uint32_t channels = 32; channels < 256; channels += 48) {
16973       DWConvMicrokernelTester()
16974         .cr(16)
16975         .kr(9)
16976         .channels(channels)
16977         .qmin(128)
16978         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16979     }
16980   }
16981 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_div_16_with_qmax)16982   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_div_16_with_qmax) {
16983     TEST_REQUIRES_X86_AVX;
16984     for (uint32_t channels = 32; channels < 256; channels += 48) {
16985       DWConvMicrokernelTester()
16986         .cr(16)
16987         .kr(9)
16988         .channels(channels)
16989         .qmax(128)
16990         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16991     }
16992   }
16993 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_lt_16)16994   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_lt_16) {
16995     TEST_REQUIRES_X86_AVX;
16996     for (uint32_t channels = 1; channels < 16; channels++) {
16997       DWConvMicrokernelTester()
16998         .cr(16)
16999         .kr(9)
17000         .channels(channels)
17001         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17002     }
17003   }
17004 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_gt_16)17005   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_gt_16) {
17006     TEST_REQUIRES_X86_AVX;
17007     for (uint32_t channels = 17; channels < 32; channels++) {
17008       DWConvMicrokernelTester()
17009         .cr(16)
17010         .kr(9)
17011         .channels(channels)
17012         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17013     }
17014   }
17015 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_gt_16_with_qmin)17016   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_gt_16_with_qmin) {
17017     TEST_REQUIRES_X86_AVX;
17018     for (uint32_t channels = 17; channels < 32; channels++) {
17019       DWConvMicrokernelTester()
17020         .cr(16)
17021         .kr(9)
17022         .channels(channels)
17023         .qmin(128)
17024         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17025     }
17026   }
17027 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_gt_16_with_qmax)17028   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_gt_16_with_qmax) {
17029     TEST_REQUIRES_X86_AVX;
17030     for (uint32_t channels = 17; channels < 32; channels++) {
17031       DWConvMicrokernelTester()
17032         .cr(16)
17033         .kr(9)
17034         .channels(channels)
17035         .qmax(128)
17036         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17037     }
17038   }
17039 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,multipixel)17040   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel) {
17041     TEST_REQUIRES_X86_AVX;
17042     for (size_t channels = 1; channels <= 80; channels += 15) {
17043       DWConvMicrokernelTester()
17044         .cr(16)
17045         .kr(9)
17046         .channels(channels)
17047         .width(3)
17048         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17049     }
17050   }
17051 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,multipixel_with_step)17052   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_step) {
17053     TEST_REQUIRES_X86_AVX;
17054     for (size_t channels = 1; channels <= 80; channels += 15) {
17055       for (size_t step = 2; step <= 9; step++) {
17056         DWConvMicrokernelTester()
17057           .cr(16)
17058           .kr(9)
17059           .channels(channels)
17060           .width(3)
17061           .step(step)
17062           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17063       }
17064     }
17065   }
17066 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,multipixel_with_output_stride)17067   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_output_stride) {
17068     TEST_REQUIRES_X86_AVX;
17069     for (size_t channels = 1; channels <= 80; channels += 15) {
17070       DWConvMicrokernelTester()
17071         .cr(16)
17072         .kr(9)
17073         .channels(16)
17074         .width(5)
17075         .output_stride(83)
17076         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17077     }
17078   }
17079 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,multipixel_with_qmin)17080   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_qmin) {
17081     TEST_REQUIRES_X86_AVX;
17082     for (size_t channels = 1; channels <= 80; channels += 15) {
17083       DWConvMicrokernelTester()
17084         .cr(16)
17085         .kr(9)
17086         .channels(channels)
17087         .width(3)
17088         .qmin(128)
17089         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17090     }
17091   }
17092 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,multipixel_with_qmax)17093   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_qmax) {
17094     TEST_REQUIRES_X86_AVX;
17095     for (size_t channels = 1; channels <= 80; channels += 15) {
17096       DWConvMicrokernelTester()
17097         .cr(16)
17098         .kr(9)
17099         .channels(channels)
17100         .width(3)
17101         .qmax(128)
17102         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17103     }
17104   }
17105 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,input_offset)17106   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, input_offset) {
17107     TEST_REQUIRES_X86_AVX;
17108     for (uint32_t channels = 32; channels < 256; channels += 48) {
17109       DWConvMicrokernelTester()
17110         .cr(16)
17111         .kr(9)
17112         .channels(channels)
17113         .input_offset(304)
17114         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17115     }
17116   }
17117 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,zero)17118   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, zero) {
17119     TEST_REQUIRES_X86_AVX;
17120     for (uint32_t mz = 0; mz < 9; mz++) {
17121       for (uint32_t channels = 32; channels < 256; channels += 48) {
17122         DWConvMicrokernelTester()
17123           .cr(16)
17124           .kr(9)
17125           .channels(channels)
17126           .input_offset(304)
17127           .zero_index(mz)
17128           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17129       }
17130     }
17131   }
17132 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
17133 
17134 
17135 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_eq_16)17136   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_eq_16) {
17137     TEST_REQUIRES_X86_AVX;
17138     DWConvMicrokernelTester()
17139       .cr(16)
17140       .kr(9)
17141       .channels(16)
17142       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17143   }
17144 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_div_16)17145   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_div_16) {
17146     TEST_REQUIRES_X86_AVX;
17147     for (uint32_t channels = 32; channels < 256; channels += 48) {
17148       DWConvMicrokernelTester()
17149         .cr(16)
17150         .kr(9)
17151         .channels(channels)
17152         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17153     }
17154   }
17155 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_div_16_with_qmin)17156   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_div_16_with_qmin) {
17157     TEST_REQUIRES_X86_AVX;
17158     for (uint32_t channels = 32; channels < 256; channels += 48) {
17159       DWConvMicrokernelTester()
17160         .cr(16)
17161         .kr(9)
17162         .channels(channels)
17163         .qmin(128)
17164         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17165     }
17166   }
17167 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_div_16_with_qmax)17168   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_div_16_with_qmax) {
17169     TEST_REQUIRES_X86_AVX;
17170     for (uint32_t channels = 32; channels < 256; channels += 48) {
17171       DWConvMicrokernelTester()
17172         .cr(16)
17173         .kr(9)
17174         .channels(channels)
17175         .qmax(128)
17176         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17177     }
17178   }
17179 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_lt_16)17180   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_lt_16) {
17181     TEST_REQUIRES_X86_AVX;
17182     for (uint32_t channels = 1; channels < 16; channels++) {
17183       DWConvMicrokernelTester()
17184         .cr(16)
17185         .kr(9)
17186         .channels(channels)
17187         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17188     }
17189   }
17190 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_gt_16)17191   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_gt_16) {
17192     TEST_REQUIRES_X86_AVX;
17193     for (uint32_t channels = 17; channels < 32; channels++) {
17194       DWConvMicrokernelTester()
17195         .cr(16)
17196         .kr(9)
17197         .channels(channels)
17198         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17199     }
17200   }
17201 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_gt_16_with_qmin)17202   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_gt_16_with_qmin) {
17203     TEST_REQUIRES_X86_AVX;
17204     for (uint32_t channels = 17; channels < 32; channels++) {
17205       DWConvMicrokernelTester()
17206         .cr(16)
17207         .kr(9)
17208         .channels(channels)
17209         .qmin(128)
17210         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17211     }
17212   }
17213 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_gt_16_with_qmax)17214   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_gt_16_with_qmax) {
17215     TEST_REQUIRES_X86_AVX;
17216     for (uint32_t channels = 17; channels < 32; channels++) {
17217       DWConvMicrokernelTester()
17218         .cr(16)
17219         .kr(9)
17220         .channels(channels)
17221         .qmax(128)
17222         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17223     }
17224   }
17225 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,multipixel)17226   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel) {
17227     TEST_REQUIRES_X86_AVX;
17228     for (size_t channels = 1; channels <= 80; channels += 15) {
17229       DWConvMicrokernelTester()
17230         .cr(16)
17231         .kr(9)
17232         .channels(channels)
17233         .width(3)
17234         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17235     }
17236   }
17237 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,multipixel_with_step)17238   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel_with_step) {
17239     TEST_REQUIRES_X86_AVX;
17240     for (size_t channels = 1; channels <= 80; channels += 15) {
17241       for (size_t step = 2; step <= 9; step++) {
17242         DWConvMicrokernelTester()
17243           .cr(16)
17244           .kr(9)
17245           .channels(channels)
17246           .width(3)
17247           .step(step)
17248           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17249       }
17250     }
17251   }
17252 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,multipixel_with_output_stride)17253   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel_with_output_stride) {
17254     TEST_REQUIRES_X86_AVX;
17255     for (size_t channels = 1; channels <= 80; channels += 15) {
17256       DWConvMicrokernelTester()
17257         .cr(16)
17258         .kr(9)
17259         .channels(16)
17260         .width(5)
17261         .output_stride(83)
17262         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17263     }
17264   }
17265 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,multipixel_with_qmin)17266   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel_with_qmin) {
17267     TEST_REQUIRES_X86_AVX;
17268     for (size_t channels = 1; channels <= 80; channels += 15) {
17269       DWConvMicrokernelTester()
17270         .cr(16)
17271         .kr(9)
17272         .channels(channels)
17273         .width(3)
17274         .qmin(128)
17275         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17276     }
17277   }
17278 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,multipixel_with_qmax)17279   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel_with_qmax) {
17280     TEST_REQUIRES_X86_AVX;
17281     for (size_t channels = 1; channels <= 80; channels += 15) {
17282       DWConvMicrokernelTester()
17283         .cr(16)
17284         .kr(9)
17285         .channels(channels)
17286         .width(3)
17287         .qmax(128)
17288         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17289     }
17290   }
17291 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,input_offset)17292   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, input_offset) {
17293     TEST_REQUIRES_X86_AVX;
17294     for (uint32_t channels = 32; channels < 256; channels += 48) {
17295       DWConvMicrokernelTester()
17296         .cr(16)
17297         .kr(9)
17298         .channels(channels)
17299         .input_offset(304)
17300         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17301     }
17302   }
17303 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,zero)17304   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, zero) {
17305     TEST_REQUIRES_X86_AVX;
17306     for (uint32_t mz = 0; mz < 9; mz++) {
17307       for (uint32_t channels = 32; channels < 256; channels += 48) {
17308         DWConvMicrokernelTester()
17309           .cr(16)
17310           .kr(9)
17311           .channels(channels)
17312           .input_offset(304)
17313           .zero_index(mz)
17314           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17315       }
17316     }
17317   }
17318 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
17319 
17320 
17321 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_eq_16)17322   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_eq_16) {
17323     TEST_REQUIRES_X86_AVX;
17324     DWConvMicrokernelTester()
17325       .cr(16)
17326       .kr(9)
17327       .channels(16)
17328       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17329   }
17330 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_div_16)17331   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_div_16) {
17332     TEST_REQUIRES_X86_AVX;
17333     for (uint32_t channels = 32; channels < 256; channels += 48) {
17334       DWConvMicrokernelTester()
17335         .cr(16)
17336         .kr(9)
17337         .channels(channels)
17338         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17339     }
17340   }
17341 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_div_16_with_qmin)17342   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_div_16_with_qmin) {
17343     TEST_REQUIRES_X86_AVX;
17344     for (uint32_t channels = 32; channels < 256; channels += 48) {
17345       DWConvMicrokernelTester()
17346         .cr(16)
17347         .kr(9)
17348         .channels(channels)
17349         .qmin(128)
17350         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17351     }
17352   }
17353 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_div_16_with_qmax)17354   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_div_16_with_qmax) {
17355     TEST_REQUIRES_X86_AVX;
17356     for (uint32_t channels = 32; channels < 256; channels += 48) {
17357       DWConvMicrokernelTester()
17358         .cr(16)
17359         .kr(9)
17360         .channels(channels)
17361         .qmax(128)
17362         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17363     }
17364   }
17365 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_lt_16)17366   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_lt_16) {
17367     TEST_REQUIRES_X86_AVX;
17368     for (uint32_t channels = 1; channels < 16; channels++) {
17369       DWConvMicrokernelTester()
17370         .cr(16)
17371         .kr(9)
17372         .channels(channels)
17373         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17374     }
17375   }
17376 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_gt_16)17377   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_gt_16) {
17378     TEST_REQUIRES_X86_AVX;
17379     for (uint32_t channels = 17; channels < 32; channels++) {
17380       DWConvMicrokernelTester()
17381         .cr(16)
17382         .kr(9)
17383         .channels(channels)
17384         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17385     }
17386   }
17387 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_gt_16_with_qmin)17388   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_gt_16_with_qmin) {
17389     TEST_REQUIRES_X86_AVX;
17390     for (uint32_t channels = 17; channels < 32; channels++) {
17391       DWConvMicrokernelTester()
17392         .cr(16)
17393         .kr(9)
17394         .channels(channels)
17395         .qmin(128)
17396         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17397     }
17398   }
17399 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_gt_16_with_qmax)17400   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_gt_16_with_qmax) {
17401     TEST_REQUIRES_X86_AVX;
17402     for (uint32_t channels = 17; channels < 32; channels++) {
17403       DWConvMicrokernelTester()
17404         .cr(16)
17405         .kr(9)
17406         .channels(channels)
17407         .qmax(128)
17408         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17409     }
17410   }
17411 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,multipixel)17412   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel) {
17413     TEST_REQUIRES_X86_AVX;
17414     for (size_t channels = 1; channels <= 80; channels += 15) {
17415       DWConvMicrokernelTester()
17416         .cr(16)
17417         .kr(9)
17418         .channels(channels)
17419         .width(3)
17420         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17421     }
17422   }
17423 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,multipixel_with_step)17424   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_step) {
17425     TEST_REQUIRES_X86_AVX;
17426     for (size_t channels = 1; channels <= 80; channels += 15) {
17427       for (size_t step = 2; step <= 9; step++) {
17428         DWConvMicrokernelTester()
17429           .cr(16)
17430           .kr(9)
17431           .channels(channels)
17432           .width(3)
17433           .step(step)
17434           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17435       }
17436     }
17437   }
17438 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,multipixel_with_output_stride)17439   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_output_stride) {
17440     TEST_REQUIRES_X86_AVX;
17441     for (size_t channels = 1; channels <= 80; channels += 15) {
17442       DWConvMicrokernelTester()
17443         .cr(16)
17444         .kr(9)
17445         .channels(16)
17446         .width(5)
17447         .output_stride(83)
17448         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17449     }
17450   }
17451 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,multipixel_with_qmin)17452   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_qmin) {
17453     TEST_REQUIRES_X86_AVX;
17454     for (size_t channels = 1; channels <= 80; channels += 15) {
17455       DWConvMicrokernelTester()
17456         .cr(16)
17457         .kr(9)
17458         .channels(channels)
17459         .width(3)
17460         .qmin(128)
17461         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17462     }
17463   }
17464 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,multipixel_with_qmax)17465   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_qmax) {
17466     TEST_REQUIRES_X86_AVX;
17467     for (size_t channels = 1; channels <= 80; channels += 15) {
17468       DWConvMicrokernelTester()
17469         .cr(16)
17470         .kr(9)
17471         .channels(channels)
17472         .width(3)
17473         .qmax(128)
17474         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17475     }
17476   }
17477 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,input_offset)17478   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, input_offset) {
17479     TEST_REQUIRES_X86_AVX;
17480     for (uint32_t channels = 32; channels < 256; channels += 48) {
17481       DWConvMicrokernelTester()
17482         .cr(16)
17483         .kr(9)
17484         .channels(channels)
17485         .input_offset(304)
17486         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17487     }
17488   }
17489 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,zero)17490   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, zero) {
17491     TEST_REQUIRES_X86_AVX;
17492     for (uint32_t mz = 0; mz < 9; mz++) {
17493       for (uint32_t channels = 32; channels < 256; channels += 48) {
17494         DWConvMicrokernelTester()
17495           .cr(16)
17496           .kr(9)
17497           .channels(channels)
17498           .input_offset(304)
17499           .zero_index(mz)
17500           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17501       }
17502     }
17503   }
17504 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
17505 
17506 
17507 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_eq_16)17508   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_eq_16) {
17509     TEST_REQUIRES_X86_AVX2;
17510     DWConvMicrokernelTester()
17511       .cr(16)
17512       .kr(9)
17513       .channels(16)
17514       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17515   }
17516 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_div_16)17517   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_16) {
17518     TEST_REQUIRES_X86_AVX2;
17519     for (uint32_t channels = 32; channels < 256; channels += 48) {
17520       DWConvMicrokernelTester()
17521         .cr(16)
17522         .kr(9)
17523         .channels(channels)
17524         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17525     }
17526   }
17527 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_div_16_with_qmin)17528   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_16_with_qmin) {
17529     TEST_REQUIRES_X86_AVX2;
17530     for (uint32_t channels = 32; channels < 256; channels += 48) {
17531       DWConvMicrokernelTester()
17532         .cr(16)
17533         .kr(9)
17534         .channels(channels)
17535         .qmin(128)
17536         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17537     }
17538   }
17539 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_div_16_with_qmax)17540   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_16_with_qmax) {
17541     TEST_REQUIRES_X86_AVX2;
17542     for (uint32_t channels = 32; channels < 256; channels += 48) {
17543       DWConvMicrokernelTester()
17544         .cr(16)
17545         .kr(9)
17546         .channels(channels)
17547         .qmax(128)
17548         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17549     }
17550   }
17551 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_lt_16)17552   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_lt_16) {
17553     TEST_REQUIRES_X86_AVX2;
17554     for (uint32_t channels = 1; channels < 16; channels++) {
17555       DWConvMicrokernelTester()
17556         .cr(16)
17557         .kr(9)
17558         .channels(channels)
17559         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17560     }
17561   }
17562 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_gt_16)17563   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16) {
17564     TEST_REQUIRES_X86_AVX2;
17565     for (uint32_t channels = 17; channels < 32; channels++) {
17566       DWConvMicrokernelTester()
17567         .cr(16)
17568         .kr(9)
17569         .channels(channels)
17570         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17571     }
17572   }
17573 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_gt_16_with_qmin)17574   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16_with_qmin) {
17575     TEST_REQUIRES_X86_AVX2;
17576     for (uint32_t channels = 17; channels < 32; channels++) {
17577       DWConvMicrokernelTester()
17578         .cr(16)
17579         .kr(9)
17580         .channels(channels)
17581         .qmin(128)
17582         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17583     }
17584   }
17585 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_gt_16_with_qmax)17586   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16_with_qmax) {
17587     TEST_REQUIRES_X86_AVX2;
17588     for (uint32_t channels = 17; channels < 32; channels++) {
17589       DWConvMicrokernelTester()
17590         .cr(16)
17591         .kr(9)
17592         .channels(channels)
17593         .qmax(128)
17594         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17595     }
17596   }
17597 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel)17598   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel) {
17599     TEST_REQUIRES_X86_AVX2;
17600     for (size_t channels = 1; channels <= 80; channels += 15) {
17601       DWConvMicrokernelTester()
17602         .cr(16)
17603         .kr(9)
17604         .channels(channels)
17605         .width(3)
17606         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17607     }
17608   }
17609 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_step)17610   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_step) {
17611     TEST_REQUIRES_X86_AVX2;
17612     for (size_t channels = 1; channels <= 80; channels += 15) {
17613       for (size_t step = 2; step <= 9; step++) {
17614         DWConvMicrokernelTester()
17615           .cr(16)
17616           .kr(9)
17617           .channels(channels)
17618           .width(3)
17619           .step(step)
17620           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17621       }
17622     }
17623   }
17624 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_output_stride)17625   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_output_stride) {
17626     TEST_REQUIRES_X86_AVX2;
17627     for (size_t channels = 1; channels <= 80; channels += 15) {
17628       DWConvMicrokernelTester()
17629         .cr(16)
17630         .kr(9)
17631         .channels(16)
17632         .width(5)
17633         .output_stride(83)
17634         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17635     }
17636   }
17637 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmin)17638   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmin) {
17639     TEST_REQUIRES_X86_AVX2;
17640     for (size_t channels = 1; channels <= 80; channels += 15) {
17641       DWConvMicrokernelTester()
17642         .cr(16)
17643         .kr(9)
17644         .channels(channels)
17645         .width(3)
17646         .qmin(128)
17647         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17648     }
17649   }
17650 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmax)17651   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmax) {
17652     TEST_REQUIRES_X86_AVX2;
17653     for (size_t channels = 1; channels <= 80; channels += 15) {
17654       DWConvMicrokernelTester()
17655         .cr(16)
17656         .kr(9)
17657         .channels(channels)
17658         .width(3)
17659         .qmax(128)
17660         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17661     }
17662   }
17663 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,input_offset)17664   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, input_offset) {
17665     TEST_REQUIRES_X86_AVX2;
17666     for (uint32_t channels = 32; channels < 256; channels += 48) {
17667       DWConvMicrokernelTester()
17668         .cr(16)
17669         .kr(9)
17670         .channels(channels)
17671         .input_offset(304)
17672         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17673     }
17674   }
17675 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,zero)17676   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, zero) {
17677     TEST_REQUIRES_X86_AVX2;
17678     for (uint32_t mz = 0; mz < 9; mz++) {
17679       for (uint32_t channels = 32; channels < 256; channels += 48) {
17680         DWConvMicrokernelTester()
17681           .cr(16)
17682           .kr(9)
17683           .channels(channels)
17684           .input_offset(304)
17685           .zero_index(mz)
17686           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17687       }
17688     }
17689   }
17690 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
17691 
17692 
17693 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_eq_16)17694   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_eq_16) {
17695     TEST_REQUIRES_X86_AVX2;
17696     DWConvMicrokernelTester()
17697       .cr(16)
17698       .kr(9)
17699       .channels(16)
17700       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17701   }
17702 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_div_16)17703   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_div_16) {
17704     TEST_REQUIRES_X86_AVX2;
17705     for (uint32_t channels = 32; channels < 256; channels += 48) {
17706       DWConvMicrokernelTester()
17707         .cr(16)
17708         .kr(9)
17709         .channels(channels)
17710         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17711     }
17712   }
17713 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_div_16_with_qmin)17714   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_div_16_with_qmin) {
17715     TEST_REQUIRES_X86_AVX2;
17716     for (uint32_t channels = 32; channels < 256; channels += 48) {
17717       DWConvMicrokernelTester()
17718         .cr(16)
17719         .kr(9)
17720         .channels(channels)
17721         .qmin(128)
17722         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17723     }
17724   }
17725 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_div_16_with_qmax)17726   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_div_16_with_qmax) {
17727     TEST_REQUIRES_X86_AVX2;
17728     for (uint32_t channels = 32; channels < 256; channels += 48) {
17729       DWConvMicrokernelTester()
17730         .cr(16)
17731         .kr(9)
17732         .channels(channels)
17733         .qmax(128)
17734         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17735     }
17736   }
17737 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_lt_16)17738   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_lt_16) {
17739     TEST_REQUIRES_X86_AVX2;
17740     for (uint32_t channels = 1; channels < 16; channels++) {
17741       DWConvMicrokernelTester()
17742         .cr(16)
17743         .kr(9)
17744         .channels(channels)
17745         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17746     }
17747   }
17748 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_gt_16)17749   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_gt_16) {
17750     TEST_REQUIRES_X86_AVX2;
17751     for (uint32_t channels = 17; channels < 32; channels++) {
17752       DWConvMicrokernelTester()
17753         .cr(16)
17754         .kr(9)
17755         .channels(channels)
17756         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17757     }
17758   }
17759 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_gt_16_with_qmin)17760   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_gt_16_with_qmin) {
17761     TEST_REQUIRES_X86_AVX2;
17762     for (uint32_t channels = 17; channels < 32; channels++) {
17763       DWConvMicrokernelTester()
17764         .cr(16)
17765         .kr(9)
17766         .channels(channels)
17767         .qmin(128)
17768         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17769     }
17770   }
17771 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_gt_16_with_qmax)17772   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_gt_16_with_qmax) {
17773     TEST_REQUIRES_X86_AVX2;
17774     for (uint32_t channels = 17; channels < 32; channels++) {
17775       DWConvMicrokernelTester()
17776         .cr(16)
17777         .kr(9)
17778         .channels(channels)
17779         .qmax(128)
17780         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17781     }
17782   }
17783 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,multipixel)17784   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel) {
17785     TEST_REQUIRES_X86_AVX2;
17786     for (size_t channels = 1; channels <= 80; channels += 15) {
17787       DWConvMicrokernelTester()
17788         .cr(16)
17789         .kr(9)
17790         .channels(channels)
17791         .width(3)
17792         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17793     }
17794   }
17795 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,multipixel_with_step)17796   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel_with_step) {
17797     TEST_REQUIRES_X86_AVX2;
17798     for (size_t channels = 1; channels <= 80; channels += 15) {
17799       for (size_t step = 2; step <= 9; step++) {
17800         DWConvMicrokernelTester()
17801           .cr(16)
17802           .kr(9)
17803           .channels(channels)
17804           .width(3)
17805           .step(step)
17806           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17807       }
17808     }
17809   }
17810 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,multipixel_with_output_stride)17811   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel_with_output_stride) {
17812     TEST_REQUIRES_X86_AVX2;
17813     for (size_t channels = 1; channels <= 80; channels += 15) {
17814       DWConvMicrokernelTester()
17815         .cr(16)
17816         .kr(9)
17817         .channels(16)
17818         .width(5)
17819         .output_stride(83)
17820         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17821     }
17822   }
17823 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,multipixel_with_qmin)17824   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel_with_qmin) {
17825     TEST_REQUIRES_X86_AVX2;
17826     for (size_t channels = 1; channels <= 80; channels += 15) {
17827       DWConvMicrokernelTester()
17828         .cr(16)
17829         .kr(9)
17830         .channels(channels)
17831         .width(3)
17832         .qmin(128)
17833         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17834     }
17835   }
17836 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,multipixel_with_qmax)17837   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel_with_qmax) {
17838     TEST_REQUIRES_X86_AVX2;
17839     for (size_t channels = 1; channels <= 80; channels += 15) {
17840       DWConvMicrokernelTester()
17841         .cr(16)
17842         .kr(9)
17843         .channels(channels)
17844         .width(3)
17845         .qmax(128)
17846         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17847     }
17848   }
17849 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,input_offset)17850   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, input_offset) {
17851     TEST_REQUIRES_X86_AVX2;
17852     for (uint32_t channels = 32; channels < 256; channels += 48) {
17853       DWConvMicrokernelTester()
17854         .cr(16)
17855         .kr(9)
17856         .channels(channels)
17857         .input_offset(304)
17858         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17859     }
17860   }
17861 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,zero)17862   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, zero) {
17863     TEST_REQUIRES_X86_AVX2;
17864     for (uint32_t mz = 0; mz < 9; mz++) {
17865       for (uint32_t channels = 32; channels < 256; channels += 48) {
17866         DWConvMicrokernelTester()
17867           .cr(16)
17868           .kr(9)
17869           .channels(channels)
17870           .input_offset(304)
17871           .zero_index(mz)
17872           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17873       }
17874     }
17875   }
17876 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
17877 
17878 
17879 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_eq_16)17880   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_eq_16) {
17881     TEST_REQUIRES_X86_AVX2;
17882     DWConvMicrokernelTester()
17883       .cr(16)
17884       .kr(9)
17885       .channels(16)
17886       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17887   }
17888 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_div_16)17889   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_div_16) {
17890     TEST_REQUIRES_X86_AVX2;
17891     for (uint32_t channels = 32; channels < 256; channels += 48) {
17892       DWConvMicrokernelTester()
17893         .cr(16)
17894         .kr(9)
17895         .channels(channels)
17896         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17897     }
17898   }
17899 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_div_16_with_qmin)17900   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_div_16_with_qmin) {
17901     TEST_REQUIRES_X86_AVX2;
17902     for (uint32_t channels = 32; channels < 256; channels += 48) {
17903       DWConvMicrokernelTester()
17904         .cr(16)
17905         .kr(9)
17906         .channels(channels)
17907         .qmin(128)
17908         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17909     }
17910   }
17911 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_div_16_with_qmax)17912   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_div_16_with_qmax) {
17913     TEST_REQUIRES_X86_AVX2;
17914     for (uint32_t channels = 32; channels < 256; channels += 48) {
17915       DWConvMicrokernelTester()
17916         .cr(16)
17917         .kr(9)
17918         .channels(channels)
17919         .qmax(128)
17920         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17921     }
17922   }
17923 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_lt_16)17924   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_lt_16) {
17925     TEST_REQUIRES_X86_AVX2;
17926     for (uint32_t channels = 1; channels < 16; channels++) {
17927       DWConvMicrokernelTester()
17928         .cr(16)
17929         .kr(9)
17930         .channels(channels)
17931         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17932     }
17933   }
17934 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_gt_16)17935   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_gt_16) {
17936     TEST_REQUIRES_X86_AVX2;
17937     for (uint32_t channels = 17; channels < 32; channels++) {
17938       DWConvMicrokernelTester()
17939         .cr(16)
17940         .kr(9)
17941         .channels(channels)
17942         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17943     }
17944   }
17945 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_gt_16_with_qmin)17946   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_gt_16_with_qmin) {
17947     TEST_REQUIRES_X86_AVX2;
17948     for (uint32_t channels = 17; channels < 32; channels++) {
17949       DWConvMicrokernelTester()
17950         .cr(16)
17951         .kr(9)
17952         .channels(channels)
17953         .qmin(128)
17954         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17955     }
17956   }
17957 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_gt_16_with_qmax)17958   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_gt_16_with_qmax) {
17959     TEST_REQUIRES_X86_AVX2;
17960     for (uint32_t channels = 17; channels < 32; channels++) {
17961       DWConvMicrokernelTester()
17962         .cr(16)
17963         .kr(9)
17964         .channels(channels)
17965         .qmax(128)
17966         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17967     }
17968   }
17969 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,multipixel)17970   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel) {
17971     TEST_REQUIRES_X86_AVX2;
17972     for (size_t channels = 1; channels <= 80; channels += 15) {
17973       DWConvMicrokernelTester()
17974         .cr(16)
17975         .kr(9)
17976         .channels(channels)
17977         .width(3)
17978         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17979     }
17980   }
17981 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,multipixel_with_step)17982   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel_with_step) {
17983     TEST_REQUIRES_X86_AVX2;
17984     for (size_t channels = 1; channels <= 80; channels += 15) {
17985       for (size_t step = 2; step <= 9; step++) {
17986         DWConvMicrokernelTester()
17987           .cr(16)
17988           .kr(9)
17989           .channels(channels)
17990           .width(3)
17991           .step(step)
17992           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17993       }
17994     }
17995   }
17996 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,multipixel_with_output_stride)17997   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel_with_output_stride) {
17998     TEST_REQUIRES_X86_AVX2;
17999     for (size_t channels = 1; channels <= 80; channels += 15) {
18000       DWConvMicrokernelTester()
18001         .cr(16)
18002         .kr(9)
18003         .channels(16)
18004         .width(5)
18005         .output_stride(83)
18006         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18007     }
18008   }
18009 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,multipixel_with_qmin)18010   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel_with_qmin) {
18011     TEST_REQUIRES_X86_AVX2;
18012     for (size_t channels = 1; channels <= 80; channels += 15) {
18013       DWConvMicrokernelTester()
18014         .cr(16)
18015         .kr(9)
18016         .channels(channels)
18017         .width(3)
18018         .qmin(128)
18019         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18020     }
18021   }
18022 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,multipixel_with_qmax)18023   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel_with_qmax) {
18024     TEST_REQUIRES_X86_AVX2;
18025     for (size_t channels = 1; channels <= 80; channels += 15) {
18026       DWConvMicrokernelTester()
18027         .cr(16)
18028         .kr(9)
18029         .channels(channels)
18030         .width(3)
18031         .qmax(128)
18032         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18033     }
18034   }
18035 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,input_offset)18036   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, input_offset) {
18037     TEST_REQUIRES_X86_AVX2;
18038     for (uint32_t channels = 32; channels < 256; channels += 48) {
18039       DWConvMicrokernelTester()
18040         .cr(16)
18041         .kr(9)
18042         .channels(channels)
18043         .input_offset(304)
18044         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18045     }
18046   }
18047 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,zero)18048   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, zero) {
18049     TEST_REQUIRES_X86_AVX2;
18050     for (uint32_t mz = 0; mz < 9; mz++) {
18051       for (uint32_t channels = 32; channels < 256; channels += 48) {
18052         DWConvMicrokernelTester()
18053           .cr(16)
18054           .kr(9)
18055           .channels(channels)
18056           .input_offset(304)
18057           .zero_index(mz)
18058           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18059       }
18060     }
18061   }
18062 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
18063 
18064 
18065 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_eq_16)18066   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_eq_16) {
18067     TEST_REQUIRES_X86_AVX2;
18068     DWConvMicrokernelTester()
18069       .cr(16)
18070       .kr(9)
18071       .channels(16)
18072       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18073   }
18074 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_div_16)18075   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16) {
18076     TEST_REQUIRES_X86_AVX2;
18077     for (uint32_t channels = 32; channels < 256; channels += 48) {
18078       DWConvMicrokernelTester()
18079         .cr(16)
18080         .kr(9)
18081         .channels(channels)
18082         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18083     }
18084   }
18085 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_div_16_with_qmin)18086   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16_with_qmin) {
18087     TEST_REQUIRES_X86_AVX2;
18088     for (uint32_t channels = 32; channels < 256; channels += 48) {
18089       DWConvMicrokernelTester()
18090         .cr(16)
18091         .kr(9)
18092         .channels(channels)
18093         .qmin(128)
18094         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18095     }
18096   }
18097 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_div_16_with_qmax)18098   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16_with_qmax) {
18099     TEST_REQUIRES_X86_AVX2;
18100     for (uint32_t channels = 32; channels < 256; channels += 48) {
18101       DWConvMicrokernelTester()
18102         .cr(16)
18103         .kr(9)
18104         .channels(channels)
18105         .qmax(128)
18106         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18107     }
18108   }
18109 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_lt_16)18110   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_lt_16) {
18111     TEST_REQUIRES_X86_AVX2;
18112     for (uint32_t channels = 1; channels < 16; channels++) {
18113       DWConvMicrokernelTester()
18114         .cr(16)
18115         .kr(9)
18116         .channels(channels)
18117         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18118     }
18119   }
18120 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_gt_16)18121   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16) {
18122     TEST_REQUIRES_X86_AVX2;
18123     for (uint32_t channels = 17; channels < 32; channels++) {
18124       DWConvMicrokernelTester()
18125         .cr(16)
18126         .kr(9)
18127         .channels(channels)
18128         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18129     }
18130   }
18131 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_gt_16_with_qmin)18132   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16_with_qmin) {
18133     TEST_REQUIRES_X86_AVX2;
18134     for (uint32_t channels = 17; channels < 32; channels++) {
18135       DWConvMicrokernelTester()
18136         .cr(16)
18137         .kr(9)
18138         .channels(channels)
18139         .qmin(128)
18140         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18141     }
18142   }
18143 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_gt_16_with_qmax)18144   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16_with_qmax) {
18145     TEST_REQUIRES_X86_AVX2;
18146     for (uint32_t channels = 17; channels < 32; channels++) {
18147       DWConvMicrokernelTester()
18148         .cr(16)
18149         .kr(9)
18150         .channels(channels)
18151         .qmax(128)
18152         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18153     }
18154   }
18155 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,multipixel)18156   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel) {
18157     TEST_REQUIRES_X86_AVX2;
18158     for (size_t channels = 1; channels <= 80; channels += 15) {
18159       DWConvMicrokernelTester()
18160         .cr(16)
18161         .kr(9)
18162         .channels(channels)
18163         .width(3)
18164         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18165     }
18166   }
18167 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,multipixel_with_step)18168   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_step) {
18169     TEST_REQUIRES_X86_AVX2;
18170     for (size_t channels = 1; channels <= 80; channels += 15) {
18171       for (size_t step = 2; step <= 9; step++) {
18172         DWConvMicrokernelTester()
18173           .cr(16)
18174           .kr(9)
18175           .channels(channels)
18176           .width(3)
18177           .step(step)
18178           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18179       }
18180     }
18181   }
18182 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,multipixel_with_output_stride)18183   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_output_stride) {
18184     TEST_REQUIRES_X86_AVX2;
18185     for (size_t channels = 1; channels <= 80; channels += 15) {
18186       DWConvMicrokernelTester()
18187         .cr(16)
18188         .kr(9)
18189         .channels(16)
18190         .width(5)
18191         .output_stride(83)
18192         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18193     }
18194   }
18195 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,multipixel_with_qmin)18196   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_qmin) {
18197     TEST_REQUIRES_X86_AVX2;
18198     for (size_t channels = 1; channels <= 80; channels += 15) {
18199       DWConvMicrokernelTester()
18200         .cr(16)
18201         .kr(9)
18202         .channels(channels)
18203         .width(3)
18204         .qmin(128)
18205         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18206     }
18207   }
18208 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,multipixel_with_qmax)18209   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_qmax) {
18210     TEST_REQUIRES_X86_AVX2;
18211     for (size_t channels = 1; channels <= 80; channels += 15) {
18212       DWConvMicrokernelTester()
18213         .cr(16)
18214         .kr(9)
18215         .channels(channels)
18216         .width(3)
18217         .qmax(128)
18218         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18219     }
18220   }
18221 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,input_offset)18222   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, input_offset) {
18223     TEST_REQUIRES_X86_AVX2;
18224     for (uint32_t channels = 32; channels < 256; channels += 48) {
18225       DWConvMicrokernelTester()
18226         .cr(16)
18227         .kr(9)
18228         .channels(channels)
18229         .input_offset(304)
18230         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18231     }
18232   }
18233 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,zero)18234   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, zero) {
18235     TEST_REQUIRES_X86_AVX2;
18236     for (uint32_t mz = 0; mz < 9; mz++) {
18237       for (uint32_t channels = 32; channels < 256; channels += 48) {
18238         DWConvMicrokernelTester()
18239           .cr(16)
18240           .kr(9)
18241           .channels(channels)
18242           .input_offset(304)
18243           .zero_index(mz)
18244           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18245       }
18246     }
18247   }
18248 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
18249 
18250 
18251 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_eq_16)18252   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_eq_16) {
18253     TEST_REQUIRES_X86_XOP;
18254     DWConvMicrokernelTester()
18255       .cr(16)
18256       .kr(9)
18257       .channels(16)
18258       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18259   }
18260 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_div_16)18261   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_div_16) {
18262     TEST_REQUIRES_X86_XOP;
18263     for (uint32_t channels = 32; channels < 256; channels += 48) {
18264       DWConvMicrokernelTester()
18265         .cr(16)
18266         .kr(9)
18267         .channels(channels)
18268         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18269     }
18270   }
18271 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_div_16_with_qmin)18272   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_div_16_with_qmin) {
18273     TEST_REQUIRES_X86_XOP;
18274     for (uint32_t channels = 32; channels < 256; channels += 48) {
18275       DWConvMicrokernelTester()
18276         .cr(16)
18277         .kr(9)
18278         .channels(channels)
18279         .qmin(128)
18280         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18281     }
18282   }
18283 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_div_16_with_qmax)18284   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_div_16_with_qmax) {
18285     TEST_REQUIRES_X86_XOP;
18286     for (uint32_t channels = 32; channels < 256; channels += 48) {
18287       DWConvMicrokernelTester()
18288         .cr(16)
18289         .kr(9)
18290         .channels(channels)
18291         .qmax(128)
18292         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18293     }
18294   }
18295 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_lt_16)18296   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_lt_16) {
18297     TEST_REQUIRES_X86_XOP;
18298     for (uint32_t channels = 1; channels < 16; channels++) {
18299       DWConvMicrokernelTester()
18300         .cr(16)
18301         .kr(9)
18302         .channels(channels)
18303         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18304     }
18305   }
18306 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_gt_16)18307   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_gt_16) {
18308     TEST_REQUIRES_X86_XOP;
18309     for (uint32_t channels = 17; channels < 32; channels++) {
18310       DWConvMicrokernelTester()
18311         .cr(16)
18312         .kr(9)
18313         .channels(channels)
18314         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18315     }
18316   }
18317 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_gt_16_with_qmin)18318   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_gt_16_with_qmin) {
18319     TEST_REQUIRES_X86_XOP;
18320     for (uint32_t channels = 17; channels < 32; channels++) {
18321       DWConvMicrokernelTester()
18322         .cr(16)
18323         .kr(9)
18324         .channels(channels)
18325         .qmin(128)
18326         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18327     }
18328   }
18329 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_gt_16_with_qmax)18330   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_gt_16_with_qmax) {
18331     TEST_REQUIRES_X86_XOP;
18332     for (uint32_t channels = 17; channels < 32; channels++) {
18333       DWConvMicrokernelTester()
18334         .cr(16)
18335         .kr(9)
18336         .channels(channels)
18337         .qmax(128)
18338         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18339     }
18340   }
18341 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,multipixel)18342   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel) {
18343     TEST_REQUIRES_X86_XOP;
18344     for (size_t channels = 1; channels <= 80; channels += 15) {
18345       DWConvMicrokernelTester()
18346         .cr(16)
18347         .kr(9)
18348         .channels(channels)
18349         .width(3)
18350         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18351     }
18352   }
18353 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,multipixel_with_step)18354   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel_with_step) {
18355     TEST_REQUIRES_X86_XOP;
18356     for (size_t channels = 1; channels <= 80; channels += 15) {
18357       for (size_t step = 2; step <= 9; step++) {
18358         DWConvMicrokernelTester()
18359           .cr(16)
18360           .kr(9)
18361           .channels(channels)
18362           .width(3)
18363           .step(step)
18364           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18365       }
18366     }
18367   }
18368 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,multipixel_with_output_stride)18369   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel_with_output_stride) {
18370     TEST_REQUIRES_X86_XOP;
18371     for (size_t channels = 1; channels <= 80; channels += 15) {
18372       DWConvMicrokernelTester()
18373         .cr(16)
18374         .kr(9)
18375         .channels(16)
18376         .width(5)
18377         .output_stride(83)
18378         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18379     }
18380   }
18381 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,multipixel_with_qmin)18382   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel_with_qmin) {
18383     TEST_REQUIRES_X86_XOP;
18384     for (size_t channels = 1; channels <= 80; channels += 15) {
18385       DWConvMicrokernelTester()
18386         .cr(16)
18387         .kr(9)
18388         .channels(channels)
18389         .width(3)
18390         .qmin(128)
18391         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18392     }
18393   }
18394 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,multipixel_with_qmax)18395   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel_with_qmax) {
18396     TEST_REQUIRES_X86_XOP;
18397     for (size_t channels = 1; channels <= 80; channels += 15) {
18398       DWConvMicrokernelTester()
18399         .cr(16)
18400         .kr(9)
18401         .channels(channels)
18402         .width(3)
18403         .qmax(128)
18404         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18405     }
18406   }
18407 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,input_offset)18408   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, input_offset) {
18409     TEST_REQUIRES_X86_XOP;
18410     for (uint32_t channels = 32; channels < 256; channels += 48) {
18411       DWConvMicrokernelTester()
18412         .cr(16)
18413         .kr(9)
18414         .channels(channels)
18415         .input_offset(304)
18416         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18417     }
18418   }
18419 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,zero)18420   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, zero) {
18421     TEST_REQUIRES_X86_XOP;
18422     for (uint32_t mz = 0; mz < 9; mz++) {
18423       for (uint32_t channels = 32; channels < 256; channels += 48) {
18424         DWConvMicrokernelTester()
18425           .cr(16)
18426           .kr(9)
18427           .channels(channels)
18428           .input_offset(304)
18429           .zero_index(mz)
18430           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18431       }
18432     }
18433   }
18434 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
18435 
18436 
18437 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_eq_16)18438   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_eq_16) {
18439     TEST_REQUIRES_X86_XOP;
18440     DWConvMicrokernelTester()
18441       .cr(16)
18442       .kr(9)
18443       .channels(16)
18444       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18445   }
18446 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_div_16)18447   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_div_16) {
18448     TEST_REQUIRES_X86_XOP;
18449     for (uint32_t channels = 32; channels < 256; channels += 48) {
18450       DWConvMicrokernelTester()
18451         .cr(16)
18452         .kr(9)
18453         .channels(channels)
18454         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18455     }
18456   }
18457 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_div_16_with_qmin)18458   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_div_16_with_qmin) {
18459     TEST_REQUIRES_X86_XOP;
18460     for (uint32_t channels = 32; channels < 256; channels += 48) {
18461       DWConvMicrokernelTester()
18462         .cr(16)
18463         .kr(9)
18464         .channels(channels)
18465         .qmin(128)
18466         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18467     }
18468   }
18469 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_div_16_with_qmax)18470   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_div_16_with_qmax) {
18471     TEST_REQUIRES_X86_XOP;
18472     for (uint32_t channels = 32; channels < 256; channels += 48) {
18473       DWConvMicrokernelTester()
18474         .cr(16)
18475         .kr(9)
18476         .channels(channels)
18477         .qmax(128)
18478         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18479     }
18480   }
18481 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_lt_16)18482   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_lt_16) {
18483     TEST_REQUIRES_X86_XOP;
18484     for (uint32_t channels = 1; channels < 16; channels++) {
18485       DWConvMicrokernelTester()
18486         .cr(16)
18487         .kr(9)
18488         .channels(channels)
18489         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18490     }
18491   }
18492 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_gt_16)18493   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_gt_16) {
18494     TEST_REQUIRES_X86_XOP;
18495     for (uint32_t channels = 17; channels < 32; channels++) {
18496       DWConvMicrokernelTester()
18497         .cr(16)
18498         .kr(9)
18499         .channels(channels)
18500         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18501     }
18502   }
18503 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_gt_16_with_qmin)18504   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_gt_16_with_qmin) {
18505     TEST_REQUIRES_X86_XOP;
18506     for (uint32_t channels = 17; channels < 32; channels++) {
18507       DWConvMicrokernelTester()
18508         .cr(16)
18509         .kr(9)
18510         .channels(channels)
18511         .qmin(128)
18512         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18513     }
18514   }
18515 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_gt_16_with_qmax)18516   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_gt_16_with_qmax) {
18517     TEST_REQUIRES_X86_XOP;
18518     for (uint32_t channels = 17; channels < 32; channels++) {
18519       DWConvMicrokernelTester()
18520         .cr(16)
18521         .kr(9)
18522         .channels(channels)
18523         .qmax(128)
18524         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18525     }
18526   }
18527 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,multipixel)18528   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel) {
18529     TEST_REQUIRES_X86_XOP;
18530     for (size_t channels = 1; channels <= 80; channels += 15) {
18531       DWConvMicrokernelTester()
18532         .cr(16)
18533         .kr(9)
18534         .channels(channels)
18535         .width(3)
18536         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18537     }
18538   }
18539 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,multipixel_with_step)18540   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_step) {
18541     TEST_REQUIRES_X86_XOP;
18542     for (size_t channels = 1; channels <= 80; channels += 15) {
18543       for (size_t step = 2; step <= 9; step++) {
18544         DWConvMicrokernelTester()
18545           .cr(16)
18546           .kr(9)
18547           .channels(channels)
18548           .width(3)
18549           .step(step)
18550           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18551       }
18552     }
18553   }
18554 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,multipixel_with_output_stride)18555   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_output_stride) {
18556     TEST_REQUIRES_X86_XOP;
18557     for (size_t channels = 1; channels <= 80; channels += 15) {
18558       DWConvMicrokernelTester()
18559         .cr(16)
18560         .kr(9)
18561         .channels(16)
18562         .width(5)
18563         .output_stride(83)
18564         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18565     }
18566   }
18567 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,multipixel_with_qmin)18568   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_qmin) {
18569     TEST_REQUIRES_X86_XOP;
18570     for (size_t channels = 1; channels <= 80; channels += 15) {
18571       DWConvMicrokernelTester()
18572         .cr(16)
18573         .kr(9)
18574         .channels(channels)
18575         .width(3)
18576         .qmin(128)
18577         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18578     }
18579   }
18580 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,multipixel_with_qmax)18581   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_qmax) {
18582     TEST_REQUIRES_X86_XOP;
18583     for (size_t channels = 1; channels <= 80; channels += 15) {
18584       DWConvMicrokernelTester()
18585         .cr(16)
18586         .kr(9)
18587         .channels(channels)
18588         .width(3)
18589         .qmax(128)
18590         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18591     }
18592   }
18593 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,input_offset)18594   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, input_offset) {
18595     TEST_REQUIRES_X86_XOP;
18596     for (uint32_t channels = 32; channels < 256; channels += 48) {
18597       DWConvMicrokernelTester()
18598         .cr(16)
18599         .kr(9)
18600         .channels(channels)
18601         .input_offset(304)
18602         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18603     }
18604   }
18605 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,zero)18606   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, zero) {
18607     TEST_REQUIRES_X86_XOP;
18608     for (uint32_t mz = 0; mz < 9; mz++) {
18609       for (uint32_t channels = 32; channels < 256; channels += 48) {
18610         DWConvMicrokernelTester()
18611           .cr(16)
18612           .kr(9)
18613           .channels(channels)
18614           .input_offset(304)
18615           .zero_index(mz)
18616           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18617       }
18618     }
18619   }
18620 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
18621 
18622 
18623 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_eq_16)18624   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_eq_16) {
18625     TEST_REQUIRES_X86_AVX;
18626     DWConvMicrokernelTester()
18627       .cr(16)
18628       .kr(25)
18629       .channels(16)
18630       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18631   }
18632 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_div_16)18633   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_div_16) {
18634     TEST_REQUIRES_X86_AVX;
18635     for (uint32_t channels = 32; channels < 256; channels += 48) {
18636       DWConvMicrokernelTester()
18637         .cr(16)
18638         .kr(25)
18639         .channels(channels)
18640         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18641     }
18642   }
18643 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_div_16_with_qmin)18644   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_div_16_with_qmin) {
18645     TEST_REQUIRES_X86_AVX;
18646     for (uint32_t channels = 32; channels < 256; channels += 48) {
18647       DWConvMicrokernelTester()
18648         .cr(16)
18649         .kr(25)
18650         .channels(channels)
18651         .qmin(128)
18652         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18653     }
18654   }
18655 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_div_16_with_qmax)18656   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_div_16_with_qmax) {
18657     TEST_REQUIRES_X86_AVX;
18658     for (uint32_t channels = 32; channels < 256; channels += 48) {
18659       DWConvMicrokernelTester()
18660         .cr(16)
18661         .kr(25)
18662         .channels(channels)
18663         .qmax(128)
18664         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18665     }
18666   }
18667 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_lt_16)18668   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_lt_16) {
18669     TEST_REQUIRES_X86_AVX;
18670     for (uint32_t channels = 1; channels < 16; channels++) {
18671       DWConvMicrokernelTester()
18672         .cr(16)
18673         .kr(25)
18674         .channels(channels)
18675         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18676     }
18677   }
18678 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_gt_16)18679   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_gt_16) {
18680     TEST_REQUIRES_X86_AVX;
18681     for (uint32_t channels = 17; channels < 32; channels++) {
18682       DWConvMicrokernelTester()
18683         .cr(16)
18684         .kr(25)
18685         .channels(channels)
18686         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18687     }
18688   }
18689 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_gt_16_with_qmin)18690   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_gt_16_with_qmin) {
18691     TEST_REQUIRES_X86_AVX;
18692     for (uint32_t channels = 17; channels < 32; channels++) {
18693       DWConvMicrokernelTester()
18694         .cr(16)
18695         .kr(25)
18696         .channels(channels)
18697         .qmin(128)
18698         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18699     }
18700   }
18701 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_gt_16_with_qmax)18702   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_gt_16_with_qmax) {
18703     TEST_REQUIRES_X86_AVX;
18704     for (uint32_t channels = 17; channels < 32; channels++) {
18705       DWConvMicrokernelTester()
18706         .cr(16)
18707         .kr(25)
18708         .channels(channels)
18709         .qmax(128)
18710         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18711     }
18712   }
18713 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,multipixel)18714   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel) {
18715     TEST_REQUIRES_X86_AVX;
18716     for (size_t channels = 1; channels <= 80; channels += 15) {
18717       DWConvMicrokernelTester()
18718         .cr(16)
18719         .kr(25)
18720         .channels(channels)
18721         .width(3)
18722         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18723     }
18724   }
18725 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,multipixel_with_step)18726   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_step) {
18727     TEST_REQUIRES_X86_AVX;
18728     for (size_t channels = 1; channels <= 80; channels += 15) {
18729       for (size_t step = 2; step <= 25; step++) {
18730         DWConvMicrokernelTester()
18731           .cr(16)
18732           .kr(25)
18733           .channels(channels)
18734           .width(3)
18735           .step(step)
18736           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18737       }
18738     }
18739   }
18740 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,multipixel_with_output_stride)18741   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_output_stride) {
18742     TEST_REQUIRES_X86_AVX;
18743     for (size_t channels = 1; channels <= 80; channels += 15) {
18744       DWConvMicrokernelTester()
18745         .cr(16)
18746         .kr(25)
18747         .channels(16)
18748         .width(5)
18749         .output_stride(83)
18750         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18751     }
18752   }
18753 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,multipixel_with_qmin)18754   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_qmin) {
18755     TEST_REQUIRES_X86_AVX;
18756     for (size_t channels = 1; channels <= 80; channels += 15) {
18757       DWConvMicrokernelTester()
18758         .cr(16)
18759         .kr(25)
18760         .channels(channels)
18761         .width(3)
18762         .qmin(128)
18763         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18764     }
18765   }
18766 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,multipixel_with_qmax)18767   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_qmax) {
18768     TEST_REQUIRES_X86_AVX;
18769     for (size_t channels = 1; channels <= 80; channels += 15) {
18770       DWConvMicrokernelTester()
18771         .cr(16)
18772         .kr(25)
18773         .channels(channels)
18774         .width(3)
18775         .qmax(128)
18776         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18777     }
18778   }
18779 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,input_offset)18780   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, input_offset) {
18781     TEST_REQUIRES_X86_AVX;
18782     for (uint32_t channels = 32; channels < 256; channels += 48) {
18783       DWConvMicrokernelTester()
18784         .cr(16)
18785         .kr(25)
18786         .channels(channels)
18787         .input_offset(304)
18788         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18789     }
18790   }
18791 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,zero)18792   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, zero) {
18793     TEST_REQUIRES_X86_AVX;
18794     for (uint32_t mz = 0; mz < 25; mz++) {
18795       for (uint32_t channels = 32; channels < 256; channels += 48) {
18796         DWConvMicrokernelTester()
18797           .cr(16)
18798           .kr(25)
18799           .channels(channels)
18800           .input_offset(304)
18801           .zero_index(mz)
18802           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18803       }
18804     }
18805   }
18806 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
18807 
18808 
18809 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_eq_16)18810   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_eq_16) {
18811     TEST_REQUIRES_X86_AVX;
18812     DWConvMicrokernelTester()
18813       .cr(16)
18814       .kr(25)
18815       .channels(16)
18816       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18817   }
18818 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_div_16)18819   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_div_16) {
18820     TEST_REQUIRES_X86_AVX;
18821     for (uint32_t channels = 32; channels < 256; channels += 48) {
18822       DWConvMicrokernelTester()
18823         .cr(16)
18824         .kr(25)
18825         .channels(channels)
18826         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18827     }
18828   }
18829 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_div_16_with_qmin)18830   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_div_16_with_qmin) {
18831     TEST_REQUIRES_X86_AVX;
18832     for (uint32_t channels = 32; channels < 256; channels += 48) {
18833       DWConvMicrokernelTester()
18834         .cr(16)
18835         .kr(25)
18836         .channels(channels)
18837         .qmin(128)
18838         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18839     }
18840   }
18841 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_div_16_with_qmax)18842   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_div_16_with_qmax) {
18843     TEST_REQUIRES_X86_AVX;
18844     for (uint32_t channels = 32; channels < 256; channels += 48) {
18845       DWConvMicrokernelTester()
18846         .cr(16)
18847         .kr(25)
18848         .channels(channels)
18849         .qmax(128)
18850         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18851     }
18852   }
18853 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_lt_16)18854   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_lt_16) {
18855     TEST_REQUIRES_X86_AVX;
18856     for (uint32_t channels = 1; channels < 16; channels++) {
18857       DWConvMicrokernelTester()
18858         .cr(16)
18859         .kr(25)
18860         .channels(channels)
18861         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18862     }
18863   }
18864 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_gt_16)18865   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_gt_16) {
18866     TEST_REQUIRES_X86_AVX;
18867     for (uint32_t channels = 17; channels < 32; channels++) {
18868       DWConvMicrokernelTester()
18869         .cr(16)
18870         .kr(25)
18871         .channels(channels)
18872         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18873     }
18874   }
18875 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_gt_16_with_qmin)18876   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_gt_16_with_qmin) {
18877     TEST_REQUIRES_X86_AVX;
18878     for (uint32_t channels = 17; channels < 32; channels++) {
18879       DWConvMicrokernelTester()
18880         .cr(16)
18881         .kr(25)
18882         .channels(channels)
18883         .qmin(128)
18884         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18885     }
18886   }
18887 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_gt_16_with_qmax)18888   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_gt_16_with_qmax) {
18889     TEST_REQUIRES_X86_AVX;
18890     for (uint32_t channels = 17; channels < 32; channels++) {
18891       DWConvMicrokernelTester()
18892         .cr(16)
18893         .kr(25)
18894         .channels(channels)
18895         .qmax(128)
18896         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18897     }
18898   }
18899 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,multipixel)18900   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel) {
18901     TEST_REQUIRES_X86_AVX;
18902     for (size_t channels = 1; channels <= 80; channels += 15) {
18903       DWConvMicrokernelTester()
18904         .cr(16)
18905         .kr(25)
18906         .channels(channels)
18907         .width(3)
18908         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18909     }
18910   }
18911 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,multipixel_with_step)18912   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel_with_step) {
18913     TEST_REQUIRES_X86_AVX;
18914     for (size_t channels = 1; channels <= 80; channels += 15) {
18915       for (size_t step = 2; step <= 25; step++) {
18916         DWConvMicrokernelTester()
18917           .cr(16)
18918           .kr(25)
18919           .channels(channels)
18920           .width(3)
18921           .step(step)
18922           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18923       }
18924     }
18925   }
18926 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,multipixel_with_output_stride)18927   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel_with_output_stride) {
18928     TEST_REQUIRES_X86_AVX;
18929     for (size_t channels = 1; channels <= 80; channels += 15) {
18930       DWConvMicrokernelTester()
18931         .cr(16)
18932         .kr(25)
18933         .channels(16)
18934         .width(5)
18935         .output_stride(83)
18936         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18937     }
18938   }
18939 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,multipixel_with_qmin)18940   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel_with_qmin) {
18941     TEST_REQUIRES_X86_AVX;
18942     for (size_t channels = 1; channels <= 80; channels += 15) {
18943       DWConvMicrokernelTester()
18944         .cr(16)
18945         .kr(25)
18946         .channels(channels)
18947         .width(3)
18948         .qmin(128)
18949         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18950     }
18951   }
18952 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,multipixel_with_qmax)18953   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel_with_qmax) {
18954     TEST_REQUIRES_X86_AVX;
18955     for (size_t channels = 1; channels <= 80; channels += 15) {
18956       DWConvMicrokernelTester()
18957         .cr(16)
18958         .kr(25)
18959         .channels(channels)
18960         .width(3)
18961         .qmax(128)
18962         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18963     }
18964   }
18965 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,input_offset)18966   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, input_offset) {
18967     TEST_REQUIRES_X86_AVX;
18968     for (uint32_t channels = 32; channels < 256; channels += 48) {
18969       DWConvMicrokernelTester()
18970         .cr(16)
18971         .kr(25)
18972         .channels(channels)
18973         .input_offset(304)
18974         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18975     }
18976   }
18977 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,zero)18978   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, zero) {
18979     TEST_REQUIRES_X86_AVX;
18980     for (uint32_t mz = 0; mz < 25; mz++) {
18981       for (uint32_t channels = 32; channels < 256; channels += 48) {
18982         DWConvMicrokernelTester()
18983           .cr(16)
18984           .kr(25)
18985           .channels(channels)
18986           .input_offset(304)
18987           .zero_index(mz)
18988           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18989       }
18990     }
18991   }
18992 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
18993 
18994 
18995 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_eq_16)18996   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_eq_16) {
18997     TEST_REQUIRES_X86_AVX;
18998     DWConvMicrokernelTester()
18999       .cr(16)
19000       .kr(25)
19001       .channels(16)
19002       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19003   }
19004 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_div_16)19005   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_div_16) {
19006     TEST_REQUIRES_X86_AVX;
19007     for (uint32_t channels = 32; channels < 256; channels += 48) {
19008       DWConvMicrokernelTester()
19009         .cr(16)
19010         .kr(25)
19011         .channels(channels)
19012         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19013     }
19014   }
19015 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_div_16_with_qmin)19016   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_div_16_with_qmin) {
19017     TEST_REQUIRES_X86_AVX;
19018     for (uint32_t channels = 32; channels < 256; channels += 48) {
19019       DWConvMicrokernelTester()
19020         .cr(16)
19021         .kr(25)
19022         .channels(channels)
19023         .qmin(128)
19024         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19025     }
19026   }
19027 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_div_16_with_qmax)19028   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_div_16_with_qmax) {
19029     TEST_REQUIRES_X86_AVX;
19030     for (uint32_t channels = 32; channels < 256; channels += 48) {
19031       DWConvMicrokernelTester()
19032         .cr(16)
19033         .kr(25)
19034         .channels(channels)
19035         .qmax(128)
19036         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19037     }
19038   }
19039 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_lt_16)19040   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_lt_16) {
19041     TEST_REQUIRES_X86_AVX;
19042     for (uint32_t channels = 1; channels < 16; channels++) {
19043       DWConvMicrokernelTester()
19044         .cr(16)
19045         .kr(25)
19046         .channels(channels)
19047         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19048     }
19049   }
19050 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_gt_16)19051   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_gt_16) {
19052     TEST_REQUIRES_X86_AVX;
19053     for (uint32_t channels = 17; channels < 32; channels++) {
19054       DWConvMicrokernelTester()
19055         .cr(16)
19056         .kr(25)
19057         .channels(channels)
19058         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19059     }
19060   }
19061 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_gt_16_with_qmin)19062   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_gt_16_with_qmin) {
19063     TEST_REQUIRES_X86_AVX;
19064     for (uint32_t channels = 17; channels < 32; channels++) {
19065       DWConvMicrokernelTester()
19066         .cr(16)
19067         .kr(25)
19068         .channels(channels)
19069         .qmin(128)
19070         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19071     }
19072   }
19073 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_gt_16_with_qmax)19074   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_gt_16_with_qmax) {
19075     TEST_REQUIRES_X86_AVX;
19076     for (uint32_t channels = 17; channels < 32; channels++) {
19077       DWConvMicrokernelTester()
19078         .cr(16)
19079         .kr(25)
19080         .channels(channels)
19081         .qmax(128)
19082         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19083     }
19084   }
19085 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,multipixel)19086   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel) {
19087     TEST_REQUIRES_X86_AVX;
19088     for (size_t channels = 1; channels <= 80; channels += 15) {
19089       DWConvMicrokernelTester()
19090         .cr(16)
19091         .kr(25)
19092         .channels(channels)
19093         .width(3)
19094         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19095     }
19096   }
19097 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,multipixel_with_step)19098   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_step) {
19099     TEST_REQUIRES_X86_AVX;
19100     for (size_t channels = 1; channels <= 80; channels += 15) {
19101       for (size_t step = 2; step <= 25; step++) {
19102         DWConvMicrokernelTester()
19103           .cr(16)
19104           .kr(25)
19105           .channels(channels)
19106           .width(3)
19107           .step(step)
19108           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19109       }
19110     }
19111   }
19112 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,multipixel_with_output_stride)19113   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_output_stride) {
19114     TEST_REQUIRES_X86_AVX;
19115     for (size_t channels = 1; channels <= 80; channels += 15) {
19116       DWConvMicrokernelTester()
19117         .cr(16)
19118         .kr(25)
19119         .channels(16)
19120         .width(5)
19121         .output_stride(83)
19122         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19123     }
19124   }
19125 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,multipixel_with_qmin)19126   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_qmin) {
19127     TEST_REQUIRES_X86_AVX;
19128     for (size_t channels = 1; channels <= 80; channels += 15) {
19129       DWConvMicrokernelTester()
19130         .cr(16)
19131         .kr(25)
19132         .channels(channels)
19133         .width(3)
19134         .qmin(128)
19135         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19136     }
19137   }
19138 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,multipixel_with_qmax)19139   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_qmax) {
19140     TEST_REQUIRES_X86_AVX;
19141     for (size_t channels = 1; channels <= 80; channels += 15) {
19142       DWConvMicrokernelTester()
19143         .cr(16)
19144         .kr(25)
19145         .channels(channels)
19146         .width(3)
19147         .qmax(128)
19148         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19149     }
19150   }
19151 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,input_offset)19152   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, input_offset) {
19153     TEST_REQUIRES_X86_AVX;
19154     for (uint32_t channels = 32; channels < 256; channels += 48) {
19155       DWConvMicrokernelTester()
19156         .cr(16)
19157         .kr(25)
19158         .channels(channels)
19159         .input_offset(304)
19160         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19161     }
19162   }
19163 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,zero)19164   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, zero) {
19165     TEST_REQUIRES_X86_AVX;
19166     for (uint32_t mz = 0; mz < 25; mz++) {
19167       for (uint32_t channels = 32; channels < 256; channels += 48) {
19168         DWConvMicrokernelTester()
19169           .cr(16)
19170           .kr(25)
19171           .channels(channels)
19172           .input_offset(304)
19173           .zero_index(mz)
19174           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19175       }
19176     }
19177   }
19178 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
19179 
19180 
19181 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_eq_16)19182   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_eq_16) {
19183     TEST_REQUIRES_X86_AVX2;
19184     DWConvMicrokernelTester()
19185       .cr(16)
19186       .kr(25)
19187       .channels(16)
19188       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19189   }
19190 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_div_16)19191   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_16) {
19192     TEST_REQUIRES_X86_AVX2;
19193     for (uint32_t channels = 32; channels < 256; channels += 48) {
19194       DWConvMicrokernelTester()
19195         .cr(16)
19196         .kr(25)
19197         .channels(channels)
19198         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19199     }
19200   }
19201 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_div_16_with_qmin)19202   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_16_with_qmin) {
19203     TEST_REQUIRES_X86_AVX2;
19204     for (uint32_t channels = 32; channels < 256; channels += 48) {
19205       DWConvMicrokernelTester()
19206         .cr(16)
19207         .kr(25)
19208         .channels(channels)
19209         .qmin(128)
19210         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19211     }
19212   }
19213 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_div_16_with_qmax)19214   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_16_with_qmax) {
19215     TEST_REQUIRES_X86_AVX2;
19216     for (uint32_t channels = 32; channels < 256; channels += 48) {
19217       DWConvMicrokernelTester()
19218         .cr(16)
19219         .kr(25)
19220         .channels(channels)
19221         .qmax(128)
19222         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19223     }
19224   }
19225 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_lt_16)19226   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_lt_16) {
19227     TEST_REQUIRES_X86_AVX2;
19228     for (uint32_t channels = 1; channels < 16; channels++) {
19229       DWConvMicrokernelTester()
19230         .cr(16)
19231         .kr(25)
19232         .channels(channels)
19233         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19234     }
19235   }
19236 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_gt_16)19237   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16) {
19238     TEST_REQUIRES_X86_AVX2;
19239     for (uint32_t channels = 17; channels < 32; channels++) {
19240       DWConvMicrokernelTester()
19241         .cr(16)
19242         .kr(25)
19243         .channels(channels)
19244         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19245     }
19246   }
19247 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_gt_16_with_qmin)19248   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16_with_qmin) {
19249     TEST_REQUIRES_X86_AVX2;
19250     for (uint32_t channels = 17; channels < 32; channels++) {
19251       DWConvMicrokernelTester()
19252         .cr(16)
19253         .kr(25)
19254         .channels(channels)
19255         .qmin(128)
19256         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19257     }
19258   }
19259 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_gt_16_with_qmax)19260   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16_with_qmax) {
19261     TEST_REQUIRES_X86_AVX2;
19262     for (uint32_t channels = 17; channels < 32; channels++) {
19263       DWConvMicrokernelTester()
19264         .cr(16)
19265         .kr(25)
19266         .channels(channels)
19267         .qmax(128)
19268         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19269     }
19270   }
19271 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel)19272   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel) {
19273     TEST_REQUIRES_X86_AVX2;
19274     for (size_t channels = 1; channels <= 80; channels += 15) {
19275       DWConvMicrokernelTester()
19276         .cr(16)
19277         .kr(25)
19278         .channels(channels)
19279         .width(3)
19280         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19281     }
19282   }
19283 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_step)19284   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_step) {
19285     TEST_REQUIRES_X86_AVX2;
19286     for (size_t channels = 1; channels <= 80; channels += 15) {
19287       for (size_t step = 2; step <= 25; step++) {
19288         DWConvMicrokernelTester()
19289           .cr(16)
19290           .kr(25)
19291           .channels(channels)
19292           .width(3)
19293           .step(step)
19294           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19295       }
19296     }
19297   }
19298 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_output_stride)19299   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_output_stride) {
19300     TEST_REQUIRES_X86_AVX2;
19301     for (size_t channels = 1; channels <= 80; channels += 15) {
19302       DWConvMicrokernelTester()
19303         .cr(16)
19304         .kr(25)
19305         .channels(16)
19306         .width(5)
19307         .output_stride(83)
19308         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19309     }
19310   }
19311 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmin)19312   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmin) {
19313     TEST_REQUIRES_X86_AVX2;
19314     for (size_t channels = 1; channels <= 80; channels += 15) {
19315       DWConvMicrokernelTester()
19316         .cr(16)
19317         .kr(25)
19318         .channels(channels)
19319         .width(3)
19320         .qmin(128)
19321         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19322     }
19323   }
19324 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmax)19325   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmax) {
19326     TEST_REQUIRES_X86_AVX2;
19327     for (size_t channels = 1; channels <= 80; channels += 15) {
19328       DWConvMicrokernelTester()
19329         .cr(16)
19330         .kr(25)
19331         .channels(channels)
19332         .width(3)
19333         .qmax(128)
19334         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19335     }
19336   }
19337 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,input_offset)19338   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, input_offset) {
19339     TEST_REQUIRES_X86_AVX2;
19340     for (uint32_t channels = 32; channels < 256; channels += 48) {
19341       DWConvMicrokernelTester()
19342         .cr(16)
19343         .kr(25)
19344         .channels(channels)
19345         .input_offset(304)
19346         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19347     }
19348   }
19349 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,zero)19350   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, zero) {
19351     TEST_REQUIRES_X86_AVX2;
19352     for (uint32_t mz = 0; mz < 25; mz++) {
19353       for (uint32_t channels = 32; channels < 256; channels += 48) {
19354         DWConvMicrokernelTester()
19355           .cr(16)
19356           .kr(25)
19357           .channels(channels)
19358           .input_offset(304)
19359           .zero_index(mz)
19360           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19361       }
19362     }
19363   }
19364 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
19365 
19366 
19367 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_eq_16)19368   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_eq_16) {
19369     TEST_REQUIRES_X86_AVX2;
19370     DWConvMicrokernelTester()
19371       .cr(16)
19372       .kr(25)
19373       .channels(16)
19374       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19375   }
19376 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_div_16)19377   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_div_16) {
19378     TEST_REQUIRES_X86_AVX2;
19379     for (uint32_t channels = 32; channels < 256; channels += 48) {
19380       DWConvMicrokernelTester()
19381         .cr(16)
19382         .kr(25)
19383         .channels(channels)
19384         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19385     }
19386   }
19387 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_div_16_with_qmin)19388   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_div_16_with_qmin) {
19389     TEST_REQUIRES_X86_AVX2;
19390     for (uint32_t channels = 32; channels < 256; channels += 48) {
19391       DWConvMicrokernelTester()
19392         .cr(16)
19393         .kr(25)
19394         .channels(channels)
19395         .qmin(128)
19396         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19397     }
19398   }
19399 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_div_16_with_qmax)19400   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_div_16_with_qmax) {
19401     TEST_REQUIRES_X86_AVX2;
19402     for (uint32_t channels = 32; channels < 256; channels += 48) {
19403       DWConvMicrokernelTester()
19404         .cr(16)
19405         .kr(25)
19406         .channels(channels)
19407         .qmax(128)
19408         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19409     }
19410   }
19411 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_lt_16)19412   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_lt_16) {
19413     TEST_REQUIRES_X86_AVX2;
19414     for (uint32_t channels = 1; channels < 16; channels++) {
19415       DWConvMicrokernelTester()
19416         .cr(16)
19417         .kr(25)
19418         .channels(channels)
19419         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19420     }
19421   }
19422 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_gt_16)19423   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_gt_16) {
19424     TEST_REQUIRES_X86_AVX2;
19425     for (uint32_t channels = 17; channels < 32; channels++) {
19426       DWConvMicrokernelTester()
19427         .cr(16)
19428         .kr(25)
19429         .channels(channels)
19430         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19431     }
19432   }
19433 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_gt_16_with_qmin)19434   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_gt_16_with_qmin) {
19435     TEST_REQUIRES_X86_AVX2;
19436     for (uint32_t channels = 17; channels < 32; channels++) {
19437       DWConvMicrokernelTester()
19438         .cr(16)
19439         .kr(25)
19440         .channels(channels)
19441         .qmin(128)
19442         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19443     }
19444   }
19445 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_gt_16_with_qmax)19446   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_gt_16_with_qmax) {
19447     TEST_REQUIRES_X86_AVX2;
19448     for (uint32_t channels = 17; channels < 32; channels++) {
19449       DWConvMicrokernelTester()
19450         .cr(16)
19451         .kr(25)
19452         .channels(channels)
19453         .qmax(128)
19454         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19455     }
19456   }
19457 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,multipixel)19458   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel) {
19459     TEST_REQUIRES_X86_AVX2;
19460     for (size_t channels = 1; channels <= 80; channels += 15) {
19461       DWConvMicrokernelTester()
19462         .cr(16)
19463         .kr(25)
19464         .channels(channels)
19465         .width(3)
19466         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19467     }
19468   }
19469 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,multipixel_with_step)19470   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel_with_step) {
19471     TEST_REQUIRES_X86_AVX2;
19472     for (size_t channels = 1; channels <= 80; channels += 15) {
19473       for (size_t step = 2; step <= 25; step++) {
19474         DWConvMicrokernelTester()
19475           .cr(16)
19476           .kr(25)
19477           .channels(channels)
19478           .width(3)
19479           .step(step)
19480           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19481       }
19482     }
19483   }
19484 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,multipixel_with_output_stride)19485   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel_with_output_stride) {
19486     TEST_REQUIRES_X86_AVX2;
19487     for (size_t channels = 1; channels <= 80; channels += 15) {
19488       DWConvMicrokernelTester()
19489         .cr(16)
19490         .kr(25)
19491         .channels(16)
19492         .width(5)
19493         .output_stride(83)
19494         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19495     }
19496   }
19497 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,multipixel_with_qmin)19498   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel_with_qmin) {
19499     TEST_REQUIRES_X86_AVX2;
19500     for (size_t channels = 1; channels <= 80; channels += 15) {
19501       DWConvMicrokernelTester()
19502         .cr(16)
19503         .kr(25)
19504         .channels(channels)
19505         .width(3)
19506         .qmin(128)
19507         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19508     }
19509   }
19510 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,multipixel_with_qmax)19511   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel_with_qmax) {
19512     TEST_REQUIRES_X86_AVX2;
19513     for (size_t channels = 1; channels <= 80; channels += 15) {
19514       DWConvMicrokernelTester()
19515         .cr(16)
19516         .kr(25)
19517         .channels(channels)
19518         .width(3)
19519         .qmax(128)
19520         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19521     }
19522   }
19523 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,input_offset)19524   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, input_offset) {
19525     TEST_REQUIRES_X86_AVX2;
19526     for (uint32_t channels = 32; channels < 256; channels += 48) {
19527       DWConvMicrokernelTester()
19528         .cr(16)
19529         .kr(25)
19530         .channels(channels)
19531         .input_offset(304)
19532         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19533     }
19534   }
19535 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,zero)19536   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, zero) {
19537     TEST_REQUIRES_X86_AVX2;
19538     for (uint32_t mz = 0; mz < 25; mz++) {
19539       for (uint32_t channels = 32; channels < 256; channels += 48) {
19540         DWConvMicrokernelTester()
19541           .cr(16)
19542           .kr(25)
19543           .channels(channels)
19544           .input_offset(304)
19545           .zero_index(mz)
19546           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19547       }
19548     }
19549   }
19550 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
19551 
19552 
19553 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_eq_16)19554   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_eq_16) {
19555     TEST_REQUIRES_X86_AVX2;
19556     DWConvMicrokernelTester()
19557       .cr(16)
19558       .kr(25)
19559       .channels(16)
19560       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19561   }
19562 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_div_16)19563   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_div_16) {
19564     TEST_REQUIRES_X86_AVX2;
19565     for (uint32_t channels = 32; channels < 256; channels += 48) {
19566       DWConvMicrokernelTester()
19567         .cr(16)
19568         .kr(25)
19569         .channels(channels)
19570         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19571     }
19572   }
19573 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_div_16_with_qmin)19574   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_div_16_with_qmin) {
19575     TEST_REQUIRES_X86_AVX2;
19576     for (uint32_t channels = 32; channels < 256; channels += 48) {
19577       DWConvMicrokernelTester()
19578         .cr(16)
19579         .kr(25)
19580         .channels(channels)
19581         .qmin(128)
19582         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19583     }
19584   }
19585 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_div_16_with_qmax)19586   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_div_16_with_qmax) {
19587     TEST_REQUIRES_X86_AVX2;
19588     for (uint32_t channels = 32; channels < 256; channels += 48) {
19589       DWConvMicrokernelTester()
19590         .cr(16)
19591         .kr(25)
19592         .channels(channels)
19593         .qmax(128)
19594         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19595     }
19596   }
19597 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_lt_16)19598   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_lt_16) {
19599     TEST_REQUIRES_X86_AVX2;
19600     for (uint32_t channels = 1; channels < 16; channels++) {
19601       DWConvMicrokernelTester()
19602         .cr(16)
19603         .kr(25)
19604         .channels(channels)
19605         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19606     }
19607   }
19608 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_gt_16)19609   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_gt_16) {
19610     TEST_REQUIRES_X86_AVX2;
19611     for (uint32_t channels = 17; channels < 32; channels++) {
19612       DWConvMicrokernelTester()
19613         .cr(16)
19614         .kr(25)
19615         .channels(channels)
19616         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19617     }
19618   }
19619 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_gt_16_with_qmin)19620   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_gt_16_with_qmin) {
19621     TEST_REQUIRES_X86_AVX2;
19622     for (uint32_t channels = 17; channels < 32; channels++) {
19623       DWConvMicrokernelTester()
19624         .cr(16)
19625         .kr(25)
19626         .channels(channels)
19627         .qmin(128)
19628         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19629     }
19630   }
19631 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_gt_16_with_qmax)19632   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_gt_16_with_qmax) {
19633     TEST_REQUIRES_X86_AVX2;
19634     for (uint32_t channels = 17; channels < 32; channels++) {
19635       DWConvMicrokernelTester()
19636         .cr(16)
19637         .kr(25)
19638         .channels(channels)
19639         .qmax(128)
19640         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19641     }
19642   }
19643 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,multipixel)19644   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel) {
19645     TEST_REQUIRES_X86_AVX2;
19646     for (size_t channels = 1; channels <= 80; channels += 15) {
19647       DWConvMicrokernelTester()
19648         .cr(16)
19649         .kr(25)
19650         .channels(channels)
19651         .width(3)
19652         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19653     }
19654   }
19655 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,multipixel_with_step)19656   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel_with_step) {
19657     TEST_REQUIRES_X86_AVX2;
19658     for (size_t channels = 1; channels <= 80; channels += 15) {
19659       for (size_t step = 2; step <= 25; step++) {
19660         DWConvMicrokernelTester()
19661           .cr(16)
19662           .kr(25)
19663           .channels(channels)
19664           .width(3)
19665           .step(step)
19666           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19667       }
19668     }
19669   }
19670 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,multipixel_with_output_stride)19671   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel_with_output_stride) {
19672     TEST_REQUIRES_X86_AVX2;
19673     for (size_t channels = 1; channels <= 80; channels += 15) {
19674       DWConvMicrokernelTester()
19675         .cr(16)
19676         .kr(25)
19677         .channels(16)
19678         .width(5)
19679         .output_stride(83)
19680         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19681     }
19682   }
19683 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,multipixel_with_qmin)19684   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel_with_qmin) {
19685     TEST_REQUIRES_X86_AVX2;
19686     for (size_t channels = 1; channels <= 80; channels += 15) {
19687       DWConvMicrokernelTester()
19688         .cr(16)
19689         .kr(25)
19690         .channels(channels)
19691         .width(3)
19692         .qmin(128)
19693         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19694     }
19695   }
19696 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,multipixel_with_qmax)19697   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel_with_qmax) {
19698     TEST_REQUIRES_X86_AVX2;
19699     for (size_t channels = 1; channels <= 80; channels += 15) {
19700       DWConvMicrokernelTester()
19701         .cr(16)
19702         .kr(25)
19703         .channels(channels)
19704         .width(3)
19705         .qmax(128)
19706         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19707     }
19708   }
19709 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,input_offset)19710   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, input_offset) {
19711     TEST_REQUIRES_X86_AVX2;
19712     for (uint32_t channels = 32; channels < 256; channels += 48) {
19713       DWConvMicrokernelTester()
19714         .cr(16)
19715         .kr(25)
19716         .channels(channels)
19717         .input_offset(304)
19718         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19719     }
19720   }
19721 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,zero)19722   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, zero) {
19723     TEST_REQUIRES_X86_AVX2;
19724     for (uint32_t mz = 0; mz < 25; mz++) {
19725       for (uint32_t channels = 32; channels < 256; channels += 48) {
19726         DWConvMicrokernelTester()
19727           .cr(16)
19728           .kr(25)
19729           .channels(channels)
19730           .input_offset(304)
19731           .zero_index(mz)
19732           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19733       }
19734     }
19735   }
19736 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
19737 
19738 
19739 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_eq_16)19740   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_eq_16) {
19741     TEST_REQUIRES_X86_AVX2;
19742     DWConvMicrokernelTester()
19743       .cr(16)
19744       .kr(25)
19745       .channels(16)
19746       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19747   }
19748 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_div_16)19749   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16) {
19750     TEST_REQUIRES_X86_AVX2;
19751     for (uint32_t channels = 32; channels < 256; channels += 48) {
19752       DWConvMicrokernelTester()
19753         .cr(16)
19754         .kr(25)
19755         .channels(channels)
19756         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19757     }
19758   }
19759 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_div_16_with_qmin)19760   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16_with_qmin) {
19761     TEST_REQUIRES_X86_AVX2;
19762     for (uint32_t channels = 32; channels < 256; channels += 48) {
19763       DWConvMicrokernelTester()
19764         .cr(16)
19765         .kr(25)
19766         .channels(channels)
19767         .qmin(128)
19768         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19769     }
19770   }
19771 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_div_16_with_qmax)19772   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16_with_qmax) {
19773     TEST_REQUIRES_X86_AVX2;
19774     for (uint32_t channels = 32; channels < 256; channels += 48) {
19775       DWConvMicrokernelTester()
19776         .cr(16)
19777         .kr(25)
19778         .channels(channels)
19779         .qmax(128)
19780         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19781     }
19782   }
19783 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_lt_16)19784   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_lt_16) {
19785     TEST_REQUIRES_X86_AVX2;
19786     for (uint32_t channels = 1; channels < 16; channels++) {
19787       DWConvMicrokernelTester()
19788         .cr(16)
19789         .kr(25)
19790         .channels(channels)
19791         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19792     }
19793   }
19794 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_gt_16)19795   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16) {
19796     TEST_REQUIRES_X86_AVX2;
19797     for (uint32_t channels = 17; channels < 32; channels++) {
19798       DWConvMicrokernelTester()
19799         .cr(16)
19800         .kr(25)
19801         .channels(channels)
19802         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19803     }
19804   }
19805 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_gt_16_with_qmin)19806   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16_with_qmin) {
19807     TEST_REQUIRES_X86_AVX2;
19808     for (uint32_t channels = 17; channels < 32; channels++) {
19809       DWConvMicrokernelTester()
19810         .cr(16)
19811         .kr(25)
19812         .channels(channels)
19813         .qmin(128)
19814         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19815     }
19816   }
19817 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_gt_16_with_qmax)19818   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16_with_qmax) {
19819     TEST_REQUIRES_X86_AVX2;
19820     for (uint32_t channels = 17; channels < 32; channels++) {
19821       DWConvMicrokernelTester()
19822         .cr(16)
19823         .kr(25)
19824         .channels(channels)
19825         .qmax(128)
19826         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19827     }
19828   }
19829 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,multipixel)19830   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel) {
19831     TEST_REQUIRES_X86_AVX2;
19832     for (size_t channels = 1; channels <= 80; channels += 15) {
19833       DWConvMicrokernelTester()
19834         .cr(16)
19835         .kr(25)
19836         .channels(channels)
19837         .width(3)
19838         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19839     }
19840   }
19841 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,multipixel_with_step)19842   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_step) {
19843     TEST_REQUIRES_X86_AVX2;
19844     for (size_t channels = 1; channels <= 80; channels += 15) {
19845       for (size_t step = 2; step <= 25; step++) {
19846         DWConvMicrokernelTester()
19847           .cr(16)
19848           .kr(25)
19849           .channels(channels)
19850           .width(3)
19851           .step(step)
19852           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19853       }
19854     }
19855   }
19856 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,multipixel_with_output_stride)19857   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_output_stride) {
19858     TEST_REQUIRES_X86_AVX2;
19859     for (size_t channels = 1; channels <= 80; channels += 15) {
19860       DWConvMicrokernelTester()
19861         .cr(16)
19862         .kr(25)
19863         .channels(16)
19864         .width(5)
19865         .output_stride(83)
19866         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19867     }
19868   }
19869 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,multipixel_with_qmin)19870   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_qmin) {
19871     TEST_REQUIRES_X86_AVX2;
19872     for (size_t channels = 1; channels <= 80; channels += 15) {
19873       DWConvMicrokernelTester()
19874         .cr(16)
19875         .kr(25)
19876         .channels(channels)
19877         .width(3)
19878         .qmin(128)
19879         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19880     }
19881   }
19882 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,multipixel_with_qmax)19883   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_qmax) {
19884     TEST_REQUIRES_X86_AVX2;
19885     for (size_t channels = 1; channels <= 80; channels += 15) {
19886       DWConvMicrokernelTester()
19887         .cr(16)
19888         .kr(25)
19889         .channels(channels)
19890         .width(3)
19891         .qmax(128)
19892         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19893     }
19894   }
19895 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,input_offset)19896   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, input_offset) {
19897     TEST_REQUIRES_X86_AVX2;
19898     for (uint32_t channels = 32; channels < 256; channels += 48) {
19899       DWConvMicrokernelTester()
19900         .cr(16)
19901         .kr(25)
19902         .channels(channels)
19903         .input_offset(304)
19904         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19905     }
19906   }
19907 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,zero)19908   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, zero) {
19909     TEST_REQUIRES_X86_AVX2;
19910     for (uint32_t mz = 0; mz < 25; mz++) {
19911       for (uint32_t channels = 32; channels < 256; channels += 48) {
19912         DWConvMicrokernelTester()
19913           .cr(16)
19914           .kr(25)
19915           .channels(channels)
19916           .input_offset(304)
19917           .zero_index(mz)
19918           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19919       }
19920     }
19921   }
19922 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
19923 
19924 
19925 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_eq_16)19926   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_eq_16) {
19927     TEST_REQUIRES_X86_XOP;
19928     DWConvMicrokernelTester()
19929       .cr(16)
19930       .kr(25)
19931       .channels(16)
19932       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19933   }
19934 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_div_16)19935   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_div_16) {
19936     TEST_REQUIRES_X86_XOP;
19937     for (uint32_t channels = 32; channels < 256; channels += 48) {
19938       DWConvMicrokernelTester()
19939         .cr(16)
19940         .kr(25)
19941         .channels(channels)
19942         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19943     }
19944   }
19945 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_div_16_with_qmin)19946   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_div_16_with_qmin) {
19947     TEST_REQUIRES_X86_XOP;
19948     for (uint32_t channels = 32; channels < 256; channels += 48) {
19949       DWConvMicrokernelTester()
19950         .cr(16)
19951         .kr(25)
19952         .channels(channels)
19953         .qmin(128)
19954         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19955     }
19956   }
19957 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_div_16_with_qmax)19958   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_div_16_with_qmax) {
19959     TEST_REQUIRES_X86_XOP;
19960     for (uint32_t channels = 32; channels < 256; channels += 48) {
19961       DWConvMicrokernelTester()
19962         .cr(16)
19963         .kr(25)
19964         .channels(channels)
19965         .qmax(128)
19966         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19967     }
19968   }
19969 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_lt_16)19970   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_lt_16) {
19971     TEST_REQUIRES_X86_XOP;
19972     for (uint32_t channels = 1; channels < 16; channels++) {
19973       DWConvMicrokernelTester()
19974         .cr(16)
19975         .kr(25)
19976         .channels(channels)
19977         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19978     }
19979   }
19980 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_gt_16)19981   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_gt_16) {
19982     TEST_REQUIRES_X86_XOP;
19983     for (uint32_t channels = 17; channels < 32; channels++) {
19984       DWConvMicrokernelTester()
19985         .cr(16)
19986         .kr(25)
19987         .channels(channels)
19988         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19989     }
19990   }
19991 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_gt_16_with_qmin)19992   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_gt_16_with_qmin) {
19993     TEST_REQUIRES_X86_XOP;
19994     for (uint32_t channels = 17; channels < 32; channels++) {
19995       DWConvMicrokernelTester()
19996         .cr(16)
19997         .kr(25)
19998         .channels(channels)
19999         .qmin(128)
20000         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20001     }
20002   }
20003 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_gt_16_with_qmax)20004   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_gt_16_with_qmax) {
20005     TEST_REQUIRES_X86_XOP;
20006     for (uint32_t channels = 17; channels < 32; channels++) {
20007       DWConvMicrokernelTester()
20008         .cr(16)
20009         .kr(25)
20010         .channels(channels)
20011         .qmax(128)
20012         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20013     }
20014   }
20015 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,multipixel)20016   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel) {
20017     TEST_REQUIRES_X86_XOP;
20018     for (size_t channels = 1; channels <= 80; channels += 15) {
20019       DWConvMicrokernelTester()
20020         .cr(16)
20021         .kr(25)
20022         .channels(channels)
20023         .width(3)
20024         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20025     }
20026   }
20027 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,multipixel_with_step)20028   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel_with_step) {
20029     TEST_REQUIRES_X86_XOP;
20030     for (size_t channels = 1; channels <= 80; channels += 15) {
20031       for (size_t step = 2; step <= 25; step++) {
20032         DWConvMicrokernelTester()
20033           .cr(16)
20034           .kr(25)
20035           .channels(channels)
20036           .width(3)
20037           .step(step)
20038           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20039       }
20040     }
20041   }
20042 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,multipixel_with_output_stride)20043   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel_with_output_stride) {
20044     TEST_REQUIRES_X86_XOP;
20045     for (size_t channels = 1; channels <= 80; channels += 15) {
20046       DWConvMicrokernelTester()
20047         .cr(16)
20048         .kr(25)
20049         .channels(16)
20050         .width(5)
20051         .output_stride(83)
20052         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20053     }
20054   }
20055 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,multipixel_with_qmin)20056   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel_with_qmin) {
20057     TEST_REQUIRES_X86_XOP;
20058     for (size_t channels = 1; channels <= 80; channels += 15) {
20059       DWConvMicrokernelTester()
20060         .cr(16)
20061         .kr(25)
20062         .channels(channels)
20063         .width(3)
20064         .qmin(128)
20065         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20066     }
20067   }
20068 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,multipixel_with_qmax)20069   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel_with_qmax) {
20070     TEST_REQUIRES_X86_XOP;
20071     for (size_t channels = 1; channels <= 80; channels += 15) {
20072       DWConvMicrokernelTester()
20073         .cr(16)
20074         .kr(25)
20075         .channels(channels)
20076         .width(3)
20077         .qmax(128)
20078         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20079     }
20080   }
20081 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,input_offset)20082   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, input_offset) {
20083     TEST_REQUIRES_X86_XOP;
20084     for (uint32_t channels = 32; channels < 256; channels += 48) {
20085       DWConvMicrokernelTester()
20086         .cr(16)
20087         .kr(25)
20088         .channels(channels)
20089         .input_offset(304)
20090         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20091     }
20092   }
20093 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,zero)20094   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, zero) {
20095     TEST_REQUIRES_X86_XOP;
20096     for (uint32_t mz = 0; mz < 25; mz++) {
20097       for (uint32_t channels = 32; channels < 256; channels += 48) {
20098         DWConvMicrokernelTester()
20099           .cr(16)
20100           .kr(25)
20101           .channels(channels)
20102           .input_offset(304)
20103           .zero_index(mz)
20104           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20105       }
20106     }
20107   }
20108 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
20109 
20110 
20111 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_eq_16)20112   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_eq_16) {
20113     TEST_REQUIRES_X86_XOP;
20114     DWConvMicrokernelTester()
20115       .cr(16)
20116       .kr(25)
20117       .channels(16)
20118       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20119   }
20120 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_div_16)20121   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_div_16) {
20122     TEST_REQUIRES_X86_XOP;
20123     for (uint32_t channels = 32; channels < 256; channels += 48) {
20124       DWConvMicrokernelTester()
20125         .cr(16)
20126         .kr(25)
20127         .channels(channels)
20128         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20129     }
20130   }
20131 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_div_16_with_qmin)20132   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_div_16_with_qmin) {
20133     TEST_REQUIRES_X86_XOP;
20134     for (uint32_t channels = 32; channels < 256; channels += 48) {
20135       DWConvMicrokernelTester()
20136         .cr(16)
20137         .kr(25)
20138         .channels(channels)
20139         .qmin(128)
20140         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20141     }
20142   }
20143 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_div_16_with_qmax)20144   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_div_16_with_qmax) {
20145     TEST_REQUIRES_X86_XOP;
20146     for (uint32_t channels = 32; channels < 256; channels += 48) {
20147       DWConvMicrokernelTester()
20148         .cr(16)
20149         .kr(25)
20150         .channels(channels)
20151         .qmax(128)
20152         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20153     }
20154   }
20155 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_lt_16)20156   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_lt_16) {
20157     TEST_REQUIRES_X86_XOP;
20158     for (uint32_t channels = 1; channels < 16; channels++) {
20159       DWConvMicrokernelTester()
20160         .cr(16)
20161         .kr(25)
20162         .channels(channels)
20163         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20164     }
20165   }
20166 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_gt_16)20167   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_gt_16) {
20168     TEST_REQUIRES_X86_XOP;
20169     for (uint32_t channels = 17; channels < 32; channels++) {
20170       DWConvMicrokernelTester()
20171         .cr(16)
20172         .kr(25)
20173         .channels(channels)
20174         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20175     }
20176   }
20177 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_gt_16_with_qmin)20178   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_gt_16_with_qmin) {
20179     TEST_REQUIRES_X86_XOP;
20180     for (uint32_t channels = 17; channels < 32; channels++) {
20181       DWConvMicrokernelTester()
20182         .cr(16)
20183         .kr(25)
20184         .channels(channels)
20185         .qmin(128)
20186         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20187     }
20188   }
20189 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_gt_16_with_qmax)20190   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_gt_16_with_qmax) {
20191     TEST_REQUIRES_X86_XOP;
20192     for (uint32_t channels = 17; channels < 32; channels++) {
20193       DWConvMicrokernelTester()
20194         .cr(16)
20195         .kr(25)
20196         .channels(channels)
20197         .qmax(128)
20198         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20199     }
20200   }
20201 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,multipixel)20202   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel) {
20203     TEST_REQUIRES_X86_XOP;
20204     for (size_t channels = 1; channels <= 80; channels += 15) {
20205       DWConvMicrokernelTester()
20206         .cr(16)
20207         .kr(25)
20208         .channels(channels)
20209         .width(3)
20210         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20211     }
20212   }
20213 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,multipixel_with_step)20214   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_step) {
20215     TEST_REQUIRES_X86_XOP;
20216     for (size_t channels = 1; channels <= 80; channels += 15) {
20217       for (size_t step = 2; step <= 25; step++) {
20218         DWConvMicrokernelTester()
20219           .cr(16)
20220           .kr(25)
20221           .channels(channels)
20222           .width(3)
20223           .step(step)
20224           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20225       }
20226     }
20227   }
20228 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,multipixel_with_output_stride)20229   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_output_stride) {
20230     TEST_REQUIRES_X86_XOP;
20231     for (size_t channels = 1; channels <= 80; channels += 15) {
20232       DWConvMicrokernelTester()
20233         .cr(16)
20234         .kr(25)
20235         .channels(16)
20236         .width(5)
20237         .output_stride(83)
20238         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20239     }
20240   }
20241 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,multipixel_with_qmin)20242   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_qmin) {
20243     TEST_REQUIRES_X86_XOP;
20244     for (size_t channels = 1; channels <= 80; channels += 15) {
20245       DWConvMicrokernelTester()
20246         .cr(16)
20247         .kr(25)
20248         .channels(channels)
20249         .width(3)
20250         .qmin(128)
20251         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20252     }
20253   }
20254 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,multipixel_with_qmax)20255   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_qmax) {
20256     TEST_REQUIRES_X86_XOP;
20257     for (size_t channels = 1; channels <= 80; channels += 15) {
20258       DWConvMicrokernelTester()
20259         .cr(16)
20260         .kr(25)
20261         .channels(channels)
20262         .width(3)
20263         .qmax(128)
20264         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20265     }
20266   }
20267 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,input_offset)20268   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, input_offset) {
20269     TEST_REQUIRES_X86_XOP;
20270     for (uint32_t channels = 32; channels < 256; channels += 48) {
20271       DWConvMicrokernelTester()
20272         .cr(16)
20273         .kr(25)
20274         .channels(channels)
20275         .input_offset(304)
20276         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20277     }
20278   }
20279 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,zero)20280   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, zero) {
20281     TEST_REQUIRES_X86_XOP;
20282     for (uint32_t mz = 0; mz < 25; mz++) {
20283       for (uint32_t channels = 32; channels < 256; channels += 48) {
20284         DWConvMicrokernelTester()
20285           .cr(16)
20286           .kr(25)
20287           .channels(channels)
20288           .input_offset(304)
20289           .zero_index(mz)
20290           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20291       }
20292     }
20293   }
20294 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
20295 
20296 
20297 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_eq_24)20298   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_eq_24) {
20299     TEST_REQUIRES_X86_AVX;
20300     DWConvMicrokernelTester()
20301       .cr(24)
20302       .kr(9)
20303       .channels(24)
20304       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20305   }
20306 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_div_24)20307   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_div_24) {
20308     TEST_REQUIRES_X86_AVX;
20309     for (uint32_t channels = 48; channels < 384; channels += 72) {
20310       DWConvMicrokernelTester()
20311         .cr(24)
20312         .kr(9)
20313         .channels(channels)
20314         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20315     }
20316   }
20317 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_div_24_with_qmin)20318   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_div_24_with_qmin) {
20319     TEST_REQUIRES_X86_AVX;
20320     for (uint32_t channels = 48; channels < 384; channels += 72) {
20321       DWConvMicrokernelTester()
20322         .cr(24)
20323         .kr(9)
20324         .channels(channels)
20325         .qmin(128)
20326         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20327     }
20328   }
20329 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_div_24_with_qmax)20330   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_div_24_with_qmax) {
20331     TEST_REQUIRES_X86_AVX;
20332     for (uint32_t channels = 48; channels < 384; channels += 72) {
20333       DWConvMicrokernelTester()
20334         .cr(24)
20335         .kr(9)
20336         .channels(channels)
20337         .qmax(128)
20338         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20339     }
20340   }
20341 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_lt_24)20342   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_lt_24) {
20343     TEST_REQUIRES_X86_AVX;
20344     for (uint32_t channels = 1; channels < 24; channels++) {
20345       DWConvMicrokernelTester()
20346         .cr(24)
20347         .kr(9)
20348         .channels(channels)
20349         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20350     }
20351   }
20352 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_gt_24)20353   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_gt_24) {
20354     TEST_REQUIRES_X86_AVX;
20355     for (uint32_t channels = 25; channels < 48; channels++) {
20356       DWConvMicrokernelTester()
20357         .cr(24)
20358         .kr(9)
20359         .channels(channels)
20360         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20361     }
20362   }
20363 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_gt_24_with_qmin)20364   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_gt_24_with_qmin) {
20365     TEST_REQUIRES_X86_AVX;
20366     for (uint32_t channels = 25; channels < 48; channels++) {
20367       DWConvMicrokernelTester()
20368         .cr(24)
20369         .kr(9)
20370         .channels(channels)
20371         .qmin(128)
20372         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20373     }
20374   }
20375 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_gt_24_with_qmax)20376   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_gt_24_with_qmax) {
20377     TEST_REQUIRES_X86_AVX;
20378     for (uint32_t channels = 25; channels < 48; channels++) {
20379       DWConvMicrokernelTester()
20380         .cr(24)
20381         .kr(9)
20382         .channels(channels)
20383         .qmax(128)
20384         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20385     }
20386   }
20387 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,multipixel)20388   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel) {
20389     TEST_REQUIRES_X86_AVX;
20390     for (size_t channels = 1; channels <= 120; channels += 23) {
20391       DWConvMicrokernelTester()
20392         .cr(24)
20393         .kr(9)
20394         .channels(channels)
20395         .width(3)
20396         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20397     }
20398   }
20399 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,multipixel_with_step)20400   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel_with_step) {
20401     TEST_REQUIRES_X86_AVX;
20402     for (size_t channels = 1; channels <= 120; channels += 23) {
20403       for (size_t step = 2; step <= 9; step++) {
20404         DWConvMicrokernelTester()
20405           .cr(24)
20406           .kr(9)
20407           .channels(channels)
20408           .width(3)
20409           .step(step)
20410           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20411       }
20412     }
20413   }
20414 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,multipixel_with_output_stride)20415   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel_with_output_stride) {
20416     TEST_REQUIRES_X86_AVX;
20417     for (size_t channels = 1; channels <= 120; channels += 23) {
20418       DWConvMicrokernelTester()
20419         .cr(24)
20420         .kr(9)
20421         .channels(24)
20422         .width(5)
20423         .output_stride(127)
20424         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20425     }
20426   }
20427 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,multipixel_with_qmin)20428   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel_with_qmin) {
20429     TEST_REQUIRES_X86_AVX;
20430     for (size_t channels = 1; channels <= 120; channels += 23) {
20431       DWConvMicrokernelTester()
20432         .cr(24)
20433         .kr(9)
20434         .channels(channels)
20435         .width(3)
20436         .qmin(128)
20437         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20438     }
20439   }
20440 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,multipixel_with_qmax)20441   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel_with_qmax) {
20442     TEST_REQUIRES_X86_AVX;
20443     for (size_t channels = 1; channels <= 120; channels += 23) {
20444       DWConvMicrokernelTester()
20445         .cr(24)
20446         .kr(9)
20447         .channels(channels)
20448         .width(3)
20449         .qmax(128)
20450         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20451     }
20452   }
20453 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,input_offset)20454   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, input_offset) {
20455     TEST_REQUIRES_X86_AVX;
20456     for (uint32_t channels = 48; channels < 384; channels += 72) {
20457       DWConvMicrokernelTester()
20458         .cr(24)
20459         .kr(9)
20460         .channels(channels)
20461         .input_offset(464)
20462         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20463     }
20464   }
20465 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,zero)20466   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, zero) {
20467     TEST_REQUIRES_X86_AVX;
20468     for (uint32_t mz = 0; mz < 9; mz++) {
20469       for (uint32_t channels = 48; channels < 384; channels += 72) {
20470         DWConvMicrokernelTester()
20471           .cr(24)
20472           .kr(9)
20473           .channels(channels)
20474           .input_offset(464)
20475           .zero_index(mz)
20476           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20477       }
20478     }
20479   }
20480 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
20481 
20482 
20483 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_eq_24)20484   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_eq_24) {
20485     TEST_REQUIRES_X86_AVX;
20486     DWConvMicrokernelTester()
20487       .cr(24)
20488       .kr(9)
20489       .channels(24)
20490       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20491   }
20492 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_div_24)20493   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_div_24) {
20494     TEST_REQUIRES_X86_AVX;
20495     for (uint32_t channels = 48; channels < 384; channels += 72) {
20496       DWConvMicrokernelTester()
20497         .cr(24)
20498         .kr(9)
20499         .channels(channels)
20500         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20501     }
20502   }
20503 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_div_24_with_qmin)20504   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_div_24_with_qmin) {
20505     TEST_REQUIRES_X86_AVX;
20506     for (uint32_t channels = 48; channels < 384; channels += 72) {
20507       DWConvMicrokernelTester()
20508         .cr(24)
20509         .kr(9)
20510         .channels(channels)
20511         .qmin(128)
20512         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20513     }
20514   }
20515 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_div_24_with_qmax)20516   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_div_24_with_qmax) {
20517     TEST_REQUIRES_X86_AVX;
20518     for (uint32_t channels = 48; channels < 384; channels += 72) {
20519       DWConvMicrokernelTester()
20520         .cr(24)
20521         .kr(9)
20522         .channels(channels)
20523         .qmax(128)
20524         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20525     }
20526   }
20527 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_lt_24)20528   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_lt_24) {
20529     TEST_REQUIRES_X86_AVX;
20530     for (uint32_t channels = 1; channels < 24; channels++) {
20531       DWConvMicrokernelTester()
20532         .cr(24)
20533         .kr(9)
20534         .channels(channels)
20535         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20536     }
20537   }
20538 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_gt_24)20539   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_gt_24) {
20540     TEST_REQUIRES_X86_AVX;
20541     for (uint32_t channels = 25; channels < 48; channels++) {
20542       DWConvMicrokernelTester()
20543         .cr(24)
20544         .kr(9)
20545         .channels(channels)
20546         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20547     }
20548   }
20549 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_gt_24_with_qmin)20550   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_gt_24_with_qmin) {
20551     TEST_REQUIRES_X86_AVX;
20552     for (uint32_t channels = 25; channels < 48; channels++) {
20553       DWConvMicrokernelTester()
20554         .cr(24)
20555         .kr(9)
20556         .channels(channels)
20557         .qmin(128)
20558         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20559     }
20560   }
20561 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_gt_24_with_qmax)20562   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_gt_24_with_qmax) {
20563     TEST_REQUIRES_X86_AVX;
20564     for (uint32_t channels = 25; channels < 48; channels++) {
20565       DWConvMicrokernelTester()
20566         .cr(24)
20567         .kr(9)
20568         .channels(channels)
20569         .qmax(128)
20570         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20571     }
20572   }
20573 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,multipixel)20574   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel) {
20575     TEST_REQUIRES_X86_AVX;
20576     for (size_t channels = 1; channels <= 120; channels += 23) {
20577       DWConvMicrokernelTester()
20578         .cr(24)
20579         .kr(9)
20580         .channels(channels)
20581         .width(3)
20582         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20583     }
20584   }
20585 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,multipixel_with_step)20586   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel_with_step) {
20587     TEST_REQUIRES_X86_AVX;
20588     for (size_t channels = 1; channels <= 120; channels += 23) {
20589       for (size_t step = 2; step <= 9; step++) {
20590         DWConvMicrokernelTester()
20591           .cr(24)
20592           .kr(9)
20593           .channels(channels)
20594           .width(3)
20595           .step(step)
20596           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20597       }
20598     }
20599   }
20600 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,multipixel_with_output_stride)20601   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel_with_output_stride) {
20602     TEST_REQUIRES_X86_AVX;
20603     for (size_t channels = 1; channels <= 120; channels += 23) {
20604       DWConvMicrokernelTester()
20605         .cr(24)
20606         .kr(9)
20607         .channels(24)
20608         .width(5)
20609         .output_stride(127)
20610         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20611     }
20612   }
20613 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,multipixel_with_qmin)20614   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel_with_qmin) {
20615     TEST_REQUIRES_X86_AVX;
20616     for (size_t channels = 1; channels <= 120; channels += 23) {
20617       DWConvMicrokernelTester()
20618         .cr(24)
20619         .kr(9)
20620         .channels(channels)
20621         .width(3)
20622         .qmin(128)
20623         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20624     }
20625   }
20626 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,multipixel_with_qmax)20627   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel_with_qmax) {
20628     TEST_REQUIRES_X86_AVX;
20629     for (size_t channels = 1; channels <= 120; channels += 23) {
20630       DWConvMicrokernelTester()
20631         .cr(24)
20632         .kr(9)
20633         .channels(channels)
20634         .width(3)
20635         .qmax(128)
20636         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20637     }
20638   }
20639 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,input_offset)20640   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, input_offset) {
20641     TEST_REQUIRES_X86_AVX;
20642     for (uint32_t channels = 48; channels < 384; channels += 72) {
20643       DWConvMicrokernelTester()
20644         .cr(24)
20645         .kr(9)
20646         .channels(channels)
20647         .input_offset(464)
20648         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20649     }
20650   }
20651 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,zero)20652   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, zero) {
20653     TEST_REQUIRES_X86_AVX;
20654     for (uint32_t mz = 0; mz < 9; mz++) {
20655       for (uint32_t channels = 48; channels < 384; channels += 72) {
20656         DWConvMicrokernelTester()
20657           .cr(24)
20658           .kr(9)
20659           .channels(channels)
20660           .input_offset(464)
20661           .zero_index(mz)
20662           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20663       }
20664     }
20665   }
20666 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
20667 
20668 
20669 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_eq_24)20670   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_eq_24) {
20671     TEST_REQUIRES_X86_AVX2;
20672     DWConvMicrokernelTester()
20673       .cr(24)
20674       .kr(9)
20675       .channels(24)
20676       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20677   }
20678 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_div_24)20679   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_div_24) {
20680     TEST_REQUIRES_X86_AVX2;
20681     for (uint32_t channels = 48; channels < 384; channels += 72) {
20682       DWConvMicrokernelTester()
20683         .cr(24)
20684         .kr(9)
20685         .channels(channels)
20686         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20687     }
20688   }
20689 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_div_24_with_qmin)20690   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_div_24_with_qmin) {
20691     TEST_REQUIRES_X86_AVX2;
20692     for (uint32_t channels = 48; channels < 384; channels += 72) {
20693       DWConvMicrokernelTester()
20694         .cr(24)
20695         .kr(9)
20696         .channels(channels)
20697         .qmin(128)
20698         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20699     }
20700   }
20701 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_div_24_with_qmax)20702   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_div_24_with_qmax) {
20703     TEST_REQUIRES_X86_AVX2;
20704     for (uint32_t channels = 48; channels < 384; channels += 72) {
20705       DWConvMicrokernelTester()
20706         .cr(24)
20707         .kr(9)
20708         .channels(channels)
20709         .qmax(128)
20710         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20711     }
20712   }
20713 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_lt_24)20714   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_lt_24) {
20715     TEST_REQUIRES_X86_AVX2;
20716     for (uint32_t channels = 1; channels < 24; channels++) {
20717       DWConvMicrokernelTester()
20718         .cr(24)
20719         .kr(9)
20720         .channels(channels)
20721         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20722     }
20723   }
20724 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_gt_24)20725   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_gt_24) {
20726     TEST_REQUIRES_X86_AVX2;
20727     for (uint32_t channels = 25; channels < 48; channels++) {
20728       DWConvMicrokernelTester()
20729         .cr(24)
20730         .kr(9)
20731         .channels(channels)
20732         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20733     }
20734   }
20735 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_gt_24_with_qmin)20736   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_gt_24_with_qmin) {
20737     TEST_REQUIRES_X86_AVX2;
20738     for (uint32_t channels = 25; channels < 48; channels++) {
20739       DWConvMicrokernelTester()
20740         .cr(24)
20741         .kr(9)
20742         .channels(channels)
20743         .qmin(128)
20744         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20745     }
20746   }
20747 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_gt_24_with_qmax)20748   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_gt_24_with_qmax) {
20749     TEST_REQUIRES_X86_AVX2;
20750     for (uint32_t channels = 25; channels < 48; channels++) {
20751       DWConvMicrokernelTester()
20752         .cr(24)
20753         .kr(9)
20754         .channels(channels)
20755         .qmax(128)
20756         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20757     }
20758   }
20759 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,multipixel)20760   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel) {
20761     TEST_REQUIRES_X86_AVX2;
20762     for (size_t channels = 1; channels <= 120; channels += 23) {
20763       DWConvMicrokernelTester()
20764         .cr(24)
20765         .kr(9)
20766         .channels(channels)
20767         .width(3)
20768         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20769     }
20770   }
20771 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,multipixel_with_step)20772   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_step) {
20773     TEST_REQUIRES_X86_AVX2;
20774     for (size_t channels = 1; channels <= 120; channels += 23) {
20775       for (size_t step = 2; step <= 9; step++) {
20776         DWConvMicrokernelTester()
20777           .cr(24)
20778           .kr(9)
20779           .channels(channels)
20780           .width(3)
20781           .step(step)
20782           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20783       }
20784     }
20785   }
20786 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,multipixel_with_output_stride)20787   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_output_stride) {
20788     TEST_REQUIRES_X86_AVX2;
20789     for (size_t channels = 1; channels <= 120; channels += 23) {
20790       DWConvMicrokernelTester()
20791         .cr(24)
20792         .kr(9)
20793         .channels(24)
20794         .width(5)
20795         .output_stride(127)
20796         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20797     }
20798   }
20799 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,multipixel_with_qmin)20800   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_qmin) {
20801     TEST_REQUIRES_X86_AVX2;
20802     for (size_t channels = 1; channels <= 120; channels += 23) {
20803       DWConvMicrokernelTester()
20804         .cr(24)
20805         .kr(9)
20806         .channels(channels)
20807         .width(3)
20808         .qmin(128)
20809         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20810     }
20811   }
20812 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,multipixel_with_qmax)20813   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_qmax) {
20814     TEST_REQUIRES_X86_AVX2;
20815     for (size_t channels = 1; channels <= 120; channels += 23) {
20816       DWConvMicrokernelTester()
20817         .cr(24)
20818         .kr(9)
20819         .channels(channels)
20820         .width(3)
20821         .qmax(128)
20822         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20823     }
20824   }
20825 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,input_offset)20826   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, input_offset) {
20827     TEST_REQUIRES_X86_AVX2;
20828     for (uint32_t channels = 48; channels < 384; channels += 72) {
20829       DWConvMicrokernelTester()
20830         .cr(24)
20831         .kr(9)
20832         .channels(channels)
20833         .input_offset(464)
20834         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20835     }
20836   }
20837 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,zero)20838   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, zero) {
20839     TEST_REQUIRES_X86_AVX2;
20840     for (uint32_t mz = 0; mz < 9; mz++) {
20841       for (uint32_t channels = 48; channels < 384; channels += 72) {
20842         DWConvMicrokernelTester()
20843           .cr(24)
20844           .kr(9)
20845           .channels(channels)
20846           .input_offset(464)
20847           .zero_index(mz)
20848           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20849       }
20850     }
20851   }
20852 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
20853 
20854 
20855 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_eq_24)20856   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_eq_24) {
20857     TEST_REQUIRES_X86_XOP;
20858     DWConvMicrokernelTester()
20859       .cr(24)
20860       .kr(9)
20861       .channels(24)
20862       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20863   }
20864 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_div_24)20865   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_div_24) {
20866     TEST_REQUIRES_X86_XOP;
20867     for (uint32_t channels = 48; channels < 384; channels += 72) {
20868       DWConvMicrokernelTester()
20869         .cr(24)
20870         .kr(9)
20871         .channels(channels)
20872         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20873     }
20874   }
20875 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_div_24_with_qmin)20876   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_div_24_with_qmin) {
20877     TEST_REQUIRES_X86_XOP;
20878     for (uint32_t channels = 48; channels < 384; channels += 72) {
20879       DWConvMicrokernelTester()
20880         .cr(24)
20881         .kr(9)
20882         .channels(channels)
20883         .qmin(128)
20884         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20885     }
20886   }
20887 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_div_24_with_qmax)20888   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_div_24_with_qmax) {
20889     TEST_REQUIRES_X86_XOP;
20890     for (uint32_t channels = 48; channels < 384; channels += 72) {
20891       DWConvMicrokernelTester()
20892         .cr(24)
20893         .kr(9)
20894         .channels(channels)
20895         .qmax(128)
20896         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20897     }
20898   }
20899 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_lt_24)20900   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_lt_24) {
20901     TEST_REQUIRES_X86_XOP;
20902     for (uint32_t channels = 1; channels < 24; channels++) {
20903       DWConvMicrokernelTester()
20904         .cr(24)
20905         .kr(9)
20906         .channels(channels)
20907         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20908     }
20909   }
20910 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_gt_24)20911   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_gt_24) {
20912     TEST_REQUIRES_X86_XOP;
20913     for (uint32_t channels = 25; channels < 48; channels++) {
20914       DWConvMicrokernelTester()
20915         .cr(24)
20916         .kr(9)
20917         .channels(channels)
20918         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20919     }
20920   }
20921 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_gt_24_with_qmin)20922   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_gt_24_with_qmin) {
20923     TEST_REQUIRES_X86_XOP;
20924     for (uint32_t channels = 25; channels < 48; channels++) {
20925       DWConvMicrokernelTester()
20926         .cr(24)
20927         .kr(9)
20928         .channels(channels)
20929         .qmin(128)
20930         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20931     }
20932   }
20933 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_gt_24_with_qmax)20934   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_gt_24_with_qmax) {
20935     TEST_REQUIRES_X86_XOP;
20936     for (uint32_t channels = 25; channels < 48; channels++) {
20937       DWConvMicrokernelTester()
20938         .cr(24)
20939         .kr(9)
20940         .channels(channels)
20941         .qmax(128)
20942         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20943     }
20944   }
20945 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,multipixel)20946   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel) {
20947     TEST_REQUIRES_X86_XOP;
20948     for (size_t channels = 1; channels <= 120; channels += 23) {
20949       DWConvMicrokernelTester()
20950         .cr(24)
20951         .kr(9)
20952         .channels(channels)
20953         .width(3)
20954         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20955     }
20956   }
20957 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,multipixel_with_step)20958   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel_with_step) {
20959     TEST_REQUIRES_X86_XOP;
20960     for (size_t channels = 1; channels <= 120; channels += 23) {
20961       for (size_t step = 2; step <= 9; step++) {
20962         DWConvMicrokernelTester()
20963           .cr(24)
20964           .kr(9)
20965           .channels(channels)
20966           .width(3)
20967           .step(step)
20968           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20969       }
20970     }
20971   }
20972 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,multipixel_with_output_stride)20973   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel_with_output_stride) {
20974     TEST_REQUIRES_X86_XOP;
20975     for (size_t channels = 1; channels <= 120; channels += 23) {
20976       DWConvMicrokernelTester()
20977         .cr(24)
20978         .kr(9)
20979         .channels(24)
20980         .width(5)
20981         .output_stride(127)
20982         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20983     }
20984   }
20985 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,multipixel_with_qmin)20986   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel_with_qmin) {
20987     TEST_REQUIRES_X86_XOP;
20988     for (size_t channels = 1; channels <= 120; channels += 23) {
20989       DWConvMicrokernelTester()
20990         .cr(24)
20991         .kr(9)
20992         .channels(channels)
20993         .width(3)
20994         .qmin(128)
20995         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20996     }
20997   }
20998 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,multipixel_with_qmax)20999   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel_with_qmax) {
21000     TEST_REQUIRES_X86_XOP;
21001     for (size_t channels = 1; channels <= 120; channels += 23) {
21002       DWConvMicrokernelTester()
21003         .cr(24)
21004         .kr(9)
21005         .channels(channels)
21006         .width(3)
21007         .qmax(128)
21008         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21009     }
21010   }
21011 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,input_offset)21012   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, input_offset) {
21013     TEST_REQUIRES_X86_XOP;
21014     for (uint32_t channels = 48; channels < 384; channels += 72) {
21015       DWConvMicrokernelTester()
21016         .cr(24)
21017         .kr(9)
21018         .channels(channels)
21019         .input_offset(464)
21020         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21021     }
21022   }
21023 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,zero)21024   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, zero) {
21025     TEST_REQUIRES_X86_XOP;
21026     for (uint32_t mz = 0; mz < 9; mz++) {
21027       for (uint32_t channels = 48; channels < 384; channels += 72) {
21028         DWConvMicrokernelTester()
21029           .cr(24)
21030           .kr(9)
21031           .channels(channels)
21032           .input_offset(464)
21033           .zero_index(mz)
21034           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21035       }
21036     }
21037   }
21038 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
21039 
21040 
21041 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_eq_24)21042   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_eq_24) {
21043     TEST_REQUIRES_X86_AVX;
21044     DWConvMicrokernelTester()
21045       .cr(24)
21046       .kr(25)
21047       .channels(24)
21048       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21049   }
21050 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_div_24)21051   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_div_24) {
21052     TEST_REQUIRES_X86_AVX;
21053     for (uint32_t channels = 48; channels < 384; channels += 72) {
21054       DWConvMicrokernelTester()
21055         .cr(24)
21056         .kr(25)
21057         .channels(channels)
21058         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21059     }
21060   }
21061 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_div_24_with_qmin)21062   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_div_24_with_qmin) {
21063     TEST_REQUIRES_X86_AVX;
21064     for (uint32_t channels = 48; channels < 384; channels += 72) {
21065       DWConvMicrokernelTester()
21066         .cr(24)
21067         .kr(25)
21068         .channels(channels)
21069         .qmin(128)
21070         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21071     }
21072   }
21073 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_div_24_with_qmax)21074   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_div_24_with_qmax) {
21075     TEST_REQUIRES_X86_AVX;
21076     for (uint32_t channels = 48; channels < 384; channels += 72) {
21077       DWConvMicrokernelTester()
21078         .cr(24)
21079         .kr(25)
21080         .channels(channels)
21081         .qmax(128)
21082         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21083     }
21084   }
21085 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_lt_24)21086   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_lt_24) {
21087     TEST_REQUIRES_X86_AVX;
21088     for (uint32_t channels = 1; channels < 24; channels++) {
21089       DWConvMicrokernelTester()
21090         .cr(24)
21091         .kr(25)
21092         .channels(channels)
21093         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21094     }
21095   }
21096 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_gt_24)21097   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_gt_24) {
21098     TEST_REQUIRES_X86_AVX;
21099     for (uint32_t channels = 25; channels < 48; channels++) {
21100       DWConvMicrokernelTester()
21101         .cr(24)
21102         .kr(25)
21103         .channels(channels)
21104         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21105     }
21106   }
21107 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_gt_24_with_qmin)21108   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_gt_24_with_qmin) {
21109     TEST_REQUIRES_X86_AVX;
21110     for (uint32_t channels = 25; channels < 48; channels++) {
21111       DWConvMicrokernelTester()
21112         .cr(24)
21113         .kr(25)
21114         .channels(channels)
21115         .qmin(128)
21116         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21117     }
21118   }
21119 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_gt_24_with_qmax)21120   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_gt_24_with_qmax) {
21121     TEST_REQUIRES_X86_AVX;
21122     for (uint32_t channels = 25; channels < 48; channels++) {
21123       DWConvMicrokernelTester()
21124         .cr(24)
21125         .kr(25)
21126         .channels(channels)
21127         .qmax(128)
21128         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21129     }
21130   }
21131 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,multipixel)21132   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel) {
21133     TEST_REQUIRES_X86_AVX;
21134     for (size_t channels = 1; channels <= 120; channels += 23) {
21135       DWConvMicrokernelTester()
21136         .cr(24)
21137         .kr(25)
21138         .channels(channels)
21139         .width(3)
21140         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21141     }
21142   }
21143 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,multipixel_with_step)21144   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel_with_step) {
21145     TEST_REQUIRES_X86_AVX;
21146     for (size_t channels = 1; channels <= 120; channels += 23) {
21147       for (size_t step = 2; step <= 25; step++) {
21148         DWConvMicrokernelTester()
21149           .cr(24)
21150           .kr(25)
21151           .channels(channels)
21152           .width(3)
21153           .step(step)
21154           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21155       }
21156     }
21157   }
21158 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,multipixel_with_output_stride)21159   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel_with_output_stride) {
21160     TEST_REQUIRES_X86_AVX;
21161     for (size_t channels = 1; channels <= 120; channels += 23) {
21162       DWConvMicrokernelTester()
21163         .cr(24)
21164         .kr(25)
21165         .channels(24)
21166         .width(5)
21167         .output_stride(127)
21168         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21169     }
21170   }
21171 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,multipixel_with_qmin)21172   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel_with_qmin) {
21173     TEST_REQUIRES_X86_AVX;
21174     for (size_t channels = 1; channels <= 120; channels += 23) {
21175       DWConvMicrokernelTester()
21176         .cr(24)
21177         .kr(25)
21178         .channels(channels)
21179         .width(3)
21180         .qmin(128)
21181         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21182     }
21183   }
21184 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,multipixel_with_qmax)21185   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel_with_qmax) {
21186     TEST_REQUIRES_X86_AVX;
21187     for (size_t channels = 1; channels <= 120; channels += 23) {
21188       DWConvMicrokernelTester()
21189         .cr(24)
21190         .kr(25)
21191         .channels(channels)
21192         .width(3)
21193         .qmax(128)
21194         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21195     }
21196   }
21197 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,input_offset)21198   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, input_offset) {
21199     TEST_REQUIRES_X86_AVX;
21200     for (uint32_t channels = 48; channels < 384; channels += 72) {
21201       DWConvMicrokernelTester()
21202         .cr(24)
21203         .kr(25)
21204         .channels(channels)
21205         .input_offset(464)
21206         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21207     }
21208   }
21209 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,zero)21210   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, zero) {
21211     TEST_REQUIRES_X86_AVX;
21212     for (uint32_t mz = 0; mz < 25; mz++) {
21213       for (uint32_t channels = 48; channels < 384; channels += 72) {
21214         DWConvMicrokernelTester()
21215           .cr(24)
21216           .kr(25)
21217           .channels(channels)
21218           .input_offset(464)
21219           .zero_index(mz)
21220           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21221       }
21222     }
21223   }
21224 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
21225 
21226 
21227 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_eq_24)21228   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_eq_24) {
21229     TEST_REQUIRES_X86_AVX;
21230     DWConvMicrokernelTester()
21231       .cr(24)
21232       .kr(25)
21233       .channels(24)
21234       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21235   }
21236 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_div_24)21237   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_div_24) {
21238     TEST_REQUIRES_X86_AVX;
21239     for (uint32_t channels = 48; channels < 384; channels += 72) {
21240       DWConvMicrokernelTester()
21241         .cr(24)
21242         .kr(25)
21243         .channels(channels)
21244         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21245     }
21246   }
21247 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_div_24_with_qmin)21248   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_div_24_with_qmin) {
21249     TEST_REQUIRES_X86_AVX;
21250     for (uint32_t channels = 48; channels < 384; channels += 72) {
21251       DWConvMicrokernelTester()
21252         .cr(24)
21253         .kr(25)
21254         .channels(channels)
21255         .qmin(128)
21256         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21257     }
21258   }
21259 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_div_24_with_qmax)21260   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_div_24_with_qmax) {
21261     TEST_REQUIRES_X86_AVX;
21262     for (uint32_t channels = 48; channels < 384; channels += 72) {
21263       DWConvMicrokernelTester()
21264         .cr(24)
21265         .kr(25)
21266         .channels(channels)
21267         .qmax(128)
21268         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21269     }
21270   }
21271 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_lt_24)21272   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_lt_24) {
21273     TEST_REQUIRES_X86_AVX;
21274     for (uint32_t channels = 1; channels < 24; channels++) {
21275       DWConvMicrokernelTester()
21276         .cr(24)
21277         .kr(25)
21278         .channels(channels)
21279         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21280     }
21281   }
21282 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_gt_24)21283   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_gt_24) {
21284     TEST_REQUIRES_X86_AVX;
21285     for (uint32_t channels = 25; channels < 48; channels++) {
21286       DWConvMicrokernelTester()
21287         .cr(24)
21288         .kr(25)
21289         .channels(channels)
21290         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21291     }
21292   }
21293 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_gt_24_with_qmin)21294   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_gt_24_with_qmin) {
21295     TEST_REQUIRES_X86_AVX;
21296     for (uint32_t channels = 25; channels < 48; channels++) {
21297       DWConvMicrokernelTester()
21298         .cr(24)
21299         .kr(25)
21300         .channels(channels)
21301         .qmin(128)
21302         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21303     }
21304   }
21305 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_gt_24_with_qmax)21306   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_gt_24_with_qmax) {
21307     TEST_REQUIRES_X86_AVX;
21308     for (uint32_t channels = 25; channels < 48; channels++) {
21309       DWConvMicrokernelTester()
21310         .cr(24)
21311         .kr(25)
21312         .channels(channels)
21313         .qmax(128)
21314         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21315     }
21316   }
21317 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,multipixel)21318   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel) {
21319     TEST_REQUIRES_X86_AVX;
21320     for (size_t channels = 1; channels <= 120; channels += 23) {
21321       DWConvMicrokernelTester()
21322         .cr(24)
21323         .kr(25)
21324         .channels(channels)
21325         .width(3)
21326         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21327     }
21328   }
21329 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,multipixel_with_step)21330   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel_with_step) {
21331     TEST_REQUIRES_X86_AVX;
21332     for (size_t channels = 1; channels <= 120; channels += 23) {
21333       for (size_t step = 2; step <= 25; step++) {
21334         DWConvMicrokernelTester()
21335           .cr(24)
21336           .kr(25)
21337           .channels(channels)
21338           .width(3)
21339           .step(step)
21340           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21341       }
21342     }
21343   }
21344 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,multipixel_with_output_stride)21345   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel_with_output_stride) {
21346     TEST_REQUIRES_X86_AVX;
21347     for (size_t channels = 1; channels <= 120; channels += 23) {
21348       DWConvMicrokernelTester()
21349         .cr(24)
21350         .kr(25)
21351         .channels(24)
21352         .width(5)
21353         .output_stride(127)
21354         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21355     }
21356   }
21357 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,multipixel_with_qmin)21358   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel_with_qmin) {
21359     TEST_REQUIRES_X86_AVX;
21360     for (size_t channels = 1; channels <= 120; channels += 23) {
21361       DWConvMicrokernelTester()
21362         .cr(24)
21363         .kr(25)
21364         .channels(channels)
21365         .width(3)
21366         .qmin(128)
21367         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21368     }
21369   }
21370 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,multipixel_with_qmax)21371   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel_with_qmax) {
21372     TEST_REQUIRES_X86_AVX;
21373     for (size_t channels = 1; channels <= 120; channels += 23) {
21374       DWConvMicrokernelTester()
21375         .cr(24)
21376         .kr(25)
21377         .channels(channels)
21378         .width(3)
21379         .qmax(128)
21380         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21381     }
21382   }
21383 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,input_offset)21384   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, input_offset) {
21385     TEST_REQUIRES_X86_AVX;
21386     for (uint32_t channels = 48; channels < 384; channels += 72) {
21387       DWConvMicrokernelTester()
21388         .cr(24)
21389         .kr(25)
21390         .channels(channels)
21391         .input_offset(464)
21392         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21393     }
21394   }
21395 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,zero)21396   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, zero) {
21397     TEST_REQUIRES_X86_AVX;
21398     for (uint32_t mz = 0; mz < 25; mz++) {
21399       for (uint32_t channels = 48; channels < 384; channels += 72) {
21400         DWConvMicrokernelTester()
21401           .cr(24)
21402           .kr(25)
21403           .channels(channels)
21404           .input_offset(464)
21405           .zero_index(mz)
21406           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21407       }
21408     }
21409   }
21410 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
21411 
21412 
21413 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_eq_24)21414   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_eq_24) {
21415     TEST_REQUIRES_X86_AVX2;
21416     DWConvMicrokernelTester()
21417       .cr(24)
21418       .kr(25)
21419       .channels(24)
21420       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21421   }
21422 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_div_24)21423   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_div_24) {
21424     TEST_REQUIRES_X86_AVX2;
21425     for (uint32_t channels = 48; channels < 384; channels += 72) {
21426       DWConvMicrokernelTester()
21427         .cr(24)
21428         .kr(25)
21429         .channels(channels)
21430         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21431     }
21432   }
21433 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_div_24_with_qmin)21434   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_div_24_with_qmin) {
21435     TEST_REQUIRES_X86_AVX2;
21436     for (uint32_t channels = 48; channels < 384; channels += 72) {
21437       DWConvMicrokernelTester()
21438         .cr(24)
21439         .kr(25)
21440         .channels(channels)
21441         .qmin(128)
21442         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21443     }
21444   }
21445 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_div_24_with_qmax)21446   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_div_24_with_qmax) {
21447     TEST_REQUIRES_X86_AVX2;
21448     for (uint32_t channels = 48; channels < 384; channels += 72) {
21449       DWConvMicrokernelTester()
21450         .cr(24)
21451         .kr(25)
21452         .channels(channels)
21453         .qmax(128)
21454         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21455     }
21456   }
21457 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_lt_24)21458   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_lt_24) {
21459     TEST_REQUIRES_X86_AVX2;
21460     for (uint32_t channels = 1; channels < 24; channels++) {
21461       DWConvMicrokernelTester()
21462         .cr(24)
21463         .kr(25)
21464         .channels(channels)
21465         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21466     }
21467   }
21468 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_gt_24)21469   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_gt_24) {
21470     TEST_REQUIRES_X86_AVX2;
21471     for (uint32_t channels = 25; channels < 48; channels++) {
21472       DWConvMicrokernelTester()
21473         .cr(24)
21474         .kr(25)
21475         .channels(channels)
21476         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21477     }
21478   }
21479 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_gt_24_with_qmin)21480   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_gt_24_with_qmin) {
21481     TEST_REQUIRES_X86_AVX2;
21482     for (uint32_t channels = 25; channels < 48; channels++) {
21483       DWConvMicrokernelTester()
21484         .cr(24)
21485         .kr(25)
21486         .channels(channels)
21487         .qmin(128)
21488         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21489     }
21490   }
21491 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_gt_24_with_qmax)21492   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_gt_24_with_qmax) {
21493     TEST_REQUIRES_X86_AVX2;
21494     for (uint32_t channels = 25; channels < 48; channels++) {
21495       DWConvMicrokernelTester()
21496         .cr(24)
21497         .kr(25)
21498         .channels(channels)
21499         .qmax(128)
21500         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21501     }
21502   }
21503 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,multipixel)21504   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel) {
21505     TEST_REQUIRES_X86_AVX2;
21506     for (size_t channels = 1; channels <= 120; channels += 23) {
21507       DWConvMicrokernelTester()
21508         .cr(24)
21509         .kr(25)
21510         .channels(channels)
21511         .width(3)
21512         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21513     }
21514   }
21515 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,multipixel_with_step)21516   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_step) {
21517     TEST_REQUIRES_X86_AVX2;
21518     for (size_t channels = 1; channels <= 120; channels += 23) {
21519       for (size_t step = 2; step <= 25; step++) {
21520         DWConvMicrokernelTester()
21521           .cr(24)
21522           .kr(25)
21523           .channels(channels)
21524           .width(3)
21525           .step(step)
21526           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21527       }
21528     }
21529   }
21530 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,multipixel_with_output_stride)21531   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_output_stride) {
21532     TEST_REQUIRES_X86_AVX2;
21533     for (size_t channels = 1; channels <= 120; channels += 23) {
21534       DWConvMicrokernelTester()
21535         .cr(24)
21536         .kr(25)
21537         .channels(24)
21538         .width(5)
21539         .output_stride(127)
21540         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21541     }
21542   }
21543 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,multipixel_with_qmin)21544   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_qmin) {
21545     TEST_REQUIRES_X86_AVX2;
21546     for (size_t channels = 1; channels <= 120; channels += 23) {
21547       DWConvMicrokernelTester()
21548         .cr(24)
21549         .kr(25)
21550         .channels(channels)
21551         .width(3)
21552         .qmin(128)
21553         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21554     }
21555   }
21556 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,multipixel_with_qmax)21557   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_qmax) {
21558     TEST_REQUIRES_X86_AVX2;
21559     for (size_t channels = 1; channels <= 120; channels += 23) {
21560       DWConvMicrokernelTester()
21561         .cr(24)
21562         .kr(25)
21563         .channels(channels)
21564         .width(3)
21565         .qmax(128)
21566         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21567     }
21568   }
21569 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,input_offset)21570   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, input_offset) {
21571     TEST_REQUIRES_X86_AVX2;
21572     for (uint32_t channels = 48; channels < 384; channels += 72) {
21573       DWConvMicrokernelTester()
21574         .cr(24)
21575         .kr(25)
21576         .channels(channels)
21577         .input_offset(464)
21578         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21579     }
21580   }
21581 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,zero)21582   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, zero) {
21583     TEST_REQUIRES_X86_AVX2;
21584     for (uint32_t mz = 0; mz < 25; mz++) {
21585       for (uint32_t channels = 48; channels < 384; channels += 72) {
21586         DWConvMicrokernelTester()
21587           .cr(24)
21588           .kr(25)
21589           .channels(channels)
21590           .input_offset(464)
21591           .zero_index(mz)
21592           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21593       }
21594     }
21595   }
21596 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
21597 
21598 
21599 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_eq_24)21600   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_eq_24) {
21601     TEST_REQUIRES_X86_XOP;
21602     DWConvMicrokernelTester()
21603       .cr(24)
21604       .kr(25)
21605       .channels(24)
21606       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21607   }
21608 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_div_24)21609   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_div_24) {
21610     TEST_REQUIRES_X86_XOP;
21611     for (uint32_t channels = 48; channels < 384; channels += 72) {
21612       DWConvMicrokernelTester()
21613         .cr(24)
21614         .kr(25)
21615         .channels(channels)
21616         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21617     }
21618   }
21619 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_div_24_with_qmin)21620   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_div_24_with_qmin) {
21621     TEST_REQUIRES_X86_XOP;
21622     for (uint32_t channels = 48; channels < 384; channels += 72) {
21623       DWConvMicrokernelTester()
21624         .cr(24)
21625         .kr(25)
21626         .channels(channels)
21627         .qmin(128)
21628         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21629     }
21630   }
21631 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_div_24_with_qmax)21632   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_div_24_with_qmax) {
21633     TEST_REQUIRES_X86_XOP;
21634     for (uint32_t channels = 48; channels < 384; channels += 72) {
21635       DWConvMicrokernelTester()
21636         .cr(24)
21637         .kr(25)
21638         .channels(channels)
21639         .qmax(128)
21640         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21641     }
21642   }
21643 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_lt_24)21644   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_lt_24) {
21645     TEST_REQUIRES_X86_XOP;
21646     for (uint32_t channels = 1; channels < 24; channels++) {
21647       DWConvMicrokernelTester()
21648         .cr(24)
21649         .kr(25)
21650         .channels(channels)
21651         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21652     }
21653   }
21654 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_gt_24)21655   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_gt_24) {
21656     TEST_REQUIRES_X86_XOP;
21657     for (uint32_t channels = 25; channels < 48; channels++) {
21658       DWConvMicrokernelTester()
21659         .cr(24)
21660         .kr(25)
21661         .channels(channels)
21662         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21663     }
21664   }
21665 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_gt_24_with_qmin)21666   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_gt_24_with_qmin) {
21667     TEST_REQUIRES_X86_XOP;
21668     for (uint32_t channels = 25; channels < 48; channels++) {
21669       DWConvMicrokernelTester()
21670         .cr(24)
21671         .kr(25)
21672         .channels(channels)
21673         .qmin(128)
21674         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21675     }
21676   }
21677 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_gt_24_with_qmax)21678   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_gt_24_with_qmax) {
21679     TEST_REQUIRES_X86_XOP;
21680     for (uint32_t channels = 25; channels < 48; channels++) {
21681       DWConvMicrokernelTester()
21682         .cr(24)
21683         .kr(25)
21684         .channels(channels)
21685         .qmax(128)
21686         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21687     }
21688   }
21689 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,multipixel)21690   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel) {
21691     TEST_REQUIRES_X86_XOP;
21692     for (size_t channels = 1; channels <= 120; channels += 23) {
21693       DWConvMicrokernelTester()
21694         .cr(24)
21695         .kr(25)
21696         .channels(channels)
21697         .width(3)
21698         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21699     }
21700   }
21701 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,multipixel_with_step)21702   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel_with_step) {
21703     TEST_REQUIRES_X86_XOP;
21704     for (size_t channels = 1; channels <= 120; channels += 23) {
21705       for (size_t step = 2; step <= 25; step++) {
21706         DWConvMicrokernelTester()
21707           .cr(24)
21708           .kr(25)
21709           .channels(channels)
21710           .width(3)
21711           .step(step)
21712           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21713       }
21714     }
21715   }
21716 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,multipixel_with_output_stride)21717   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel_with_output_stride) {
21718     TEST_REQUIRES_X86_XOP;
21719     for (size_t channels = 1; channels <= 120; channels += 23) {
21720       DWConvMicrokernelTester()
21721         .cr(24)
21722         .kr(25)
21723         .channels(24)
21724         .width(5)
21725         .output_stride(127)
21726         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21727     }
21728   }
21729 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,multipixel_with_qmin)21730   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel_with_qmin) {
21731     TEST_REQUIRES_X86_XOP;
21732     for (size_t channels = 1; channels <= 120; channels += 23) {
21733       DWConvMicrokernelTester()
21734         .cr(24)
21735         .kr(25)
21736         .channels(channels)
21737         .width(3)
21738         .qmin(128)
21739         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21740     }
21741   }
21742 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,multipixel_with_qmax)21743   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel_with_qmax) {
21744     TEST_REQUIRES_X86_XOP;
21745     for (size_t channels = 1; channels <= 120; channels += 23) {
21746       DWConvMicrokernelTester()
21747         .cr(24)
21748         .kr(25)
21749         .channels(channels)
21750         .width(3)
21751         .qmax(128)
21752         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21753     }
21754   }
21755 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,input_offset)21756   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, input_offset) {
21757     TEST_REQUIRES_X86_XOP;
21758     for (uint32_t channels = 48; channels < 384; channels += 72) {
21759       DWConvMicrokernelTester()
21760         .cr(24)
21761         .kr(25)
21762         .channels(channels)
21763         .input_offset(464)
21764         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21765     }
21766   }
21767 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,zero)21768   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, zero) {
21769     TEST_REQUIRES_X86_XOP;
21770     for (uint32_t mz = 0; mz < 25; mz++) {
21771       for (uint32_t channels = 48; channels < 384; channels += 72) {
21772         DWConvMicrokernelTester()
21773           .cr(24)
21774           .kr(25)
21775           .channels(channels)
21776           .input_offset(464)
21777           .zero_index(mz)
21778           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21779       }
21780     }
21781   }
21782 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
21783 
21784 
21785 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_eq_32)21786   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_eq_32) {
21787     TEST_REQUIRES_X86_AVX2;
21788     DWConvMicrokernelTester()
21789       .cr(32)
21790       .kr(9)
21791       .channels(32)
21792       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21793   }
21794 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_div_32)21795   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_32) {
21796     TEST_REQUIRES_X86_AVX2;
21797     for (uint32_t channels = 64; channels < 512; channels += 96) {
21798       DWConvMicrokernelTester()
21799         .cr(32)
21800         .kr(9)
21801         .channels(channels)
21802         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21803     }
21804   }
21805 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_div_32_with_qmin)21806   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_32_with_qmin) {
21807     TEST_REQUIRES_X86_AVX2;
21808     for (uint32_t channels = 64; channels < 512; channels += 96) {
21809       DWConvMicrokernelTester()
21810         .cr(32)
21811         .kr(9)
21812         .channels(channels)
21813         .qmin(128)
21814         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21815     }
21816   }
21817 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_div_32_with_qmax)21818   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_32_with_qmax) {
21819     TEST_REQUIRES_X86_AVX2;
21820     for (uint32_t channels = 64; channels < 512; channels += 96) {
21821       DWConvMicrokernelTester()
21822         .cr(32)
21823         .kr(9)
21824         .channels(channels)
21825         .qmax(128)
21826         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21827     }
21828   }
21829 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_lt_32)21830   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_lt_32) {
21831     TEST_REQUIRES_X86_AVX2;
21832     for (uint32_t channels = 1; channels < 32; channels++) {
21833       DWConvMicrokernelTester()
21834         .cr(32)
21835         .kr(9)
21836         .channels(channels)
21837         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21838     }
21839   }
21840 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_gt_32)21841   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32) {
21842     TEST_REQUIRES_X86_AVX2;
21843     for (uint32_t channels = 33; channels < 64; channels++) {
21844       DWConvMicrokernelTester()
21845         .cr(32)
21846         .kr(9)
21847         .channels(channels)
21848         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21849     }
21850   }
21851 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_gt_32_with_qmin)21852   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32_with_qmin) {
21853     TEST_REQUIRES_X86_AVX2;
21854     for (uint32_t channels = 33; channels < 64; channels++) {
21855       DWConvMicrokernelTester()
21856         .cr(32)
21857         .kr(9)
21858         .channels(channels)
21859         .qmin(128)
21860         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21861     }
21862   }
21863 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_gt_32_with_qmax)21864   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32_with_qmax) {
21865     TEST_REQUIRES_X86_AVX2;
21866     for (uint32_t channels = 33; channels < 64; channels++) {
21867       DWConvMicrokernelTester()
21868         .cr(32)
21869         .kr(9)
21870         .channels(channels)
21871         .qmax(128)
21872         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21873     }
21874   }
21875 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel)21876   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel) {
21877     TEST_REQUIRES_X86_AVX2;
21878     for (size_t channels = 1; channels <= 160; channels += 31) {
21879       DWConvMicrokernelTester()
21880         .cr(32)
21881         .kr(9)
21882         .channels(channels)
21883         .width(3)
21884         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21885     }
21886   }
21887 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_step)21888   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_step) {
21889     TEST_REQUIRES_X86_AVX2;
21890     for (size_t channels = 1; channels <= 160; channels += 31) {
21891       for (size_t step = 2; step <= 9; step++) {
21892         DWConvMicrokernelTester()
21893           .cr(32)
21894           .kr(9)
21895           .channels(channels)
21896           .width(3)
21897           .step(step)
21898           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21899       }
21900     }
21901   }
21902 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_output_stride)21903   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_output_stride) {
21904     TEST_REQUIRES_X86_AVX2;
21905     for (size_t channels = 1; channels <= 160; channels += 31) {
21906       DWConvMicrokernelTester()
21907         .cr(32)
21908         .kr(9)
21909         .channels(32)
21910         .width(5)
21911         .output_stride(163)
21912         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21913     }
21914   }
21915 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmin)21916   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmin) {
21917     TEST_REQUIRES_X86_AVX2;
21918     for (size_t channels = 1; channels <= 160; channels += 31) {
21919       DWConvMicrokernelTester()
21920         .cr(32)
21921         .kr(9)
21922         .channels(channels)
21923         .width(3)
21924         .qmin(128)
21925         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21926     }
21927   }
21928 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmax)21929   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmax) {
21930     TEST_REQUIRES_X86_AVX2;
21931     for (size_t channels = 1; channels <= 160; channels += 31) {
21932       DWConvMicrokernelTester()
21933         .cr(32)
21934         .kr(9)
21935         .channels(channels)
21936         .width(3)
21937         .qmax(128)
21938         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21939     }
21940   }
21941 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,input_offset)21942   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, input_offset) {
21943     TEST_REQUIRES_X86_AVX2;
21944     for (uint32_t channels = 64; channels < 512; channels += 96) {
21945       DWConvMicrokernelTester()
21946         .cr(32)
21947         .kr(9)
21948         .channels(channels)
21949         .input_offset(592)
21950         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21951     }
21952   }
21953 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,zero)21954   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, zero) {
21955     TEST_REQUIRES_X86_AVX2;
21956     for (uint32_t mz = 0; mz < 9; mz++) {
21957       for (uint32_t channels = 64; channels < 512; channels += 96) {
21958         DWConvMicrokernelTester()
21959           .cr(32)
21960           .kr(9)
21961           .channels(channels)
21962           .input_offset(592)
21963           .zero_index(mz)
21964           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21965       }
21966     }
21967   }
21968 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
21969 
21970 
21971 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_eq_32)21972   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_eq_32) {
21973     TEST_REQUIRES_X86_AVX2;
21974     DWConvMicrokernelTester()
21975       .cr(32)
21976       .kr(9)
21977       .channels(32)
21978       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21979   }
21980 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_div_32)21981   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_div_32) {
21982     TEST_REQUIRES_X86_AVX2;
21983     for (uint32_t channels = 64; channels < 512; channels += 96) {
21984       DWConvMicrokernelTester()
21985         .cr(32)
21986         .kr(9)
21987         .channels(channels)
21988         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21989     }
21990   }
21991 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_div_32_with_qmin)21992   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_div_32_with_qmin) {
21993     TEST_REQUIRES_X86_AVX2;
21994     for (uint32_t channels = 64; channels < 512; channels += 96) {
21995       DWConvMicrokernelTester()
21996         .cr(32)
21997         .kr(9)
21998         .channels(channels)
21999         .qmin(128)
22000         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22001     }
22002   }
22003 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_div_32_with_qmax)22004   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_div_32_with_qmax) {
22005     TEST_REQUIRES_X86_AVX2;
22006     for (uint32_t channels = 64; channels < 512; channels += 96) {
22007       DWConvMicrokernelTester()
22008         .cr(32)
22009         .kr(9)
22010         .channels(channels)
22011         .qmax(128)
22012         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22013     }
22014   }
22015 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_lt_32)22016   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_lt_32) {
22017     TEST_REQUIRES_X86_AVX2;
22018     for (uint32_t channels = 1; channels < 32; channels++) {
22019       DWConvMicrokernelTester()
22020         .cr(32)
22021         .kr(9)
22022         .channels(channels)
22023         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22024     }
22025   }
22026 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_gt_32)22027   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_gt_32) {
22028     TEST_REQUIRES_X86_AVX2;
22029     for (uint32_t channels = 33; channels < 64; channels++) {
22030       DWConvMicrokernelTester()
22031         .cr(32)
22032         .kr(9)
22033         .channels(channels)
22034         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22035     }
22036   }
22037 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_gt_32_with_qmin)22038   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_gt_32_with_qmin) {
22039     TEST_REQUIRES_X86_AVX2;
22040     for (uint32_t channels = 33; channels < 64; channels++) {
22041       DWConvMicrokernelTester()
22042         .cr(32)
22043         .kr(9)
22044         .channels(channels)
22045         .qmin(128)
22046         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22047     }
22048   }
22049 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_gt_32_with_qmax)22050   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_gt_32_with_qmax) {
22051     TEST_REQUIRES_X86_AVX2;
22052     for (uint32_t channels = 33; channels < 64; channels++) {
22053       DWConvMicrokernelTester()
22054         .cr(32)
22055         .kr(9)
22056         .channels(channels)
22057         .qmax(128)
22058         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22059     }
22060   }
22061 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,multipixel)22062   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel) {
22063     TEST_REQUIRES_X86_AVX2;
22064     for (size_t channels = 1; channels <= 160; channels += 31) {
22065       DWConvMicrokernelTester()
22066         .cr(32)
22067         .kr(9)
22068         .channels(channels)
22069         .width(3)
22070         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22071     }
22072   }
22073 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,multipixel_with_step)22074   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel_with_step) {
22075     TEST_REQUIRES_X86_AVX2;
22076     for (size_t channels = 1; channels <= 160; channels += 31) {
22077       for (size_t step = 2; step <= 9; step++) {
22078         DWConvMicrokernelTester()
22079           .cr(32)
22080           .kr(9)
22081           .channels(channels)
22082           .width(3)
22083           .step(step)
22084           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22085       }
22086     }
22087   }
22088 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,multipixel_with_output_stride)22089   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel_with_output_stride) {
22090     TEST_REQUIRES_X86_AVX2;
22091     for (size_t channels = 1; channels <= 160; channels += 31) {
22092       DWConvMicrokernelTester()
22093         .cr(32)
22094         .kr(9)
22095         .channels(32)
22096         .width(5)
22097         .output_stride(163)
22098         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22099     }
22100   }
22101 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,multipixel_with_qmin)22102   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel_with_qmin) {
22103     TEST_REQUIRES_X86_AVX2;
22104     for (size_t channels = 1; channels <= 160; channels += 31) {
22105       DWConvMicrokernelTester()
22106         .cr(32)
22107         .kr(9)
22108         .channels(channels)
22109         .width(3)
22110         .qmin(128)
22111         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22112     }
22113   }
22114 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,multipixel_with_qmax)22115   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel_with_qmax) {
22116     TEST_REQUIRES_X86_AVX2;
22117     for (size_t channels = 1; channels <= 160; channels += 31) {
22118       DWConvMicrokernelTester()
22119         .cr(32)
22120         .kr(9)
22121         .channels(channels)
22122         .width(3)
22123         .qmax(128)
22124         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22125     }
22126   }
22127 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,input_offset)22128   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, input_offset) {
22129     TEST_REQUIRES_X86_AVX2;
22130     for (uint32_t channels = 64; channels < 512; channels += 96) {
22131       DWConvMicrokernelTester()
22132         .cr(32)
22133         .kr(9)
22134         .channels(channels)
22135         .input_offset(592)
22136         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22137     }
22138   }
22139 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,zero)22140   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, zero) {
22141     TEST_REQUIRES_X86_AVX2;
22142     for (uint32_t mz = 0; mz < 9; mz++) {
22143       for (uint32_t channels = 64; channels < 512; channels += 96) {
22144         DWConvMicrokernelTester()
22145           .cr(32)
22146           .kr(9)
22147           .channels(channels)
22148           .input_offset(592)
22149           .zero_index(mz)
22150           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22151       }
22152     }
22153   }
22154 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
22155 
22156 
22157 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_eq_32)22158   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_eq_32) {
22159     TEST_REQUIRES_X86_AVX2;
22160     DWConvMicrokernelTester()
22161       .cr(32)
22162       .kr(9)
22163       .channels(32)
22164       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22165   }
22166 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_div_32)22167   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_div_32) {
22168     TEST_REQUIRES_X86_AVX2;
22169     for (uint32_t channels = 64; channels < 512; channels += 96) {
22170       DWConvMicrokernelTester()
22171         .cr(32)
22172         .kr(9)
22173         .channels(channels)
22174         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22175     }
22176   }
22177 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_div_32_with_qmin)22178   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_div_32_with_qmin) {
22179     TEST_REQUIRES_X86_AVX2;
22180     for (uint32_t channels = 64; channels < 512; channels += 96) {
22181       DWConvMicrokernelTester()
22182         .cr(32)
22183         .kr(9)
22184         .channels(channels)
22185         .qmin(128)
22186         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22187     }
22188   }
22189 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_div_32_with_qmax)22190   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_div_32_with_qmax) {
22191     TEST_REQUIRES_X86_AVX2;
22192     for (uint32_t channels = 64; channels < 512; channels += 96) {
22193       DWConvMicrokernelTester()
22194         .cr(32)
22195         .kr(9)
22196         .channels(channels)
22197         .qmax(128)
22198         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22199     }
22200   }
22201 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_lt_32)22202   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_lt_32) {
22203     TEST_REQUIRES_X86_AVX2;
22204     for (uint32_t channels = 1; channels < 32; channels++) {
22205       DWConvMicrokernelTester()
22206         .cr(32)
22207         .kr(9)
22208         .channels(channels)
22209         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22210     }
22211   }
22212 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_gt_32)22213   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_gt_32) {
22214     TEST_REQUIRES_X86_AVX2;
22215     for (uint32_t channels = 33; channels < 64; channels++) {
22216       DWConvMicrokernelTester()
22217         .cr(32)
22218         .kr(9)
22219         .channels(channels)
22220         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22221     }
22222   }
22223 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_gt_32_with_qmin)22224   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_gt_32_with_qmin) {
22225     TEST_REQUIRES_X86_AVX2;
22226     for (uint32_t channels = 33; channels < 64; channels++) {
22227       DWConvMicrokernelTester()
22228         .cr(32)
22229         .kr(9)
22230         .channels(channels)
22231         .qmin(128)
22232         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22233     }
22234   }
22235 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_gt_32_with_qmax)22236   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_gt_32_with_qmax) {
22237     TEST_REQUIRES_X86_AVX2;
22238     for (uint32_t channels = 33; channels < 64; channels++) {
22239       DWConvMicrokernelTester()
22240         .cr(32)
22241         .kr(9)
22242         .channels(channels)
22243         .qmax(128)
22244         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22245     }
22246   }
22247 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,multipixel)22248   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel) {
22249     TEST_REQUIRES_X86_AVX2;
22250     for (size_t channels = 1; channels <= 160; channels += 31) {
22251       DWConvMicrokernelTester()
22252         .cr(32)
22253         .kr(9)
22254         .channels(channels)
22255         .width(3)
22256         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22257     }
22258   }
22259 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,multipixel_with_step)22260   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel_with_step) {
22261     TEST_REQUIRES_X86_AVX2;
22262     for (size_t channels = 1; channels <= 160; channels += 31) {
22263       for (size_t step = 2; step <= 9; step++) {
22264         DWConvMicrokernelTester()
22265           .cr(32)
22266           .kr(9)
22267           .channels(channels)
22268           .width(3)
22269           .step(step)
22270           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22271       }
22272     }
22273   }
22274 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,multipixel_with_output_stride)22275   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel_with_output_stride) {
22276     TEST_REQUIRES_X86_AVX2;
22277     for (size_t channels = 1; channels <= 160; channels += 31) {
22278       DWConvMicrokernelTester()
22279         .cr(32)
22280         .kr(9)
22281         .channels(32)
22282         .width(5)
22283         .output_stride(163)
22284         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22285     }
22286   }
22287 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,multipixel_with_qmin)22288   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel_with_qmin) {
22289     TEST_REQUIRES_X86_AVX2;
22290     for (size_t channels = 1; channels <= 160; channels += 31) {
22291       DWConvMicrokernelTester()
22292         .cr(32)
22293         .kr(9)
22294         .channels(channels)
22295         .width(3)
22296         .qmin(128)
22297         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22298     }
22299   }
22300 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,multipixel_with_qmax)22301   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel_with_qmax) {
22302     TEST_REQUIRES_X86_AVX2;
22303     for (size_t channels = 1; channels <= 160; channels += 31) {
22304       DWConvMicrokernelTester()
22305         .cr(32)
22306         .kr(9)
22307         .channels(channels)
22308         .width(3)
22309         .qmax(128)
22310         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22311     }
22312   }
22313 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,input_offset)22314   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, input_offset) {
22315     TEST_REQUIRES_X86_AVX2;
22316     for (uint32_t channels = 64; channels < 512; channels += 96) {
22317       DWConvMicrokernelTester()
22318         .cr(32)
22319         .kr(9)
22320         .channels(channels)
22321         .input_offset(592)
22322         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22323     }
22324   }
22325 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,zero)22326   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, zero) {
22327     TEST_REQUIRES_X86_AVX2;
22328     for (uint32_t mz = 0; mz < 9; mz++) {
22329       for (uint32_t channels = 64; channels < 512; channels += 96) {
22330         DWConvMicrokernelTester()
22331           .cr(32)
22332           .kr(9)
22333           .channels(channels)
22334           .input_offset(592)
22335           .zero_index(mz)
22336           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22337       }
22338     }
22339   }
22340 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
22341 
22342 
22343 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_eq_32)22344   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_eq_32) {
22345     TEST_REQUIRES_X86_AVX2;
22346     DWConvMicrokernelTester()
22347       .cr(32)
22348       .kr(9)
22349       .channels(32)
22350       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22351   }
22352 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_div_32)22353   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32) {
22354     TEST_REQUIRES_X86_AVX2;
22355     for (uint32_t channels = 64; channels < 512; channels += 96) {
22356       DWConvMicrokernelTester()
22357         .cr(32)
22358         .kr(9)
22359         .channels(channels)
22360         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22361     }
22362   }
22363 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_div_32_with_qmin)22364   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32_with_qmin) {
22365     TEST_REQUIRES_X86_AVX2;
22366     for (uint32_t channels = 64; channels < 512; channels += 96) {
22367       DWConvMicrokernelTester()
22368         .cr(32)
22369         .kr(9)
22370         .channels(channels)
22371         .qmin(128)
22372         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22373     }
22374   }
22375 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_div_32_with_qmax)22376   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32_with_qmax) {
22377     TEST_REQUIRES_X86_AVX2;
22378     for (uint32_t channels = 64; channels < 512; channels += 96) {
22379       DWConvMicrokernelTester()
22380         .cr(32)
22381         .kr(9)
22382         .channels(channels)
22383         .qmax(128)
22384         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22385     }
22386   }
22387 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_lt_32)22388   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_lt_32) {
22389     TEST_REQUIRES_X86_AVX2;
22390     for (uint32_t channels = 1; channels < 32; channels++) {
22391       DWConvMicrokernelTester()
22392         .cr(32)
22393         .kr(9)
22394         .channels(channels)
22395         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22396     }
22397   }
22398 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_gt_32)22399   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32) {
22400     TEST_REQUIRES_X86_AVX2;
22401     for (uint32_t channels = 33; channels < 64; channels++) {
22402       DWConvMicrokernelTester()
22403         .cr(32)
22404         .kr(9)
22405         .channels(channels)
22406         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22407     }
22408   }
22409 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_gt_32_with_qmin)22410   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32_with_qmin) {
22411     TEST_REQUIRES_X86_AVX2;
22412     for (uint32_t channels = 33; channels < 64; channels++) {
22413       DWConvMicrokernelTester()
22414         .cr(32)
22415         .kr(9)
22416         .channels(channels)
22417         .qmin(128)
22418         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22419     }
22420   }
22421 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_gt_32_with_qmax)22422   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32_with_qmax) {
22423     TEST_REQUIRES_X86_AVX2;
22424     for (uint32_t channels = 33; channels < 64; channels++) {
22425       DWConvMicrokernelTester()
22426         .cr(32)
22427         .kr(9)
22428         .channels(channels)
22429         .qmax(128)
22430         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22431     }
22432   }
22433 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,multipixel)22434   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel) {
22435     TEST_REQUIRES_X86_AVX2;
22436     for (size_t channels = 1; channels <= 160; channels += 31) {
22437       DWConvMicrokernelTester()
22438         .cr(32)
22439         .kr(9)
22440         .channels(channels)
22441         .width(3)
22442         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22443     }
22444   }
22445 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,multipixel_with_step)22446   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_step) {
22447     TEST_REQUIRES_X86_AVX2;
22448     for (size_t channels = 1; channels <= 160; channels += 31) {
22449       for (size_t step = 2; step <= 9; step++) {
22450         DWConvMicrokernelTester()
22451           .cr(32)
22452           .kr(9)
22453           .channels(channels)
22454           .width(3)
22455           .step(step)
22456           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22457       }
22458     }
22459   }
22460 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,multipixel_with_output_stride)22461   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_output_stride) {
22462     TEST_REQUIRES_X86_AVX2;
22463     for (size_t channels = 1; channels <= 160; channels += 31) {
22464       DWConvMicrokernelTester()
22465         .cr(32)
22466         .kr(9)
22467         .channels(32)
22468         .width(5)
22469         .output_stride(163)
22470         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22471     }
22472   }
22473 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,multipixel_with_qmin)22474   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_qmin) {
22475     TEST_REQUIRES_X86_AVX2;
22476     for (size_t channels = 1; channels <= 160; channels += 31) {
22477       DWConvMicrokernelTester()
22478         .cr(32)
22479         .kr(9)
22480         .channels(channels)
22481         .width(3)
22482         .qmin(128)
22483         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22484     }
22485   }
22486 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,multipixel_with_qmax)22487   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_qmax) {
22488     TEST_REQUIRES_X86_AVX2;
22489     for (size_t channels = 1; channels <= 160; channels += 31) {
22490       DWConvMicrokernelTester()
22491         .cr(32)
22492         .kr(9)
22493         .channels(channels)
22494         .width(3)
22495         .qmax(128)
22496         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22497     }
22498   }
22499 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,input_offset)22500   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, input_offset) {
22501     TEST_REQUIRES_X86_AVX2;
22502     for (uint32_t channels = 64; channels < 512; channels += 96) {
22503       DWConvMicrokernelTester()
22504         .cr(32)
22505         .kr(9)
22506         .channels(channels)
22507         .input_offset(592)
22508         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22509     }
22510   }
22511 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,zero)22512   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, zero) {
22513     TEST_REQUIRES_X86_AVX2;
22514     for (uint32_t mz = 0; mz < 9; mz++) {
22515       for (uint32_t channels = 64; channels < 512; channels += 96) {
22516         DWConvMicrokernelTester()
22517           .cr(32)
22518           .kr(9)
22519           .channels(channels)
22520           .input_offset(592)
22521           .zero_index(mz)
22522           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22523       }
22524     }
22525   }
22526 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
22527 
22528 
22529 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_eq_32)22530   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_eq_32) {
22531     TEST_REQUIRES_X86_AVX2;
22532     DWConvMicrokernelTester()
22533       .cr(32)
22534       .kr(25)
22535       .channels(32)
22536       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22537   }
22538 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_div_32)22539   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_32) {
22540     TEST_REQUIRES_X86_AVX2;
22541     for (uint32_t channels = 64; channels < 512; channels += 96) {
22542       DWConvMicrokernelTester()
22543         .cr(32)
22544         .kr(25)
22545         .channels(channels)
22546         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22547     }
22548   }
22549 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_div_32_with_qmin)22550   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_32_with_qmin) {
22551     TEST_REQUIRES_X86_AVX2;
22552     for (uint32_t channels = 64; channels < 512; channels += 96) {
22553       DWConvMicrokernelTester()
22554         .cr(32)
22555         .kr(25)
22556         .channels(channels)
22557         .qmin(128)
22558         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22559     }
22560   }
22561 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_div_32_with_qmax)22562   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_32_with_qmax) {
22563     TEST_REQUIRES_X86_AVX2;
22564     for (uint32_t channels = 64; channels < 512; channels += 96) {
22565       DWConvMicrokernelTester()
22566         .cr(32)
22567         .kr(25)
22568         .channels(channels)
22569         .qmax(128)
22570         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22571     }
22572   }
22573 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_lt_32)22574   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_lt_32) {
22575     TEST_REQUIRES_X86_AVX2;
22576     for (uint32_t channels = 1; channels < 32; channels++) {
22577       DWConvMicrokernelTester()
22578         .cr(32)
22579         .kr(25)
22580         .channels(channels)
22581         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22582     }
22583   }
22584 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_gt_32)22585   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32) {
22586     TEST_REQUIRES_X86_AVX2;
22587     for (uint32_t channels = 33; channels < 64; channels++) {
22588       DWConvMicrokernelTester()
22589         .cr(32)
22590         .kr(25)
22591         .channels(channels)
22592         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22593     }
22594   }
22595 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_gt_32_with_qmin)22596   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32_with_qmin) {
22597     TEST_REQUIRES_X86_AVX2;
22598     for (uint32_t channels = 33; channels < 64; channels++) {
22599       DWConvMicrokernelTester()
22600         .cr(32)
22601         .kr(25)
22602         .channels(channels)
22603         .qmin(128)
22604         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22605     }
22606   }
22607 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_gt_32_with_qmax)22608   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32_with_qmax) {
22609     TEST_REQUIRES_X86_AVX2;
22610     for (uint32_t channels = 33; channels < 64; channels++) {
22611       DWConvMicrokernelTester()
22612         .cr(32)
22613         .kr(25)
22614         .channels(channels)
22615         .qmax(128)
22616         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22617     }
22618   }
22619 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel)22620   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel) {
22621     TEST_REQUIRES_X86_AVX2;
22622     for (size_t channels = 1; channels <= 160; channels += 31) {
22623       DWConvMicrokernelTester()
22624         .cr(32)
22625         .kr(25)
22626         .channels(channels)
22627         .width(3)
22628         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22629     }
22630   }
22631 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_step)22632   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_step) {
22633     TEST_REQUIRES_X86_AVX2;
22634     for (size_t channels = 1; channels <= 160; channels += 31) {
22635       for (size_t step = 2; step <= 25; step++) {
22636         DWConvMicrokernelTester()
22637           .cr(32)
22638           .kr(25)
22639           .channels(channels)
22640           .width(3)
22641           .step(step)
22642           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22643       }
22644     }
22645   }
22646 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_output_stride)22647   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_output_stride) {
22648     TEST_REQUIRES_X86_AVX2;
22649     for (size_t channels = 1; channels <= 160; channels += 31) {
22650       DWConvMicrokernelTester()
22651         .cr(32)
22652         .kr(25)
22653         .channels(32)
22654         .width(5)
22655         .output_stride(163)
22656         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22657     }
22658   }
22659 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmin)22660   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmin) {
22661     TEST_REQUIRES_X86_AVX2;
22662     for (size_t channels = 1; channels <= 160; channels += 31) {
22663       DWConvMicrokernelTester()
22664         .cr(32)
22665         .kr(25)
22666         .channels(channels)
22667         .width(3)
22668         .qmin(128)
22669         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22670     }
22671   }
22672 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmax)22673   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmax) {
22674     TEST_REQUIRES_X86_AVX2;
22675     for (size_t channels = 1; channels <= 160; channels += 31) {
22676       DWConvMicrokernelTester()
22677         .cr(32)
22678         .kr(25)
22679         .channels(channels)
22680         .width(3)
22681         .qmax(128)
22682         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22683     }
22684   }
22685 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,input_offset)22686   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, input_offset) {
22687     TEST_REQUIRES_X86_AVX2;
22688     for (uint32_t channels = 64; channels < 512; channels += 96) {
22689       DWConvMicrokernelTester()
22690         .cr(32)
22691         .kr(25)
22692         .channels(channels)
22693         .input_offset(592)
22694         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22695     }
22696   }
22697 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,zero)22698   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, zero) {
22699     TEST_REQUIRES_X86_AVX2;
22700     for (uint32_t mz = 0; mz < 25; mz++) {
22701       for (uint32_t channels = 64; channels < 512; channels += 96) {
22702         DWConvMicrokernelTester()
22703           .cr(32)
22704           .kr(25)
22705           .channels(channels)
22706           .input_offset(592)
22707           .zero_index(mz)
22708           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22709       }
22710     }
22711   }
22712 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
22713 
22714 
22715 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_eq_32)22716   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_eq_32) {
22717     TEST_REQUIRES_X86_AVX2;
22718     DWConvMicrokernelTester()
22719       .cr(32)
22720       .kr(25)
22721       .channels(32)
22722       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22723   }
22724 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_div_32)22725   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_div_32) {
22726     TEST_REQUIRES_X86_AVX2;
22727     for (uint32_t channels = 64; channels < 512; channels += 96) {
22728       DWConvMicrokernelTester()
22729         .cr(32)
22730         .kr(25)
22731         .channels(channels)
22732         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22733     }
22734   }
22735 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_div_32_with_qmin)22736   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_div_32_with_qmin) {
22737     TEST_REQUIRES_X86_AVX2;
22738     for (uint32_t channels = 64; channels < 512; channels += 96) {
22739       DWConvMicrokernelTester()
22740         .cr(32)
22741         .kr(25)
22742         .channels(channels)
22743         .qmin(128)
22744         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22745     }
22746   }
22747 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_div_32_with_qmax)22748   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_div_32_with_qmax) {
22749     TEST_REQUIRES_X86_AVX2;
22750     for (uint32_t channels = 64; channels < 512; channels += 96) {
22751       DWConvMicrokernelTester()
22752         .cr(32)
22753         .kr(25)
22754         .channels(channels)
22755         .qmax(128)
22756         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22757     }
22758   }
22759 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_lt_32)22760   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_lt_32) {
22761     TEST_REQUIRES_X86_AVX2;
22762     for (uint32_t channels = 1; channels < 32; channels++) {
22763       DWConvMicrokernelTester()
22764         .cr(32)
22765         .kr(25)
22766         .channels(channels)
22767         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22768     }
22769   }
22770 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_gt_32)22771   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_gt_32) {
22772     TEST_REQUIRES_X86_AVX2;
22773     for (uint32_t channels = 33; channels < 64; channels++) {
22774       DWConvMicrokernelTester()
22775         .cr(32)
22776         .kr(25)
22777         .channels(channels)
22778         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22779     }
22780   }
22781 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_gt_32_with_qmin)22782   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_gt_32_with_qmin) {
22783     TEST_REQUIRES_X86_AVX2;
22784     for (uint32_t channels = 33; channels < 64; channels++) {
22785       DWConvMicrokernelTester()
22786         .cr(32)
22787         .kr(25)
22788         .channels(channels)
22789         .qmin(128)
22790         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22791     }
22792   }
22793 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_gt_32_with_qmax)22794   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_gt_32_with_qmax) {
22795     TEST_REQUIRES_X86_AVX2;
22796     for (uint32_t channels = 33; channels < 64; channels++) {
22797       DWConvMicrokernelTester()
22798         .cr(32)
22799         .kr(25)
22800         .channels(channels)
22801         .qmax(128)
22802         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22803     }
22804   }
22805 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,multipixel)22806   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel) {
22807     TEST_REQUIRES_X86_AVX2;
22808     for (size_t channels = 1; channels <= 160; channels += 31) {
22809       DWConvMicrokernelTester()
22810         .cr(32)
22811         .kr(25)
22812         .channels(channels)
22813         .width(3)
22814         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22815     }
22816   }
22817 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,multipixel_with_step)22818   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel_with_step) {
22819     TEST_REQUIRES_X86_AVX2;
22820     for (size_t channels = 1; channels <= 160; channels += 31) {
22821       for (size_t step = 2; step <= 25; step++) {
22822         DWConvMicrokernelTester()
22823           .cr(32)
22824           .kr(25)
22825           .channels(channels)
22826           .width(3)
22827           .step(step)
22828           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22829       }
22830     }
22831   }
22832 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,multipixel_with_output_stride)22833   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel_with_output_stride) {
22834     TEST_REQUIRES_X86_AVX2;
22835     for (size_t channels = 1; channels <= 160; channels += 31) {
22836       DWConvMicrokernelTester()
22837         .cr(32)
22838         .kr(25)
22839         .channels(32)
22840         .width(5)
22841         .output_stride(163)
22842         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22843     }
22844   }
22845 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,multipixel_with_qmin)22846   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel_with_qmin) {
22847     TEST_REQUIRES_X86_AVX2;
22848     for (size_t channels = 1; channels <= 160; channels += 31) {
22849       DWConvMicrokernelTester()
22850         .cr(32)
22851         .kr(25)
22852         .channels(channels)
22853         .width(3)
22854         .qmin(128)
22855         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22856     }
22857   }
22858 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,multipixel_with_qmax)22859   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel_with_qmax) {
22860     TEST_REQUIRES_X86_AVX2;
22861     for (size_t channels = 1; channels <= 160; channels += 31) {
22862       DWConvMicrokernelTester()
22863         .cr(32)
22864         .kr(25)
22865         .channels(channels)
22866         .width(3)
22867         .qmax(128)
22868         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22869     }
22870   }
22871 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,input_offset)22872   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, input_offset) {
22873     TEST_REQUIRES_X86_AVX2;
22874     for (uint32_t channels = 64; channels < 512; channels += 96) {
22875       DWConvMicrokernelTester()
22876         .cr(32)
22877         .kr(25)
22878         .channels(channels)
22879         .input_offset(592)
22880         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22881     }
22882   }
22883 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,zero)22884   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, zero) {
22885     TEST_REQUIRES_X86_AVX2;
22886     for (uint32_t mz = 0; mz < 25; mz++) {
22887       for (uint32_t channels = 64; channels < 512; channels += 96) {
22888         DWConvMicrokernelTester()
22889           .cr(32)
22890           .kr(25)
22891           .channels(channels)
22892           .input_offset(592)
22893           .zero_index(mz)
22894           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22895       }
22896     }
22897   }
22898 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
22899 
22900 
22901 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_eq_32)22902   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_eq_32) {
22903     TEST_REQUIRES_X86_AVX2;
22904     DWConvMicrokernelTester()
22905       .cr(32)
22906       .kr(25)
22907       .channels(32)
22908       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22909   }
22910 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_div_32)22911   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_div_32) {
22912     TEST_REQUIRES_X86_AVX2;
22913     for (uint32_t channels = 64; channels < 512; channels += 96) {
22914       DWConvMicrokernelTester()
22915         .cr(32)
22916         .kr(25)
22917         .channels(channels)
22918         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22919     }
22920   }
22921 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_div_32_with_qmin)22922   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_div_32_with_qmin) {
22923     TEST_REQUIRES_X86_AVX2;
22924     for (uint32_t channels = 64; channels < 512; channels += 96) {
22925       DWConvMicrokernelTester()
22926         .cr(32)
22927         .kr(25)
22928         .channels(channels)
22929         .qmin(128)
22930         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22931     }
22932   }
22933 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_div_32_with_qmax)22934   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_div_32_with_qmax) {
22935     TEST_REQUIRES_X86_AVX2;
22936     for (uint32_t channels = 64; channels < 512; channels += 96) {
22937       DWConvMicrokernelTester()
22938         .cr(32)
22939         .kr(25)
22940         .channels(channels)
22941         .qmax(128)
22942         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22943     }
22944   }
22945 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_lt_32)22946   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_lt_32) {
22947     TEST_REQUIRES_X86_AVX2;
22948     for (uint32_t channels = 1; channels < 32; channels++) {
22949       DWConvMicrokernelTester()
22950         .cr(32)
22951         .kr(25)
22952         .channels(channels)
22953         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22954     }
22955   }
22956 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_gt_32)22957   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_gt_32) {
22958     TEST_REQUIRES_X86_AVX2;
22959     for (uint32_t channels = 33; channels < 64; channels++) {
22960       DWConvMicrokernelTester()
22961         .cr(32)
22962         .kr(25)
22963         .channels(channels)
22964         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22965     }
22966   }
22967 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_gt_32_with_qmin)22968   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_gt_32_with_qmin) {
22969     TEST_REQUIRES_X86_AVX2;
22970     for (uint32_t channels = 33; channels < 64; channels++) {
22971       DWConvMicrokernelTester()
22972         .cr(32)
22973         .kr(25)
22974         .channels(channels)
22975         .qmin(128)
22976         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22977     }
22978   }
22979 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_gt_32_with_qmax)22980   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_gt_32_with_qmax) {
22981     TEST_REQUIRES_X86_AVX2;
22982     for (uint32_t channels = 33; channels < 64; channels++) {
22983       DWConvMicrokernelTester()
22984         .cr(32)
22985         .kr(25)
22986         .channels(channels)
22987         .qmax(128)
22988         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22989     }
22990   }
22991 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,multipixel)22992   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel) {
22993     TEST_REQUIRES_X86_AVX2;
22994     for (size_t channels = 1; channels <= 160; channels += 31) {
22995       DWConvMicrokernelTester()
22996         .cr(32)
22997         .kr(25)
22998         .channels(channels)
22999         .width(3)
23000         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23001     }
23002   }
23003 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,multipixel_with_step)23004   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel_with_step) {
23005     TEST_REQUIRES_X86_AVX2;
23006     for (size_t channels = 1; channels <= 160; channels += 31) {
23007       for (size_t step = 2; step <= 25; step++) {
23008         DWConvMicrokernelTester()
23009           .cr(32)
23010           .kr(25)
23011           .channels(channels)
23012           .width(3)
23013           .step(step)
23014           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23015       }
23016     }
23017   }
23018 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,multipixel_with_output_stride)23019   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel_with_output_stride) {
23020     TEST_REQUIRES_X86_AVX2;
23021     for (size_t channels = 1; channels <= 160; channels += 31) {
23022       DWConvMicrokernelTester()
23023         .cr(32)
23024         .kr(25)
23025         .channels(32)
23026         .width(5)
23027         .output_stride(163)
23028         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23029     }
23030   }
23031 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,multipixel_with_qmin)23032   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel_with_qmin) {
23033     TEST_REQUIRES_X86_AVX2;
23034     for (size_t channels = 1; channels <= 160; channels += 31) {
23035       DWConvMicrokernelTester()
23036         .cr(32)
23037         .kr(25)
23038         .channels(channels)
23039         .width(3)
23040         .qmin(128)
23041         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23042     }
23043   }
23044 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,multipixel_with_qmax)23045   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel_with_qmax) {
23046     TEST_REQUIRES_X86_AVX2;
23047     for (size_t channels = 1; channels <= 160; channels += 31) {
23048       DWConvMicrokernelTester()
23049         .cr(32)
23050         .kr(25)
23051         .channels(channels)
23052         .width(3)
23053         .qmax(128)
23054         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23055     }
23056   }
23057 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,input_offset)23058   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, input_offset) {
23059     TEST_REQUIRES_X86_AVX2;
23060     for (uint32_t channels = 64; channels < 512; channels += 96) {
23061       DWConvMicrokernelTester()
23062         .cr(32)
23063         .kr(25)
23064         .channels(channels)
23065         .input_offset(592)
23066         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23067     }
23068   }
23069 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,zero)23070   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, zero) {
23071     TEST_REQUIRES_X86_AVX2;
23072     for (uint32_t mz = 0; mz < 25; mz++) {
23073       for (uint32_t channels = 64; channels < 512; channels += 96) {
23074         DWConvMicrokernelTester()
23075           .cr(32)
23076           .kr(25)
23077           .channels(channels)
23078           .input_offset(592)
23079           .zero_index(mz)
23080           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23081       }
23082     }
23083   }
23084 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
23085 
23086 
23087 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_eq_32)23088   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_eq_32) {
23089     TEST_REQUIRES_X86_AVX2;
23090     DWConvMicrokernelTester()
23091       .cr(32)
23092       .kr(25)
23093       .channels(32)
23094       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23095   }
23096 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_div_32)23097   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32) {
23098     TEST_REQUIRES_X86_AVX2;
23099     for (uint32_t channels = 64; channels < 512; channels += 96) {
23100       DWConvMicrokernelTester()
23101         .cr(32)
23102         .kr(25)
23103         .channels(channels)
23104         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23105     }
23106   }
23107 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_div_32_with_qmin)23108   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32_with_qmin) {
23109     TEST_REQUIRES_X86_AVX2;
23110     for (uint32_t channels = 64; channels < 512; channels += 96) {
23111       DWConvMicrokernelTester()
23112         .cr(32)
23113         .kr(25)
23114         .channels(channels)
23115         .qmin(128)
23116         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23117     }
23118   }
23119 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_div_32_with_qmax)23120   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32_with_qmax) {
23121     TEST_REQUIRES_X86_AVX2;
23122     for (uint32_t channels = 64; channels < 512; channels += 96) {
23123       DWConvMicrokernelTester()
23124         .cr(32)
23125         .kr(25)
23126         .channels(channels)
23127         .qmax(128)
23128         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23129     }
23130   }
23131 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_lt_32)23132   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_lt_32) {
23133     TEST_REQUIRES_X86_AVX2;
23134     for (uint32_t channels = 1; channels < 32; channels++) {
23135       DWConvMicrokernelTester()
23136         .cr(32)
23137         .kr(25)
23138         .channels(channels)
23139         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23140     }
23141   }
23142 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_gt_32)23143   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32) {
23144     TEST_REQUIRES_X86_AVX2;
23145     for (uint32_t channels = 33; channels < 64; channels++) {
23146       DWConvMicrokernelTester()
23147         .cr(32)
23148         .kr(25)
23149         .channels(channels)
23150         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23151     }
23152   }
23153 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_gt_32_with_qmin)23154   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32_with_qmin) {
23155     TEST_REQUIRES_X86_AVX2;
23156     for (uint32_t channels = 33; channels < 64; channels++) {
23157       DWConvMicrokernelTester()
23158         .cr(32)
23159         .kr(25)
23160         .channels(channels)
23161         .qmin(128)
23162         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23163     }
23164   }
23165 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_gt_32_with_qmax)23166   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32_with_qmax) {
23167     TEST_REQUIRES_X86_AVX2;
23168     for (uint32_t channels = 33; channels < 64; channels++) {
23169       DWConvMicrokernelTester()
23170         .cr(32)
23171         .kr(25)
23172         .channels(channels)
23173         .qmax(128)
23174         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23175     }
23176   }
23177 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,multipixel)23178   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel) {
23179     TEST_REQUIRES_X86_AVX2;
23180     for (size_t channels = 1; channels <= 160; channels += 31) {
23181       DWConvMicrokernelTester()
23182         .cr(32)
23183         .kr(25)
23184         .channels(channels)
23185         .width(3)
23186         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23187     }
23188   }
23189 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,multipixel_with_step)23190   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_step) {
23191     TEST_REQUIRES_X86_AVX2;
23192     for (size_t channels = 1; channels <= 160; channels += 31) {
23193       for (size_t step = 2; step <= 25; step++) {
23194         DWConvMicrokernelTester()
23195           .cr(32)
23196           .kr(25)
23197           .channels(channels)
23198           .width(3)
23199           .step(step)
23200           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23201       }
23202     }
23203   }
23204 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,multipixel_with_output_stride)23205   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_output_stride) {
23206     TEST_REQUIRES_X86_AVX2;
23207     for (size_t channels = 1; channels <= 160; channels += 31) {
23208       DWConvMicrokernelTester()
23209         .cr(32)
23210         .kr(25)
23211         .channels(32)
23212         .width(5)
23213         .output_stride(163)
23214         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23215     }
23216   }
23217 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,multipixel_with_qmin)23218   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_qmin) {
23219     TEST_REQUIRES_X86_AVX2;
23220     for (size_t channels = 1; channels <= 160; channels += 31) {
23221       DWConvMicrokernelTester()
23222         .cr(32)
23223         .kr(25)
23224         .channels(channels)
23225         .width(3)
23226         .qmin(128)
23227         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23228     }
23229   }
23230 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,multipixel_with_qmax)23231   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_qmax) {
23232     TEST_REQUIRES_X86_AVX2;
23233     for (size_t channels = 1; channels <= 160; channels += 31) {
23234       DWConvMicrokernelTester()
23235         .cr(32)
23236         .kr(25)
23237         .channels(channels)
23238         .width(3)
23239         .qmax(128)
23240         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23241     }
23242   }
23243 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,input_offset)23244   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, input_offset) {
23245     TEST_REQUIRES_X86_AVX2;
23246     for (uint32_t channels = 64; channels < 512; channels += 96) {
23247       DWConvMicrokernelTester()
23248         .cr(32)
23249         .kr(25)
23250         .channels(channels)
23251         .input_offset(592)
23252         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23253     }
23254   }
23255 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,zero)23256   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, zero) {
23257     TEST_REQUIRES_X86_AVX2;
23258     for (uint32_t mz = 0; mz < 25; mz++) {
23259       for (uint32_t channels = 64; channels < 512; channels += 96) {
23260         DWConvMicrokernelTester()
23261           .cr(32)
23262           .kr(25)
23263           .channels(channels)
23264           .input_offset(592)
23265           .zero_index(mz)
23266           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23267       }
23268     }
23269   }
23270 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
23271 
23272 
23273 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_eq_16)23274   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_eq_16) {
23275     TEST_REQUIRES_X86_AVX512SKX;
23276     DWConvMicrokernelTester()
23277       .cr(16)
23278       .kr(9)
23279       .channels(16)
23280       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23281   }
23282 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_div_16)23283   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16) {
23284     TEST_REQUIRES_X86_AVX512SKX;
23285     for (uint32_t channels = 32; channels < 256; channels += 48) {
23286       DWConvMicrokernelTester()
23287         .cr(16)
23288         .kr(9)
23289         .channels(channels)
23290         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23291     }
23292   }
23293 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_div_16_with_qmin)23294   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmin) {
23295     TEST_REQUIRES_X86_AVX512SKX;
23296     for (uint32_t channels = 32; channels < 256; channels += 48) {
23297       DWConvMicrokernelTester()
23298         .cr(16)
23299         .kr(9)
23300         .channels(channels)
23301         .qmin(128)
23302         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23303     }
23304   }
23305 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_div_16_with_qmax)23306   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmax) {
23307     TEST_REQUIRES_X86_AVX512SKX;
23308     for (uint32_t channels = 32; channels < 256; channels += 48) {
23309       DWConvMicrokernelTester()
23310         .cr(16)
23311         .kr(9)
23312         .channels(channels)
23313         .qmax(128)
23314         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23315     }
23316   }
23317 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_lt_16)23318   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_lt_16) {
23319     TEST_REQUIRES_X86_AVX512SKX;
23320     for (uint32_t channels = 1; channels < 16; channels++) {
23321       DWConvMicrokernelTester()
23322         .cr(16)
23323         .kr(9)
23324         .channels(channels)
23325         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23326     }
23327   }
23328 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_gt_16)23329   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16) {
23330     TEST_REQUIRES_X86_AVX512SKX;
23331     for (uint32_t channels = 17; channels < 32; channels++) {
23332       DWConvMicrokernelTester()
23333         .cr(16)
23334         .kr(9)
23335         .channels(channels)
23336         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23337     }
23338   }
23339 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_gt_16_with_qmin)23340   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmin) {
23341     TEST_REQUIRES_X86_AVX512SKX;
23342     for (uint32_t channels = 17; channels < 32; channels++) {
23343       DWConvMicrokernelTester()
23344         .cr(16)
23345         .kr(9)
23346         .channels(channels)
23347         .qmin(128)
23348         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23349     }
23350   }
23351 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_gt_16_with_qmax)23352   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmax) {
23353     TEST_REQUIRES_X86_AVX512SKX;
23354     for (uint32_t channels = 17; channels < 32; channels++) {
23355       DWConvMicrokernelTester()
23356         .cr(16)
23357         .kr(9)
23358         .channels(channels)
23359         .qmax(128)
23360         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23361     }
23362   }
23363 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,multipixel)23364   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel) {
23365     TEST_REQUIRES_X86_AVX512SKX;
23366     for (size_t channels = 1; channels <= 80; channels += 15) {
23367       DWConvMicrokernelTester()
23368         .cr(16)
23369         .kr(9)
23370         .channels(channels)
23371         .width(3)
23372         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23373     }
23374   }
23375 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,multipixel_with_step)23376   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_step) {
23377     TEST_REQUIRES_X86_AVX512SKX;
23378     for (size_t channels = 1; channels <= 80; channels += 15) {
23379       for (size_t step = 2; step <= 9; step++) {
23380         DWConvMicrokernelTester()
23381           .cr(16)
23382           .kr(9)
23383           .channels(channels)
23384           .width(3)
23385           .step(step)
23386           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23387       }
23388     }
23389   }
23390 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,multipixel_with_output_stride)23391   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
23392     TEST_REQUIRES_X86_AVX512SKX;
23393     for (size_t channels = 1; channels <= 80; channels += 15) {
23394       DWConvMicrokernelTester()
23395         .cr(16)
23396         .kr(9)
23397         .channels(16)
23398         .width(5)
23399         .output_stride(83)
23400         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23401     }
23402   }
23403 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,multipixel_with_qmin)23404   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_qmin) {
23405     TEST_REQUIRES_X86_AVX512SKX;
23406     for (size_t channels = 1; channels <= 80; channels += 15) {
23407       DWConvMicrokernelTester()
23408         .cr(16)
23409         .kr(9)
23410         .channels(channels)
23411         .width(3)
23412         .qmin(128)
23413         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23414     }
23415   }
23416 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,multipixel_with_qmax)23417   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_qmax) {
23418     TEST_REQUIRES_X86_AVX512SKX;
23419     for (size_t channels = 1; channels <= 80; channels += 15) {
23420       DWConvMicrokernelTester()
23421         .cr(16)
23422         .kr(9)
23423         .channels(channels)
23424         .width(3)
23425         .qmax(128)
23426         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23427     }
23428   }
23429 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,input_offset)23430   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, input_offset) {
23431     TEST_REQUIRES_X86_AVX512SKX;
23432     for (uint32_t channels = 32; channels < 256; channels += 48) {
23433       DWConvMicrokernelTester()
23434         .cr(16)
23435         .kr(9)
23436         .channels(channels)
23437         .input_offset(304)
23438         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23439     }
23440   }
23441 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,zero)23442   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, zero) {
23443     TEST_REQUIRES_X86_AVX512SKX;
23444     for (uint32_t mz = 0; mz < 9; mz++) {
23445       for (uint32_t channels = 32; channels < 256; channels += 48) {
23446         DWConvMicrokernelTester()
23447           .cr(16)
23448           .kr(9)
23449           .channels(channels)
23450           .input_offset(304)
23451           .zero_index(mz)
23452           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23453       }
23454     }
23455   }
23456 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
23457 
23458 
23459 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_eq_16)23460   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_eq_16) {
23461     TEST_REQUIRES_X86_AVX512SKX;
23462     DWConvMicrokernelTester()
23463       .cr(16)
23464       .kr(25)
23465       .channels(16)
23466       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23467   }
23468 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_div_16)23469   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16) {
23470     TEST_REQUIRES_X86_AVX512SKX;
23471     for (uint32_t channels = 32; channels < 256; channels += 48) {
23472       DWConvMicrokernelTester()
23473         .cr(16)
23474         .kr(25)
23475         .channels(channels)
23476         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23477     }
23478   }
23479 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_div_16_with_qmin)23480   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16_with_qmin) {
23481     TEST_REQUIRES_X86_AVX512SKX;
23482     for (uint32_t channels = 32; channels < 256; channels += 48) {
23483       DWConvMicrokernelTester()
23484         .cr(16)
23485         .kr(25)
23486         .channels(channels)
23487         .qmin(128)
23488         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23489     }
23490   }
23491 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_div_16_with_qmax)23492   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16_with_qmax) {
23493     TEST_REQUIRES_X86_AVX512SKX;
23494     for (uint32_t channels = 32; channels < 256; channels += 48) {
23495       DWConvMicrokernelTester()
23496         .cr(16)
23497         .kr(25)
23498         .channels(channels)
23499         .qmax(128)
23500         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23501     }
23502   }
23503 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_lt_16)23504   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_lt_16) {
23505     TEST_REQUIRES_X86_AVX512SKX;
23506     for (uint32_t channels = 1; channels < 16; channels++) {
23507       DWConvMicrokernelTester()
23508         .cr(16)
23509         .kr(25)
23510         .channels(channels)
23511         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23512     }
23513   }
23514 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_gt_16)23515   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16) {
23516     TEST_REQUIRES_X86_AVX512SKX;
23517     for (uint32_t channels = 17; channels < 32; channels++) {
23518       DWConvMicrokernelTester()
23519         .cr(16)
23520         .kr(25)
23521         .channels(channels)
23522         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23523     }
23524   }
23525 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_gt_16_with_qmin)23526   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16_with_qmin) {
23527     TEST_REQUIRES_X86_AVX512SKX;
23528     for (uint32_t channels = 17; channels < 32; channels++) {
23529       DWConvMicrokernelTester()
23530         .cr(16)
23531         .kr(25)
23532         .channels(channels)
23533         .qmin(128)
23534         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23535     }
23536   }
23537 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_gt_16_with_qmax)23538   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16_with_qmax) {
23539     TEST_REQUIRES_X86_AVX512SKX;
23540     for (uint32_t channels = 17; channels < 32; channels++) {
23541       DWConvMicrokernelTester()
23542         .cr(16)
23543         .kr(25)
23544         .channels(channels)
23545         .qmax(128)
23546         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23547     }
23548   }
23549 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,multipixel)23550   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel) {
23551     TEST_REQUIRES_X86_AVX512SKX;
23552     for (size_t channels = 1; channels <= 80; channels += 15) {
23553       DWConvMicrokernelTester()
23554         .cr(16)
23555         .kr(25)
23556         .channels(channels)
23557         .width(3)
23558         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23559     }
23560   }
23561 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,multipixel_with_step)23562   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_step) {
23563     TEST_REQUIRES_X86_AVX512SKX;
23564     for (size_t channels = 1; channels <= 80; channels += 15) {
23565       for (size_t step = 2; step <= 25; step++) {
23566         DWConvMicrokernelTester()
23567           .cr(16)
23568           .kr(25)
23569           .channels(channels)
23570           .width(3)
23571           .step(step)
23572           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23573       }
23574     }
23575   }
23576 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,multipixel_with_output_stride)23577   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_output_stride) {
23578     TEST_REQUIRES_X86_AVX512SKX;
23579     for (size_t channels = 1; channels <= 80; channels += 15) {
23580       DWConvMicrokernelTester()
23581         .cr(16)
23582         .kr(25)
23583         .channels(16)
23584         .width(5)
23585         .output_stride(83)
23586         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23587     }
23588   }
23589 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,multipixel_with_qmin)23590   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_qmin) {
23591     TEST_REQUIRES_X86_AVX512SKX;
23592     for (size_t channels = 1; channels <= 80; channels += 15) {
23593       DWConvMicrokernelTester()
23594         .cr(16)
23595         .kr(25)
23596         .channels(channels)
23597         .width(3)
23598         .qmin(128)
23599         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23600     }
23601   }
23602 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,multipixel_with_qmax)23603   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_qmax) {
23604     TEST_REQUIRES_X86_AVX512SKX;
23605     for (size_t channels = 1; channels <= 80; channels += 15) {
23606       DWConvMicrokernelTester()
23607         .cr(16)
23608         .kr(25)
23609         .channels(channels)
23610         .width(3)
23611         .qmax(128)
23612         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23613     }
23614   }
23615 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,input_offset)23616   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, input_offset) {
23617     TEST_REQUIRES_X86_AVX512SKX;
23618     for (uint32_t channels = 32; channels < 256; channels += 48) {
23619       DWConvMicrokernelTester()
23620         .cr(16)
23621         .kr(25)
23622         .channels(channels)
23623         .input_offset(304)
23624         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23625     }
23626   }
23627 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,zero)23628   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, zero) {
23629     TEST_REQUIRES_X86_AVX512SKX;
23630     for (uint32_t mz = 0; mz < 25; mz++) {
23631       for (uint32_t channels = 32; channels < 256; channels += 48) {
23632         DWConvMicrokernelTester()
23633           .cr(16)
23634           .kr(25)
23635           .channels(channels)
23636           .input_offset(304)
23637           .zero_index(mz)
23638           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23639       }
23640     }
23641   }
23642 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
23643 
23644 
23645 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,c_eq_32)23646   TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, c_eq_32) {
23647     TEST_REQUIRES_X86_AVX512SKX;
23648     DWConvMicrokernelTester()
23649       .cr(32)
23650       .kr(3)
23651       .channels(32)
23652       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23653   }
23654 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,c_div_32)23655   TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, c_div_32) {
23656     TEST_REQUIRES_X86_AVX512SKX;
23657     for (uint32_t channels = 64; channels < 512; channels += 96) {
23658       DWConvMicrokernelTester()
23659         .cr(32)
23660         .kr(3)
23661         .channels(channels)
23662         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23663     }
23664   }
23665 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,c_div_32_with_qmin)23666   TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, c_div_32_with_qmin) {
23667     TEST_REQUIRES_X86_AVX512SKX;
23668     for (uint32_t channels = 64; channels < 512; channels += 96) {
23669       DWConvMicrokernelTester()
23670         .cr(32)
23671         .kr(3)
23672         .channels(channels)
23673         .qmin(128)
23674         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23675     }
23676   }
23677 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,c_div_32_with_qmax)23678   TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, c_div_32_with_qmax) {
23679     TEST_REQUIRES_X86_AVX512SKX;
23680     for (uint32_t channels = 64; channels < 512; channels += 96) {
23681       DWConvMicrokernelTester()
23682         .cr(32)
23683         .kr(3)
23684         .channels(channels)
23685         .qmax(128)
23686         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23687     }
23688   }
23689 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,c_lt_32)23690   TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, c_lt_32) {
23691     TEST_REQUIRES_X86_AVX512SKX;
23692     for (uint32_t channels = 1; channels < 32; channels++) {
23693       DWConvMicrokernelTester()
23694         .cr(32)
23695         .kr(3)
23696         .channels(channels)
23697         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23698     }
23699   }
23700 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,c_gt_32)23701   TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, c_gt_32) {
23702     TEST_REQUIRES_X86_AVX512SKX;
23703     for (uint32_t channels = 33; channels < 64; channels++) {
23704       DWConvMicrokernelTester()
23705         .cr(32)
23706         .kr(3)
23707         .channels(channels)
23708         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23709     }
23710   }
23711 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,c_gt_32_with_qmin)23712   TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, c_gt_32_with_qmin) {
23713     TEST_REQUIRES_X86_AVX512SKX;
23714     for (uint32_t channels = 33; channels < 64; channels++) {
23715       DWConvMicrokernelTester()
23716         .cr(32)
23717         .kr(3)
23718         .channels(channels)
23719         .qmin(128)
23720         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23721     }
23722   }
23723 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,c_gt_32_with_qmax)23724   TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, c_gt_32_with_qmax) {
23725     TEST_REQUIRES_X86_AVX512SKX;
23726     for (uint32_t channels = 33; channels < 64; channels++) {
23727       DWConvMicrokernelTester()
23728         .cr(32)
23729         .kr(3)
23730         .channels(channels)
23731         .qmax(128)
23732         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23733     }
23734   }
23735 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,multipixel)23736   TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, multipixel) {
23737     TEST_REQUIRES_X86_AVX512SKX;
23738     for (size_t channels = 1; channels <= 160; channels += 31) {
23739       DWConvMicrokernelTester()
23740         .cr(32)
23741         .kr(3)
23742         .channels(channels)
23743         .width(3)
23744         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23745     }
23746   }
23747 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,multipixel_with_step)23748   TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, multipixel_with_step) {
23749     TEST_REQUIRES_X86_AVX512SKX;
23750     for (size_t channels = 1; channels <= 160; channels += 31) {
23751       for (size_t step = 2; step <= 3; step++) {
23752         DWConvMicrokernelTester()
23753           .cr(32)
23754           .kr(3)
23755           .channels(channels)
23756           .width(3)
23757           .step(step)
23758           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23759       }
23760     }
23761   }
23762 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,multipixel_with_output_stride)23763   TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, multipixel_with_output_stride) {
23764     TEST_REQUIRES_X86_AVX512SKX;
23765     for (size_t channels = 1; channels <= 160; channels += 31) {
23766       DWConvMicrokernelTester()
23767         .cr(32)
23768         .kr(3)
23769         .channels(32)
23770         .width(5)
23771         .output_stride(163)
23772         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23773     }
23774   }
23775 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,multipixel_with_qmin)23776   TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, multipixel_with_qmin) {
23777     TEST_REQUIRES_X86_AVX512SKX;
23778     for (size_t channels = 1; channels <= 160; channels += 31) {
23779       DWConvMicrokernelTester()
23780         .cr(32)
23781         .kr(3)
23782         .channels(channels)
23783         .width(3)
23784         .qmin(128)
23785         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23786     }
23787   }
23788 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,multipixel_with_qmax)23789   TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, multipixel_with_qmax) {
23790     TEST_REQUIRES_X86_AVX512SKX;
23791     for (size_t channels = 1; channels <= 160; channels += 31) {
23792       DWConvMicrokernelTester()
23793         .cr(32)
23794         .kr(3)
23795         .channels(channels)
23796         .width(3)
23797         .qmax(128)
23798         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23799     }
23800   }
23801 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,input_offset)23802   TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, input_offset) {
23803     TEST_REQUIRES_X86_AVX512SKX;
23804     for (uint32_t channels = 64; channels < 512; channels += 96) {
23805       DWConvMicrokernelTester()
23806         .cr(32)
23807         .kr(3)
23808         .channels(channels)
23809         .input_offset(592)
23810         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23811     }
23812   }
23813 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,zero)23814   TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, zero) {
23815     TEST_REQUIRES_X86_AVX512SKX;
23816     for (uint32_t mz = 0; mz < 3; mz++) {
23817       for (uint32_t channels = 64; channels < 512; channels += 96) {
23818         DWConvMicrokernelTester()
23819           .cr(32)
23820           .kr(3)
23821           .channels(channels)
23822           .input_offset(592)
23823           .zero_index(mz)
23824           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23825       }
23826     }
23827   }
23828 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
23829 
23830 
23831 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_eq_32)23832   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_eq_32) {
23833     TEST_REQUIRES_X86_AVX512SKX;
23834     DWConvMicrokernelTester()
23835       .cr(32)
23836       .kr(9)
23837       .channels(32)
23838       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23839   }
23840 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_div_32)23841   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32) {
23842     TEST_REQUIRES_X86_AVX512SKX;
23843     for (uint32_t channels = 64; channels < 512; channels += 96) {
23844       DWConvMicrokernelTester()
23845         .cr(32)
23846         .kr(9)
23847         .channels(channels)
23848         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23849     }
23850   }
23851 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_div_32_with_qmin)23852   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmin) {
23853     TEST_REQUIRES_X86_AVX512SKX;
23854     for (uint32_t channels = 64; channels < 512; channels += 96) {
23855       DWConvMicrokernelTester()
23856         .cr(32)
23857         .kr(9)
23858         .channels(channels)
23859         .qmin(128)
23860         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23861     }
23862   }
23863 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_div_32_with_qmax)23864   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmax) {
23865     TEST_REQUIRES_X86_AVX512SKX;
23866     for (uint32_t channels = 64; channels < 512; channels += 96) {
23867       DWConvMicrokernelTester()
23868         .cr(32)
23869         .kr(9)
23870         .channels(channels)
23871         .qmax(128)
23872         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23873     }
23874   }
23875 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_lt_32)23876   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_lt_32) {
23877     TEST_REQUIRES_X86_AVX512SKX;
23878     for (uint32_t channels = 1; channels < 32; channels++) {
23879       DWConvMicrokernelTester()
23880         .cr(32)
23881         .kr(9)
23882         .channels(channels)
23883         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23884     }
23885   }
23886 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_gt_32)23887   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32) {
23888     TEST_REQUIRES_X86_AVX512SKX;
23889     for (uint32_t channels = 33; channels < 64; channels++) {
23890       DWConvMicrokernelTester()
23891         .cr(32)
23892         .kr(9)
23893         .channels(channels)
23894         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23895     }
23896   }
23897 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_gt_32_with_qmin)23898   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmin) {
23899     TEST_REQUIRES_X86_AVX512SKX;
23900     for (uint32_t channels = 33; channels < 64; channels++) {
23901       DWConvMicrokernelTester()
23902         .cr(32)
23903         .kr(9)
23904         .channels(channels)
23905         .qmin(128)
23906         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23907     }
23908   }
23909 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_gt_32_with_qmax)23910   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmax) {
23911     TEST_REQUIRES_X86_AVX512SKX;
23912     for (uint32_t channels = 33; channels < 64; channels++) {
23913       DWConvMicrokernelTester()
23914         .cr(32)
23915         .kr(9)
23916         .channels(channels)
23917         .qmax(128)
23918         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23919     }
23920   }
23921 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,multipixel)23922   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel) {
23923     TEST_REQUIRES_X86_AVX512SKX;
23924     for (size_t channels = 1; channels <= 160; channels += 31) {
23925       DWConvMicrokernelTester()
23926         .cr(32)
23927         .kr(9)
23928         .channels(channels)
23929         .width(3)
23930         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23931     }
23932   }
23933 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,multipixel_with_step)23934   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_step) {
23935     TEST_REQUIRES_X86_AVX512SKX;
23936     for (size_t channels = 1; channels <= 160; channels += 31) {
23937       for (size_t step = 2; step <= 9; step++) {
23938         DWConvMicrokernelTester()
23939           .cr(32)
23940           .kr(9)
23941           .channels(channels)
23942           .width(3)
23943           .step(step)
23944           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23945       }
23946     }
23947   }
23948 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,multipixel_with_output_stride)23949   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
23950     TEST_REQUIRES_X86_AVX512SKX;
23951     for (size_t channels = 1; channels <= 160; channels += 31) {
23952       DWConvMicrokernelTester()
23953         .cr(32)
23954         .kr(9)
23955         .channels(32)
23956         .width(5)
23957         .output_stride(163)
23958         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23959     }
23960   }
23961 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,multipixel_with_qmin)23962   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_qmin) {
23963     TEST_REQUIRES_X86_AVX512SKX;
23964     for (size_t channels = 1; channels <= 160; channels += 31) {
23965       DWConvMicrokernelTester()
23966         .cr(32)
23967         .kr(9)
23968         .channels(channels)
23969         .width(3)
23970         .qmin(128)
23971         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23972     }
23973   }
23974 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,multipixel_with_qmax)23975   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_qmax) {
23976     TEST_REQUIRES_X86_AVX512SKX;
23977     for (size_t channels = 1; channels <= 160; channels += 31) {
23978       DWConvMicrokernelTester()
23979         .cr(32)
23980         .kr(9)
23981         .channels(channels)
23982         .width(3)
23983         .qmax(128)
23984         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23985     }
23986   }
23987 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,input_offset)23988   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, input_offset) {
23989     TEST_REQUIRES_X86_AVX512SKX;
23990     for (uint32_t channels = 64; channels < 512; channels += 96) {
23991       DWConvMicrokernelTester()
23992         .cr(32)
23993         .kr(9)
23994         .channels(channels)
23995         .input_offset(592)
23996         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23997     }
23998   }
23999 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,zero)24000   TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, zero) {
24001     TEST_REQUIRES_X86_AVX512SKX;
24002     for (uint32_t mz = 0; mz < 9; mz++) {
24003       for (uint32_t channels = 64; channels < 512; channels += 96) {
24004         DWConvMicrokernelTester()
24005           .cr(32)
24006           .kr(9)
24007           .channels(channels)
24008           .input_offset(592)
24009           .zero_index(mz)
24010           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24011       }
24012     }
24013   }
24014 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
24015 
24016 
24017 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_eq_32)24018   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_eq_32) {
24019     TEST_REQUIRES_X86_AVX512SKX;
24020     DWConvMicrokernelTester()
24021       .cr(32)
24022       .kr(25)
24023       .channels(32)
24024       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24025   }
24026 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_div_32)24027   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32) {
24028     TEST_REQUIRES_X86_AVX512SKX;
24029     for (uint32_t channels = 64; channels < 512; channels += 96) {
24030       DWConvMicrokernelTester()
24031         .cr(32)
24032         .kr(25)
24033         .channels(channels)
24034         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24035     }
24036   }
24037 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_div_32_with_qmin)24038   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32_with_qmin) {
24039     TEST_REQUIRES_X86_AVX512SKX;
24040     for (uint32_t channels = 64; channels < 512; channels += 96) {
24041       DWConvMicrokernelTester()
24042         .cr(32)
24043         .kr(25)
24044         .channels(channels)
24045         .qmin(128)
24046         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24047     }
24048   }
24049 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_div_32_with_qmax)24050   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32_with_qmax) {
24051     TEST_REQUIRES_X86_AVX512SKX;
24052     for (uint32_t channels = 64; channels < 512; channels += 96) {
24053       DWConvMicrokernelTester()
24054         .cr(32)
24055         .kr(25)
24056         .channels(channels)
24057         .qmax(128)
24058         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24059     }
24060   }
24061 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_lt_32)24062   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_lt_32) {
24063     TEST_REQUIRES_X86_AVX512SKX;
24064     for (uint32_t channels = 1; channels < 32; channels++) {
24065       DWConvMicrokernelTester()
24066         .cr(32)
24067         .kr(25)
24068         .channels(channels)
24069         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24070     }
24071   }
24072 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_gt_32)24073   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32) {
24074     TEST_REQUIRES_X86_AVX512SKX;
24075     for (uint32_t channels = 33; channels < 64; channels++) {
24076       DWConvMicrokernelTester()
24077         .cr(32)
24078         .kr(25)
24079         .channels(channels)
24080         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24081     }
24082   }
24083 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_gt_32_with_qmin)24084   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32_with_qmin) {
24085     TEST_REQUIRES_X86_AVX512SKX;
24086     for (uint32_t channels = 33; channels < 64; channels++) {
24087       DWConvMicrokernelTester()
24088         .cr(32)
24089         .kr(25)
24090         .channels(channels)
24091         .qmin(128)
24092         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24093     }
24094   }
24095 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_gt_32_with_qmax)24096   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32_with_qmax) {
24097     TEST_REQUIRES_X86_AVX512SKX;
24098     for (uint32_t channels = 33; channels < 64; channels++) {
24099       DWConvMicrokernelTester()
24100         .cr(32)
24101         .kr(25)
24102         .channels(channels)
24103         .qmax(128)
24104         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24105     }
24106   }
24107 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,multipixel)24108   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel) {
24109     TEST_REQUIRES_X86_AVX512SKX;
24110     for (size_t channels = 1; channels <= 160; channels += 31) {
24111       DWConvMicrokernelTester()
24112         .cr(32)
24113         .kr(25)
24114         .channels(channels)
24115         .width(3)
24116         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24117     }
24118   }
24119 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,multipixel_with_step)24120   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_step) {
24121     TEST_REQUIRES_X86_AVX512SKX;
24122     for (size_t channels = 1; channels <= 160; channels += 31) {
24123       for (size_t step = 2; step <= 25; step++) {
24124         DWConvMicrokernelTester()
24125           .cr(32)
24126           .kr(25)
24127           .channels(channels)
24128           .width(3)
24129           .step(step)
24130           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24131       }
24132     }
24133   }
24134 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,multipixel_with_output_stride)24135   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_output_stride) {
24136     TEST_REQUIRES_X86_AVX512SKX;
24137     for (size_t channels = 1; channels <= 160; channels += 31) {
24138       DWConvMicrokernelTester()
24139         .cr(32)
24140         .kr(25)
24141         .channels(32)
24142         .width(5)
24143         .output_stride(163)
24144         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24145     }
24146   }
24147 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,multipixel_with_qmin)24148   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_qmin) {
24149     TEST_REQUIRES_X86_AVX512SKX;
24150     for (size_t channels = 1; channels <= 160; channels += 31) {
24151       DWConvMicrokernelTester()
24152         .cr(32)
24153         .kr(25)
24154         .channels(channels)
24155         .width(3)
24156         .qmin(128)
24157         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24158     }
24159   }
24160 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,multipixel_with_qmax)24161   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_qmax) {
24162     TEST_REQUIRES_X86_AVX512SKX;
24163     for (size_t channels = 1; channels <= 160; channels += 31) {
24164       DWConvMicrokernelTester()
24165         .cr(32)
24166         .kr(25)
24167         .channels(channels)
24168         .width(3)
24169         .qmax(128)
24170         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24171     }
24172   }
24173 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,input_offset)24174   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, input_offset) {
24175     TEST_REQUIRES_X86_AVX512SKX;
24176     for (uint32_t channels = 64; channels < 512; channels += 96) {
24177       DWConvMicrokernelTester()
24178         .cr(32)
24179         .kr(25)
24180         .channels(channels)
24181         .input_offset(592)
24182         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24183     }
24184   }
24185 
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,zero)24186   TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, zero) {
24187     TEST_REQUIRES_X86_AVX512SKX;
24188     for (uint32_t mz = 0; mz < 25; mz++) {
24189       for (uint32_t channels = 64; channels < 512; channels += 96) {
24190         DWConvMicrokernelTester()
24191           .cr(32)
24192           .kr(25)
24193           .channels(channels)
24194           .input_offset(592)
24195           .zero_index(mz)
24196           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24197       }
24198     }
24199   }
24200 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
24201 
24202 
24203 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_eq_8)24204   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_eq_8) {
24205     DWConvMicrokernelTester()
24206       .cr(8)
24207       .kr(9)
24208       .channels(8)
24209       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24210   }
24211 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_div_8)24212   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_div_8) {
24213     for (uint32_t channels = 16; channels < 128; channels += 24) {
24214       DWConvMicrokernelTester()
24215         .cr(8)
24216         .kr(9)
24217         .channels(channels)
24218         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24219     }
24220   }
24221 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_div_8_with_qmin)24222   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmin) {
24223     for (uint32_t channels = 16; channels < 128; channels += 24) {
24224       DWConvMicrokernelTester()
24225         .cr(8)
24226         .kr(9)
24227         .channels(channels)
24228         .qmin(128)
24229         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24230     }
24231   }
24232 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_div_8_with_qmax)24233   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmax) {
24234     for (uint32_t channels = 16; channels < 128; channels += 24) {
24235       DWConvMicrokernelTester()
24236         .cr(8)
24237         .kr(9)
24238         .channels(channels)
24239         .qmax(128)
24240         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24241     }
24242   }
24243 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_lt_8)24244   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_lt_8) {
24245     for (uint32_t channels = 1; channels < 8; channels++) {
24246       DWConvMicrokernelTester()
24247         .cr(8)
24248         .kr(9)
24249         .channels(channels)
24250         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24251     }
24252   }
24253 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_gt_8)24254   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_gt_8) {
24255     for (uint32_t channels = 9; channels < 16; channels++) {
24256       DWConvMicrokernelTester()
24257         .cr(8)
24258         .kr(9)
24259         .channels(channels)
24260         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24261     }
24262   }
24263 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_gt_8_with_qmin)24264   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmin) {
24265     for (uint32_t channels = 9; channels < 16; channels++) {
24266       DWConvMicrokernelTester()
24267         .cr(8)
24268         .kr(9)
24269         .channels(channels)
24270         .qmin(128)
24271         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24272     }
24273   }
24274 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_gt_8_with_qmax)24275   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmax) {
24276     for (uint32_t channels = 9; channels < 16; channels++) {
24277       DWConvMicrokernelTester()
24278         .cr(8)
24279         .kr(9)
24280         .channels(channels)
24281         .qmax(128)
24282         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24283     }
24284   }
24285 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,multipixel)24286   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel) {
24287     for (size_t channels = 1; channels <= 40; channels += 7) {
24288       DWConvMicrokernelTester()
24289         .cr(8)
24290         .kr(9)
24291         .channels(channels)
24292         .width(3)
24293         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24294     }
24295   }
24296 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,multipixel_with_step)24297   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_step) {
24298     for (size_t channels = 1; channels <= 40; channels += 7) {
24299       for (size_t step = 2; step <= 9; step++) {
24300         DWConvMicrokernelTester()
24301           .cr(8)
24302           .kr(9)
24303           .channels(channels)
24304           .width(3)
24305           .step(step)
24306           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24307       }
24308     }
24309   }
24310 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,multipixel_with_output_stride)24311   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
24312     for (size_t channels = 1; channels <= 40; channels += 7) {
24313       DWConvMicrokernelTester()
24314         .cr(8)
24315         .kr(9)
24316         .channels(8)
24317         .width(5)
24318         .output_stride(43)
24319         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24320     }
24321   }
24322 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,multipixel_with_qmin)24323   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_qmin) {
24324     for (size_t channels = 1; channels <= 40; channels += 7) {
24325       DWConvMicrokernelTester()
24326         .cr(8)
24327         .kr(9)
24328         .channels(channels)
24329         .width(3)
24330         .qmin(128)
24331         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24332     }
24333   }
24334 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,multipixel_with_qmax)24335   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_qmax) {
24336     for (size_t channels = 1; channels <= 40; channels += 7) {
24337       DWConvMicrokernelTester()
24338         .cr(8)
24339         .kr(9)
24340         .channels(channels)
24341         .width(3)
24342         .qmax(128)
24343         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24344     }
24345   }
24346 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,input_offset)24347   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, input_offset) {
24348     for (uint32_t channels = 16; channels < 128; channels += 24) {
24349       DWConvMicrokernelTester()
24350         .cr(8)
24351         .kr(9)
24352         .channels(channels)
24353         .input_offset(176)
24354         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24355     }
24356   }
24357 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,zero)24358   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, zero) {
24359     for (uint32_t mz = 0; mz < 9; mz++) {
24360       for (uint32_t channels = 16; channels < 128; channels += 24) {
24361         DWConvMicrokernelTester()
24362           .cr(8)
24363           .kr(9)
24364           .channels(channels)
24365           .input_offset(176)
24366           .zero_index(mz)
24367           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24368       }
24369     }
24370   }
24371 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24372 
24373 
24374 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_eq_8)24375   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_eq_8) {
24376     DWConvMicrokernelTester()
24377       .cr(8)
24378       .kr(9)
24379       .channels(8)
24380       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24381   }
24382 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_div_8)24383   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_div_8) {
24384     for (uint32_t channels = 16; channels < 128; channels += 24) {
24385       DWConvMicrokernelTester()
24386         .cr(8)
24387         .kr(9)
24388         .channels(channels)
24389         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24390     }
24391   }
24392 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_div_8_with_qmin)24393   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_div_8_with_qmin) {
24394     for (uint32_t channels = 16; channels < 128; channels += 24) {
24395       DWConvMicrokernelTester()
24396         .cr(8)
24397         .kr(9)
24398         .channels(channels)
24399         .qmin(128)
24400         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24401     }
24402   }
24403 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_div_8_with_qmax)24404   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_div_8_with_qmax) {
24405     for (uint32_t channels = 16; channels < 128; channels += 24) {
24406       DWConvMicrokernelTester()
24407         .cr(8)
24408         .kr(9)
24409         .channels(channels)
24410         .qmax(128)
24411         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24412     }
24413   }
24414 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_lt_8)24415   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_lt_8) {
24416     for (uint32_t channels = 1; channels < 8; channels++) {
24417       DWConvMicrokernelTester()
24418         .cr(8)
24419         .kr(9)
24420         .channels(channels)
24421         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24422     }
24423   }
24424 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_gt_8)24425   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_gt_8) {
24426     for (uint32_t channels = 9; channels < 16; channels++) {
24427       DWConvMicrokernelTester()
24428         .cr(8)
24429         .kr(9)
24430         .channels(channels)
24431         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24432     }
24433   }
24434 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_gt_8_with_qmin)24435   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_gt_8_with_qmin) {
24436     for (uint32_t channels = 9; channels < 16; channels++) {
24437       DWConvMicrokernelTester()
24438         .cr(8)
24439         .kr(9)
24440         .channels(channels)
24441         .qmin(128)
24442         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24443     }
24444   }
24445 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_gt_8_with_qmax)24446   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_gt_8_with_qmax) {
24447     for (uint32_t channels = 9; channels < 16; channels++) {
24448       DWConvMicrokernelTester()
24449         .cr(8)
24450         .kr(9)
24451         .channels(channels)
24452         .qmax(128)
24453         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24454     }
24455   }
24456 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,multipixel)24457   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel) {
24458     for (size_t channels = 1; channels <= 40; channels += 7) {
24459       DWConvMicrokernelTester()
24460         .cr(8)
24461         .kr(9)
24462         .channels(channels)
24463         .width(3)
24464         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24465     }
24466   }
24467 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,multipixel_with_step)24468   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
24469     for (size_t channels = 1; channels <= 40; channels += 7) {
24470       for (size_t step = 2; step <= 9; step++) {
24471         DWConvMicrokernelTester()
24472           .cr(8)
24473           .kr(9)
24474           .channels(channels)
24475           .width(3)
24476           .step(step)
24477           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24478       }
24479     }
24480   }
24481 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,multipixel_with_output_stride)24482   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
24483     for (size_t channels = 1; channels <= 40; channels += 7) {
24484       DWConvMicrokernelTester()
24485         .cr(8)
24486         .kr(9)
24487         .channels(8)
24488         .width(5)
24489         .output_stride(43)
24490         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24491     }
24492   }
24493 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,multipixel_with_qmin)24494   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
24495     for (size_t channels = 1; channels <= 40; channels += 7) {
24496       DWConvMicrokernelTester()
24497         .cr(8)
24498         .kr(9)
24499         .channels(channels)
24500         .width(3)
24501         .qmin(128)
24502         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24503     }
24504   }
24505 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,multipixel_with_qmax)24506   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
24507     for (size_t channels = 1; channels <= 40; channels += 7) {
24508       DWConvMicrokernelTester()
24509         .cr(8)
24510         .kr(9)
24511         .channels(channels)
24512         .width(3)
24513         .qmax(128)
24514         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24515     }
24516   }
24517 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,input_offset)24518   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, input_offset) {
24519     for (uint32_t channels = 16; channels < 128; channels += 24) {
24520       DWConvMicrokernelTester()
24521         .cr(8)
24522         .kr(9)
24523         .channels(channels)
24524         .input_offset(176)
24525         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24526     }
24527   }
24528 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,zero)24529   TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, zero) {
24530     for (uint32_t mz = 0; mz < 9; mz++) {
24531       for (uint32_t channels = 16; channels < 128; channels += 24) {
24532         DWConvMicrokernelTester()
24533           .cr(8)
24534           .kr(9)
24535           .channels(channels)
24536           .input_offset(176)
24537           .zero_index(mz)
24538           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24539       }
24540     }
24541   }
24542 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24543 
24544 
24545 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_eq_8)24546   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_eq_8) {
24547     DWConvMicrokernelTester()
24548       .cr(8)
24549       .kr(25)
24550       .channels(8)
24551       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24552   }
24553 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_div_8)24554   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_div_8) {
24555     for (uint32_t channels = 16; channels < 128; channels += 24) {
24556       DWConvMicrokernelTester()
24557         .cr(8)
24558         .kr(25)
24559         .channels(channels)
24560         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24561     }
24562   }
24563 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_div_8_with_qmin)24564   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_div_8_with_qmin) {
24565     for (uint32_t channels = 16; channels < 128; channels += 24) {
24566       DWConvMicrokernelTester()
24567         .cr(8)
24568         .kr(25)
24569         .channels(channels)
24570         .qmin(128)
24571         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24572     }
24573   }
24574 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_div_8_with_qmax)24575   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_div_8_with_qmax) {
24576     for (uint32_t channels = 16; channels < 128; channels += 24) {
24577       DWConvMicrokernelTester()
24578         .cr(8)
24579         .kr(25)
24580         .channels(channels)
24581         .qmax(128)
24582         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24583     }
24584   }
24585 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_lt_8)24586   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_lt_8) {
24587     for (uint32_t channels = 1; channels < 8; channels++) {
24588       DWConvMicrokernelTester()
24589         .cr(8)
24590         .kr(25)
24591         .channels(channels)
24592         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24593     }
24594   }
24595 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_gt_8)24596   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_gt_8) {
24597     for (uint32_t channels = 9; channels < 16; channels++) {
24598       DWConvMicrokernelTester()
24599         .cr(8)
24600         .kr(25)
24601         .channels(channels)
24602         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24603     }
24604   }
24605 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_gt_8_with_qmin)24606   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_gt_8_with_qmin) {
24607     for (uint32_t channels = 9; channels < 16; channels++) {
24608       DWConvMicrokernelTester()
24609         .cr(8)
24610         .kr(25)
24611         .channels(channels)
24612         .qmin(128)
24613         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24614     }
24615   }
24616 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_gt_8_with_qmax)24617   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_gt_8_with_qmax) {
24618     for (uint32_t channels = 9; channels < 16; channels++) {
24619       DWConvMicrokernelTester()
24620         .cr(8)
24621         .kr(25)
24622         .channels(channels)
24623         .qmax(128)
24624         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24625     }
24626   }
24627 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,multipixel)24628   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel) {
24629     for (size_t channels = 1; channels <= 40; channels += 7) {
24630       DWConvMicrokernelTester()
24631         .cr(8)
24632         .kr(25)
24633         .channels(channels)
24634         .width(3)
24635         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24636     }
24637   }
24638 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,multipixel_with_step)24639   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_step) {
24640     for (size_t channels = 1; channels <= 40; channels += 7) {
24641       for (size_t step = 2; step <= 25; step++) {
24642         DWConvMicrokernelTester()
24643           .cr(8)
24644           .kr(25)
24645           .channels(channels)
24646           .width(3)
24647           .step(step)
24648           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24649       }
24650     }
24651   }
24652 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,multipixel_with_output_stride)24653   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
24654     for (size_t channels = 1; channels <= 40; channels += 7) {
24655       DWConvMicrokernelTester()
24656         .cr(8)
24657         .kr(25)
24658         .channels(8)
24659         .width(5)
24660         .output_stride(43)
24661         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24662     }
24663   }
24664 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,multipixel_with_qmin)24665   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_qmin) {
24666     for (size_t channels = 1; channels <= 40; channels += 7) {
24667       DWConvMicrokernelTester()
24668         .cr(8)
24669         .kr(25)
24670         .channels(channels)
24671         .width(3)
24672         .qmin(128)
24673         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24674     }
24675   }
24676 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,multipixel_with_qmax)24677   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_qmax) {
24678     for (size_t channels = 1; channels <= 40; channels += 7) {
24679       DWConvMicrokernelTester()
24680         .cr(8)
24681         .kr(25)
24682         .channels(channels)
24683         .width(3)
24684         .qmax(128)
24685         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24686     }
24687   }
24688 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,input_offset)24689   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, input_offset) {
24690     for (uint32_t channels = 16; channels < 128; channels += 24) {
24691       DWConvMicrokernelTester()
24692         .cr(8)
24693         .kr(25)
24694         .channels(channels)
24695         .input_offset(176)
24696         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24697     }
24698   }
24699 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,zero)24700   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, zero) {
24701     for (uint32_t mz = 0; mz < 25; mz++) {
24702       for (uint32_t channels = 16; channels < 128; channels += 24) {
24703         DWConvMicrokernelTester()
24704           .cr(8)
24705           .kr(25)
24706           .channels(channels)
24707           .input_offset(176)
24708           .zero_index(mz)
24709           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24710       }
24711     }
24712   }
24713 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24714 
24715 
24716 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_eq_8)24717   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_eq_8) {
24718     DWConvMicrokernelTester()
24719       .cr(8)
24720       .kr(25)
24721       .channels(8)
24722       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24723   }
24724 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_div_8)24725   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_div_8) {
24726     for (uint32_t channels = 16; channels < 128; channels += 24) {
24727       DWConvMicrokernelTester()
24728         .cr(8)
24729         .kr(25)
24730         .channels(channels)
24731         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24732     }
24733   }
24734 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_div_8_with_qmin)24735   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_div_8_with_qmin) {
24736     for (uint32_t channels = 16; channels < 128; channels += 24) {
24737       DWConvMicrokernelTester()
24738         .cr(8)
24739         .kr(25)
24740         .channels(channels)
24741         .qmin(128)
24742         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24743     }
24744   }
24745 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_div_8_with_qmax)24746   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_div_8_with_qmax) {
24747     for (uint32_t channels = 16; channels < 128; channels += 24) {
24748       DWConvMicrokernelTester()
24749         .cr(8)
24750         .kr(25)
24751         .channels(channels)
24752         .qmax(128)
24753         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24754     }
24755   }
24756 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_lt_8)24757   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_lt_8) {
24758     for (uint32_t channels = 1; channels < 8; channels++) {
24759       DWConvMicrokernelTester()
24760         .cr(8)
24761         .kr(25)
24762         .channels(channels)
24763         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24764     }
24765   }
24766 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_gt_8)24767   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_gt_8) {
24768     for (uint32_t channels = 9; channels < 16; channels++) {
24769       DWConvMicrokernelTester()
24770         .cr(8)
24771         .kr(25)
24772         .channels(channels)
24773         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24774     }
24775   }
24776 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_gt_8_with_qmin)24777   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_gt_8_with_qmin) {
24778     for (uint32_t channels = 9; channels < 16; channels++) {
24779       DWConvMicrokernelTester()
24780         .cr(8)
24781         .kr(25)
24782         .channels(channels)
24783         .qmin(128)
24784         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24785     }
24786   }
24787 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_gt_8_with_qmax)24788   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_gt_8_with_qmax) {
24789     for (uint32_t channels = 9; channels < 16; channels++) {
24790       DWConvMicrokernelTester()
24791         .cr(8)
24792         .kr(25)
24793         .channels(channels)
24794         .qmax(128)
24795         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24796     }
24797   }
24798 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,multipixel)24799   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel) {
24800     for (size_t channels = 1; channels <= 40; channels += 7) {
24801       DWConvMicrokernelTester()
24802         .cr(8)
24803         .kr(25)
24804         .channels(channels)
24805         .width(3)
24806         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24807     }
24808   }
24809 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,multipixel_with_step)24810   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
24811     for (size_t channels = 1; channels <= 40; channels += 7) {
24812       for (size_t step = 2; step <= 25; step++) {
24813         DWConvMicrokernelTester()
24814           .cr(8)
24815           .kr(25)
24816           .channels(channels)
24817           .width(3)
24818           .step(step)
24819           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24820       }
24821     }
24822   }
24823 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,multipixel_with_output_stride)24824   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
24825     for (size_t channels = 1; channels <= 40; channels += 7) {
24826       DWConvMicrokernelTester()
24827         .cr(8)
24828         .kr(25)
24829         .channels(8)
24830         .width(5)
24831         .output_stride(43)
24832         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24833     }
24834   }
24835 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,multipixel_with_qmin)24836   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
24837     for (size_t channels = 1; channels <= 40; channels += 7) {
24838       DWConvMicrokernelTester()
24839         .cr(8)
24840         .kr(25)
24841         .channels(channels)
24842         .width(3)
24843         .qmin(128)
24844         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24845     }
24846   }
24847 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,multipixel_with_qmax)24848   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
24849     for (size_t channels = 1; channels <= 40; channels += 7) {
24850       DWConvMicrokernelTester()
24851         .cr(8)
24852         .kr(25)
24853         .channels(channels)
24854         .width(3)
24855         .qmax(128)
24856         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24857     }
24858   }
24859 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,input_offset)24860   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, input_offset) {
24861     for (uint32_t channels = 16; channels < 128; channels += 24) {
24862       DWConvMicrokernelTester()
24863         .cr(8)
24864         .kr(25)
24865         .channels(channels)
24866         .input_offset(176)
24867         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24868     }
24869   }
24870 
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,zero)24871   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, zero) {
24872     for (uint32_t mz = 0; mz < 25; mz++) {
24873       for (uint32_t channels = 16; channels < 128; channels += 24) {
24874         DWConvMicrokernelTester()
24875           .cr(8)
24876           .kr(25)
24877           .channels(channels)
24878           .input_offset(176)
24879           .zero_index(mz)
24880           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24881       }
24882     }
24883   }
24884 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24885 
24886 
24887 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,c_eq_16)24888   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, c_eq_16) {
24889     DWConvMicrokernelTester()
24890       .cr(16)
24891       .kr(3)
24892       .channels(16)
24893       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24894   }
24895 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,c_div_16)24896   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, c_div_16) {
24897     for (uint32_t channels = 32; channels < 256; channels += 48) {
24898       DWConvMicrokernelTester()
24899         .cr(16)
24900         .kr(3)
24901         .channels(channels)
24902         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24903     }
24904   }
24905 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,c_div_16_with_qmin)24906   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, c_div_16_with_qmin) {
24907     for (uint32_t channels = 32; channels < 256; channels += 48) {
24908       DWConvMicrokernelTester()
24909         .cr(16)
24910         .kr(3)
24911         .channels(channels)
24912         .qmin(128)
24913         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24914     }
24915   }
24916 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,c_div_16_with_qmax)24917   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, c_div_16_with_qmax) {
24918     for (uint32_t channels = 32; channels < 256; channels += 48) {
24919       DWConvMicrokernelTester()
24920         .cr(16)
24921         .kr(3)
24922         .channels(channels)
24923         .qmax(128)
24924         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24925     }
24926   }
24927 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,c_lt_16)24928   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, c_lt_16) {
24929     for (uint32_t channels = 1; channels < 16; channels++) {
24930       DWConvMicrokernelTester()
24931         .cr(16)
24932         .kr(3)
24933         .channels(channels)
24934         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24935     }
24936   }
24937 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,c_gt_16)24938   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, c_gt_16) {
24939     for (uint32_t channels = 17; channels < 32; channels++) {
24940       DWConvMicrokernelTester()
24941         .cr(16)
24942         .kr(3)
24943         .channels(channels)
24944         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24945     }
24946   }
24947 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,c_gt_16_with_qmin)24948   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmin) {
24949     for (uint32_t channels = 17; channels < 32; channels++) {
24950       DWConvMicrokernelTester()
24951         .cr(16)
24952         .kr(3)
24953         .channels(channels)
24954         .qmin(128)
24955         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24956     }
24957   }
24958 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,c_gt_16_with_qmax)24959   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmax) {
24960     for (uint32_t channels = 17; channels < 32; channels++) {
24961       DWConvMicrokernelTester()
24962         .cr(16)
24963         .kr(3)
24964         .channels(channels)
24965         .qmax(128)
24966         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24967     }
24968   }
24969 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,multipixel)24970   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, multipixel) {
24971     for (size_t channels = 1; channels <= 80; channels += 15) {
24972       DWConvMicrokernelTester()
24973         .cr(16)
24974         .kr(3)
24975         .channels(channels)
24976         .width(3)
24977         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24978     }
24979   }
24980 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,multipixel_with_step)24981   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
24982     for (size_t channels = 1; channels <= 80; channels += 15) {
24983       for (size_t step = 2; step <= 3; step++) {
24984         DWConvMicrokernelTester()
24985           .cr(16)
24986           .kr(3)
24987           .channels(channels)
24988           .width(3)
24989           .step(step)
24990           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24991       }
24992     }
24993   }
24994 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,multipixel_with_output_stride)24995   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
24996     for (size_t channels = 1; channels <= 80; channels += 15) {
24997       DWConvMicrokernelTester()
24998         .cr(16)
24999         .kr(3)
25000         .channels(16)
25001         .width(5)
25002         .output_stride(83)
25003         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25004     }
25005   }
25006 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,multipixel_with_qmin)25007   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
25008     for (size_t channels = 1; channels <= 80; channels += 15) {
25009       DWConvMicrokernelTester()
25010         .cr(16)
25011         .kr(3)
25012         .channels(channels)
25013         .width(3)
25014         .qmin(128)
25015         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25016     }
25017   }
25018 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,multipixel_with_qmax)25019   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
25020     for (size_t channels = 1; channels <= 80; channels += 15) {
25021       DWConvMicrokernelTester()
25022         .cr(16)
25023         .kr(3)
25024         .channels(channels)
25025         .width(3)
25026         .qmax(128)
25027         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25028     }
25029   }
25030 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,input_offset)25031   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, input_offset) {
25032     for (uint32_t channels = 32; channels < 256; channels += 48) {
25033       DWConvMicrokernelTester()
25034         .cr(16)
25035         .kr(3)
25036         .channels(channels)
25037         .input_offset(304)
25038         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25039     }
25040   }
25041 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,zero)25042   TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, zero) {
25043     for (uint32_t mz = 0; mz < 3; mz++) {
25044       for (uint32_t channels = 32; channels < 256; channels += 48) {
25045         DWConvMicrokernelTester()
25046           .cr(16)
25047           .kr(3)
25048           .channels(channels)
25049           .input_offset(304)
25050           .zero_index(mz)
25051           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25052       }
25053     }
25054   }
25055 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25056 
25057 
25058 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_eq_16)25059   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_eq_16) {
25060     DWConvMicrokernelTester()
25061       .cr(16)
25062       .kr(9)
25063       .channels(16)
25064       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25065   }
25066 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_div_16)25067   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_div_16) {
25068     for (uint32_t channels = 32; channels < 256; channels += 48) {
25069       DWConvMicrokernelTester()
25070         .cr(16)
25071         .kr(9)
25072         .channels(channels)
25073         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25074     }
25075   }
25076 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_div_16_with_qmin)25077   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmin) {
25078     for (uint32_t channels = 32; channels < 256; channels += 48) {
25079       DWConvMicrokernelTester()
25080         .cr(16)
25081         .kr(9)
25082         .channels(channels)
25083         .qmin(128)
25084         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25085     }
25086   }
25087 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_div_16_with_qmax)25088   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmax) {
25089     for (uint32_t channels = 32; channels < 256; channels += 48) {
25090       DWConvMicrokernelTester()
25091         .cr(16)
25092         .kr(9)
25093         .channels(channels)
25094         .qmax(128)
25095         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25096     }
25097   }
25098 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_lt_16)25099   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_lt_16) {
25100     for (uint32_t channels = 1; channels < 16; channels++) {
25101       DWConvMicrokernelTester()
25102         .cr(16)
25103         .kr(9)
25104         .channels(channels)
25105         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25106     }
25107   }
25108 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_gt_16)25109   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_gt_16) {
25110     for (uint32_t channels = 17; channels < 32; channels++) {
25111       DWConvMicrokernelTester()
25112         .cr(16)
25113         .kr(9)
25114         .channels(channels)
25115         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25116     }
25117   }
25118 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_gt_16_with_qmin)25119   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmin) {
25120     for (uint32_t channels = 17; channels < 32; channels++) {
25121       DWConvMicrokernelTester()
25122         .cr(16)
25123         .kr(9)
25124         .channels(channels)
25125         .qmin(128)
25126         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25127     }
25128   }
25129 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_gt_16_with_qmax)25130   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmax) {
25131     for (uint32_t channels = 17; channels < 32; channels++) {
25132       DWConvMicrokernelTester()
25133         .cr(16)
25134         .kr(9)
25135         .channels(channels)
25136         .qmax(128)
25137         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25138     }
25139   }
25140 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,multipixel)25141   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel) {
25142     for (size_t channels = 1; channels <= 80; channels += 15) {
25143       DWConvMicrokernelTester()
25144         .cr(16)
25145         .kr(9)
25146         .channels(channels)
25147         .width(3)
25148         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25149     }
25150   }
25151 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,multipixel_with_step)25152   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_step) {
25153     for (size_t channels = 1; channels <= 80; channels += 15) {
25154       for (size_t step = 2; step <= 9; step++) {
25155         DWConvMicrokernelTester()
25156           .cr(16)
25157           .kr(9)
25158           .channels(channels)
25159           .width(3)
25160           .step(step)
25161           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25162       }
25163     }
25164   }
25165 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,multipixel_with_output_stride)25166   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
25167     for (size_t channels = 1; channels <= 80; channels += 15) {
25168       DWConvMicrokernelTester()
25169         .cr(16)
25170         .kr(9)
25171         .channels(16)
25172         .width(5)
25173         .output_stride(83)
25174         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25175     }
25176   }
25177 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,multipixel_with_qmin)25178   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_qmin) {
25179     for (size_t channels = 1; channels <= 80; channels += 15) {
25180       DWConvMicrokernelTester()
25181         .cr(16)
25182         .kr(9)
25183         .channels(channels)
25184         .width(3)
25185         .qmin(128)
25186         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25187     }
25188   }
25189 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,multipixel_with_qmax)25190   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_qmax) {
25191     for (size_t channels = 1; channels <= 80; channels += 15) {
25192       DWConvMicrokernelTester()
25193         .cr(16)
25194         .kr(9)
25195         .channels(channels)
25196         .width(3)
25197         .qmax(128)
25198         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25199     }
25200   }
25201 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,input_offset)25202   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, input_offset) {
25203     for (uint32_t channels = 32; channels < 256; channels += 48) {
25204       DWConvMicrokernelTester()
25205         .cr(16)
25206         .kr(9)
25207         .channels(channels)
25208         .input_offset(304)
25209         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25210     }
25211   }
25212 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,zero)25213   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, zero) {
25214     for (uint32_t mz = 0; mz < 9; mz++) {
25215       for (uint32_t channels = 32; channels < 256; channels += 48) {
25216         DWConvMicrokernelTester()
25217           .cr(16)
25218           .kr(9)
25219           .channels(channels)
25220           .input_offset(304)
25221           .zero_index(mz)
25222           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25223       }
25224     }
25225   }
25226 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25227 
25228 
25229 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_eq_16)25230   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_eq_16) {
25231     DWConvMicrokernelTester()
25232       .cr(16)
25233       .kr(9)
25234       .channels(16)
25235       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25236   }
25237 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_div_16)25238   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_div_16) {
25239     for (uint32_t channels = 32; channels < 256; channels += 48) {
25240       DWConvMicrokernelTester()
25241         .cr(16)
25242         .kr(9)
25243         .channels(channels)
25244         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25245     }
25246   }
25247 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_div_16_with_qmin)25248   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_div_16_with_qmin) {
25249     for (uint32_t channels = 32; channels < 256; channels += 48) {
25250       DWConvMicrokernelTester()
25251         .cr(16)
25252         .kr(9)
25253         .channels(channels)
25254         .qmin(128)
25255         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25256     }
25257   }
25258 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_div_16_with_qmax)25259   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_div_16_with_qmax) {
25260     for (uint32_t channels = 32; channels < 256; channels += 48) {
25261       DWConvMicrokernelTester()
25262         .cr(16)
25263         .kr(9)
25264         .channels(channels)
25265         .qmax(128)
25266         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25267     }
25268   }
25269 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_lt_16)25270   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_lt_16) {
25271     for (uint32_t channels = 1; channels < 16; channels++) {
25272       DWConvMicrokernelTester()
25273         .cr(16)
25274         .kr(9)
25275         .channels(channels)
25276         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25277     }
25278   }
25279 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_gt_16)25280   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_gt_16) {
25281     for (uint32_t channels = 17; channels < 32; channels++) {
25282       DWConvMicrokernelTester()
25283         .cr(16)
25284         .kr(9)
25285         .channels(channels)
25286         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25287     }
25288   }
25289 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_gt_16_with_qmin)25290   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmin) {
25291     for (uint32_t channels = 17; channels < 32; channels++) {
25292       DWConvMicrokernelTester()
25293         .cr(16)
25294         .kr(9)
25295         .channels(channels)
25296         .qmin(128)
25297         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25298     }
25299   }
25300 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_gt_16_with_qmax)25301   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmax) {
25302     for (uint32_t channels = 17; channels < 32; channels++) {
25303       DWConvMicrokernelTester()
25304         .cr(16)
25305         .kr(9)
25306         .channels(channels)
25307         .qmax(128)
25308         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25309     }
25310   }
25311 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,multipixel)25312   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel) {
25313     for (size_t channels = 1; channels <= 80; channels += 15) {
25314       DWConvMicrokernelTester()
25315         .cr(16)
25316         .kr(9)
25317         .channels(channels)
25318         .width(3)
25319         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25320     }
25321   }
25322 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,multipixel_with_step)25323   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
25324     for (size_t channels = 1; channels <= 80; channels += 15) {
25325       for (size_t step = 2; step <= 9; step++) {
25326         DWConvMicrokernelTester()
25327           .cr(16)
25328           .kr(9)
25329           .channels(channels)
25330           .width(3)
25331           .step(step)
25332           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25333       }
25334     }
25335   }
25336 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,multipixel_with_output_stride)25337   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
25338     for (size_t channels = 1; channels <= 80; channels += 15) {
25339       DWConvMicrokernelTester()
25340         .cr(16)
25341         .kr(9)
25342         .channels(16)
25343         .width(5)
25344         .output_stride(83)
25345         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25346     }
25347   }
25348 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,multipixel_with_qmin)25349   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
25350     for (size_t channels = 1; channels <= 80; channels += 15) {
25351       DWConvMicrokernelTester()
25352         .cr(16)
25353         .kr(9)
25354         .channels(channels)
25355         .width(3)
25356         .qmin(128)
25357         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25358     }
25359   }
25360 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,multipixel_with_qmax)25361   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
25362     for (size_t channels = 1; channels <= 80; channels += 15) {
25363       DWConvMicrokernelTester()
25364         .cr(16)
25365         .kr(9)
25366         .channels(channels)
25367         .width(3)
25368         .qmax(128)
25369         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25370     }
25371   }
25372 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,input_offset)25373   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, input_offset) {
25374     for (uint32_t channels = 32; channels < 256; channels += 48) {
25375       DWConvMicrokernelTester()
25376         .cr(16)
25377         .kr(9)
25378         .channels(channels)
25379         .input_offset(304)
25380         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25381     }
25382   }
25383 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,zero)25384   TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, zero) {
25385     for (uint32_t mz = 0; mz < 9; mz++) {
25386       for (uint32_t channels = 32; channels < 256; channels += 48) {
25387         DWConvMicrokernelTester()
25388           .cr(16)
25389           .kr(9)
25390           .channels(channels)
25391           .input_offset(304)
25392           .zero_index(mz)
25393           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25394       }
25395     }
25396   }
25397 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25398 
25399 
25400 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_eq_16)25401   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_eq_16) {
25402     DWConvMicrokernelTester()
25403       .cr(16)
25404       .kr(25)
25405       .channels(16)
25406       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25407   }
25408 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_div_16)25409   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_div_16) {
25410     for (uint32_t channels = 32; channels < 256; channels += 48) {
25411       DWConvMicrokernelTester()
25412         .cr(16)
25413         .kr(25)
25414         .channels(channels)
25415         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25416     }
25417   }
25418 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_div_16_with_qmin)25419   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_div_16_with_qmin) {
25420     for (uint32_t channels = 32; channels < 256; channels += 48) {
25421       DWConvMicrokernelTester()
25422         .cr(16)
25423         .kr(25)
25424         .channels(channels)
25425         .qmin(128)
25426         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25427     }
25428   }
25429 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_div_16_with_qmax)25430   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_div_16_with_qmax) {
25431     for (uint32_t channels = 32; channels < 256; channels += 48) {
25432       DWConvMicrokernelTester()
25433         .cr(16)
25434         .kr(25)
25435         .channels(channels)
25436         .qmax(128)
25437         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25438     }
25439   }
25440 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_lt_16)25441   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_lt_16) {
25442     for (uint32_t channels = 1; channels < 16; channels++) {
25443       DWConvMicrokernelTester()
25444         .cr(16)
25445         .kr(25)
25446         .channels(channels)
25447         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25448     }
25449   }
25450 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_gt_16)25451   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_gt_16) {
25452     for (uint32_t channels = 17; channels < 32; channels++) {
25453       DWConvMicrokernelTester()
25454         .cr(16)
25455         .kr(25)
25456         .channels(channels)
25457         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25458     }
25459   }
25460 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_gt_16_with_qmin)25461   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_gt_16_with_qmin) {
25462     for (uint32_t channels = 17; channels < 32; channels++) {
25463       DWConvMicrokernelTester()
25464         .cr(16)
25465         .kr(25)
25466         .channels(channels)
25467         .qmin(128)
25468         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25469     }
25470   }
25471 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_gt_16_with_qmax)25472   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_gt_16_with_qmax) {
25473     for (uint32_t channels = 17; channels < 32; channels++) {
25474       DWConvMicrokernelTester()
25475         .cr(16)
25476         .kr(25)
25477         .channels(channels)
25478         .qmax(128)
25479         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25480     }
25481   }
25482 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,multipixel)25483   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel) {
25484     for (size_t channels = 1; channels <= 80; channels += 15) {
25485       DWConvMicrokernelTester()
25486         .cr(16)
25487         .kr(25)
25488         .channels(channels)
25489         .width(3)
25490         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25491     }
25492   }
25493 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,multipixel_with_step)25494   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_step) {
25495     for (size_t channels = 1; channels <= 80; channels += 15) {
25496       for (size_t step = 2; step <= 25; step++) {
25497         DWConvMicrokernelTester()
25498           .cr(16)
25499           .kr(25)
25500           .channels(channels)
25501           .width(3)
25502           .step(step)
25503           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25504       }
25505     }
25506   }
25507 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,multipixel_with_output_stride)25508   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
25509     for (size_t channels = 1; channels <= 80; channels += 15) {
25510       DWConvMicrokernelTester()
25511         .cr(16)
25512         .kr(25)
25513         .channels(16)
25514         .width(5)
25515         .output_stride(83)
25516         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25517     }
25518   }
25519 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,multipixel_with_qmin)25520   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_qmin) {
25521     for (size_t channels = 1; channels <= 80; channels += 15) {
25522       DWConvMicrokernelTester()
25523         .cr(16)
25524         .kr(25)
25525         .channels(channels)
25526         .width(3)
25527         .qmin(128)
25528         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25529     }
25530   }
25531 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,multipixel_with_qmax)25532   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_qmax) {
25533     for (size_t channels = 1; channels <= 80; channels += 15) {
25534       DWConvMicrokernelTester()
25535         .cr(16)
25536         .kr(25)
25537         .channels(channels)
25538         .width(3)
25539         .qmax(128)
25540         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25541     }
25542   }
25543 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,input_offset)25544   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, input_offset) {
25545     for (uint32_t channels = 32; channels < 256; channels += 48) {
25546       DWConvMicrokernelTester()
25547         .cr(16)
25548         .kr(25)
25549         .channels(channels)
25550         .input_offset(304)
25551         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25552     }
25553   }
25554 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,zero)25555   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, zero) {
25556     for (uint32_t mz = 0; mz < 25; mz++) {
25557       for (uint32_t channels = 32; channels < 256; channels += 48) {
25558         DWConvMicrokernelTester()
25559           .cr(16)
25560           .kr(25)
25561           .channels(channels)
25562           .input_offset(304)
25563           .zero_index(mz)
25564           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25565       }
25566     }
25567   }
25568 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25569 
25570 
25571 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_eq_16)25572   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_eq_16) {
25573     DWConvMicrokernelTester()
25574       .cr(16)
25575       .kr(25)
25576       .channels(16)
25577       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25578   }
25579 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_div_16)25580   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_div_16) {
25581     for (uint32_t channels = 32; channels < 256; channels += 48) {
25582       DWConvMicrokernelTester()
25583         .cr(16)
25584         .kr(25)
25585         .channels(channels)
25586         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25587     }
25588   }
25589 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_div_16_with_qmin)25590   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_div_16_with_qmin) {
25591     for (uint32_t channels = 32; channels < 256; channels += 48) {
25592       DWConvMicrokernelTester()
25593         .cr(16)
25594         .kr(25)
25595         .channels(channels)
25596         .qmin(128)
25597         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25598     }
25599   }
25600 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_div_16_with_qmax)25601   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_div_16_with_qmax) {
25602     for (uint32_t channels = 32; channels < 256; channels += 48) {
25603       DWConvMicrokernelTester()
25604         .cr(16)
25605         .kr(25)
25606         .channels(channels)
25607         .qmax(128)
25608         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25609     }
25610   }
25611 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_lt_16)25612   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_lt_16) {
25613     for (uint32_t channels = 1; channels < 16; channels++) {
25614       DWConvMicrokernelTester()
25615         .cr(16)
25616         .kr(25)
25617         .channels(channels)
25618         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25619     }
25620   }
25621 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_gt_16)25622   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_gt_16) {
25623     for (uint32_t channels = 17; channels < 32; channels++) {
25624       DWConvMicrokernelTester()
25625         .cr(16)
25626         .kr(25)
25627         .channels(channels)
25628         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25629     }
25630   }
25631 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_gt_16_with_qmin)25632   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmin) {
25633     for (uint32_t channels = 17; channels < 32; channels++) {
25634       DWConvMicrokernelTester()
25635         .cr(16)
25636         .kr(25)
25637         .channels(channels)
25638         .qmin(128)
25639         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25640     }
25641   }
25642 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_gt_16_with_qmax)25643   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmax) {
25644     for (uint32_t channels = 17; channels < 32; channels++) {
25645       DWConvMicrokernelTester()
25646         .cr(16)
25647         .kr(25)
25648         .channels(channels)
25649         .qmax(128)
25650         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25651     }
25652   }
25653 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,multipixel)25654   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel) {
25655     for (size_t channels = 1; channels <= 80; channels += 15) {
25656       DWConvMicrokernelTester()
25657         .cr(16)
25658         .kr(25)
25659         .channels(channels)
25660         .width(3)
25661         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25662     }
25663   }
25664 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,multipixel_with_step)25665   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
25666     for (size_t channels = 1; channels <= 80; channels += 15) {
25667       for (size_t step = 2; step <= 25; step++) {
25668         DWConvMicrokernelTester()
25669           .cr(16)
25670           .kr(25)
25671           .channels(channels)
25672           .width(3)
25673           .step(step)
25674           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25675       }
25676     }
25677   }
25678 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,multipixel_with_output_stride)25679   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
25680     for (size_t channels = 1; channels <= 80; channels += 15) {
25681       DWConvMicrokernelTester()
25682         .cr(16)
25683         .kr(25)
25684         .channels(16)
25685         .width(5)
25686         .output_stride(83)
25687         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25688     }
25689   }
25690 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,multipixel_with_qmin)25691   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
25692     for (size_t channels = 1; channels <= 80; channels += 15) {
25693       DWConvMicrokernelTester()
25694         .cr(16)
25695         .kr(25)
25696         .channels(channels)
25697         .width(3)
25698         .qmin(128)
25699         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25700     }
25701   }
25702 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,multipixel_with_qmax)25703   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
25704     for (size_t channels = 1; channels <= 80; channels += 15) {
25705       DWConvMicrokernelTester()
25706         .cr(16)
25707         .kr(25)
25708         .channels(channels)
25709         .width(3)
25710         .qmax(128)
25711         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25712     }
25713   }
25714 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,input_offset)25715   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, input_offset) {
25716     for (uint32_t channels = 32; channels < 256; channels += 48) {
25717       DWConvMicrokernelTester()
25718         .cr(16)
25719         .kr(25)
25720         .channels(channels)
25721         .input_offset(304)
25722         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25723     }
25724   }
25725 
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,zero)25726   TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, zero) {
25727     for (uint32_t mz = 0; mz < 25; mz++) {
25728       for (uint32_t channels = 32; channels < 256; channels += 48) {
25729         DWConvMicrokernelTester()
25730           .cr(16)
25731           .kr(25)
25732           .channels(channels)
25733           .input_offset(304)
25734           .zero_index(mz)
25735           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25736       }
25737     }
25738   }
25739 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25740 
25741 
25742 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_eq_24)25743   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_eq_24) {
25744     DWConvMicrokernelTester()
25745       .cr(24)
25746       .kr(9)
25747       .channels(24)
25748       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25749   }
25750 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_div_24)25751   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_div_24) {
25752     for (uint32_t channels = 48; channels < 384; channels += 72) {
25753       DWConvMicrokernelTester()
25754         .cr(24)
25755         .kr(9)
25756         .channels(channels)
25757         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25758     }
25759   }
25760 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_div_24_with_qmin)25761   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmin) {
25762     for (uint32_t channels = 48; channels < 384; channels += 72) {
25763       DWConvMicrokernelTester()
25764         .cr(24)
25765         .kr(9)
25766         .channels(channels)
25767         .qmin(128)
25768         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25769     }
25770   }
25771 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_div_24_with_qmax)25772   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmax) {
25773     for (uint32_t channels = 48; channels < 384; channels += 72) {
25774       DWConvMicrokernelTester()
25775         .cr(24)
25776         .kr(9)
25777         .channels(channels)
25778         .qmax(128)
25779         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25780     }
25781   }
25782 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_lt_24)25783   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_lt_24) {
25784     for (uint32_t channels = 1; channels < 24; channels++) {
25785       DWConvMicrokernelTester()
25786         .cr(24)
25787         .kr(9)
25788         .channels(channels)
25789         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25790     }
25791   }
25792 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_gt_24)25793   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_gt_24) {
25794     for (uint32_t channels = 25; channels < 48; channels++) {
25795       DWConvMicrokernelTester()
25796         .cr(24)
25797         .kr(9)
25798         .channels(channels)
25799         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25800     }
25801   }
25802 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_gt_24_with_qmin)25803   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmin) {
25804     for (uint32_t channels = 25; channels < 48; channels++) {
25805       DWConvMicrokernelTester()
25806         .cr(24)
25807         .kr(9)
25808         .channels(channels)
25809         .qmin(128)
25810         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25811     }
25812   }
25813 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_gt_24_with_qmax)25814   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmax) {
25815     for (uint32_t channels = 25; channels < 48; channels++) {
25816       DWConvMicrokernelTester()
25817         .cr(24)
25818         .kr(9)
25819         .channels(channels)
25820         .qmax(128)
25821         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25822     }
25823   }
25824 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,multipixel)25825   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel) {
25826     for (size_t channels = 1; channels <= 120; channels += 23) {
25827       DWConvMicrokernelTester()
25828         .cr(24)
25829         .kr(9)
25830         .channels(channels)
25831         .width(3)
25832         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25833     }
25834   }
25835 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,multipixel_with_step)25836   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_step) {
25837     for (size_t channels = 1; channels <= 120; channels += 23) {
25838       for (size_t step = 2; step <= 9; step++) {
25839         DWConvMicrokernelTester()
25840           .cr(24)
25841           .kr(9)
25842           .channels(channels)
25843           .width(3)
25844           .step(step)
25845           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25846       }
25847     }
25848   }
25849 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,multipixel_with_output_stride)25850   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
25851     for (size_t channels = 1; channels <= 120; channels += 23) {
25852       DWConvMicrokernelTester()
25853         .cr(24)
25854         .kr(9)
25855         .channels(24)
25856         .width(5)
25857         .output_stride(127)
25858         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25859     }
25860   }
25861 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,multipixel_with_qmin)25862   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_qmin) {
25863     for (size_t channels = 1; channels <= 120; channels += 23) {
25864       DWConvMicrokernelTester()
25865         .cr(24)
25866         .kr(9)
25867         .channels(channels)
25868         .width(3)
25869         .qmin(128)
25870         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25871     }
25872   }
25873 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,multipixel_with_qmax)25874   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_qmax) {
25875     for (size_t channels = 1; channels <= 120; channels += 23) {
25876       DWConvMicrokernelTester()
25877         .cr(24)
25878         .kr(9)
25879         .channels(channels)
25880         .width(3)
25881         .qmax(128)
25882         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25883     }
25884   }
25885 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,input_offset)25886   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, input_offset) {
25887     for (uint32_t channels = 48; channels < 384; channels += 72) {
25888       DWConvMicrokernelTester()
25889         .cr(24)
25890         .kr(9)
25891         .channels(channels)
25892         .input_offset(464)
25893         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25894     }
25895   }
25896 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,zero)25897   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, zero) {
25898     for (uint32_t mz = 0; mz < 9; mz++) {
25899       for (uint32_t channels = 48; channels < 384; channels += 72) {
25900         DWConvMicrokernelTester()
25901           .cr(24)
25902           .kr(9)
25903           .channels(channels)
25904           .input_offset(464)
25905           .zero_index(mz)
25906           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25907       }
25908     }
25909   }
25910 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25911 
25912 
25913 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_eq_24)25914   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_eq_24) {
25915     DWConvMicrokernelTester()
25916       .cr(24)
25917       .kr(9)
25918       .channels(24)
25919       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25920   }
25921 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_div_24)25922   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_div_24) {
25923     for (uint32_t channels = 48; channels < 384; channels += 72) {
25924       DWConvMicrokernelTester()
25925         .cr(24)
25926         .kr(9)
25927         .channels(channels)
25928         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25929     }
25930   }
25931 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_div_24_with_qmin)25932   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_div_24_with_qmin) {
25933     for (uint32_t channels = 48; channels < 384; channels += 72) {
25934       DWConvMicrokernelTester()
25935         .cr(24)
25936         .kr(9)
25937         .channels(channels)
25938         .qmin(128)
25939         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25940     }
25941   }
25942 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_div_24_with_qmax)25943   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_div_24_with_qmax) {
25944     for (uint32_t channels = 48; channels < 384; channels += 72) {
25945       DWConvMicrokernelTester()
25946         .cr(24)
25947         .kr(9)
25948         .channels(channels)
25949         .qmax(128)
25950         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25951     }
25952   }
25953 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_lt_24)25954   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_lt_24) {
25955     for (uint32_t channels = 1; channels < 24; channels++) {
25956       DWConvMicrokernelTester()
25957         .cr(24)
25958         .kr(9)
25959         .channels(channels)
25960         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25961     }
25962   }
25963 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_gt_24)25964   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_gt_24) {
25965     for (uint32_t channels = 25; channels < 48; channels++) {
25966       DWConvMicrokernelTester()
25967         .cr(24)
25968         .kr(9)
25969         .channels(channels)
25970         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25971     }
25972   }
25973 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_gt_24_with_qmin)25974   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_gt_24_with_qmin) {
25975     for (uint32_t channels = 25; channels < 48; channels++) {
25976       DWConvMicrokernelTester()
25977         .cr(24)
25978         .kr(9)
25979         .channels(channels)
25980         .qmin(128)
25981         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25982     }
25983   }
25984 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_gt_24_with_qmax)25985   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_gt_24_with_qmax) {
25986     for (uint32_t channels = 25; channels < 48; channels++) {
25987       DWConvMicrokernelTester()
25988         .cr(24)
25989         .kr(9)
25990         .channels(channels)
25991         .qmax(128)
25992         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25993     }
25994   }
25995 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,multipixel)25996   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel) {
25997     for (size_t channels = 1; channels <= 120; channels += 23) {
25998       DWConvMicrokernelTester()
25999         .cr(24)
26000         .kr(9)
26001         .channels(channels)
26002         .width(3)
26003         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26004     }
26005   }
26006 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,multipixel_with_step)26007   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
26008     for (size_t channels = 1; channels <= 120; channels += 23) {
26009       for (size_t step = 2; step <= 9; step++) {
26010         DWConvMicrokernelTester()
26011           .cr(24)
26012           .kr(9)
26013           .channels(channels)
26014           .width(3)
26015           .step(step)
26016           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26017       }
26018     }
26019   }
26020 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,multipixel_with_output_stride)26021   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
26022     for (size_t channels = 1; channels <= 120; channels += 23) {
26023       DWConvMicrokernelTester()
26024         .cr(24)
26025         .kr(9)
26026         .channels(24)
26027         .width(5)
26028         .output_stride(127)
26029         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26030     }
26031   }
26032 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,multipixel_with_qmin)26033   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
26034     for (size_t channels = 1; channels <= 120; channels += 23) {
26035       DWConvMicrokernelTester()
26036         .cr(24)
26037         .kr(9)
26038         .channels(channels)
26039         .width(3)
26040         .qmin(128)
26041         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26042     }
26043   }
26044 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,multipixel_with_qmax)26045   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
26046     for (size_t channels = 1; channels <= 120; channels += 23) {
26047       DWConvMicrokernelTester()
26048         .cr(24)
26049         .kr(9)
26050         .channels(channels)
26051         .width(3)
26052         .qmax(128)
26053         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26054     }
26055   }
26056 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,input_offset)26057   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, input_offset) {
26058     for (uint32_t channels = 48; channels < 384; channels += 72) {
26059       DWConvMicrokernelTester()
26060         .cr(24)
26061         .kr(9)
26062         .channels(channels)
26063         .input_offset(464)
26064         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26065     }
26066   }
26067 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,zero)26068   TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, zero) {
26069     for (uint32_t mz = 0; mz < 9; mz++) {
26070       for (uint32_t channels = 48; channels < 384; channels += 72) {
26071         DWConvMicrokernelTester()
26072           .cr(24)
26073           .kr(9)
26074           .channels(channels)
26075           .input_offset(464)
26076           .zero_index(mz)
26077           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26078       }
26079     }
26080   }
26081 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26082 
26083 
26084 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_eq_24)26085   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_eq_24) {
26086     DWConvMicrokernelTester()
26087       .cr(24)
26088       .kr(25)
26089       .channels(24)
26090       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26091   }
26092 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_div_24)26093   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_div_24) {
26094     for (uint32_t channels = 48; channels < 384; channels += 72) {
26095       DWConvMicrokernelTester()
26096         .cr(24)
26097         .kr(25)
26098         .channels(channels)
26099         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26100     }
26101   }
26102 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_div_24_with_qmin)26103   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_div_24_with_qmin) {
26104     for (uint32_t channels = 48; channels < 384; channels += 72) {
26105       DWConvMicrokernelTester()
26106         .cr(24)
26107         .kr(25)
26108         .channels(channels)
26109         .qmin(128)
26110         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26111     }
26112   }
26113 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_div_24_with_qmax)26114   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_div_24_with_qmax) {
26115     for (uint32_t channels = 48; channels < 384; channels += 72) {
26116       DWConvMicrokernelTester()
26117         .cr(24)
26118         .kr(25)
26119         .channels(channels)
26120         .qmax(128)
26121         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26122     }
26123   }
26124 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_lt_24)26125   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_lt_24) {
26126     for (uint32_t channels = 1; channels < 24; channels++) {
26127       DWConvMicrokernelTester()
26128         .cr(24)
26129         .kr(25)
26130         .channels(channels)
26131         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26132     }
26133   }
26134 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_gt_24)26135   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_gt_24) {
26136     for (uint32_t channels = 25; channels < 48; channels++) {
26137       DWConvMicrokernelTester()
26138         .cr(24)
26139         .kr(25)
26140         .channels(channels)
26141         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26142     }
26143   }
26144 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_gt_24_with_qmin)26145   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_gt_24_with_qmin) {
26146     for (uint32_t channels = 25; channels < 48; channels++) {
26147       DWConvMicrokernelTester()
26148         .cr(24)
26149         .kr(25)
26150         .channels(channels)
26151         .qmin(128)
26152         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26153     }
26154   }
26155 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_gt_24_with_qmax)26156   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_gt_24_with_qmax) {
26157     for (uint32_t channels = 25; channels < 48; channels++) {
26158       DWConvMicrokernelTester()
26159         .cr(24)
26160         .kr(25)
26161         .channels(channels)
26162         .qmax(128)
26163         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26164     }
26165   }
26166 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,multipixel)26167   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel) {
26168     for (size_t channels = 1; channels <= 120; channels += 23) {
26169       DWConvMicrokernelTester()
26170         .cr(24)
26171         .kr(25)
26172         .channels(channels)
26173         .width(3)
26174         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26175     }
26176   }
26177 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,multipixel_with_step)26178   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_step) {
26179     for (size_t channels = 1; channels <= 120; channels += 23) {
26180       for (size_t step = 2; step <= 25; step++) {
26181         DWConvMicrokernelTester()
26182           .cr(24)
26183           .kr(25)
26184           .channels(channels)
26185           .width(3)
26186           .step(step)
26187           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26188       }
26189     }
26190   }
26191 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,multipixel_with_output_stride)26192   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
26193     for (size_t channels = 1; channels <= 120; channels += 23) {
26194       DWConvMicrokernelTester()
26195         .cr(24)
26196         .kr(25)
26197         .channels(24)
26198         .width(5)
26199         .output_stride(127)
26200         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26201     }
26202   }
26203 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,multipixel_with_qmin)26204   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_qmin) {
26205     for (size_t channels = 1; channels <= 120; channels += 23) {
26206       DWConvMicrokernelTester()
26207         .cr(24)
26208         .kr(25)
26209         .channels(channels)
26210         .width(3)
26211         .qmin(128)
26212         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26213     }
26214   }
26215 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,multipixel_with_qmax)26216   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_qmax) {
26217     for (size_t channels = 1; channels <= 120; channels += 23) {
26218       DWConvMicrokernelTester()
26219         .cr(24)
26220         .kr(25)
26221         .channels(channels)
26222         .width(3)
26223         .qmax(128)
26224         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26225     }
26226   }
26227 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,input_offset)26228   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, input_offset) {
26229     for (uint32_t channels = 48; channels < 384; channels += 72) {
26230       DWConvMicrokernelTester()
26231         .cr(24)
26232         .kr(25)
26233         .channels(channels)
26234         .input_offset(464)
26235         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26236     }
26237   }
26238 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,zero)26239   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, zero) {
26240     for (uint32_t mz = 0; mz < 25; mz++) {
26241       for (uint32_t channels = 48; channels < 384; channels += 72) {
26242         DWConvMicrokernelTester()
26243           .cr(24)
26244           .kr(25)
26245           .channels(channels)
26246           .input_offset(464)
26247           .zero_index(mz)
26248           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26249       }
26250     }
26251   }
26252 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26253 
26254 
26255 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_eq_24)26256   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_eq_24) {
26257     DWConvMicrokernelTester()
26258       .cr(24)
26259       .kr(25)
26260       .channels(24)
26261       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26262   }
26263 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_div_24)26264   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_div_24) {
26265     for (uint32_t channels = 48; channels < 384; channels += 72) {
26266       DWConvMicrokernelTester()
26267         .cr(24)
26268         .kr(25)
26269         .channels(channels)
26270         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26271     }
26272   }
26273 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_div_24_with_qmin)26274   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_div_24_with_qmin) {
26275     for (uint32_t channels = 48; channels < 384; channels += 72) {
26276       DWConvMicrokernelTester()
26277         .cr(24)
26278         .kr(25)
26279         .channels(channels)
26280         .qmin(128)
26281         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26282     }
26283   }
26284 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_div_24_with_qmax)26285   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_div_24_with_qmax) {
26286     for (uint32_t channels = 48; channels < 384; channels += 72) {
26287       DWConvMicrokernelTester()
26288         .cr(24)
26289         .kr(25)
26290         .channels(channels)
26291         .qmax(128)
26292         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26293     }
26294   }
26295 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_lt_24)26296   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_lt_24) {
26297     for (uint32_t channels = 1; channels < 24; channels++) {
26298       DWConvMicrokernelTester()
26299         .cr(24)
26300         .kr(25)
26301         .channels(channels)
26302         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26303     }
26304   }
26305 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_gt_24)26306   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_gt_24) {
26307     for (uint32_t channels = 25; channels < 48; channels++) {
26308       DWConvMicrokernelTester()
26309         .cr(24)
26310         .kr(25)
26311         .channels(channels)
26312         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26313     }
26314   }
26315 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_gt_24_with_qmin)26316   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_gt_24_with_qmin) {
26317     for (uint32_t channels = 25; channels < 48; channels++) {
26318       DWConvMicrokernelTester()
26319         .cr(24)
26320         .kr(25)
26321         .channels(channels)
26322         .qmin(128)
26323         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26324     }
26325   }
26326 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_gt_24_with_qmax)26327   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_gt_24_with_qmax) {
26328     for (uint32_t channels = 25; channels < 48; channels++) {
26329       DWConvMicrokernelTester()
26330         .cr(24)
26331         .kr(25)
26332         .channels(channels)
26333         .qmax(128)
26334         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26335     }
26336   }
26337 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,multipixel)26338   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel) {
26339     for (size_t channels = 1; channels <= 120; channels += 23) {
26340       DWConvMicrokernelTester()
26341         .cr(24)
26342         .kr(25)
26343         .channels(channels)
26344         .width(3)
26345         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26346     }
26347   }
26348 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,multipixel_with_step)26349   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
26350     for (size_t channels = 1; channels <= 120; channels += 23) {
26351       for (size_t step = 2; step <= 25; step++) {
26352         DWConvMicrokernelTester()
26353           .cr(24)
26354           .kr(25)
26355           .channels(channels)
26356           .width(3)
26357           .step(step)
26358           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26359       }
26360     }
26361   }
26362 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,multipixel_with_output_stride)26363   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
26364     for (size_t channels = 1; channels <= 120; channels += 23) {
26365       DWConvMicrokernelTester()
26366         .cr(24)
26367         .kr(25)
26368         .channels(24)
26369         .width(5)
26370         .output_stride(127)
26371         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26372     }
26373   }
26374 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,multipixel_with_qmin)26375   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
26376     for (size_t channels = 1; channels <= 120; channels += 23) {
26377       DWConvMicrokernelTester()
26378         .cr(24)
26379         .kr(25)
26380         .channels(channels)
26381         .width(3)
26382         .qmin(128)
26383         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26384     }
26385   }
26386 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,multipixel_with_qmax)26387   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
26388     for (size_t channels = 1; channels <= 120; channels += 23) {
26389       DWConvMicrokernelTester()
26390         .cr(24)
26391         .kr(25)
26392         .channels(channels)
26393         .width(3)
26394         .qmax(128)
26395         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26396     }
26397   }
26398 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,input_offset)26399   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, input_offset) {
26400     for (uint32_t channels = 48; channels < 384; channels += 72) {
26401       DWConvMicrokernelTester()
26402         .cr(24)
26403         .kr(25)
26404         .channels(channels)
26405         .input_offset(464)
26406         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26407     }
26408   }
26409 
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,zero)26410   TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, zero) {
26411     for (uint32_t mz = 0; mz < 25; mz++) {
26412       for (uint32_t channels = 48; channels < 384; channels += 72) {
26413         DWConvMicrokernelTester()
26414           .cr(24)
26415           .kr(25)
26416           .channels(channels)
26417           .input_offset(464)
26418           .zero_index(mz)
26419           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26420       }
26421     }
26422   }
26423 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26424 
26425 
26426 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,c_eq_1)26427   TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_eq_1) {
26428     DWConvMicrokernelTester()
26429       .cr(1)
26430       .kr(9)
26431       .channels(1)
26432       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26433   }
26434 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,c_gt_1)26435   TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_gt_1) {
26436     for (uint32_t channels = 2; channels < 10; channels++) {
26437       DWConvMicrokernelTester()
26438         .cr(1)
26439         .kr(9)
26440         .channels(channels)
26441         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26442     }
26443   }
26444 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,c_gt_1_with_qmin)26445   TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_gt_1_with_qmin) {
26446     for (uint32_t channels = 2; channels < 10; channels++) {
26447       DWConvMicrokernelTester()
26448         .cr(1)
26449         .kr(9)
26450         .channels(channels)
26451         .qmin(128)
26452         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26453     }
26454   }
26455 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,c_gt_1_with_qmax)26456   TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_gt_1_with_qmax) {
26457     for (uint32_t channels = 2; channels < 10; channels++) {
26458       DWConvMicrokernelTester()
26459         .cr(1)
26460         .kr(9)
26461         .channels(channels)
26462         .qmax(128)
26463         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26464     }
26465   }
26466 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,multipixel)26467   TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel) {
26468     for (size_t channels = 1; channels <= 5; channels += 1) {
26469       DWConvMicrokernelTester()
26470         .cr(1)
26471         .kr(9)
26472         .channels(channels)
26473         .width(3)
26474         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26475     }
26476   }
26477 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,multipixel_with_step)26478   TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_step) {
26479     for (size_t channels = 1; channels <= 5; channels += 1) {
26480       for (size_t step = 2; step <= 9; step++) {
26481         DWConvMicrokernelTester()
26482           .cr(1)
26483           .kr(9)
26484           .channels(channels)
26485           .width(3)
26486           .step(step)
26487           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26488       }
26489     }
26490   }
26491 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,multipixel_with_output_stride)26492   TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_output_stride) {
26493     for (size_t channels = 1; channels <= 5; channels += 1) {
26494       DWConvMicrokernelTester()
26495         .cr(1)
26496         .kr(9)
26497         .channels(1)
26498         .width(5)
26499         .output_stride(7)
26500         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26501     }
26502   }
26503 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,multipixel_with_qmin)26504   TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_qmin) {
26505     for (size_t channels = 1; channels <= 5; channels += 1) {
26506       DWConvMicrokernelTester()
26507         .cr(1)
26508         .kr(9)
26509         .channels(channels)
26510         .width(3)
26511         .qmin(128)
26512         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26513     }
26514   }
26515 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,multipixel_with_qmax)26516   TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_qmax) {
26517     for (size_t channels = 1; channels <= 5; channels += 1) {
26518       DWConvMicrokernelTester()
26519         .cr(1)
26520         .kr(9)
26521         .channels(channels)
26522         .width(3)
26523         .qmax(128)
26524         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26525     }
26526   }
26527 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,input_offset)26528   TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, input_offset) {
26529     for (uint32_t channels = 2; channels < 16; channels += 3) {
26530       DWConvMicrokernelTester()
26531         .cr(1)
26532         .kr(9)
26533         .channels(channels)
26534         .input_offset(48)
26535         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26536     }
26537   }
26538 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,zero)26539   TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, zero) {
26540     for (uint32_t mz = 0; mz < 9; mz++) {
26541       for (uint32_t channels = 2; channels < 16; channels += 3) {
26542         DWConvMicrokernelTester()
26543           .cr(1)
26544           .kr(9)
26545           .channels(channels)
26546           .input_offset(48)
26547           .zero_index(mz)
26548           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26549       }
26550     }
26551   }
26552 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26553 
26554 
26555 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,c_eq_1)26556   TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_eq_1) {
26557     DWConvMicrokernelTester()
26558       .cr(1)
26559       .kr(25)
26560       .channels(1)
26561       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26562   }
26563 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,c_gt_1)26564   TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_gt_1) {
26565     for (uint32_t channels = 2; channels < 10; channels++) {
26566       DWConvMicrokernelTester()
26567         .cr(1)
26568         .kr(25)
26569         .channels(channels)
26570         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26571     }
26572   }
26573 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,c_gt_1_with_qmin)26574   TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_gt_1_with_qmin) {
26575     for (uint32_t channels = 2; channels < 10; channels++) {
26576       DWConvMicrokernelTester()
26577         .cr(1)
26578         .kr(25)
26579         .channels(channels)
26580         .qmin(128)
26581         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26582     }
26583   }
26584 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,c_gt_1_with_qmax)26585   TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_gt_1_with_qmax) {
26586     for (uint32_t channels = 2; channels < 10; channels++) {
26587       DWConvMicrokernelTester()
26588         .cr(1)
26589         .kr(25)
26590         .channels(channels)
26591         .qmax(128)
26592         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26593     }
26594   }
26595 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,multipixel)26596   TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel) {
26597     for (size_t channels = 1; channels <= 5; channels += 1) {
26598       DWConvMicrokernelTester()
26599         .cr(1)
26600         .kr(25)
26601         .channels(channels)
26602         .width(3)
26603         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26604     }
26605   }
26606 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,multipixel_with_step)26607   TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_step) {
26608     for (size_t channels = 1; channels <= 5; channels += 1) {
26609       for (size_t step = 2; step <= 25; step++) {
26610         DWConvMicrokernelTester()
26611           .cr(1)
26612           .kr(25)
26613           .channels(channels)
26614           .width(3)
26615           .step(step)
26616           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26617       }
26618     }
26619   }
26620 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,multipixel_with_output_stride)26621   TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_output_stride) {
26622     for (size_t channels = 1; channels <= 5; channels += 1) {
26623       DWConvMicrokernelTester()
26624         .cr(1)
26625         .kr(25)
26626         .channels(1)
26627         .width(5)
26628         .output_stride(7)
26629         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26630     }
26631   }
26632 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,multipixel_with_qmin)26633   TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_qmin) {
26634     for (size_t channels = 1; channels <= 5; channels += 1) {
26635       DWConvMicrokernelTester()
26636         .cr(1)
26637         .kr(25)
26638         .channels(channels)
26639         .width(3)
26640         .qmin(128)
26641         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26642     }
26643   }
26644 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,multipixel_with_qmax)26645   TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_qmax) {
26646     for (size_t channels = 1; channels <= 5; channels += 1) {
26647       DWConvMicrokernelTester()
26648         .cr(1)
26649         .kr(25)
26650         .channels(channels)
26651         .width(3)
26652         .qmax(128)
26653         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26654     }
26655   }
26656 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,input_offset)26657   TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, input_offset) {
26658     for (uint32_t channels = 2; channels < 16; channels += 3) {
26659       DWConvMicrokernelTester()
26660         .cr(1)
26661         .kr(25)
26662         .channels(channels)
26663         .input_offset(48)
26664         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26665     }
26666   }
26667 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,zero)26668   TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, zero) {
26669     for (uint32_t mz = 0; mz < 25; mz++) {
26670       for (uint32_t channels = 2; channels < 16; channels += 3) {
26671         DWConvMicrokernelTester()
26672           .cr(1)
26673           .kr(25)
26674           .channels(channels)
26675           .input_offset(48)
26676           .zero_index(mz)
26677           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26678       }
26679     }
26680   }
26681 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26682 
26683 
26684 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,c_eq_2)26685   TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, c_eq_2) {
26686     DWConvMicrokernelTester()
26687       .cr(2)
26688       .kr(3)
26689       .channels(2)
26690       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26691   }
26692 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,c_div_2)26693   TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, c_div_2) {
26694     for (uint32_t channels = 4; channels < 32; channels += 6) {
26695       DWConvMicrokernelTester()
26696         .cr(2)
26697         .kr(3)
26698         .channels(channels)
26699         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26700     }
26701   }
26702 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,c_div_2_with_qmin)26703   TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, c_div_2_with_qmin) {
26704     for (uint32_t channels = 4; channels < 32; channels += 6) {
26705       DWConvMicrokernelTester()
26706         .cr(2)
26707         .kr(3)
26708         .channels(channels)
26709         .qmin(128)
26710         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26711     }
26712   }
26713 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,c_div_2_with_qmax)26714   TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, c_div_2_with_qmax) {
26715     for (uint32_t channels = 4; channels < 32; channels += 6) {
26716       DWConvMicrokernelTester()
26717         .cr(2)
26718         .kr(3)
26719         .channels(channels)
26720         .qmax(128)
26721         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26722     }
26723   }
26724 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,c_lt_2)26725   TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, c_lt_2) {
26726     for (uint32_t channels = 1; channels < 2; channels++) {
26727       DWConvMicrokernelTester()
26728         .cr(2)
26729         .kr(3)
26730         .channels(channels)
26731         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26732     }
26733   }
26734 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,c_gt_2)26735   TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, c_gt_2) {
26736     for (uint32_t channels = 3; channels < 4; channels++) {
26737       DWConvMicrokernelTester()
26738         .cr(2)
26739         .kr(3)
26740         .channels(channels)
26741         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26742     }
26743   }
26744 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,c_gt_2_with_qmin)26745   TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, c_gt_2_with_qmin) {
26746     for (uint32_t channels = 3; channels < 4; channels++) {
26747       DWConvMicrokernelTester()
26748         .cr(2)
26749         .kr(3)
26750         .channels(channels)
26751         .qmin(128)
26752         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26753     }
26754   }
26755 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,c_gt_2_with_qmax)26756   TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, c_gt_2_with_qmax) {
26757     for (uint32_t channels = 3; channels < 4; channels++) {
26758       DWConvMicrokernelTester()
26759         .cr(2)
26760         .kr(3)
26761         .channels(channels)
26762         .qmax(128)
26763         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26764     }
26765   }
26766 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,multipixel)26767   TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, multipixel) {
26768     for (size_t channels = 1; channels <= 10; channels += 1) {
26769       DWConvMicrokernelTester()
26770         .cr(2)
26771         .kr(3)
26772         .channels(channels)
26773         .width(3)
26774         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26775     }
26776   }
26777 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,multipixel_with_step)26778   TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, multipixel_with_step) {
26779     for (size_t channels = 1; channels <= 10; channels += 1) {
26780       for (size_t step = 2; step <= 3; step++) {
26781         DWConvMicrokernelTester()
26782           .cr(2)
26783           .kr(3)
26784           .channels(channels)
26785           .width(3)
26786           .step(step)
26787           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26788       }
26789     }
26790   }
26791 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,multipixel_with_output_stride)26792   TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, multipixel_with_output_stride) {
26793     for (size_t channels = 1; channels <= 10; channels += 1) {
26794       DWConvMicrokernelTester()
26795         .cr(2)
26796         .kr(3)
26797         .channels(2)
26798         .width(5)
26799         .output_stride(13)
26800         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26801     }
26802   }
26803 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,multipixel_with_qmin)26804   TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, multipixel_with_qmin) {
26805     for (size_t channels = 1; channels <= 10; channels += 1) {
26806       DWConvMicrokernelTester()
26807         .cr(2)
26808         .kr(3)
26809         .channels(channels)
26810         .width(3)
26811         .qmin(128)
26812         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26813     }
26814   }
26815 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,multipixel_with_qmax)26816   TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, multipixel_with_qmax) {
26817     for (size_t channels = 1; channels <= 10; channels += 1) {
26818       DWConvMicrokernelTester()
26819         .cr(2)
26820         .kr(3)
26821         .channels(channels)
26822         .width(3)
26823         .qmax(128)
26824         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26825     }
26826   }
26827 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,input_offset)26828   TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, input_offset) {
26829     for (uint32_t channels = 4; channels < 32; channels += 6) {
26830       DWConvMicrokernelTester()
26831         .cr(2)
26832         .kr(3)
26833         .channels(channels)
26834         .input_offset(80)
26835         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26836     }
26837   }
26838 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,zero)26839   TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, zero) {
26840     for (uint32_t mz = 0; mz < 3; mz++) {
26841       for (uint32_t channels = 4; channels < 32; channels += 6) {
26842         DWConvMicrokernelTester()
26843           .cr(2)
26844           .kr(3)
26845           .channels(channels)
26846           .input_offset(80)
26847           .zero_index(mz)
26848           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26849       }
26850     }
26851   }
26852 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26853 
26854 
26855 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_eq_2)26856   TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_eq_2) {
26857     DWConvMicrokernelTester()
26858       .cr(2)
26859       .kr(9)
26860       .channels(2)
26861       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26862   }
26863 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_div_2)26864   TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_div_2) {
26865     for (uint32_t channels = 4; channels < 32; channels += 6) {
26866       DWConvMicrokernelTester()
26867         .cr(2)
26868         .kr(9)
26869         .channels(channels)
26870         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26871     }
26872   }
26873 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_div_2_with_qmin)26874   TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_div_2_with_qmin) {
26875     for (uint32_t channels = 4; channels < 32; channels += 6) {
26876       DWConvMicrokernelTester()
26877         .cr(2)
26878         .kr(9)
26879         .channels(channels)
26880         .qmin(128)
26881         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26882     }
26883   }
26884 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_div_2_with_qmax)26885   TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_div_2_with_qmax) {
26886     for (uint32_t channels = 4; channels < 32; channels += 6) {
26887       DWConvMicrokernelTester()
26888         .cr(2)
26889         .kr(9)
26890         .channels(channels)
26891         .qmax(128)
26892         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26893     }
26894   }
26895 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_lt_2)26896   TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_lt_2) {
26897     for (uint32_t channels = 1; channels < 2; channels++) {
26898       DWConvMicrokernelTester()
26899         .cr(2)
26900         .kr(9)
26901         .channels(channels)
26902         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26903     }
26904   }
26905 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_gt_2)26906   TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_gt_2) {
26907     for (uint32_t channels = 3; channels < 4; channels++) {
26908       DWConvMicrokernelTester()
26909         .cr(2)
26910         .kr(9)
26911         .channels(channels)
26912         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26913     }
26914   }
26915 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_gt_2_with_qmin)26916   TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_gt_2_with_qmin) {
26917     for (uint32_t channels = 3; channels < 4; channels++) {
26918       DWConvMicrokernelTester()
26919         .cr(2)
26920         .kr(9)
26921         .channels(channels)
26922         .qmin(128)
26923         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26924     }
26925   }
26926 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_gt_2_with_qmax)26927   TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_gt_2_with_qmax) {
26928     for (uint32_t channels = 3; channels < 4; channels++) {
26929       DWConvMicrokernelTester()
26930         .cr(2)
26931         .kr(9)
26932         .channels(channels)
26933         .qmax(128)
26934         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26935     }
26936   }
26937 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,multipixel)26938   TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel) {
26939     for (size_t channels = 1; channels <= 10; channels += 1) {
26940       DWConvMicrokernelTester()
26941         .cr(2)
26942         .kr(9)
26943         .channels(channels)
26944         .width(3)
26945         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26946     }
26947   }
26948 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,multipixel_with_step)26949   TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_step) {
26950     for (size_t channels = 1; channels <= 10; channels += 1) {
26951       for (size_t step = 2; step <= 9; step++) {
26952         DWConvMicrokernelTester()
26953           .cr(2)
26954           .kr(9)
26955           .channels(channels)
26956           .width(3)
26957           .step(step)
26958           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26959       }
26960     }
26961   }
26962 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,multipixel_with_output_stride)26963   TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_output_stride) {
26964     for (size_t channels = 1; channels <= 10; channels += 1) {
26965       DWConvMicrokernelTester()
26966         .cr(2)
26967         .kr(9)
26968         .channels(2)
26969         .width(5)
26970         .output_stride(13)
26971         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26972     }
26973   }
26974 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,multipixel_with_qmin)26975   TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_qmin) {
26976     for (size_t channels = 1; channels <= 10; channels += 1) {
26977       DWConvMicrokernelTester()
26978         .cr(2)
26979         .kr(9)
26980         .channels(channels)
26981         .width(3)
26982         .qmin(128)
26983         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26984     }
26985   }
26986 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,multipixel_with_qmax)26987   TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_qmax) {
26988     for (size_t channels = 1; channels <= 10; channels += 1) {
26989       DWConvMicrokernelTester()
26990         .cr(2)
26991         .kr(9)
26992         .channels(channels)
26993         .width(3)
26994         .qmax(128)
26995         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26996     }
26997   }
26998 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,input_offset)26999   TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, input_offset) {
27000     for (uint32_t channels = 4; channels < 32; channels += 6) {
27001       DWConvMicrokernelTester()
27002         .cr(2)
27003         .kr(9)
27004         .channels(channels)
27005         .input_offset(80)
27006         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27007     }
27008   }
27009 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,zero)27010   TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, zero) {
27011     for (uint32_t mz = 0; mz < 9; mz++) {
27012       for (uint32_t channels = 4; channels < 32; channels += 6) {
27013         DWConvMicrokernelTester()
27014           .cr(2)
27015           .kr(9)
27016           .channels(channels)
27017           .input_offset(80)
27018           .zero_index(mz)
27019           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27020       }
27021     }
27022   }
27023 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27024 
27025 
27026 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_eq_2)27027   TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_eq_2) {
27028     DWConvMicrokernelTester()
27029       .cr(2)
27030       .kr(25)
27031       .channels(2)
27032       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27033   }
27034 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_div_2)27035   TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_div_2) {
27036     for (uint32_t channels = 4; channels < 32; channels += 6) {
27037       DWConvMicrokernelTester()
27038         .cr(2)
27039         .kr(25)
27040         .channels(channels)
27041         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27042     }
27043   }
27044 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_div_2_with_qmin)27045   TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_div_2_with_qmin) {
27046     for (uint32_t channels = 4; channels < 32; channels += 6) {
27047       DWConvMicrokernelTester()
27048         .cr(2)
27049         .kr(25)
27050         .channels(channels)
27051         .qmin(128)
27052         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27053     }
27054   }
27055 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_div_2_with_qmax)27056   TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_div_2_with_qmax) {
27057     for (uint32_t channels = 4; channels < 32; channels += 6) {
27058       DWConvMicrokernelTester()
27059         .cr(2)
27060         .kr(25)
27061         .channels(channels)
27062         .qmax(128)
27063         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27064     }
27065   }
27066 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_lt_2)27067   TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_lt_2) {
27068     for (uint32_t channels = 1; channels < 2; channels++) {
27069       DWConvMicrokernelTester()
27070         .cr(2)
27071         .kr(25)
27072         .channels(channels)
27073         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27074     }
27075   }
27076 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_gt_2)27077   TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_gt_2) {
27078     for (uint32_t channels = 3; channels < 4; channels++) {
27079       DWConvMicrokernelTester()
27080         .cr(2)
27081         .kr(25)
27082         .channels(channels)
27083         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27084     }
27085   }
27086 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_gt_2_with_qmin)27087   TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_gt_2_with_qmin) {
27088     for (uint32_t channels = 3; channels < 4; channels++) {
27089       DWConvMicrokernelTester()
27090         .cr(2)
27091         .kr(25)
27092         .channels(channels)
27093         .qmin(128)
27094         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27095     }
27096   }
27097 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_gt_2_with_qmax)27098   TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_gt_2_with_qmax) {
27099     for (uint32_t channels = 3; channels < 4; channels++) {
27100       DWConvMicrokernelTester()
27101         .cr(2)
27102         .kr(25)
27103         .channels(channels)
27104         .qmax(128)
27105         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27106     }
27107   }
27108 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,multipixel)27109   TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel) {
27110     for (size_t channels = 1; channels <= 10; channels += 1) {
27111       DWConvMicrokernelTester()
27112         .cr(2)
27113         .kr(25)
27114         .channels(channels)
27115         .width(3)
27116         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27117     }
27118   }
27119 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,multipixel_with_step)27120   TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_step) {
27121     for (size_t channels = 1; channels <= 10; channels += 1) {
27122       for (size_t step = 2; step <= 25; step++) {
27123         DWConvMicrokernelTester()
27124           .cr(2)
27125           .kr(25)
27126           .channels(channels)
27127           .width(3)
27128           .step(step)
27129           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27130       }
27131     }
27132   }
27133 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,multipixel_with_output_stride)27134   TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_output_stride) {
27135     for (size_t channels = 1; channels <= 10; channels += 1) {
27136       DWConvMicrokernelTester()
27137         .cr(2)
27138         .kr(25)
27139         .channels(2)
27140         .width(5)
27141         .output_stride(13)
27142         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27143     }
27144   }
27145 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,multipixel_with_qmin)27146   TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_qmin) {
27147     for (size_t channels = 1; channels <= 10; channels += 1) {
27148       DWConvMicrokernelTester()
27149         .cr(2)
27150         .kr(25)
27151         .channels(channels)
27152         .width(3)
27153         .qmin(128)
27154         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27155     }
27156   }
27157 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,multipixel_with_qmax)27158   TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_qmax) {
27159     for (size_t channels = 1; channels <= 10; channels += 1) {
27160       DWConvMicrokernelTester()
27161         .cr(2)
27162         .kr(25)
27163         .channels(channels)
27164         .width(3)
27165         .qmax(128)
27166         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27167     }
27168   }
27169 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,input_offset)27170   TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, input_offset) {
27171     for (uint32_t channels = 4; channels < 32; channels += 6) {
27172       DWConvMicrokernelTester()
27173         .cr(2)
27174         .kr(25)
27175         .channels(channels)
27176         .input_offset(80)
27177         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27178     }
27179   }
27180 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,zero)27181   TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, zero) {
27182     for (uint32_t mz = 0; mz < 25; mz++) {
27183       for (uint32_t channels = 4; channels < 32; channels += 6) {
27184         DWConvMicrokernelTester()
27185           .cr(2)
27186           .kr(25)
27187           .channels(channels)
27188           .input_offset(80)
27189           .zero_index(mz)
27190           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27191       }
27192     }
27193   }
27194 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27195 
27196 
27197 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_eq_4)27198   TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_eq_4) {
27199     DWConvMicrokernelTester()
27200       .cr(4)
27201       .kr(9)
27202       .channels(4)
27203       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27204   }
27205 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_div_4)27206   TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_div_4) {
27207     for (uint32_t channels = 8; channels < 64; channels += 12) {
27208       DWConvMicrokernelTester()
27209         .cr(4)
27210         .kr(9)
27211         .channels(channels)
27212         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27213     }
27214   }
27215 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_div_4_with_qmin)27216   TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_div_4_with_qmin) {
27217     for (uint32_t channels = 8; channels < 64; channels += 12) {
27218       DWConvMicrokernelTester()
27219         .cr(4)
27220         .kr(9)
27221         .channels(channels)
27222         .qmin(128)
27223         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27224     }
27225   }
27226 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_div_4_with_qmax)27227   TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_div_4_with_qmax) {
27228     for (uint32_t channels = 8; channels < 64; channels += 12) {
27229       DWConvMicrokernelTester()
27230         .cr(4)
27231         .kr(9)
27232         .channels(channels)
27233         .qmax(128)
27234         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27235     }
27236   }
27237 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_lt_4)27238   TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_lt_4) {
27239     for (uint32_t channels = 1; channels < 4; channels++) {
27240       DWConvMicrokernelTester()
27241         .cr(4)
27242         .kr(9)
27243         .channels(channels)
27244         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27245     }
27246   }
27247 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_gt_4)27248   TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_gt_4) {
27249     for (uint32_t channels = 5; channels < 8; channels++) {
27250       DWConvMicrokernelTester()
27251         .cr(4)
27252         .kr(9)
27253         .channels(channels)
27254         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27255     }
27256   }
27257 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_gt_4_with_qmin)27258   TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_gt_4_with_qmin) {
27259     for (uint32_t channels = 5; channels < 8; channels++) {
27260       DWConvMicrokernelTester()
27261         .cr(4)
27262         .kr(9)
27263         .channels(channels)
27264         .qmin(128)
27265         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27266     }
27267   }
27268 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_gt_4_with_qmax)27269   TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_gt_4_with_qmax) {
27270     for (uint32_t channels = 5; channels < 8; channels++) {
27271       DWConvMicrokernelTester()
27272         .cr(4)
27273         .kr(9)
27274         .channels(channels)
27275         .qmax(128)
27276         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27277     }
27278   }
27279 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,multipixel)27280   TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel) {
27281     for (size_t channels = 1; channels <= 20; channels += 3) {
27282       DWConvMicrokernelTester()
27283         .cr(4)
27284         .kr(9)
27285         .channels(channels)
27286         .width(3)
27287         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27288     }
27289   }
27290 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,multipixel_with_step)27291   TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_step) {
27292     for (size_t channels = 1; channels <= 20; channels += 3) {
27293       for (size_t step = 2; step <= 9; step++) {
27294         DWConvMicrokernelTester()
27295           .cr(4)
27296           .kr(9)
27297           .channels(channels)
27298           .width(3)
27299           .step(step)
27300           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27301       }
27302     }
27303   }
27304 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,multipixel_with_output_stride)27305   TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_output_stride) {
27306     for (size_t channels = 1; channels <= 20; channels += 3) {
27307       DWConvMicrokernelTester()
27308         .cr(4)
27309         .kr(9)
27310         .channels(4)
27311         .width(5)
27312         .output_stride(23)
27313         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27314     }
27315   }
27316 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,multipixel_with_qmin)27317   TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_qmin) {
27318     for (size_t channels = 1; channels <= 20; channels += 3) {
27319       DWConvMicrokernelTester()
27320         .cr(4)
27321         .kr(9)
27322         .channels(channels)
27323         .width(3)
27324         .qmin(128)
27325         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27326     }
27327   }
27328 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,multipixel_with_qmax)27329   TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_qmax) {
27330     for (size_t channels = 1; channels <= 20; channels += 3) {
27331       DWConvMicrokernelTester()
27332         .cr(4)
27333         .kr(9)
27334         .channels(channels)
27335         .width(3)
27336         .qmax(128)
27337         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27338     }
27339   }
27340 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,input_offset)27341   TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, input_offset) {
27342     for (uint32_t channels = 8; channels < 64; channels += 12) {
27343       DWConvMicrokernelTester()
27344         .cr(4)
27345         .kr(9)
27346         .channels(channels)
27347         .input_offset(112)
27348         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27349     }
27350   }
27351 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,zero)27352   TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, zero) {
27353     for (uint32_t mz = 0; mz < 9; mz++) {
27354       for (uint32_t channels = 8; channels < 64; channels += 12) {
27355         DWConvMicrokernelTester()
27356           .cr(4)
27357           .kr(9)
27358           .channels(channels)
27359           .input_offset(112)
27360           .zero_index(mz)
27361           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27362       }
27363     }
27364   }
27365 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27366 
27367 
27368 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_eq_4)27369   TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_eq_4) {
27370     DWConvMicrokernelTester()
27371       .cr(4)
27372       .kr(25)
27373       .channels(4)
27374       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27375   }
27376 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_div_4)27377   TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_div_4) {
27378     for (uint32_t channels = 8; channels < 64; channels += 12) {
27379       DWConvMicrokernelTester()
27380         .cr(4)
27381         .kr(25)
27382         .channels(channels)
27383         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27384     }
27385   }
27386 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_div_4_with_qmin)27387   TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_div_4_with_qmin) {
27388     for (uint32_t channels = 8; channels < 64; channels += 12) {
27389       DWConvMicrokernelTester()
27390         .cr(4)
27391         .kr(25)
27392         .channels(channels)
27393         .qmin(128)
27394         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27395     }
27396   }
27397 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_div_4_with_qmax)27398   TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_div_4_with_qmax) {
27399     for (uint32_t channels = 8; channels < 64; channels += 12) {
27400       DWConvMicrokernelTester()
27401         .cr(4)
27402         .kr(25)
27403         .channels(channels)
27404         .qmax(128)
27405         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27406     }
27407   }
27408 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_lt_4)27409   TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_lt_4) {
27410     for (uint32_t channels = 1; channels < 4; channels++) {
27411       DWConvMicrokernelTester()
27412         .cr(4)
27413         .kr(25)
27414         .channels(channels)
27415         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27416     }
27417   }
27418 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_gt_4)27419   TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_gt_4) {
27420     for (uint32_t channels = 5; channels < 8; channels++) {
27421       DWConvMicrokernelTester()
27422         .cr(4)
27423         .kr(25)
27424         .channels(channels)
27425         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27426     }
27427   }
27428 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_gt_4_with_qmin)27429   TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_gt_4_with_qmin) {
27430     for (uint32_t channels = 5; channels < 8; channels++) {
27431       DWConvMicrokernelTester()
27432         .cr(4)
27433         .kr(25)
27434         .channels(channels)
27435         .qmin(128)
27436         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27437     }
27438   }
27439 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_gt_4_with_qmax)27440   TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_gt_4_with_qmax) {
27441     for (uint32_t channels = 5; channels < 8; channels++) {
27442       DWConvMicrokernelTester()
27443         .cr(4)
27444         .kr(25)
27445         .channels(channels)
27446         .qmax(128)
27447         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27448     }
27449   }
27450 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,multipixel)27451   TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel) {
27452     for (size_t channels = 1; channels <= 20; channels += 3) {
27453       DWConvMicrokernelTester()
27454         .cr(4)
27455         .kr(25)
27456         .channels(channels)
27457         .width(3)
27458         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27459     }
27460   }
27461 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,multipixel_with_step)27462   TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_step) {
27463     for (size_t channels = 1; channels <= 20; channels += 3) {
27464       for (size_t step = 2; step <= 25; step++) {
27465         DWConvMicrokernelTester()
27466           .cr(4)
27467           .kr(25)
27468           .channels(channels)
27469           .width(3)
27470           .step(step)
27471           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27472       }
27473     }
27474   }
27475 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,multipixel_with_output_stride)27476   TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_output_stride) {
27477     for (size_t channels = 1; channels <= 20; channels += 3) {
27478       DWConvMicrokernelTester()
27479         .cr(4)
27480         .kr(25)
27481         .channels(4)
27482         .width(5)
27483         .output_stride(23)
27484         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27485     }
27486   }
27487 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,multipixel_with_qmin)27488   TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_qmin) {
27489     for (size_t channels = 1; channels <= 20; channels += 3) {
27490       DWConvMicrokernelTester()
27491         .cr(4)
27492         .kr(25)
27493         .channels(channels)
27494         .width(3)
27495         .qmin(128)
27496         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27497     }
27498   }
27499 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,multipixel_with_qmax)27500   TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_qmax) {
27501     for (size_t channels = 1; channels <= 20; channels += 3) {
27502       DWConvMicrokernelTester()
27503         .cr(4)
27504         .kr(25)
27505         .channels(channels)
27506         .width(3)
27507         .qmax(128)
27508         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27509     }
27510   }
27511 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,input_offset)27512   TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, input_offset) {
27513     for (uint32_t channels = 8; channels < 64; channels += 12) {
27514       DWConvMicrokernelTester()
27515         .cr(4)
27516         .kr(25)
27517         .channels(channels)
27518         .input_offset(112)
27519         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27520     }
27521   }
27522 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,zero)27523   TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, zero) {
27524     for (uint32_t mz = 0; mz < 25; mz++) {
27525       for (uint32_t channels = 8; channels < 64; channels += 12) {
27526         DWConvMicrokernelTester()
27527           .cr(4)
27528           .kr(25)
27529           .channels(channels)
27530           .input_offset(112)
27531           .zero_index(mz)
27532           .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27533       }
27534     }
27535   }
27536 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27537 
27538 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,c_eq_1)27539 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_eq_1) {
27540   DWConvMicrokernelTester()
27541     .cr(1)
27542     .kr(9)
27543     .channels(1)
27544     .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27545 }
27546 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,c_gt_1)27547 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_gt_1) {
27548   for (uint32_t channels = 2; channels < 10; channels++) {
27549     DWConvMicrokernelTester()
27550       .cr(1)
27551       .kr(9)
27552       .channels(channels)
27553       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27554   }
27555 }
27556 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,c_gt_1_with_qmin)27557 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_gt_1_with_qmin) {
27558   for (uint32_t channels = 2; channels < 10; channels++) {
27559     DWConvMicrokernelTester()
27560       .cr(1)
27561       .kr(9)
27562       .channels(channels)
27563       .qmin(128)
27564       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27565   }
27566 }
27567 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,c_gt_1_with_qmax)27568 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_gt_1_with_qmax) {
27569   for (uint32_t channels = 2; channels < 10; channels++) {
27570     DWConvMicrokernelTester()
27571       .cr(1)
27572       .kr(9)
27573       .channels(channels)
27574       .qmax(128)
27575       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27576   }
27577 }
27578 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,multipixel)27579 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel) {
27580   for (size_t channels = 1; channels <= 5; channels += 1) {
27581     DWConvMicrokernelTester()
27582       .cr(1)
27583       .kr(9)
27584       .channels(channels)
27585       .width(3)
27586       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27587   }
27588 }
27589 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,multipixel_with_step)27590 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_step) {
27591   for (size_t channels = 1; channels <= 5; channels += 1) {
27592     for (size_t step = 2; step <= 9; step++) {
27593       DWConvMicrokernelTester()
27594         .cr(1)
27595         .kr(9)
27596         .channels(channels)
27597         .width(3)
27598         .step(step)
27599         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27600     }
27601   }
27602 }
27603 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,multipixel_with_output_stride)27604 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_output_stride) {
27605   for (size_t channels = 1; channels <= 5; channels += 1) {
27606     DWConvMicrokernelTester()
27607       .cr(1)
27608       .kr(9)
27609       .channels(1)
27610       .width(5)
27611       .output_stride(7)
27612       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27613   }
27614 }
27615 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,multipixel_with_qmin)27616 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_qmin) {
27617   for (size_t channels = 1; channels <= 5; channels += 1) {
27618     DWConvMicrokernelTester()
27619       .cr(1)
27620       .kr(9)
27621       .channels(channels)
27622       .width(3)
27623       .qmin(128)
27624       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27625   }
27626 }
27627 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,multipixel_with_qmax)27628 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_qmax) {
27629   for (size_t channels = 1; channels <= 5; channels += 1) {
27630     DWConvMicrokernelTester()
27631       .cr(1)
27632       .kr(9)
27633       .channels(channels)
27634       .width(3)
27635       .qmax(128)
27636       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27637   }
27638 }
27639 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,input_offset)27640 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, input_offset) {
27641   for (uint32_t channels = 2; channels < 16; channels += 3) {
27642     DWConvMicrokernelTester()
27643       .cr(1)
27644       .kr(9)
27645       .channels(channels)
27646       .input_offset(48)
27647       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27648   }
27649 }
27650 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,zero)27651 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, zero) {
27652   for (uint32_t mz = 0; mz < 9; mz++) {
27653     for (uint32_t channels = 2; channels < 16; channels += 3) {
27654       DWConvMicrokernelTester()
27655         .cr(1)
27656         .kr(9)
27657         .channels(channels)
27658         .input_offset(48)
27659         .zero_index(mz)
27660         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27661     }
27662   }
27663 }
27664 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,c_eq_1)27665 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_eq_1) {
27666   DWConvMicrokernelTester()
27667     .cr(1)
27668     .kr(9)
27669     .channels(1)
27670     .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27671 }
27672 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,c_gt_1)27673 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1) {
27674   for (uint32_t channels = 2; channels < 10; channels++) {
27675     DWConvMicrokernelTester()
27676       .cr(1)
27677       .kr(9)
27678       .channels(channels)
27679       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27680   }
27681 }
27682 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,c_gt_1_with_qmin)27683 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1_with_qmin) {
27684   for (uint32_t channels = 2; channels < 10; channels++) {
27685     DWConvMicrokernelTester()
27686       .cr(1)
27687       .kr(9)
27688       .channels(channels)
27689       .qmin(128)
27690       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27691   }
27692 }
27693 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,c_gt_1_with_qmax)27694 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1_with_qmax) {
27695   for (uint32_t channels = 2; channels < 10; channels++) {
27696     DWConvMicrokernelTester()
27697       .cr(1)
27698       .kr(9)
27699       .channels(channels)
27700       .qmax(128)
27701       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27702   }
27703 }
27704 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,multipixel)27705 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel) {
27706   for (size_t channels = 1; channels <= 5; channels += 1) {
27707     DWConvMicrokernelTester()
27708       .cr(1)
27709       .kr(9)
27710       .channels(channels)
27711       .width(3)
27712       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27713   }
27714 }
27715 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,multipixel_with_step)27716 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_step) {
27717   for (size_t channels = 1; channels <= 5; channels += 1) {
27718     for (size_t step = 2; step <= 9; step++) {
27719       DWConvMicrokernelTester()
27720         .cr(1)
27721         .kr(9)
27722         .channels(channels)
27723         .width(3)
27724         .step(step)
27725         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27726     }
27727   }
27728 }
27729 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,multipixel_with_output_stride)27730 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
27731   for (size_t channels = 1; channels <= 5; channels += 1) {
27732     DWConvMicrokernelTester()
27733       .cr(1)
27734       .kr(9)
27735       .channels(1)
27736       .width(5)
27737       .output_stride(7)
27738       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27739   }
27740 }
27741 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,multipixel_with_qmin)27742 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_qmin) {
27743   for (size_t channels = 1; channels <= 5; channels += 1) {
27744     DWConvMicrokernelTester()
27745       .cr(1)
27746       .kr(9)
27747       .channels(channels)
27748       .width(3)
27749       .qmin(128)
27750       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27751   }
27752 }
27753 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,multipixel_with_qmax)27754 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_qmax) {
27755   for (size_t channels = 1; channels <= 5; channels += 1) {
27756     DWConvMicrokernelTester()
27757       .cr(1)
27758       .kr(9)
27759       .channels(channels)
27760       .width(3)
27761       .qmax(128)
27762       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27763   }
27764 }
27765 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,input_offset)27766 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, input_offset) {
27767   for (uint32_t channels = 2; channels < 16; channels += 3) {
27768     DWConvMicrokernelTester()
27769       .cr(1)
27770       .kr(9)
27771       .channels(channels)
27772       .input_offset(48)
27773       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27774   }
27775 }
27776 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,zero)27777 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, zero) {
27778   for (uint32_t mz = 0; mz < 9; mz++) {
27779     for (uint32_t channels = 2; channels < 16; channels += 3) {
27780       DWConvMicrokernelTester()
27781         .cr(1)
27782         .kr(9)
27783         .channels(channels)
27784         .input_offset(48)
27785         .zero_index(mz)
27786         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27787     }
27788   }
27789 }
27790 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,c_eq_1)27791 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_eq_1) {
27792   DWConvMicrokernelTester()
27793     .cr(1)
27794     .kr(9)
27795     .channels(1)
27796     .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27797 }
27798 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,c_gt_1)27799 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1) {
27800   for (uint32_t channels = 2; channels < 10; channels++) {
27801     DWConvMicrokernelTester()
27802       .cr(1)
27803       .kr(9)
27804       .channels(channels)
27805       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27806   }
27807 }
27808 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,c_gt_1_with_qmin)27809 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmin) {
27810   for (uint32_t channels = 2; channels < 10; channels++) {
27811     DWConvMicrokernelTester()
27812       .cr(1)
27813       .kr(9)
27814       .channels(channels)
27815       .qmin(128)
27816       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27817   }
27818 }
27819 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,c_gt_1_with_qmax)27820 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmax) {
27821   for (uint32_t channels = 2; channels < 10; channels++) {
27822     DWConvMicrokernelTester()
27823       .cr(1)
27824       .kr(9)
27825       .channels(channels)
27826       .qmax(128)
27827       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27828   }
27829 }
27830 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,multipixel)27831 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel) {
27832   for (size_t channels = 1; channels <= 5; channels += 1) {
27833     DWConvMicrokernelTester()
27834       .cr(1)
27835       .kr(9)
27836       .channels(channels)
27837       .width(3)
27838       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27839   }
27840 }
27841 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,multipixel_with_step)27842 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_step) {
27843   for (size_t channels = 1; channels <= 5; channels += 1) {
27844     for (size_t step = 2; step <= 9; step++) {
27845       DWConvMicrokernelTester()
27846         .cr(1)
27847         .kr(9)
27848         .channels(channels)
27849         .width(3)
27850         .step(step)
27851         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27852     }
27853   }
27854 }
27855 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,multipixel_with_output_stride)27856 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_output_stride) {
27857   for (size_t channels = 1; channels <= 5; channels += 1) {
27858     DWConvMicrokernelTester()
27859       .cr(1)
27860       .kr(9)
27861       .channels(1)
27862       .width(5)
27863       .output_stride(7)
27864       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27865   }
27866 }
27867 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,multipixel_with_qmin)27868 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmin) {
27869   for (size_t channels = 1; channels <= 5; channels += 1) {
27870     DWConvMicrokernelTester()
27871       .cr(1)
27872       .kr(9)
27873       .channels(channels)
27874       .width(3)
27875       .qmin(128)
27876       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27877   }
27878 }
27879 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,multipixel_with_qmax)27880 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmax) {
27881   for (size_t channels = 1; channels <= 5; channels += 1) {
27882     DWConvMicrokernelTester()
27883       .cr(1)
27884       .kr(9)
27885       .channels(channels)
27886       .width(3)
27887       .qmax(128)
27888       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27889   }
27890 }
27891 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,input_offset)27892 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, input_offset) {
27893   for (uint32_t channels = 2; channels < 16; channels += 3) {
27894     DWConvMicrokernelTester()
27895       .cr(1)
27896       .kr(9)
27897       .channels(channels)
27898       .input_offset(48)
27899       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27900   }
27901 }
27902 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,zero)27903 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, zero) {
27904   for (uint32_t mz = 0; mz < 9; mz++) {
27905     for (uint32_t channels = 2; channels < 16; channels += 3) {
27906       DWConvMicrokernelTester()
27907         .cr(1)
27908         .kr(9)
27909         .channels(channels)
27910         .input_offset(48)
27911         .zero_index(mz)
27912         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27913     }
27914   }
27915 }
27916 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,c_eq_1)27917 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_eq_1) {
27918   DWConvMicrokernelTester()
27919     .cr(1)
27920     .kr(25)
27921     .channels(1)
27922     .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27923 }
27924 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,c_gt_1)27925 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_gt_1) {
27926   for (uint32_t channels = 2; channels < 10; channels++) {
27927     DWConvMicrokernelTester()
27928       .cr(1)
27929       .kr(25)
27930       .channels(channels)
27931       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27932   }
27933 }
27934 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,c_gt_1_with_qmin)27935 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_gt_1_with_qmin) {
27936   for (uint32_t channels = 2; channels < 10; channels++) {
27937     DWConvMicrokernelTester()
27938       .cr(1)
27939       .kr(25)
27940       .channels(channels)
27941       .qmin(128)
27942       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27943   }
27944 }
27945 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,c_gt_1_with_qmax)27946 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_gt_1_with_qmax) {
27947   for (uint32_t channels = 2; channels < 10; channels++) {
27948     DWConvMicrokernelTester()
27949       .cr(1)
27950       .kr(25)
27951       .channels(channels)
27952       .qmax(128)
27953       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27954   }
27955 }
27956 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,multipixel)27957 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel) {
27958   for (size_t channels = 1; channels <= 5; channels += 1) {
27959     DWConvMicrokernelTester()
27960       .cr(1)
27961       .kr(25)
27962       .channels(channels)
27963       .width(3)
27964       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27965   }
27966 }
27967 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,multipixel_with_step)27968 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_step) {
27969   for (size_t channels = 1; channels <= 5; channels += 1) {
27970     for (size_t step = 2; step <= 25; step++) {
27971       DWConvMicrokernelTester()
27972         .cr(1)
27973         .kr(25)
27974         .channels(channels)
27975         .width(3)
27976         .step(step)
27977         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27978     }
27979   }
27980 }
27981 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,multipixel_with_output_stride)27982 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_output_stride) {
27983   for (size_t channels = 1; channels <= 5; channels += 1) {
27984     DWConvMicrokernelTester()
27985       .cr(1)
27986       .kr(25)
27987       .channels(1)
27988       .width(5)
27989       .output_stride(7)
27990       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27991   }
27992 }
27993 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,multipixel_with_qmin)27994 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_qmin) {
27995   for (size_t channels = 1; channels <= 5; channels += 1) {
27996     DWConvMicrokernelTester()
27997       .cr(1)
27998       .kr(25)
27999       .channels(channels)
28000       .width(3)
28001       .qmin(128)
28002       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28003   }
28004 }
28005 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,multipixel_with_qmax)28006 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_qmax) {
28007   for (size_t channels = 1; channels <= 5; channels += 1) {
28008     DWConvMicrokernelTester()
28009       .cr(1)
28010       .kr(25)
28011       .channels(channels)
28012       .width(3)
28013       .qmax(128)
28014       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28015   }
28016 }
28017 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,input_offset)28018 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, input_offset) {
28019   for (uint32_t channels = 2; channels < 16; channels += 3) {
28020     DWConvMicrokernelTester()
28021       .cr(1)
28022       .kr(25)
28023       .channels(channels)
28024       .input_offset(48)
28025       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28026   }
28027 }
28028 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,zero)28029 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, zero) {
28030   for (uint32_t mz = 0; mz < 25; mz++) {
28031     for (uint32_t channels = 2; channels < 16; channels += 3) {
28032       DWConvMicrokernelTester()
28033         .cr(1)
28034         .kr(25)
28035         .channels(channels)
28036         .input_offset(48)
28037         .zero_index(mz)
28038         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28039     }
28040   }
28041 }
28042 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,c_eq_1)28043 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_eq_1) {
28044   DWConvMicrokernelTester()
28045     .cr(1)
28046     .kr(25)
28047     .channels(1)
28048     .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28049 }
28050 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,c_gt_1)28051 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1) {
28052   for (uint32_t channels = 2; channels < 10; channels++) {
28053     DWConvMicrokernelTester()
28054       .cr(1)
28055       .kr(25)
28056       .channels(channels)
28057       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28058   }
28059 }
28060 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,c_gt_1_with_qmin)28061 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1_with_qmin) {
28062   for (uint32_t channels = 2; channels < 10; channels++) {
28063     DWConvMicrokernelTester()
28064       .cr(1)
28065       .kr(25)
28066       .channels(channels)
28067       .qmin(128)
28068       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28069   }
28070 }
28071 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,c_gt_1_with_qmax)28072 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1_with_qmax) {
28073   for (uint32_t channels = 2; channels < 10; channels++) {
28074     DWConvMicrokernelTester()
28075       .cr(1)
28076       .kr(25)
28077       .channels(channels)
28078       .qmax(128)
28079       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28080   }
28081 }
28082 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,multipixel)28083 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel) {
28084   for (size_t channels = 1; channels <= 5; channels += 1) {
28085     DWConvMicrokernelTester()
28086       .cr(1)
28087       .kr(25)
28088       .channels(channels)
28089       .width(3)
28090       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28091   }
28092 }
28093 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,multipixel_with_step)28094 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_step) {
28095   for (size_t channels = 1; channels <= 5; channels += 1) {
28096     for (size_t step = 2; step <= 25; step++) {
28097       DWConvMicrokernelTester()
28098         .cr(1)
28099         .kr(25)
28100         .channels(channels)
28101         .width(3)
28102         .step(step)
28103         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28104     }
28105   }
28106 }
28107 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,multipixel_with_output_stride)28108 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
28109   for (size_t channels = 1; channels <= 5; channels += 1) {
28110     DWConvMicrokernelTester()
28111       .cr(1)
28112       .kr(25)
28113       .channels(1)
28114       .width(5)
28115       .output_stride(7)
28116       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28117   }
28118 }
28119 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,multipixel_with_qmin)28120 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_qmin) {
28121   for (size_t channels = 1; channels <= 5; channels += 1) {
28122     DWConvMicrokernelTester()
28123       .cr(1)
28124       .kr(25)
28125       .channels(channels)
28126       .width(3)
28127       .qmin(128)
28128       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28129   }
28130 }
28131 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,multipixel_with_qmax)28132 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_qmax) {
28133   for (size_t channels = 1; channels <= 5; channels += 1) {
28134     DWConvMicrokernelTester()
28135       .cr(1)
28136       .kr(25)
28137       .channels(channels)
28138       .width(3)
28139       .qmax(128)
28140       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28141   }
28142 }
28143 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,input_offset)28144 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, input_offset) {
28145   for (uint32_t channels = 2; channels < 16; channels += 3) {
28146     DWConvMicrokernelTester()
28147       .cr(1)
28148       .kr(25)
28149       .channels(channels)
28150       .input_offset(48)
28151       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28152   }
28153 }
28154 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,zero)28155 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, zero) {
28156   for (uint32_t mz = 0; mz < 25; mz++) {
28157     for (uint32_t channels = 2; channels < 16; channels += 3) {
28158       DWConvMicrokernelTester()
28159         .cr(1)
28160         .kr(25)
28161         .channels(channels)
28162         .input_offset(48)
28163         .zero_index(mz)
28164         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28165     }
28166   }
28167 }
28168 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,c_eq_1)28169 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_eq_1) {
28170   DWConvMicrokernelTester()
28171     .cr(1)
28172     .kr(25)
28173     .channels(1)
28174     .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28175 }
28176 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,c_gt_1)28177 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1) {
28178   for (uint32_t channels = 2; channels < 10; channels++) {
28179     DWConvMicrokernelTester()
28180       .cr(1)
28181       .kr(25)
28182       .channels(channels)
28183       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28184   }
28185 }
28186 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,c_gt_1_with_qmin)28187 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmin) {
28188   for (uint32_t channels = 2; channels < 10; channels++) {
28189     DWConvMicrokernelTester()
28190       .cr(1)
28191       .kr(25)
28192       .channels(channels)
28193       .qmin(128)
28194       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28195   }
28196 }
28197 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,c_gt_1_with_qmax)28198 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmax) {
28199   for (uint32_t channels = 2; channels < 10; channels++) {
28200     DWConvMicrokernelTester()
28201       .cr(1)
28202       .kr(25)
28203       .channels(channels)
28204       .qmax(128)
28205       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28206   }
28207 }
28208 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,multipixel)28209 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel) {
28210   for (size_t channels = 1; channels <= 5; channels += 1) {
28211     DWConvMicrokernelTester()
28212       .cr(1)
28213       .kr(25)
28214       .channels(channels)
28215       .width(3)
28216       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28217   }
28218 }
28219 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,multipixel_with_step)28220 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_step) {
28221   for (size_t channels = 1; channels <= 5; channels += 1) {
28222     for (size_t step = 2; step <= 25; step++) {
28223       DWConvMicrokernelTester()
28224         .cr(1)
28225         .kr(25)
28226         .channels(channels)
28227         .width(3)
28228         .step(step)
28229         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28230     }
28231   }
28232 }
28233 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,multipixel_with_output_stride)28234 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_output_stride) {
28235   for (size_t channels = 1; channels <= 5; channels += 1) {
28236     DWConvMicrokernelTester()
28237       .cr(1)
28238       .kr(25)
28239       .channels(1)
28240       .width(5)
28241       .output_stride(7)
28242       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28243   }
28244 }
28245 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,multipixel_with_qmin)28246 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmin) {
28247   for (size_t channels = 1; channels <= 5; channels += 1) {
28248     DWConvMicrokernelTester()
28249       .cr(1)
28250       .kr(25)
28251       .channels(channels)
28252       .width(3)
28253       .qmin(128)
28254       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28255   }
28256 }
28257 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,multipixel_with_qmax)28258 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmax) {
28259   for (size_t channels = 1; channels <= 5; channels += 1) {
28260     DWConvMicrokernelTester()
28261       .cr(1)
28262       .kr(25)
28263       .channels(channels)
28264       .width(3)
28265       .qmax(128)
28266       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28267   }
28268 }
28269 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,input_offset)28270 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, input_offset) {
28271   for (uint32_t channels = 2; channels < 16; channels += 3) {
28272     DWConvMicrokernelTester()
28273       .cr(1)
28274       .kr(25)
28275       .channels(channels)
28276       .input_offset(48)
28277       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28278   }
28279 }
28280 
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,zero)28281 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, zero) {
28282   for (uint32_t mz = 0; mz < 25; mz++) {
28283     for (uint32_t channels = 2; channels < 16; channels += 3) {
28284       DWConvMicrokernelTester()
28285         .cr(1)
28286         .kr(25)
28287         .channels(channels)
28288         .input_offset(48)
28289         .zero_index(mz)
28290         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28291     }
28292   }
28293 }
28294 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,c_eq_2)28295 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, c_eq_2) {
28296   DWConvMicrokernelTester()
28297     .cr(2)
28298     .kr(3)
28299     .channels(2)
28300     .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28301 }
28302 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,c_div_2)28303 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, c_div_2) {
28304   for (uint32_t channels = 4; channels < 32; channels += 6) {
28305     DWConvMicrokernelTester()
28306       .cr(2)
28307       .kr(3)
28308       .channels(channels)
28309       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28310   }
28311 }
28312 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,c_div_2_with_qmin)28313 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, c_div_2_with_qmin) {
28314   for (uint32_t channels = 4; channels < 32; channels += 6) {
28315     DWConvMicrokernelTester()
28316       .cr(2)
28317       .kr(3)
28318       .channels(channels)
28319       .qmin(128)
28320       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28321   }
28322 }
28323 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,c_div_2_with_qmax)28324 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, c_div_2_with_qmax) {
28325   for (uint32_t channels = 4; channels < 32; channels += 6) {
28326     DWConvMicrokernelTester()
28327       .cr(2)
28328       .kr(3)
28329       .channels(channels)
28330       .qmax(128)
28331       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28332   }
28333 }
28334 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,c_lt_2)28335 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, c_lt_2) {
28336   for (uint32_t channels = 1; channels < 2; channels++) {
28337     DWConvMicrokernelTester()
28338       .cr(2)
28339       .kr(3)
28340       .channels(channels)
28341       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28342   }
28343 }
28344 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,c_gt_2)28345 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, c_gt_2) {
28346   for (uint32_t channels = 3; channels < 4; channels++) {
28347     DWConvMicrokernelTester()
28348       .cr(2)
28349       .kr(3)
28350       .channels(channels)
28351       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28352   }
28353 }
28354 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,c_gt_2_with_qmin)28355 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, c_gt_2_with_qmin) {
28356   for (uint32_t channels = 3; channels < 4; channels++) {
28357     DWConvMicrokernelTester()
28358       .cr(2)
28359       .kr(3)
28360       .channels(channels)
28361       .qmin(128)
28362       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28363   }
28364 }
28365 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,c_gt_2_with_qmax)28366 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, c_gt_2_with_qmax) {
28367   for (uint32_t channels = 3; channels < 4; channels++) {
28368     DWConvMicrokernelTester()
28369       .cr(2)
28370       .kr(3)
28371       .channels(channels)
28372       .qmax(128)
28373       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28374   }
28375 }
28376 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,multipixel)28377 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, multipixel) {
28378   for (size_t channels = 1; channels <= 10; channels += 1) {
28379     DWConvMicrokernelTester()
28380       .cr(2)
28381       .kr(3)
28382       .channels(channels)
28383       .width(3)
28384       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28385   }
28386 }
28387 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,multipixel_with_step)28388 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, multipixel_with_step) {
28389   for (size_t channels = 1; channels <= 10; channels += 1) {
28390     for (size_t step = 2; step <= 3; step++) {
28391       DWConvMicrokernelTester()
28392         .cr(2)
28393         .kr(3)
28394         .channels(channels)
28395         .width(3)
28396         .step(step)
28397         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28398     }
28399   }
28400 }
28401 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,multipixel_with_output_stride)28402 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, multipixel_with_output_stride) {
28403   for (size_t channels = 1; channels <= 10; channels += 1) {
28404     DWConvMicrokernelTester()
28405       .cr(2)
28406       .kr(3)
28407       .channels(2)
28408       .width(5)
28409       .output_stride(13)
28410       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28411   }
28412 }
28413 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,multipixel_with_qmin)28414 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, multipixel_with_qmin) {
28415   for (size_t channels = 1; channels <= 10; channels += 1) {
28416     DWConvMicrokernelTester()
28417       .cr(2)
28418       .kr(3)
28419       .channels(channels)
28420       .width(3)
28421       .qmin(128)
28422       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28423   }
28424 }
28425 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,multipixel_with_qmax)28426 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, multipixel_with_qmax) {
28427   for (size_t channels = 1; channels <= 10; channels += 1) {
28428     DWConvMicrokernelTester()
28429       .cr(2)
28430       .kr(3)
28431       .channels(channels)
28432       .width(3)
28433       .qmax(128)
28434       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28435   }
28436 }
28437 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,input_offset)28438 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, input_offset) {
28439   for (uint32_t channels = 4; channels < 32; channels += 6) {
28440     DWConvMicrokernelTester()
28441       .cr(2)
28442       .kr(3)
28443       .channels(channels)
28444       .input_offset(80)
28445       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28446   }
28447 }
28448 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,zero)28449 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, zero) {
28450   for (uint32_t mz = 0; mz < 3; mz++) {
28451     for (uint32_t channels = 4; channels < 32; channels += 6) {
28452       DWConvMicrokernelTester()
28453         .cr(2)
28454         .kr(3)
28455         .channels(channels)
28456         .input_offset(80)
28457         .zero_index(mz)
28458         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28459     }
28460   }
28461 }
28462 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,c_eq_2)28463 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, c_eq_2) {
28464   DWConvMicrokernelTester()
28465     .cr(2)
28466     .kr(3)
28467     .channels(2)
28468     .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28469 }
28470 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,c_div_2)28471 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, c_div_2) {
28472   for (uint32_t channels = 4; channels < 32; channels += 6) {
28473     DWConvMicrokernelTester()
28474       .cr(2)
28475       .kr(3)
28476       .channels(channels)
28477       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28478   }
28479 }
28480 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,c_div_2_with_qmin)28481 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, c_div_2_with_qmin) {
28482   for (uint32_t channels = 4; channels < 32; channels += 6) {
28483     DWConvMicrokernelTester()
28484       .cr(2)
28485       .kr(3)
28486       .channels(channels)
28487       .qmin(128)
28488       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28489   }
28490 }
28491 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,c_div_2_with_qmax)28492 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, c_div_2_with_qmax) {
28493   for (uint32_t channels = 4; channels < 32; channels += 6) {
28494     DWConvMicrokernelTester()
28495       .cr(2)
28496       .kr(3)
28497       .channels(channels)
28498       .qmax(128)
28499       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28500   }
28501 }
28502 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,c_lt_2)28503 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, c_lt_2) {
28504   for (uint32_t channels = 1; channels < 2; channels++) {
28505     DWConvMicrokernelTester()
28506       .cr(2)
28507       .kr(3)
28508       .channels(channels)
28509       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28510   }
28511 }
28512 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,c_gt_2)28513 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, c_gt_2) {
28514   for (uint32_t channels = 3; channels < 4; channels++) {
28515     DWConvMicrokernelTester()
28516       .cr(2)
28517       .kr(3)
28518       .channels(channels)
28519       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28520   }
28521 }
28522 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,c_gt_2_with_qmin)28523 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, c_gt_2_with_qmin) {
28524   for (uint32_t channels = 3; channels < 4; channels++) {
28525     DWConvMicrokernelTester()
28526       .cr(2)
28527       .kr(3)
28528       .channels(channels)
28529       .qmin(128)
28530       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28531   }
28532 }
28533 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,c_gt_2_with_qmax)28534 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, c_gt_2_with_qmax) {
28535   for (uint32_t channels = 3; channels < 4; channels++) {
28536     DWConvMicrokernelTester()
28537       .cr(2)
28538       .kr(3)
28539       .channels(channels)
28540       .qmax(128)
28541       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28542   }
28543 }
28544 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,multipixel)28545 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, multipixel) {
28546   for (size_t channels = 1; channels <= 10; channels += 1) {
28547     DWConvMicrokernelTester()
28548       .cr(2)
28549       .kr(3)
28550       .channels(channels)
28551       .width(3)
28552       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28553   }
28554 }
28555 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,multipixel_with_step)28556 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, multipixel_with_step) {
28557   for (size_t channels = 1; channels <= 10; channels += 1) {
28558     for (size_t step = 2; step <= 3; step++) {
28559       DWConvMicrokernelTester()
28560         .cr(2)
28561         .kr(3)
28562         .channels(channels)
28563         .width(3)
28564         .step(step)
28565         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28566     }
28567   }
28568 }
28569 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,multipixel_with_output_stride)28570 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, multipixel_with_output_stride) {
28571   for (size_t channels = 1; channels <= 10; channels += 1) {
28572     DWConvMicrokernelTester()
28573       .cr(2)
28574       .kr(3)
28575       .channels(2)
28576       .width(5)
28577       .output_stride(13)
28578       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28579   }
28580 }
28581 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,multipixel_with_qmin)28582 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, multipixel_with_qmin) {
28583   for (size_t channels = 1; channels <= 10; channels += 1) {
28584     DWConvMicrokernelTester()
28585       .cr(2)
28586       .kr(3)
28587       .channels(channels)
28588       .width(3)
28589       .qmin(128)
28590       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28591   }
28592 }
28593 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,multipixel_with_qmax)28594 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, multipixel_with_qmax) {
28595   for (size_t channels = 1; channels <= 10; channels += 1) {
28596     DWConvMicrokernelTester()
28597       .cr(2)
28598       .kr(3)
28599       .channels(channels)
28600       .width(3)
28601       .qmax(128)
28602       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28603   }
28604 }
28605 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,input_offset)28606 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, input_offset) {
28607   for (uint32_t channels = 4; channels < 32; channels += 6) {
28608     DWConvMicrokernelTester()
28609       .cr(2)
28610       .kr(3)
28611       .channels(channels)
28612       .input_offset(80)
28613       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28614   }
28615 }
28616 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,zero)28617 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, zero) {
28618   for (uint32_t mz = 0; mz < 3; mz++) {
28619     for (uint32_t channels = 4; channels < 32; channels += 6) {
28620       DWConvMicrokernelTester()
28621         .cr(2)
28622         .kr(3)
28623         .channels(channels)
28624         .input_offset(80)
28625         .zero_index(mz)
28626         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28627     }
28628   }
28629 }
28630 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_eq_2)28631 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_eq_2) {
28632   DWConvMicrokernelTester()
28633     .cr(2)
28634     .kr(9)
28635     .channels(2)
28636     .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28637 }
28638 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_div_2)28639 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_div_2) {
28640   for (uint32_t channels = 4; channels < 32; channels += 6) {
28641     DWConvMicrokernelTester()
28642       .cr(2)
28643       .kr(9)
28644       .channels(channels)
28645       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28646   }
28647 }
28648 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_div_2_with_qmin)28649 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_div_2_with_qmin) {
28650   for (uint32_t channels = 4; channels < 32; channels += 6) {
28651     DWConvMicrokernelTester()
28652       .cr(2)
28653       .kr(9)
28654       .channels(channels)
28655       .qmin(128)
28656       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28657   }
28658 }
28659 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_div_2_with_qmax)28660 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_div_2_with_qmax) {
28661   for (uint32_t channels = 4; channels < 32; channels += 6) {
28662     DWConvMicrokernelTester()
28663       .cr(2)
28664       .kr(9)
28665       .channels(channels)
28666       .qmax(128)
28667       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28668   }
28669 }
28670 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_lt_2)28671 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_lt_2) {
28672   for (uint32_t channels = 1; channels < 2; channels++) {
28673     DWConvMicrokernelTester()
28674       .cr(2)
28675       .kr(9)
28676       .channels(channels)
28677       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28678   }
28679 }
28680 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_gt_2)28681 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_gt_2) {
28682   for (uint32_t channels = 3; channels < 4; channels++) {
28683     DWConvMicrokernelTester()
28684       .cr(2)
28685       .kr(9)
28686       .channels(channels)
28687       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28688   }
28689 }
28690 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_gt_2_with_qmin)28691 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_gt_2_with_qmin) {
28692   for (uint32_t channels = 3; channels < 4; channels++) {
28693     DWConvMicrokernelTester()
28694       .cr(2)
28695       .kr(9)
28696       .channels(channels)
28697       .qmin(128)
28698       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28699   }
28700 }
28701 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_gt_2_with_qmax)28702 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_gt_2_with_qmax) {
28703   for (uint32_t channels = 3; channels < 4; channels++) {
28704     DWConvMicrokernelTester()
28705       .cr(2)
28706       .kr(9)
28707       .channels(channels)
28708       .qmax(128)
28709       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28710   }
28711 }
28712 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,multipixel)28713 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel) {
28714   for (size_t channels = 1; channels <= 10; channels += 1) {
28715     DWConvMicrokernelTester()
28716       .cr(2)
28717       .kr(9)
28718       .channels(channels)
28719       .width(3)
28720       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28721   }
28722 }
28723 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,multipixel_with_step)28724 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_step) {
28725   for (size_t channels = 1; channels <= 10; channels += 1) {
28726     for (size_t step = 2; step <= 9; step++) {
28727       DWConvMicrokernelTester()
28728         .cr(2)
28729         .kr(9)
28730         .channels(channels)
28731         .width(3)
28732         .step(step)
28733         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28734     }
28735   }
28736 }
28737 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,multipixel_with_output_stride)28738 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_output_stride) {
28739   for (size_t channels = 1; channels <= 10; channels += 1) {
28740     DWConvMicrokernelTester()
28741       .cr(2)
28742       .kr(9)
28743       .channels(2)
28744       .width(5)
28745       .output_stride(13)
28746       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28747   }
28748 }
28749 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,multipixel_with_qmin)28750 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_qmin) {
28751   for (size_t channels = 1; channels <= 10; channels += 1) {
28752     DWConvMicrokernelTester()
28753       .cr(2)
28754       .kr(9)
28755       .channels(channels)
28756       .width(3)
28757       .qmin(128)
28758       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28759   }
28760 }
28761 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,multipixel_with_qmax)28762 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_qmax) {
28763   for (size_t channels = 1; channels <= 10; channels += 1) {
28764     DWConvMicrokernelTester()
28765       .cr(2)
28766       .kr(9)
28767       .channels(channels)
28768       .width(3)
28769       .qmax(128)
28770       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28771   }
28772 }
28773 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,input_offset)28774 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, input_offset) {
28775   for (uint32_t channels = 4; channels < 32; channels += 6) {
28776     DWConvMicrokernelTester()
28777       .cr(2)
28778       .kr(9)
28779       .channels(channels)
28780       .input_offset(80)
28781       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28782   }
28783 }
28784 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,zero)28785 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, zero) {
28786   for (uint32_t mz = 0; mz < 9; mz++) {
28787     for (uint32_t channels = 4; channels < 32; channels += 6) {
28788       DWConvMicrokernelTester()
28789         .cr(2)
28790         .kr(9)
28791         .channels(channels)
28792         .input_offset(80)
28793         .zero_index(mz)
28794         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28795     }
28796   }
28797 }
28798 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_eq_2)28799 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_eq_2) {
28800   DWConvMicrokernelTester()
28801     .cr(2)
28802     .kr(9)
28803     .channels(2)
28804     .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28805 }
28806 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_div_2)28807 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2) {
28808   for (uint32_t channels = 4; channels < 32; channels += 6) {
28809     DWConvMicrokernelTester()
28810       .cr(2)
28811       .kr(9)
28812       .channels(channels)
28813       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28814   }
28815 }
28816 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_div_2_with_qmin)28817 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2_with_qmin) {
28818   for (uint32_t channels = 4; channels < 32; channels += 6) {
28819     DWConvMicrokernelTester()
28820       .cr(2)
28821       .kr(9)
28822       .channels(channels)
28823       .qmin(128)
28824       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28825   }
28826 }
28827 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_div_2_with_qmax)28828 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2_with_qmax) {
28829   for (uint32_t channels = 4; channels < 32; channels += 6) {
28830     DWConvMicrokernelTester()
28831       .cr(2)
28832       .kr(9)
28833       .channels(channels)
28834       .qmax(128)
28835       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28836   }
28837 }
28838 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_lt_2)28839 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_lt_2) {
28840   for (uint32_t channels = 1; channels < 2; channels++) {
28841     DWConvMicrokernelTester()
28842       .cr(2)
28843       .kr(9)
28844       .channels(channels)
28845       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28846   }
28847 }
28848 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_gt_2)28849 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2) {
28850   for (uint32_t channels = 3; channels < 4; channels++) {
28851     DWConvMicrokernelTester()
28852       .cr(2)
28853       .kr(9)
28854       .channels(channels)
28855       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28856   }
28857 }
28858 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_gt_2_with_qmin)28859 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2_with_qmin) {
28860   for (uint32_t channels = 3; channels < 4; channels++) {
28861     DWConvMicrokernelTester()
28862       .cr(2)
28863       .kr(9)
28864       .channels(channels)
28865       .qmin(128)
28866       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28867   }
28868 }
28869 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_gt_2_with_qmax)28870 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2_with_qmax) {
28871   for (uint32_t channels = 3; channels < 4; channels++) {
28872     DWConvMicrokernelTester()
28873       .cr(2)
28874       .kr(9)
28875       .channels(channels)
28876       .qmax(128)
28877       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28878   }
28879 }
28880 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,multipixel)28881 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel) {
28882   for (size_t channels = 1; channels <= 10; channels += 1) {
28883     DWConvMicrokernelTester()
28884       .cr(2)
28885       .kr(9)
28886       .channels(channels)
28887       .width(3)
28888       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28889   }
28890 }
28891 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,multipixel_with_step)28892 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_step) {
28893   for (size_t channels = 1; channels <= 10; channels += 1) {
28894     for (size_t step = 2; step <= 9; step++) {
28895       DWConvMicrokernelTester()
28896         .cr(2)
28897         .kr(9)
28898         .channels(channels)
28899         .width(3)
28900         .step(step)
28901         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28902     }
28903   }
28904 }
28905 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,multipixel_with_output_stride)28906 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
28907   for (size_t channels = 1; channels <= 10; channels += 1) {
28908     DWConvMicrokernelTester()
28909       .cr(2)
28910       .kr(9)
28911       .channels(2)
28912       .width(5)
28913       .output_stride(13)
28914       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28915   }
28916 }
28917 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,multipixel_with_qmin)28918 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_qmin) {
28919   for (size_t channels = 1; channels <= 10; channels += 1) {
28920     DWConvMicrokernelTester()
28921       .cr(2)
28922       .kr(9)
28923       .channels(channels)
28924       .width(3)
28925       .qmin(128)
28926       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28927   }
28928 }
28929 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,multipixel_with_qmax)28930 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_qmax) {
28931   for (size_t channels = 1; channels <= 10; channels += 1) {
28932     DWConvMicrokernelTester()
28933       .cr(2)
28934       .kr(9)
28935       .channels(channels)
28936       .width(3)
28937       .qmax(128)
28938       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28939   }
28940 }
28941 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,input_offset)28942 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, input_offset) {
28943   for (uint32_t channels = 4; channels < 32; channels += 6) {
28944     DWConvMicrokernelTester()
28945       .cr(2)
28946       .kr(9)
28947       .channels(channels)
28948       .input_offset(80)
28949       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28950   }
28951 }
28952 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,zero)28953 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, zero) {
28954   for (uint32_t mz = 0; mz < 9; mz++) {
28955     for (uint32_t channels = 4; channels < 32; channels += 6) {
28956       DWConvMicrokernelTester()
28957         .cr(2)
28958         .kr(9)
28959         .channels(channels)
28960         .input_offset(80)
28961         .zero_index(mz)
28962         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28963     }
28964   }
28965 }
28966 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_eq_2)28967 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_eq_2) {
28968   DWConvMicrokernelTester()
28969     .cr(2)
28970     .kr(9)
28971     .channels(2)
28972     .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28973 }
28974 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_div_2)28975 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2) {
28976   for (uint32_t channels = 4; channels < 32; channels += 6) {
28977     DWConvMicrokernelTester()
28978       .cr(2)
28979       .kr(9)
28980       .channels(channels)
28981       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28982   }
28983 }
28984 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_div_2_with_qmin)28985 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmin) {
28986   for (uint32_t channels = 4; channels < 32; channels += 6) {
28987     DWConvMicrokernelTester()
28988       .cr(2)
28989       .kr(9)
28990       .channels(channels)
28991       .qmin(128)
28992       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28993   }
28994 }
28995 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_div_2_with_qmax)28996 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmax) {
28997   for (uint32_t channels = 4; channels < 32; channels += 6) {
28998     DWConvMicrokernelTester()
28999       .cr(2)
29000       .kr(9)
29001       .channels(channels)
29002       .qmax(128)
29003       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29004   }
29005 }
29006 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_lt_2)29007 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_lt_2) {
29008   for (uint32_t channels = 1; channels < 2; channels++) {
29009     DWConvMicrokernelTester()
29010       .cr(2)
29011       .kr(9)
29012       .channels(channels)
29013       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29014   }
29015 }
29016 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_gt_2)29017 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2) {
29018   for (uint32_t channels = 3; channels < 4; channels++) {
29019     DWConvMicrokernelTester()
29020       .cr(2)
29021       .kr(9)
29022       .channels(channels)
29023       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29024   }
29025 }
29026 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_gt_2_with_qmin)29027 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmin) {
29028   for (uint32_t channels = 3; channels < 4; channels++) {
29029     DWConvMicrokernelTester()
29030       .cr(2)
29031       .kr(9)
29032       .channels(channels)
29033       .qmin(128)
29034       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29035   }
29036 }
29037 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_gt_2_with_qmax)29038 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmax) {
29039   for (uint32_t channels = 3; channels < 4; channels++) {
29040     DWConvMicrokernelTester()
29041       .cr(2)
29042       .kr(9)
29043       .channels(channels)
29044       .qmax(128)
29045       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29046   }
29047 }
29048 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,multipixel)29049 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel) {
29050   for (size_t channels = 1; channels <= 10; channels += 1) {
29051     DWConvMicrokernelTester()
29052       .cr(2)
29053       .kr(9)
29054       .channels(channels)
29055       .width(3)
29056       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29057   }
29058 }
29059 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,multipixel_with_step)29060 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_step) {
29061   for (size_t channels = 1; channels <= 10; channels += 1) {
29062     for (size_t step = 2; step <= 9; step++) {
29063       DWConvMicrokernelTester()
29064         .cr(2)
29065         .kr(9)
29066         .channels(channels)
29067         .width(3)
29068         .step(step)
29069         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29070     }
29071   }
29072 }
29073 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,multipixel_with_output_stride)29074 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_output_stride) {
29075   for (size_t channels = 1; channels <= 10; channels += 1) {
29076     DWConvMicrokernelTester()
29077       .cr(2)
29078       .kr(9)
29079       .channels(2)
29080       .width(5)
29081       .output_stride(13)
29082       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29083   }
29084 }
29085 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,multipixel_with_qmin)29086 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmin) {
29087   for (size_t channels = 1; channels <= 10; channels += 1) {
29088     DWConvMicrokernelTester()
29089       .cr(2)
29090       .kr(9)
29091       .channels(channels)
29092       .width(3)
29093       .qmin(128)
29094       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29095   }
29096 }
29097 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,multipixel_with_qmax)29098 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmax) {
29099   for (size_t channels = 1; channels <= 10; channels += 1) {
29100     DWConvMicrokernelTester()
29101       .cr(2)
29102       .kr(9)
29103       .channels(channels)
29104       .width(3)
29105       .qmax(128)
29106       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29107   }
29108 }
29109 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,input_offset)29110 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, input_offset) {
29111   for (uint32_t channels = 4; channels < 32; channels += 6) {
29112     DWConvMicrokernelTester()
29113       .cr(2)
29114       .kr(9)
29115       .channels(channels)
29116       .input_offset(80)
29117       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29118   }
29119 }
29120 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,zero)29121 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, zero) {
29122   for (uint32_t mz = 0; mz < 9; mz++) {
29123     for (uint32_t channels = 4; channels < 32; channels += 6) {
29124       DWConvMicrokernelTester()
29125         .cr(2)
29126         .kr(9)
29127         .channels(channels)
29128         .input_offset(80)
29129         .zero_index(mz)
29130         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29131     }
29132   }
29133 }
29134 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_eq_2)29135 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_eq_2) {
29136   DWConvMicrokernelTester()
29137     .cr(2)
29138     .kr(25)
29139     .channels(2)
29140     .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29141 }
29142 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_div_2)29143 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_div_2) {
29144   for (uint32_t channels = 4; channels < 32; channels += 6) {
29145     DWConvMicrokernelTester()
29146       .cr(2)
29147       .kr(25)
29148       .channels(channels)
29149       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29150   }
29151 }
29152 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_div_2_with_qmin)29153 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_div_2_with_qmin) {
29154   for (uint32_t channels = 4; channels < 32; channels += 6) {
29155     DWConvMicrokernelTester()
29156       .cr(2)
29157       .kr(25)
29158       .channels(channels)
29159       .qmin(128)
29160       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29161   }
29162 }
29163 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_div_2_with_qmax)29164 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_div_2_with_qmax) {
29165   for (uint32_t channels = 4; channels < 32; channels += 6) {
29166     DWConvMicrokernelTester()
29167       .cr(2)
29168       .kr(25)
29169       .channels(channels)
29170       .qmax(128)
29171       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29172   }
29173 }
29174 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_lt_2)29175 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_lt_2) {
29176   for (uint32_t channels = 1; channels < 2; channels++) {
29177     DWConvMicrokernelTester()
29178       .cr(2)
29179       .kr(25)
29180       .channels(channels)
29181       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29182   }
29183 }
29184 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_gt_2)29185 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_gt_2) {
29186   for (uint32_t channels = 3; channels < 4; channels++) {
29187     DWConvMicrokernelTester()
29188       .cr(2)
29189       .kr(25)
29190       .channels(channels)
29191       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29192   }
29193 }
29194 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_gt_2_with_qmin)29195 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_gt_2_with_qmin) {
29196   for (uint32_t channels = 3; channels < 4; channels++) {
29197     DWConvMicrokernelTester()
29198       .cr(2)
29199       .kr(25)
29200       .channels(channels)
29201       .qmin(128)
29202       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29203   }
29204 }
29205 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_gt_2_with_qmax)29206 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_gt_2_with_qmax) {
29207   for (uint32_t channels = 3; channels < 4; channels++) {
29208     DWConvMicrokernelTester()
29209       .cr(2)
29210       .kr(25)
29211       .channels(channels)
29212       .qmax(128)
29213       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29214   }
29215 }
29216 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,multipixel)29217 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel) {
29218   for (size_t channels = 1; channels <= 10; channels += 1) {
29219     DWConvMicrokernelTester()
29220       .cr(2)
29221       .kr(25)
29222       .channels(channels)
29223       .width(3)
29224       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29225   }
29226 }
29227 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,multipixel_with_step)29228 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_step) {
29229   for (size_t channels = 1; channels <= 10; channels += 1) {
29230     for (size_t step = 2; step <= 25; step++) {
29231       DWConvMicrokernelTester()
29232         .cr(2)
29233         .kr(25)
29234         .channels(channels)
29235         .width(3)
29236         .step(step)
29237         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29238     }
29239   }
29240 }
29241 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,multipixel_with_output_stride)29242 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_output_stride) {
29243   for (size_t channels = 1; channels <= 10; channels += 1) {
29244     DWConvMicrokernelTester()
29245       .cr(2)
29246       .kr(25)
29247       .channels(2)
29248       .width(5)
29249       .output_stride(13)
29250       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29251   }
29252 }
29253 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,multipixel_with_qmin)29254 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_qmin) {
29255   for (size_t channels = 1; channels <= 10; channels += 1) {
29256     DWConvMicrokernelTester()
29257       .cr(2)
29258       .kr(25)
29259       .channels(channels)
29260       .width(3)
29261       .qmin(128)
29262       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29263   }
29264 }
29265 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,multipixel_with_qmax)29266 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_qmax) {
29267   for (size_t channels = 1; channels <= 10; channels += 1) {
29268     DWConvMicrokernelTester()
29269       .cr(2)
29270       .kr(25)
29271       .channels(channels)
29272       .width(3)
29273       .qmax(128)
29274       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29275   }
29276 }
29277 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,input_offset)29278 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, input_offset) {
29279   for (uint32_t channels = 4; channels < 32; channels += 6) {
29280     DWConvMicrokernelTester()
29281       .cr(2)
29282       .kr(25)
29283       .channels(channels)
29284       .input_offset(80)
29285       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29286   }
29287 }
29288 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,zero)29289 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, zero) {
29290   for (uint32_t mz = 0; mz < 25; mz++) {
29291     for (uint32_t channels = 4; channels < 32; channels += 6) {
29292       DWConvMicrokernelTester()
29293         .cr(2)
29294         .kr(25)
29295         .channels(channels)
29296         .input_offset(80)
29297         .zero_index(mz)
29298         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29299     }
29300   }
29301 }
29302 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_eq_2)29303 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_eq_2) {
29304   DWConvMicrokernelTester()
29305     .cr(2)
29306     .kr(25)
29307     .channels(2)
29308     .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29309 }
29310 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_div_2)29311 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2) {
29312   for (uint32_t channels = 4; channels < 32; channels += 6) {
29313     DWConvMicrokernelTester()
29314       .cr(2)
29315       .kr(25)
29316       .channels(channels)
29317       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29318   }
29319 }
29320 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_div_2_with_qmin)29321 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2_with_qmin) {
29322   for (uint32_t channels = 4; channels < 32; channels += 6) {
29323     DWConvMicrokernelTester()
29324       .cr(2)
29325       .kr(25)
29326       .channels(channels)
29327       .qmin(128)
29328       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29329   }
29330 }
29331 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_div_2_with_qmax)29332 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2_with_qmax) {
29333   for (uint32_t channels = 4; channels < 32; channels += 6) {
29334     DWConvMicrokernelTester()
29335       .cr(2)
29336       .kr(25)
29337       .channels(channels)
29338       .qmax(128)
29339       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29340   }
29341 }
29342 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_lt_2)29343 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_lt_2) {
29344   for (uint32_t channels = 1; channels < 2; channels++) {
29345     DWConvMicrokernelTester()
29346       .cr(2)
29347       .kr(25)
29348       .channels(channels)
29349       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29350   }
29351 }
29352 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_gt_2)29353 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2) {
29354   for (uint32_t channels = 3; channels < 4; channels++) {
29355     DWConvMicrokernelTester()
29356       .cr(2)
29357       .kr(25)
29358       .channels(channels)
29359       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29360   }
29361 }
29362 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_gt_2_with_qmin)29363 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2_with_qmin) {
29364   for (uint32_t channels = 3; channels < 4; channels++) {
29365     DWConvMicrokernelTester()
29366       .cr(2)
29367       .kr(25)
29368       .channels(channels)
29369       .qmin(128)
29370       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29371   }
29372 }
29373 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_gt_2_with_qmax)29374 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2_with_qmax) {
29375   for (uint32_t channels = 3; channels < 4; channels++) {
29376     DWConvMicrokernelTester()
29377       .cr(2)
29378       .kr(25)
29379       .channels(channels)
29380       .qmax(128)
29381       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29382   }
29383 }
29384 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,multipixel)29385 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel) {
29386   for (size_t channels = 1; channels <= 10; channels += 1) {
29387     DWConvMicrokernelTester()
29388       .cr(2)
29389       .kr(25)
29390       .channels(channels)
29391       .width(3)
29392       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29393   }
29394 }
29395 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,multipixel_with_step)29396 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_step) {
29397   for (size_t channels = 1; channels <= 10; channels += 1) {
29398     for (size_t step = 2; step <= 25; step++) {
29399       DWConvMicrokernelTester()
29400         .cr(2)
29401         .kr(25)
29402         .channels(channels)
29403         .width(3)
29404         .step(step)
29405         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29406     }
29407   }
29408 }
29409 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,multipixel_with_output_stride)29410 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
29411   for (size_t channels = 1; channels <= 10; channels += 1) {
29412     DWConvMicrokernelTester()
29413       .cr(2)
29414       .kr(25)
29415       .channels(2)
29416       .width(5)
29417       .output_stride(13)
29418       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29419   }
29420 }
29421 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,multipixel_with_qmin)29422 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_qmin) {
29423   for (size_t channels = 1; channels <= 10; channels += 1) {
29424     DWConvMicrokernelTester()
29425       .cr(2)
29426       .kr(25)
29427       .channels(channels)
29428       .width(3)
29429       .qmin(128)
29430       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29431   }
29432 }
29433 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,multipixel_with_qmax)29434 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_qmax) {
29435   for (size_t channels = 1; channels <= 10; channels += 1) {
29436     DWConvMicrokernelTester()
29437       .cr(2)
29438       .kr(25)
29439       .channels(channels)
29440       .width(3)
29441       .qmax(128)
29442       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29443   }
29444 }
29445 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,input_offset)29446 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, input_offset) {
29447   for (uint32_t channels = 4; channels < 32; channels += 6) {
29448     DWConvMicrokernelTester()
29449       .cr(2)
29450       .kr(25)
29451       .channels(channels)
29452       .input_offset(80)
29453       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29454   }
29455 }
29456 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,zero)29457 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, zero) {
29458   for (uint32_t mz = 0; mz < 25; mz++) {
29459     for (uint32_t channels = 4; channels < 32; channels += 6) {
29460       DWConvMicrokernelTester()
29461         .cr(2)
29462         .kr(25)
29463         .channels(channels)
29464         .input_offset(80)
29465         .zero_index(mz)
29466         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29467     }
29468   }
29469 }
29470 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_eq_2)29471 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_eq_2) {
29472   DWConvMicrokernelTester()
29473     .cr(2)
29474     .kr(25)
29475     .channels(2)
29476     .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29477 }
29478 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_div_2)29479 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2) {
29480   for (uint32_t channels = 4; channels < 32; channels += 6) {
29481     DWConvMicrokernelTester()
29482       .cr(2)
29483       .kr(25)
29484       .channels(channels)
29485       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29486   }
29487 }
29488 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_div_2_with_qmin)29489 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmin) {
29490   for (uint32_t channels = 4; channels < 32; channels += 6) {
29491     DWConvMicrokernelTester()
29492       .cr(2)
29493       .kr(25)
29494       .channels(channels)
29495       .qmin(128)
29496       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29497   }
29498 }
29499 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_div_2_with_qmax)29500 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmax) {
29501   for (uint32_t channels = 4; channels < 32; channels += 6) {
29502     DWConvMicrokernelTester()
29503       .cr(2)
29504       .kr(25)
29505       .channels(channels)
29506       .qmax(128)
29507       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29508   }
29509 }
29510 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_lt_2)29511 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_lt_2) {
29512   for (uint32_t channels = 1; channels < 2; channels++) {
29513     DWConvMicrokernelTester()
29514       .cr(2)
29515       .kr(25)
29516       .channels(channels)
29517       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29518   }
29519 }
29520 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_gt_2)29521 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2) {
29522   for (uint32_t channels = 3; channels < 4; channels++) {
29523     DWConvMicrokernelTester()
29524       .cr(2)
29525       .kr(25)
29526       .channels(channels)
29527       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29528   }
29529 }
29530 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_gt_2_with_qmin)29531 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmin) {
29532   for (uint32_t channels = 3; channels < 4; channels++) {
29533     DWConvMicrokernelTester()
29534       .cr(2)
29535       .kr(25)
29536       .channels(channels)
29537       .qmin(128)
29538       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29539   }
29540 }
29541 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_gt_2_with_qmax)29542 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmax) {
29543   for (uint32_t channels = 3; channels < 4; channels++) {
29544     DWConvMicrokernelTester()
29545       .cr(2)
29546       .kr(25)
29547       .channels(channels)
29548       .qmax(128)
29549       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29550   }
29551 }
29552 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,multipixel)29553 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel) {
29554   for (size_t channels = 1; channels <= 10; channels += 1) {
29555     DWConvMicrokernelTester()
29556       .cr(2)
29557       .kr(25)
29558       .channels(channels)
29559       .width(3)
29560       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29561   }
29562 }
29563 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,multipixel_with_step)29564 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_step) {
29565   for (size_t channels = 1; channels <= 10; channels += 1) {
29566     for (size_t step = 2; step <= 25; step++) {
29567       DWConvMicrokernelTester()
29568         .cr(2)
29569         .kr(25)
29570         .channels(channels)
29571         .width(3)
29572         .step(step)
29573         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29574     }
29575   }
29576 }
29577 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,multipixel_with_output_stride)29578 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_output_stride) {
29579   for (size_t channels = 1; channels <= 10; channels += 1) {
29580     DWConvMicrokernelTester()
29581       .cr(2)
29582       .kr(25)
29583       .channels(2)
29584       .width(5)
29585       .output_stride(13)
29586       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29587   }
29588 }
29589 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,multipixel_with_qmin)29590 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmin) {
29591   for (size_t channels = 1; channels <= 10; channels += 1) {
29592     DWConvMicrokernelTester()
29593       .cr(2)
29594       .kr(25)
29595       .channels(channels)
29596       .width(3)
29597       .qmin(128)
29598       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29599   }
29600 }
29601 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,multipixel_with_qmax)29602 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmax) {
29603   for (size_t channels = 1; channels <= 10; channels += 1) {
29604     DWConvMicrokernelTester()
29605       .cr(2)
29606       .kr(25)
29607       .channels(channels)
29608       .width(3)
29609       .qmax(128)
29610       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29611   }
29612 }
29613 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,input_offset)29614 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, input_offset) {
29615   for (uint32_t channels = 4; channels < 32; channels += 6) {
29616     DWConvMicrokernelTester()
29617       .cr(2)
29618       .kr(25)
29619       .channels(channels)
29620       .input_offset(80)
29621       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29622   }
29623 }
29624 
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,zero)29625 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, zero) {
29626   for (uint32_t mz = 0; mz < 25; mz++) {
29627     for (uint32_t channels = 4; channels < 32; channels += 6) {
29628       DWConvMicrokernelTester()
29629         .cr(2)
29630         .kr(25)
29631         .channels(channels)
29632         .input_offset(80)
29633         .zero_index(mz)
29634         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29635     }
29636   }
29637 }
29638 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_eq_4)29639 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_eq_4) {
29640   DWConvMicrokernelTester()
29641     .cr(4)
29642     .kr(9)
29643     .channels(4)
29644     .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29645 }
29646 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_div_4)29647 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_div_4) {
29648   for (uint32_t channels = 8; channels < 64; channels += 12) {
29649     DWConvMicrokernelTester()
29650       .cr(4)
29651       .kr(9)
29652       .channels(channels)
29653       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29654   }
29655 }
29656 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_div_4_with_qmin)29657 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_div_4_with_qmin) {
29658   for (uint32_t channels = 8; channels < 64; channels += 12) {
29659     DWConvMicrokernelTester()
29660       .cr(4)
29661       .kr(9)
29662       .channels(channels)
29663       .qmin(128)
29664       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29665   }
29666 }
29667 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_div_4_with_qmax)29668 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_div_4_with_qmax) {
29669   for (uint32_t channels = 8; channels < 64; channels += 12) {
29670     DWConvMicrokernelTester()
29671       .cr(4)
29672       .kr(9)
29673       .channels(channels)
29674       .qmax(128)
29675       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29676   }
29677 }
29678 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_lt_4)29679 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_lt_4) {
29680   for (uint32_t channels = 1; channels < 4; channels++) {
29681     DWConvMicrokernelTester()
29682       .cr(4)
29683       .kr(9)
29684       .channels(channels)
29685       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29686   }
29687 }
29688 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_gt_4)29689 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_gt_4) {
29690   for (uint32_t channels = 5; channels < 8; channels++) {
29691     DWConvMicrokernelTester()
29692       .cr(4)
29693       .kr(9)
29694       .channels(channels)
29695       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29696   }
29697 }
29698 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_gt_4_with_qmin)29699 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_gt_4_with_qmin) {
29700   for (uint32_t channels = 5; channels < 8; channels++) {
29701     DWConvMicrokernelTester()
29702       .cr(4)
29703       .kr(9)
29704       .channels(channels)
29705       .qmin(128)
29706       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29707   }
29708 }
29709 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_gt_4_with_qmax)29710 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_gt_4_with_qmax) {
29711   for (uint32_t channels = 5; channels < 8; channels++) {
29712     DWConvMicrokernelTester()
29713       .cr(4)
29714       .kr(9)
29715       .channels(channels)
29716       .qmax(128)
29717       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29718   }
29719 }
29720 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,multipixel)29721 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel) {
29722   for (size_t channels = 1; channels <= 20; channels += 3) {
29723     DWConvMicrokernelTester()
29724       .cr(4)
29725       .kr(9)
29726       .channels(channels)
29727       .width(3)
29728       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29729   }
29730 }
29731 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,multipixel_with_step)29732 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_step) {
29733   for (size_t channels = 1; channels <= 20; channels += 3) {
29734     for (size_t step = 2; step <= 9; step++) {
29735       DWConvMicrokernelTester()
29736         .cr(4)
29737         .kr(9)
29738         .channels(channels)
29739         .width(3)
29740         .step(step)
29741         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29742     }
29743   }
29744 }
29745 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,multipixel_with_output_stride)29746 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_output_stride) {
29747   for (size_t channels = 1; channels <= 20; channels += 3) {
29748     DWConvMicrokernelTester()
29749       .cr(4)
29750       .kr(9)
29751       .channels(4)
29752       .width(5)
29753       .output_stride(23)
29754       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29755   }
29756 }
29757 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,multipixel_with_qmin)29758 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_qmin) {
29759   for (size_t channels = 1; channels <= 20; channels += 3) {
29760     DWConvMicrokernelTester()
29761       .cr(4)
29762       .kr(9)
29763       .channels(channels)
29764       .width(3)
29765       .qmin(128)
29766       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29767   }
29768 }
29769 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,multipixel_with_qmax)29770 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_qmax) {
29771   for (size_t channels = 1; channels <= 20; channels += 3) {
29772     DWConvMicrokernelTester()
29773       .cr(4)
29774       .kr(9)
29775       .channels(channels)
29776       .width(3)
29777       .qmax(128)
29778       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29779   }
29780 }
29781 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,input_offset)29782 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, input_offset) {
29783   for (uint32_t channels = 8; channels < 64; channels += 12) {
29784     DWConvMicrokernelTester()
29785       .cr(4)
29786       .kr(9)
29787       .channels(channels)
29788       .input_offset(112)
29789       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29790   }
29791 }
29792 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,zero)29793 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, zero) {
29794   for (uint32_t mz = 0; mz < 9; mz++) {
29795     for (uint32_t channels = 8; channels < 64; channels += 12) {
29796       DWConvMicrokernelTester()
29797         .cr(4)
29798         .kr(9)
29799         .channels(channels)
29800         .input_offset(112)
29801         .zero_index(mz)
29802         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29803     }
29804   }
29805 }
29806 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_eq_4)29807 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_eq_4) {
29808   DWConvMicrokernelTester()
29809     .cr(4)
29810     .kr(9)
29811     .channels(4)
29812     .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29813 }
29814 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_div_4)29815 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4) {
29816   for (uint32_t channels = 8; channels < 64; channels += 12) {
29817     DWConvMicrokernelTester()
29818       .cr(4)
29819       .kr(9)
29820       .channels(channels)
29821       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29822   }
29823 }
29824 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_div_4_with_qmin)29825 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4_with_qmin) {
29826   for (uint32_t channels = 8; channels < 64; channels += 12) {
29827     DWConvMicrokernelTester()
29828       .cr(4)
29829       .kr(9)
29830       .channels(channels)
29831       .qmin(128)
29832       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29833   }
29834 }
29835 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_div_4_with_qmax)29836 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4_with_qmax) {
29837   for (uint32_t channels = 8; channels < 64; channels += 12) {
29838     DWConvMicrokernelTester()
29839       .cr(4)
29840       .kr(9)
29841       .channels(channels)
29842       .qmax(128)
29843       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29844   }
29845 }
29846 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_lt_4)29847 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_lt_4) {
29848   for (uint32_t channels = 1; channels < 4; channels++) {
29849     DWConvMicrokernelTester()
29850       .cr(4)
29851       .kr(9)
29852       .channels(channels)
29853       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29854   }
29855 }
29856 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_gt_4)29857 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4) {
29858   for (uint32_t channels = 5; channels < 8; channels++) {
29859     DWConvMicrokernelTester()
29860       .cr(4)
29861       .kr(9)
29862       .channels(channels)
29863       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29864   }
29865 }
29866 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_gt_4_with_qmin)29867 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4_with_qmin) {
29868   for (uint32_t channels = 5; channels < 8; channels++) {
29869     DWConvMicrokernelTester()
29870       .cr(4)
29871       .kr(9)
29872       .channels(channels)
29873       .qmin(128)
29874       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29875   }
29876 }
29877 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_gt_4_with_qmax)29878 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4_with_qmax) {
29879   for (uint32_t channels = 5; channels < 8; channels++) {
29880     DWConvMicrokernelTester()
29881       .cr(4)
29882       .kr(9)
29883       .channels(channels)
29884       .qmax(128)
29885       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29886   }
29887 }
29888 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,multipixel)29889 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel) {
29890   for (size_t channels = 1; channels <= 20; channels += 3) {
29891     DWConvMicrokernelTester()
29892       .cr(4)
29893       .kr(9)
29894       .channels(channels)
29895       .width(3)
29896       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29897   }
29898 }
29899 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,multipixel_with_step)29900 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_step) {
29901   for (size_t channels = 1; channels <= 20; channels += 3) {
29902     for (size_t step = 2; step <= 9; step++) {
29903       DWConvMicrokernelTester()
29904         .cr(4)
29905         .kr(9)
29906         .channels(channels)
29907         .width(3)
29908         .step(step)
29909         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29910     }
29911   }
29912 }
29913 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,multipixel_with_output_stride)29914 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
29915   for (size_t channels = 1; channels <= 20; channels += 3) {
29916     DWConvMicrokernelTester()
29917       .cr(4)
29918       .kr(9)
29919       .channels(4)
29920       .width(5)
29921       .output_stride(23)
29922       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29923   }
29924 }
29925 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,multipixel_with_qmin)29926 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_qmin) {
29927   for (size_t channels = 1; channels <= 20; channels += 3) {
29928     DWConvMicrokernelTester()
29929       .cr(4)
29930       .kr(9)
29931       .channels(channels)
29932       .width(3)
29933       .qmin(128)
29934       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29935   }
29936 }
29937 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,multipixel_with_qmax)29938 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_qmax) {
29939   for (size_t channels = 1; channels <= 20; channels += 3) {
29940     DWConvMicrokernelTester()
29941       .cr(4)
29942       .kr(9)
29943       .channels(channels)
29944       .width(3)
29945       .qmax(128)
29946       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29947   }
29948 }
29949 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,input_offset)29950 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, input_offset) {
29951   for (uint32_t channels = 8; channels < 64; channels += 12) {
29952     DWConvMicrokernelTester()
29953       .cr(4)
29954       .kr(9)
29955       .channels(channels)
29956       .input_offset(112)
29957       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29958   }
29959 }
29960 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,zero)29961 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, zero) {
29962   for (uint32_t mz = 0; mz < 9; mz++) {
29963     for (uint32_t channels = 8; channels < 64; channels += 12) {
29964       DWConvMicrokernelTester()
29965         .cr(4)
29966         .kr(9)
29967         .channels(channels)
29968         .input_offset(112)
29969         .zero_index(mz)
29970         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29971     }
29972   }
29973 }
29974 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_eq_4)29975 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_eq_4) {
29976   DWConvMicrokernelTester()
29977     .cr(4)
29978     .kr(9)
29979     .channels(4)
29980     .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29981 }
29982 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_div_4)29983 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4) {
29984   for (uint32_t channels = 8; channels < 64; channels += 12) {
29985     DWConvMicrokernelTester()
29986       .cr(4)
29987       .kr(9)
29988       .channels(channels)
29989       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29990   }
29991 }
29992 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_div_4_with_qmin)29993 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmin) {
29994   for (uint32_t channels = 8; channels < 64; channels += 12) {
29995     DWConvMicrokernelTester()
29996       .cr(4)
29997       .kr(9)
29998       .channels(channels)
29999       .qmin(128)
30000       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30001   }
30002 }
30003 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_div_4_with_qmax)30004 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmax) {
30005   for (uint32_t channels = 8; channels < 64; channels += 12) {
30006     DWConvMicrokernelTester()
30007       .cr(4)
30008       .kr(9)
30009       .channels(channels)
30010       .qmax(128)
30011       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30012   }
30013 }
30014 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_lt_4)30015 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_lt_4) {
30016   for (uint32_t channels = 1; channels < 4; channels++) {
30017     DWConvMicrokernelTester()
30018       .cr(4)
30019       .kr(9)
30020       .channels(channels)
30021       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30022   }
30023 }
30024 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_gt_4)30025 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4) {
30026   for (uint32_t channels = 5; channels < 8; channels++) {
30027     DWConvMicrokernelTester()
30028       .cr(4)
30029       .kr(9)
30030       .channels(channels)
30031       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30032   }
30033 }
30034 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_gt_4_with_qmin)30035 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmin) {
30036   for (uint32_t channels = 5; channels < 8; channels++) {
30037     DWConvMicrokernelTester()
30038       .cr(4)
30039       .kr(9)
30040       .channels(channels)
30041       .qmin(128)
30042       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30043   }
30044 }
30045 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_gt_4_with_qmax)30046 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmax) {
30047   for (uint32_t channels = 5; channels < 8; channels++) {
30048     DWConvMicrokernelTester()
30049       .cr(4)
30050       .kr(9)
30051       .channels(channels)
30052       .qmax(128)
30053       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30054   }
30055 }
30056 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,multipixel)30057 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel) {
30058   for (size_t channels = 1; channels <= 20; channels += 3) {
30059     DWConvMicrokernelTester()
30060       .cr(4)
30061       .kr(9)
30062       .channels(channels)
30063       .width(3)
30064       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30065   }
30066 }
30067 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,multipixel_with_step)30068 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_step) {
30069   for (size_t channels = 1; channels <= 20; channels += 3) {
30070     for (size_t step = 2; step <= 9; step++) {
30071       DWConvMicrokernelTester()
30072         .cr(4)
30073         .kr(9)
30074         .channels(channels)
30075         .width(3)
30076         .step(step)
30077         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30078     }
30079   }
30080 }
30081 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,multipixel_with_output_stride)30082 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_output_stride) {
30083   for (size_t channels = 1; channels <= 20; channels += 3) {
30084     DWConvMicrokernelTester()
30085       .cr(4)
30086       .kr(9)
30087       .channels(4)
30088       .width(5)
30089       .output_stride(23)
30090       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30091   }
30092 }
30093 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,multipixel_with_qmin)30094 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmin) {
30095   for (size_t channels = 1; channels <= 20; channels += 3) {
30096     DWConvMicrokernelTester()
30097       .cr(4)
30098       .kr(9)
30099       .channels(channels)
30100       .width(3)
30101       .qmin(128)
30102       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30103   }
30104 }
30105 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,multipixel_with_qmax)30106 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmax) {
30107   for (size_t channels = 1; channels <= 20; channels += 3) {
30108     DWConvMicrokernelTester()
30109       .cr(4)
30110       .kr(9)
30111       .channels(channels)
30112       .width(3)
30113       .qmax(128)
30114       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30115   }
30116 }
30117 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,input_offset)30118 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, input_offset) {
30119   for (uint32_t channels = 8; channels < 64; channels += 12) {
30120     DWConvMicrokernelTester()
30121       .cr(4)
30122       .kr(9)
30123       .channels(channels)
30124       .input_offset(112)
30125       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30126   }
30127 }
30128 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,zero)30129 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, zero) {
30130   for (uint32_t mz = 0; mz < 9; mz++) {
30131     for (uint32_t channels = 8; channels < 64; channels += 12) {
30132       DWConvMicrokernelTester()
30133         .cr(4)
30134         .kr(9)
30135         .channels(channels)
30136         .input_offset(112)
30137         .zero_index(mz)
30138         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30139     }
30140   }
30141 }
30142 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_eq_4)30143 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_eq_4) {
30144   DWConvMicrokernelTester()
30145     .cr(4)
30146     .kr(25)
30147     .channels(4)
30148     .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30149 }
30150 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_div_4)30151 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_div_4) {
30152   for (uint32_t channels = 8; channels < 64; channels += 12) {
30153     DWConvMicrokernelTester()
30154       .cr(4)
30155       .kr(25)
30156       .channels(channels)
30157       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30158   }
30159 }
30160 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_div_4_with_qmin)30161 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_div_4_with_qmin) {
30162   for (uint32_t channels = 8; channels < 64; channels += 12) {
30163     DWConvMicrokernelTester()
30164       .cr(4)
30165       .kr(25)
30166       .channels(channels)
30167       .qmin(128)
30168       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30169   }
30170 }
30171 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_div_4_with_qmax)30172 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_div_4_with_qmax) {
30173   for (uint32_t channels = 8; channels < 64; channels += 12) {
30174     DWConvMicrokernelTester()
30175       .cr(4)
30176       .kr(25)
30177       .channels(channels)
30178       .qmax(128)
30179       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30180   }
30181 }
30182 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_lt_4)30183 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_lt_4) {
30184   for (uint32_t channels = 1; channels < 4; channels++) {
30185     DWConvMicrokernelTester()
30186       .cr(4)
30187       .kr(25)
30188       .channels(channels)
30189       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30190   }
30191 }
30192 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_gt_4)30193 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_gt_4) {
30194   for (uint32_t channels = 5; channels < 8; channels++) {
30195     DWConvMicrokernelTester()
30196       .cr(4)
30197       .kr(25)
30198       .channels(channels)
30199       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30200   }
30201 }
30202 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_gt_4_with_qmin)30203 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_gt_4_with_qmin) {
30204   for (uint32_t channels = 5; channels < 8; channels++) {
30205     DWConvMicrokernelTester()
30206       .cr(4)
30207       .kr(25)
30208       .channels(channels)
30209       .qmin(128)
30210       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30211   }
30212 }
30213 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_gt_4_with_qmax)30214 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_gt_4_with_qmax) {
30215   for (uint32_t channels = 5; channels < 8; channels++) {
30216     DWConvMicrokernelTester()
30217       .cr(4)
30218       .kr(25)
30219       .channels(channels)
30220       .qmax(128)
30221       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30222   }
30223 }
30224 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,multipixel)30225 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel) {
30226   for (size_t channels = 1; channels <= 20; channels += 3) {
30227     DWConvMicrokernelTester()
30228       .cr(4)
30229       .kr(25)
30230       .channels(channels)
30231       .width(3)
30232       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30233   }
30234 }
30235 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,multipixel_with_step)30236 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_step) {
30237   for (size_t channels = 1; channels <= 20; channels += 3) {
30238     for (size_t step = 2; step <= 25; step++) {
30239       DWConvMicrokernelTester()
30240         .cr(4)
30241         .kr(25)
30242         .channels(channels)
30243         .width(3)
30244         .step(step)
30245         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30246     }
30247   }
30248 }
30249 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,multipixel_with_output_stride)30250 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_output_stride) {
30251   for (size_t channels = 1; channels <= 20; channels += 3) {
30252     DWConvMicrokernelTester()
30253       .cr(4)
30254       .kr(25)
30255       .channels(4)
30256       .width(5)
30257       .output_stride(23)
30258       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30259   }
30260 }
30261 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,multipixel_with_qmin)30262 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_qmin) {
30263   for (size_t channels = 1; channels <= 20; channels += 3) {
30264     DWConvMicrokernelTester()
30265       .cr(4)
30266       .kr(25)
30267       .channels(channels)
30268       .width(3)
30269       .qmin(128)
30270       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30271   }
30272 }
30273 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,multipixel_with_qmax)30274 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_qmax) {
30275   for (size_t channels = 1; channels <= 20; channels += 3) {
30276     DWConvMicrokernelTester()
30277       .cr(4)
30278       .kr(25)
30279       .channels(channels)
30280       .width(3)
30281       .qmax(128)
30282       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30283   }
30284 }
30285 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,input_offset)30286 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, input_offset) {
30287   for (uint32_t channels = 8; channels < 64; channels += 12) {
30288     DWConvMicrokernelTester()
30289       .cr(4)
30290       .kr(25)
30291       .channels(channels)
30292       .input_offset(112)
30293       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30294   }
30295 }
30296 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,zero)30297 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, zero) {
30298   for (uint32_t mz = 0; mz < 25; mz++) {
30299     for (uint32_t channels = 8; channels < 64; channels += 12) {
30300       DWConvMicrokernelTester()
30301         .cr(4)
30302         .kr(25)
30303         .channels(channels)
30304         .input_offset(112)
30305         .zero_index(mz)
30306         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30307     }
30308   }
30309 }
30310 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_eq_4)30311 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_eq_4) {
30312   DWConvMicrokernelTester()
30313     .cr(4)
30314     .kr(25)
30315     .channels(4)
30316     .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30317 }
30318 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_div_4)30319 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4) {
30320   for (uint32_t channels = 8; channels < 64; channels += 12) {
30321     DWConvMicrokernelTester()
30322       .cr(4)
30323       .kr(25)
30324       .channels(channels)
30325       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30326   }
30327 }
30328 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_div_4_with_qmin)30329 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4_with_qmin) {
30330   for (uint32_t channels = 8; channels < 64; channels += 12) {
30331     DWConvMicrokernelTester()
30332       .cr(4)
30333       .kr(25)
30334       .channels(channels)
30335       .qmin(128)
30336       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30337   }
30338 }
30339 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_div_4_with_qmax)30340 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4_with_qmax) {
30341   for (uint32_t channels = 8; channels < 64; channels += 12) {
30342     DWConvMicrokernelTester()
30343       .cr(4)
30344       .kr(25)
30345       .channels(channels)
30346       .qmax(128)
30347       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30348   }
30349 }
30350 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_lt_4)30351 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_lt_4) {
30352   for (uint32_t channels = 1; channels < 4; channels++) {
30353     DWConvMicrokernelTester()
30354       .cr(4)
30355       .kr(25)
30356       .channels(channels)
30357       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30358   }
30359 }
30360 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_gt_4)30361 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4) {
30362   for (uint32_t channels = 5; channels < 8; channels++) {
30363     DWConvMicrokernelTester()
30364       .cr(4)
30365       .kr(25)
30366       .channels(channels)
30367       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30368   }
30369 }
30370 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_gt_4_with_qmin)30371 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4_with_qmin) {
30372   for (uint32_t channels = 5; channels < 8; channels++) {
30373     DWConvMicrokernelTester()
30374       .cr(4)
30375       .kr(25)
30376       .channels(channels)
30377       .qmin(128)
30378       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30379   }
30380 }
30381 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_gt_4_with_qmax)30382 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4_with_qmax) {
30383   for (uint32_t channels = 5; channels < 8; channels++) {
30384     DWConvMicrokernelTester()
30385       .cr(4)
30386       .kr(25)
30387       .channels(channels)
30388       .qmax(128)
30389       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30390   }
30391 }
30392 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,multipixel)30393 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel) {
30394   for (size_t channels = 1; channels <= 20; channels += 3) {
30395     DWConvMicrokernelTester()
30396       .cr(4)
30397       .kr(25)
30398       .channels(channels)
30399       .width(3)
30400       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30401   }
30402 }
30403 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,multipixel_with_step)30404 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_step) {
30405   for (size_t channels = 1; channels <= 20; channels += 3) {
30406     for (size_t step = 2; step <= 25; step++) {
30407       DWConvMicrokernelTester()
30408         .cr(4)
30409         .kr(25)
30410         .channels(channels)
30411         .width(3)
30412         .step(step)
30413         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30414     }
30415   }
30416 }
30417 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,multipixel_with_output_stride)30418 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
30419   for (size_t channels = 1; channels <= 20; channels += 3) {
30420     DWConvMicrokernelTester()
30421       .cr(4)
30422       .kr(25)
30423       .channels(4)
30424       .width(5)
30425       .output_stride(23)
30426       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30427   }
30428 }
30429 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,multipixel_with_qmin)30430 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_qmin) {
30431   for (size_t channels = 1; channels <= 20; channels += 3) {
30432     DWConvMicrokernelTester()
30433       .cr(4)
30434       .kr(25)
30435       .channels(channels)
30436       .width(3)
30437       .qmin(128)
30438       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30439   }
30440 }
30441 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,multipixel_with_qmax)30442 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_qmax) {
30443   for (size_t channels = 1; channels <= 20; channels += 3) {
30444     DWConvMicrokernelTester()
30445       .cr(4)
30446       .kr(25)
30447       .channels(channels)
30448       .width(3)
30449       .qmax(128)
30450       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30451   }
30452 }
30453 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,input_offset)30454 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, input_offset) {
30455   for (uint32_t channels = 8; channels < 64; channels += 12) {
30456     DWConvMicrokernelTester()
30457       .cr(4)
30458       .kr(25)
30459       .channels(channels)
30460       .input_offset(112)
30461       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30462   }
30463 }
30464 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,zero)30465 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, zero) {
30466   for (uint32_t mz = 0; mz < 25; mz++) {
30467     for (uint32_t channels = 8; channels < 64; channels += 12) {
30468       DWConvMicrokernelTester()
30469         .cr(4)
30470         .kr(25)
30471         .channels(channels)
30472         .input_offset(112)
30473         .zero_index(mz)
30474         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30475     }
30476   }
30477 }
30478 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_eq_4)30479 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_eq_4) {
30480   DWConvMicrokernelTester()
30481     .cr(4)
30482     .kr(25)
30483     .channels(4)
30484     .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30485 }
30486 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_div_4)30487 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4) {
30488   for (uint32_t channels = 8; channels < 64; channels += 12) {
30489     DWConvMicrokernelTester()
30490       .cr(4)
30491       .kr(25)
30492       .channels(channels)
30493       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30494   }
30495 }
30496 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_div_4_with_qmin)30497 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmin) {
30498   for (uint32_t channels = 8; channels < 64; channels += 12) {
30499     DWConvMicrokernelTester()
30500       .cr(4)
30501       .kr(25)
30502       .channels(channels)
30503       .qmin(128)
30504       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30505   }
30506 }
30507 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_div_4_with_qmax)30508 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmax) {
30509   for (uint32_t channels = 8; channels < 64; channels += 12) {
30510     DWConvMicrokernelTester()
30511       .cr(4)
30512       .kr(25)
30513       .channels(channels)
30514       .qmax(128)
30515       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30516   }
30517 }
30518 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_lt_4)30519 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_lt_4) {
30520   for (uint32_t channels = 1; channels < 4; channels++) {
30521     DWConvMicrokernelTester()
30522       .cr(4)
30523       .kr(25)
30524       .channels(channels)
30525       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30526   }
30527 }
30528 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_gt_4)30529 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4) {
30530   for (uint32_t channels = 5; channels < 8; channels++) {
30531     DWConvMicrokernelTester()
30532       .cr(4)
30533       .kr(25)
30534       .channels(channels)
30535       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30536   }
30537 }
30538 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_gt_4_with_qmin)30539 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmin) {
30540   for (uint32_t channels = 5; channels < 8; channels++) {
30541     DWConvMicrokernelTester()
30542       .cr(4)
30543       .kr(25)
30544       .channels(channels)
30545       .qmin(128)
30546       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30547   }
30548 }
30549 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_gt_4_with_qmax)30550 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmax) {
30551   for (uint32_t channels = 5; channels < 8; channels++) {
30552     DWConvMicrokernelTester()
30553       .cr(4)
30554       .kr(25)
30555       .channels(channels)
30556       .qmax(128)
30557       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30558   }
30559 }
30560 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,multipixel)30561 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel) {
30562   for (size_t channels = 1; channels <= 20; channels += 3) {
30563     DWConvMicrokernelTester()
30564       .cr(4)
30565       .kr(25)
30566       .channels(channels)
30567       .width(3)
30568       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30569   }
30570 }
30571 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,multipixel_with_step)30572 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_step) {
30573   for (size_t channels = 1; channels <= 20; channels += 3) {
30574     for (size_t step = 2; step <= 25; step++) {
30575       DWConvMicrokernelTester()
30576         .cr(4)
30577         .kr(25)
30578         .channels(channels)
30579         .width(3)
30580         .step(step)
30581         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30582     }
30583   }
30584 }
30585 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,multipixel_with_output_stride)30586 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_output_stride) {
30587   for (size_t channels = 1; channels <= 20; channels += 3) {
30588     DWConvMicrokernelTester()
30589       .cr(4)
30590       .kr(25)
30591       .channels(4)
30592       .width(5)
30593       .output_stride(23)
30594       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30595   }
30596 }
30597 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,multipixel_with_qmin)30598 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmin) {
30599   for (size_t channels = 1; channels <= 20; channels += 3) {
30600     DWConvMicrokernelTester()
30601       .cr(4)
30602       .kr(25)
30603       .channels(channels)
30604       .width(3)
30605       .qmin(128)
30606       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30607   }
30608 }
30609 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,multipixel_with_qmax)30610 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmax) {
30611   for (size_t channels = 1; channels <= 20; channels += 3) {
30612     DWConvMicrokernelTester()
30613       .cr(4)
30614       .kr(25)
30615       .channels(channels)
30616       .width(3)
30617       .qmax(128)
30618       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30619   }
30620 }
30621 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,input_offset)30622 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, input_offset) {
30623   for (uint32_t channels = 8; channels < 64; channels += 12) {
30624     DWConvMicrokernelTester()
30625       .cr(4)
30626       .kr(25)
30627       .channels(channels)
30628       .input_offset(112)
30629       .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30630   }
30631 }
30632 
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,zero)30633 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, zero) {
30634   for (uint32_t mz = 0; mz < 25; mz++) {
30635     for (uint32_t channels = 8; channels < 64; channels += 12) {
30636       DWConvMicrokernelTester()
30637         .cr(4)
30638         .kr(25)
30639         .channels(channels)
30640         .input_offset(112)
30641         .zero_index(mz)
30642         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30643     }
30644   }
30645 }