xref: /aosp_15_r20/external/XNNPACK/test/qs8-dwconv-minmax-fp32.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 //   Specification: test/qs8-dwconv-minmax-fp32.yaml
11 //   Generator: tools/generate-dwconv-test.py
12 
13 
14 #include <gtest/gtest.h>
15 
16 #include <xnnpack/common.h>
17 #include <xnnpack/isa-checks.h>
18 
19 #include <xnnpack/dwconv.h>
20 #include "dwconv-microkernel-tester.h"
21 
22 
23 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_eq_8)24   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_eq_8) {
25     TEST_REQUIRES_ARM_NEON;
26     DWConvMicrokernelTester()
27       .cr(8)
28       .kr(9)
29       .channels(8)
30       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
31   }
32 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_div_8)33   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_div_8) {
34     TEST_REQUIRES_ARM_NEON;
35     for (uint32_t channels = 16; channels < 128; channels += 24) {
36       DWConvMicrokernelTester()
37         .cr(8)
38         .kr(9)
39         .channels(channels)
40         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
41     }
42   }
43 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_div_8_with_qmin)44   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_div_8_with_qmin) {
45     TEST_REQUIRES_ARM_NEON;
46     for (uint32_t channels = 16; channels < 128; channels += 24) {
47       DWConvMicrokernelTester()
48         .cr(8)
49         .kr(9)
50         .channels(channels)
51         .qmin(128)
52         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
53     }
54   }
55 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_div_8_with_qmax)56   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_div_8_with_qmax) {
57     TEST_REQUIRES_ARM_NEON;
58     for (uint32_t channels = 16; channels < 128; channels += 24) {
59       DWConvMicrokernelTester()
60         .cr(8)
61         .kr(9)
62         .channels(channels)
63         .qmax(128)
64         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
65     }
66   }
67 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_lt_8)68   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_lt_8) {
69     TEST_REQUIRES_ARM_NEON;
70     for (uint32_t channels = 1; channels < 8; channels++) {
71       DWConvMicrokernelTester()
72         .cr(8)
73         .kr(9)
74         .channels(channels)
75         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
76     }
77   }
78 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_gt_8)79   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_gt_8) {
80     TEST_REQUIRES_ARM_NEON;
81     for (uint32_t channels = 9; channels < 16; channels++) {
82       DWConvMicrokernelTester()
83         .cr(8)
84         .kr(9)
85         .channels(channels)
86         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
87     }
88   }
89 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_gt_8_with_qmin)90   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_gt_8_with_qmin) {
91     TEST_REQUIRES_ARM_NEON;
92     for (uint32_t channels = 9; channels < 16; channels++) {
93       DWConvMicrokernelTester()
94         .cr(8)
95         .kr(9)
96         .channels(channels)
97         .qmin(128)
98         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
99     }
100   }
101 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_gt_8_with_qmax)102   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_gt_8_with_qmax) {
103     TEST_REQUIRES_ARM_NEON;
104     for (uint32_t channels = 9; channels < 16; channels++) {
105       DWConvMicrokernelTester()
106         .cr(8)
107         .kr(9)
108         .channels(channels)
109         .qmax(128)
110         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
111     }
112   }
113 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,multipixel)114   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel) {
115     TEST_REQUIRES_ARM_NEON;
116     for (size_t channels = 1; channels <= 40; channels += 7) {
117       DWConvMicrokernelTester()
118         .cr(8)
119         .kr(9)
120         .channels(channels)
121         .width(3)
122         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
123     }
124   }
125 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,multipixel_with_step)126   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_step) {
127     TEST_REQUIRES_ARM_NEON;
128     for (size_t channels = 1; channels <= 40; channels += 7) {
129       for (size_t step = 2; step <= 9; step++) {
130         DWConvMicrokernelTester()
131           .cr(8)
132           .kr(9)
133           .channels(channels)
134           .width(3)
135           .step(step)
136           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
137       }
138     }
139   }
140 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,multipixel_with_output_stride)141   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_output_stride) {
142     TEST_REQUIRES_ARM_NEON;
143     for (size_t channels = 1; channels <= 40; channels += 7) {
144       DWConvMicrokernelTester()
145         .cr(8)
146         .kr(9)
147         .channels(8)
148         .width(5)
149         .output_stride(43)
150         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
151     }
152   }
153 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,multipixel_with_qmin)154   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_qmin) {
155     TEST_REQUIRES_ARM_NEON;
156     for (size_t channels = 1; channels <= 40; channels += 7) {
157       DWConvMicrokernelTester()
158         .cr(8)
159         .kr(9)
160         .channels(channels)
161         .width(3)
162         .qmin(128)
163         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
164     }
165   }
166 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,multipixel_with_qmax)167   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_qmax) {
168     TEST_REQUIRES_ARM_NEON;
169     for (size_t channels = 1; channels <= 40; channels += 7) {
170       DWConvMicrokernelTester()
171         .cr(8)
172         .kr(9)
173         .channels(channels)
174         .width(3)
175         .qmax(128)
176         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
177     }
178   }
179 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,input_offset)180   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, input_offset) {
181     TEST_REQUIRES_ARM_NEON;
182     for (uint32_t channels = 16; channels < 128; channels += 24) {
183       DWConvMicrokernelTester()
184         .cr(8)
185         .kr(9)
186         .channels(channels)
187         .input_offset(176)
188         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
189     }
190   }
191 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,zero)192   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, zero) {
193     TEST_REQUIRES_ARM_NEON;
194     for (uint32_t mz = 0; mz < 9; mz++) {
195       for (uint32_t channels = 16; channels < 128; channels += 24) {
196         DWConvMicrokernelTester()
197           .cr(8)
198           .kr(9)
199           .channels(channels)
200           .input_offset(176)
201           .zero_index(mz)
202           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
203       }
204     }
205   }
206 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
207 
208 
209 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_eq_8)210   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_eq_8) {
211     TEST_REQUIRES_ARM_NEON_V8;
212     DWConvMicrokernelTester()
213       .cr(8)
214       .kr(9)
215       .channels(8)
216       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
217   }
218 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_div_8)219   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_div_8) {
220     TEST_REQUIRES_ARM_NEON_V8;
221     for (uint32_t channels = 16; channels < 128; channels += 24) {
222       DWConvMicrokernelTester()
223         .cr(8)
224         .kr(9)
225         .channels(channels)
226         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
227     }
228   }
229 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_div_8_with_qmin)230   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_div_8_with_qmin) {
231     TEST_REQUIRES_ARM_NEON_V8;
232     for (uint32_t channels = 16; channels < 128; channels += 24) {
233       DWConvMicrokernelTester()
234         .cr(8)
235         .kr(9)
236         .channels(channels)
237         .qmin(128)
238         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
239     }
240   }
241 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_div_8_with_qmax)242   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_div_8_with_qmax) {
243     TEST_REQUIRES_ARM_NEON_V8;
244     for (uint32_t channels = 16; channels < 128; channels += 24) {
245       DWConvMicrokernelTester()
246         .cr(8)
247         .kr(9)
248         .channels(channels)
249         .qmax(128)
250         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
251     }
252   }
253 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_lt_8)254   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_lt_8) {
255     TEST_REQUIRES_ARM_NEON_V8;
256     for (uint32_t channels = 1; channels < 8; channels++) {
257       DWConvMicrokernelTester()
258         .cr(8)
259         .kr(9)
260         .channels(channels)
261         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
262     }
263   }
264 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_gt_8)265   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_gt_8) {
266     TEST_REQUIRES_ARM_NEON_V8;
267     for (uint32_t channels = 9; channels < 16; channels++) {
268       DWConvMicrokernelTester()
269         .cr(8)
270         .kr(9)
271         .channels(channels)
272         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
273     }
274   }
275 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_gt_8_with_qmin)276   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_gt_8_with_qmin) {
277     TEST_REQUIRES_ARM_NEON_V8;
278     for (uint32_t channels = 9; channels < 16; channels++) {
279       DWConvMicrokernelTester()
280         .cr(8)
281         .kr(9)
282         .channels(channels)
283         .qmin(128)
284         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
285     }
286   }
287 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_gt_8_with_qmax)288   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_gt_8_with_qmax) {
289     TEST_REQUIRES_ARM_NEON_V8;
290     for (uint32_t channels = 9; channels < 16; channels++) {
291       DWConvMicrokernelTester()
292         .cr(8)
293         .kr(9)
294         .channels(channels)
295         .qmax(128)
296         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
297     }
298   }
299 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,multipixel)300   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel) {
301     TEST_REQUIRES_ARM_NEON_V8;
302     for (size_t channels = 1; channels <= 40; channels += 7) {
303       DWConvMicrokernelTester()
304         .cr(8)
305         .kr(9)
306         .channels(channels)
307         .width(3)
308         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
309     }
310   }
311 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,multipixel_with_step)312   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_step) {
313     TEST_REQUIRES_ARM_NEON_V8;
314     for (size_t channels = 1; channels <= 40; channels += 7) {
315       for (size_t step = 2; step <= 9; step++) {
316         DWConvMicrokernelTester()
317           .cr(8)
318           .kr(9)
319           .channels(channels)
320           .width(3)
321           .step(step)
322           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
323       }
324     }
325   }
326 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,multipixel_with_output_stride)327   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_output_stride) {
328     TEST_REQUIRES_ARM_NEON_V8;
329     for (size_t channels = 1; channels <= 40; channels += 7) {
330       DWConvMicrokernelTester()
331         .cr(8)
332         .kr(9)
333         .channels(8)
334         .width(5)
335         .output_stride(43)
336         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
337     }
338   }
339 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,multipixel_with_qmin)340   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_qmin) {
341     TEST_REQUIRES_ARM_NEON_V8;
342     for (size_t channels = 1; channels <= 40; channels += 7) {
343       DWConvMicrokernelTester()
344         .cr(8)
345         .kr(9)
346         .channels(channels)
347         .width(3)
348         .qmin(128)
349         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
350     }
351   }
352 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,multipixel_with_qmax)353   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_qmax) {
354     TEST_REQUIRES_ARM_NEON_V8;
355     for (size_t channels = 1; channels <= 40; channels += 7) {
356       DWConvMicrokernelTester()
357         .cr(8)
358         .kr(9)
359         .channels(channels)
360         .width(3)
361         .qmax(128)
362         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
363     }
364   }
365 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,input_offset)366   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, input_offset) {
367     TEST_REQUIRES_ARM_NEON_V8;
368     for (uint32_t channels = 16; channels < 128; channels += 24) {
369       DWConvMicrokernelTester()
370         .cr(8)
371         .kr(9)
372         .channels(channels)
373         .input_offset(176)
374         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
375     }
376   }
377 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,zero)378   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, zero) {
379     TEST_REQUIRES_ARM_NEON_V8;
380     for (uint32_t mz = 0; mz < 9; mz++) {
381       for (uint32_t channels = 16; channels < 128; channels += 24) {
382         DWConvMicrokernelTester()
383           .cr(8)
384           .kr(9)
385           .channels(channels)
386           .input_offset(176)
387           .zero_index(mz)
388           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
389       }
390     }
391   }
392 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
393 
394 
395 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_eq_8)396   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_eq_8) {
397     TEST_REQUIRES_ARM_NEON;
398     DWConvMicrokernelTester()
399       .cr(8)
400       .kr(25)
401       .channels(8)
402       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
403   }
404 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_div_8)405   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_div_8) {
406     TEST_REQUIRES_ARM_NEON;
407     for (uint32_t channels = 16; channels < 128; channels += 24) {
408       DWConvMicrokernelTester()
409         .cr(8)
410         .kr(25)
411         .channels(channels)
412         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
413     }
414   }
415 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_div_8_with_qmin)416   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_div_8_with_qmin) {
417     TEST_REQUIRES_ARM_NEON;
418     for (uint32_t channels = 16; channels < 128; channels += 24) {
419       DWConvMicrokernelTester()
420         .cr(8)
421         .kr(25)
422         .channels(channels)
423         .qmin(128)
424         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
425     }
426   }
427 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_div_8_with_qmax)428   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_div_8_with_qmax) {
429     TEST_REQUIRES_ARM_NEON;
430     for (uint32_t channels = 16; channels < 128; channels += 24) {
431       DWConvMicrokernelTester()
432         .cr(8)
433         .kr(25)
434         .channels(channels)
435         .qmax(128)
436         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
437     }
438   }
439 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_lt_8)440   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_lt_8) {
441     TEST_REQUIRES_ARM_NEON;
442     for (uint32_t channels = 1; channels < 8; channels++) {
443       DWConvMicrokernelTester()
444         .cr(8)
445         .kr(25)
446         .channels(channels)
447         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
448     }
449   }
450 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_gt_8)451   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_gt_8) {
452     TEST_REQUIRES_ARM_NEON;
453     for (uint32_t channels = 9; channels < 16; channels++) {
454       DWConvMicrokernelTester()
455         .cr(8)
456         .kr(25)
457         .channels(channels)
458         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
459     }
460   }
461 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_gt_8_with_qmin)462   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_gt_8_with_qmin) {
463     TEST_REQUIRES_ARM_NEON;
464     for (uint32_t channels = 9; channels < 16; channels++) {
465       DWConvMicrokernelTester()
466         .cr(8)
467         .kr(25)
468         .channels(channels)
469         .qmin(128)
470         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
471     }
472   }
473 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_gt_8_with_qmax)474   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_gt_8_with_qmax) {
475     TEST_REQUIRES_ARM_NEON;
476     for (uint32_t channels = 9; channels < 16; channels++) {
477       DWConvMicrokernelTester()
478         .cr(8)
479         .kr(25)
480         .channels(channels)
481         .qmax(128)
482         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
483     }
484   }
485 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,multipixel)486   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel) {
487     TEST_REQUIRES_ARM_NEON;
488     for (size_t channels = 1; channels <= 40; channels += 7) {
489       DWConvMicrokernelTester()
490         .cr(8)
491         .kr(25)
492         .channels(channels)
493         .width(3)
494         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
495     }
496   }
497 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,multipixel_with_step)498   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_step) {
499     TEST_REQUIRES_ARM_NEON;
500     for (size_t channels = 1; channels <= 40; channels += 7) {
501       for (size_t step = 2; step <= 25; step++) {
502         DWConvMicrokernelTester()
503           .cr(8)
504           .kr(25)
505           .channels(channels)
506           .width(3)
507           .step(step)
508           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
509       }
510     }
511   }
512 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,multipixel_with_output_stride)513   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_output_stride) {
514     TEST_REQUIRES_ARM_NEON;
515     for (size_t channels = 1; channels <= 40; channels += 7) {
516       DWConvMicrokernelTester()
517         .cr(8)
518         .kr(25)
519         .channels(8)
520         .width(5)
521         .output_stride(43)
522         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
523     }
524   }
525 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,multipixel_with_qmin)526   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_qmin) {
527     TEST_REQUIRES_ARM_NEON;
528     for (size_t channels = 1; channels <= 40; channels += 7) {
529       DWConvMicrokernelTester()
530         .cr(8)
531         .kr(25)
532         .channels(channels)
533         .width(3)
534         .qmin(128)
535         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
536     }
537   }
538 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,multipixel_with_qmax)539   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_qmax) {
540     TEST_REQUIRES_ARM_NEON;
541     for (size_t channels = 1; channels <= 40; channels += 7) {
542       DWConvMicrokernelTester()
543         .cr(8)
544         .kr(25)
545         .channels(channels)
546         .width(3)
547         .qmax(128)
548         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
549     }
550   }
551 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,input_offset)552   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, input_offset) {
553     TEST_REQUIRES_ARM_NEON;
554     for (uint32_t channels = 16; channels < 128; channels += 24) {
555       DWConvMicrokernelTester()
556         .cr(8)
557         .kr(25)
558         .channels(channels)
559         .input_offset(176)
560         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
561     }
562   }
563 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,zero)564   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, zero) {
565     TEST_REQUIRES_ARM_NEON;
566     for (uint32_t mz = 0; mz < 25; mz++) {
567       for (uint32_t channels = 16; channels < 128; channels += 24) {
568         DWConvMicrokernelTester()
569           .cr(8)
570           .kr(25)
571           .channels(channels)
572           .input_offset(176)
573           .zero_index(mz)
574           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
575       }
576     }
577   }
578 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
579 
580 
581 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_eq_8)582   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_eq_8) {
583     TEST_REQUIRES_ARM_NEON_V8;
584     DWConvMicrokernelTester()
585       .cr(8)
586       .kr(25)
587       .channels(8)
588       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
589   }
590 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_div_8)591   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_div_8) {
592     TEST_REQUIRES_ARM_NEON_V8;
593     for (uint32_t channels = 16; channels < 128; channels += 24) {
594       DWConvMicrokernelTester()
595         .cr(8)
596         .kr(25)
597         .channels(channels)
598         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
599     }
600   }
601 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_div_8_with_qmin)602   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_div_8_with_qmin) {
603     TEST_REQUIRES_ARM_NEON_V8;
604     for (uint32_t channels = 16; channels < 128; channels += 24) {
605       DWConvMicrokernelTester()
606         .cr(8)
607         .kr(25)
608         .channels(channels)
609         .qmin(128)
610         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
611     }
612   }
613 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_div_8_with_qmax)614   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_div_8_with_qmax) {
615     TEST_REQUIRES_ARM_NEON_V8;
616     for (uint32_t channels = 16; channels < 128; channels += 24) {
617       DWConvMicrokernelTester()
618         .cr(8)
619         .kr(25)
620         .channels(channels)
621         .qmax(128)
622         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
623     }
624   }
625 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_lt_8)626   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_lt_8) {
627     TEST_REQUIRES_ARM_NEON_V8;
628     for (uint32_t channels = 1; channels < 8; channels++) {
629       DWConvMicrokernelTester()
630         .cr(8)
631         .kr(25)
632         .channels(channels)
633         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
634     }
635   }
636 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_gt_8)637   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_gt_8) {
638     TEST_REQUIRES_ARM_NEON_V8;
639     for (uint32_t channels = 9; channels < 16; channels++) {
640       DWConvMicrokernelTester()
641         .cr(8)
642         .kr(25)
643         .channels(channels)
644         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
645     }
646   }
647 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_gt_8_with_qmin)648   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_gt_8_with_qmin) {
649     TEST_REQUIRES_ARM_NEON_V8;
650     for (uint32_t channels = 9; channels < 16; channels++) {
651       DWConvMicrokernelTester()
652         .cr(8)
653         .kr(25)
654         .channels(channels)
655         .qmin(128)
656         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
657     }
658   }
659 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_gt_8_with_qmax)660   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_gt_8_with_qmax) {
661     TEST_REQUIRES_ARM_NEON_V8;
662     for (uint32_t channels = 9; channels < 16; channels++) {
663       DWConvMicrokernelTester()
664         .cr(8)
665         .kr(25)
666         .channels(channels)
667         .qmax(128)
668         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
669     }
670   }
671 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,multipixel)672   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel) {
673     TEST_REQUIRES_ARM_NEON_V8;
674     for (size_t channels = 1; channels <= 40; channels += 7) {
675       DWConvMicrokernelTester()
676         .cr(8)
677         .kr(25)
678         .channels(channels)
679         .width(3)
680         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
681     }
682   }
683 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,multipixel_with_step)684   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_step) {
685     TEST_REQUIRES_ARM_NEON_V8;
686     for (size_t channels = 1; channels <= 40; channels += 7) {
687       for (size_t step = 2; step <= 25; step++) {
688         DWConvMicrokernelTester()
689           .cr(8)
690           .kr(25)
691           .channels(channels)
692           .width(3)
693           .step(step)
694           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
695       }
696     }
697   }
698 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,multipixel_with_output_stride)699   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_output_stride) {
700     TEST_REQUIRES_ARM_NEON_V8;
701     for (size_t channels = 1; channels <= 40; channels += 7) {
702       DWConvMicrokernelTester()
703         .cr(8)
704         .kr(25)
705         .channels(8)
706         .width(5)
707         .output_stride(43)
708         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
709     }
710   }
711 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,multipixel_with_qmin)712   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_qmin) {
713     TEST_REQUIRES_ARM_NEON_V8;
714     for (size_t channels = 1; channels <= 40; channels += 7) {
715       DWConvMicrokernelTester()
716         .cr(8)
717         .kr(25)
718         .channels(channels)
719         .width(3)
720         .qmin(128)
721         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
722     }
723   }
724 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,multipixel_with_qmax)725   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_qmax) {
726     TEST_REQUIRES_ARM_NEON_V8;
727     for (size_t channels = 1; channels <= 40; channels += 7) {
728       DWConvMicrokernelTester()
729         .cr(8)
730         .kr(25)
731         .channels(channels)
732         .width(3)
733         .qmax(128)
734         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
735     }
736   }
737 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,input_offset)738   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, input_offset) {
739     TEST_REQUIRES_ARM_NEON_V8;
740     for (uint32_t channels = 16; channels < 128; channels += 24) {
741       DWConvMicrokernelTester()
742         .cr(8)
743         .kr(25)
744         .channels(channels)
745         .input_offset(176)
746         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
747     }
748   }
749 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,zero)750   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, zero) {
751     TEST_REQUIRES_ARM_NEON_V8;
752     for (uint32_t mz = 0; mz < 25; mz++) {
753       for (uint32_t channels = 16; channels < 128; channels += 24) {
754         DWConvMicrokernelTester()
755           .cr(8)
756           .kr(25)
757           .channels(channels)
758           .input_offset(176)
759           .zero_index(mz)
760           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
761       }
762     }
763   }
764 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
765 
766 
767 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_eq_16)768   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_eq_16) {
769     TEST_REQUIRES_ARM_NEON;
770     DWConvMicrokernelTester()
771       .cr(16)
772       .kr(9)
773       .channels(16)
774       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
775   }
776 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_div_16)777   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_div_16) {
778     TEST_REQUIRES_ARM_NEON;
779     for (uint32_t channels = 32; channels < 256; channels += 48) {
780       DWConvMicrokernelTester()
781         .cr(16)
782         .kr(9)
783         .channels(channels)
784         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
785     }
786   }
787 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_div_16_with_qmin)788   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_div_16_with_qmin) {
789     TEST_REQUIRES_ARM_NEON;
790     for (uint32_t channels = 32; channels < 256; channels += 48) {
791       DWConvMicrokernelTester()
792         .cr(16)
793         .kr(9)
794         .channels(channels)
795         .qmin(128)
796         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
797     }
798   }
799 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_div_16_with_qmax)800   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_div_16_with_qmax) {
801     TEST_REQUIRES_ARM_NEON;
802     for (uint32_t channels = 32; channels < 256; channels += 48) {
803       DWConvMicrokernelTester()
804         .cr(16)
805         .kr(9)
806         .channels(channels)
807         .qmax(128)
808         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
809     }
810   }
811 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_lt_16)812   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_lt_16) {
813     TEST_REQUIRES_ARM_NEON;
814     for (uint32_t channels = 1; channels < 16; channels++) {
815       DWConvMicrokernelTester()
816         .cr(16)
817         .kr(9)
818         .channels(channels)
819         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
820     }
821   }
822 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_gt_16)823   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_gt_16) {
824     TEST_REQUIRES_ARM_NEON;
825     for (uint32_t channels = 17; channels < 32; channels++) {
826       DWConvMicrokernelTester()
827         .cr(16)
828         .kr(9)
829         .channels(channels)
830         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
831     }
832   }
833 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_gt_16_with_qmin)834   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_gt_16_with_qmin) {
835     TEST_REQUIRES_ARM_NEON;
836     for (uint32_t channels = 17; channels < 32; channels++) {
837       DWConvMicrokernelTester()
838         .cr(16)
839         .kr(9)
840         .channels(channels)
841         .qmin(128)
842         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
843     }
844   }
845 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_gt_16_with_qmax)846   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_gt_16_with_qmax) {
847     TEST_REQUIRES_ARM_NEON;
848     for (uint32_t channels = 17; channels < 32; channels++) {
849       DWConvMicrokernelTester()
850         .cr(16)
851         .kr(9)
852         .channels(channels)
853         .qmax(128)
854         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
855     }
856   }
857 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,multipixel)858   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel) {
859     TEST_REQUIRES_ARM_NEON;
860     for (size_t channels = 1; channels <= 80; channels += 15) {
861       DWConvMicrokernelTester()
862         .cr(16)
863         .kr(9)
864         .channels(channels)
865         .width(3)
866         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
867     }
868   }
869 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,multipixel_with_step)870   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_step) {
871     TEST_REQUIRES_ARM_NEON;
872     for (size_t channels = 1; channels <= 80; channels += 15) {
873       for (size_t step = 2; step <= 9; step++) {
874         DWConvMicrokernelTester()
875           .cr(16)
876           .kr(9)
877           .channels(channels)
878           .width(3)
879           .step(step)
880           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
881       }
882     }
883   }
884 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,multipixel_with_output_stride)885   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_output_stride) {
886     TEST_REQUIRES_ARM_NEON;
887     for (size_t channels = 1; channels <= 80; channels += 15) {
888       DWConvMicrokernelTester()
889         .cr(16)
890         .kr(9)
891         .channels(16)
892         .width(5)
893         .output_stride(83)
894         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
895     }
896   }
897 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,multipixel_with_qmin)898   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_qmin) {
899     TEST_REQUIRES_ARM_NEON;
900     for (size_t channels = 1; channels <= 80; channels += 15) {
901       DWConvMicrokernelTester()
902         .cr(16)
903         .kr(9)
904         .channels(channels)
905         .width(3)
906         .qmin(128)
907         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
908     }
909   }
910 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,multipixel_with_qmax)911   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_qmax) {
912     TEST_REQUIRES_ARM_NEON;
913     for (size_t channels = 1; channels <= 80; channels += 15) {
914       DWConvMicrokernelTester()
915         .cr(16)
916         .kr(9)
917         .channels(channels)
918         .width(3)
919         .qmax(128)
920         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
921     }
922   }
923 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,input_offset)924   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, input_offset) {
925     TEST_REQUIRES_ARM_NEON;
926     for (uint32_t channels = 32; channels < 256; channels += 48) {
927       DWConvMicrokernelTester()
928         .cr(16)
929         .kr(9)
930         .channels(channels)
931         .input_offset(304)
932         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
933     }
934   }
935 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,zero)936   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, zero) {
937     TEST_REQUIRES_ARM_NEON;
938     for (uint32_t mz = 0; mz < 9; mz++) {
939       for (uint32_t channels = 32; channels < 256; channels += 48) {
940         DWConvMicrokernelTester()
941           .cr(16)
942           .kr(9)
943           .channels(channels)
944           .input_offset(304)
945           .zero_index(mz)
946           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
947       }
948     }
949   }
950 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
951 
952 
953 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_eq_16)954   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_eq_16) {
955     TEST_REQUIRES_ARM_NEON_V8;
956     DWConvMicrokernelTester()
957       .cr(16)
958       .kr(9)
959       .channels(16)
960       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
961   }
962 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_div_16)963   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_div_16) {
964     TEST_REQUIRES_ARM_NEON_V8;
965     for (uint32_t channels = 32; channels < 256; channels += 48) {
966       DWConvMicrokernelTester()
967         .cr(16)
968         .kr(9)
969         .channels(channels)
970         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
971     }
972   }
973 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_div_16_with_qmin)974   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_div_16_with_qmin) {
975     TEST_REQUIRES_ARM_NEON_V8;
976     for (uint32_t channels = 32; channels < 256; channels += 48) {
977       DWConvMicrokernelTester()
978         .cr(16)
979         .kr(9)
980         .channels(channels)
981         .qmin(128)
982         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
983     }
984   }
985 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_div_16_with_qmax)986   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_div_16_with_qmax) {
987     TEST_REQUIRES_ARM_NEON_V8;
988     for (uint32_t channels = 32; channels < 256; channels += 48) {
989       DWConvMicrokernelTester()
990         .cr(16)
991         .kr(9)
992         .channels(channels)
993         .qmax(128)
994         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
995     }
996   }
997 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_lt_16)998   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_lt_16) {
999     TEST_REQUIRES_ARM_NEON_V8;
1000     for (uint32_t channels = 1; channels < 16; channels++) {
1001       DWConvMicrokernelTester()
1002         .cr(16)
1003         .kr(9)
1004         .channels(channels)
1005         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1006     }
1007   }
1008 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_gt_16)1009   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_gt_16) {
1010     TEST_REQUIRES_ARM_NEON_V8;
1011     for (uint32_t channels = 17; channels < 32; channels++) {
1012       DWConvMicrokernelTester()
1013         .cr(16)
1014         .kr(9)
1015         .channels(channels)
1016         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1017     }
1018   }
1019 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_gt_16_with_qmin)1020   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_gt_16_with_qmin) {
1021     TEST_REQUIRES_ARM_NEON_V8;
1022     for (uint32_t channels = 17; channels < 32; channels++) {
1023       DWConvMicrokernelTester()
1024         .cr(16)
1025         .kr(9)
1026         .channels(channels)
1027         .qmin(128)
1028         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1029     }
1030   }
1031 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_gt_16_with_qmax)1032   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_gt_16_with_qmax) {
1033     TEST_REQUIRES_ARM_NEON_V8;
1034     for (uint32_t channels = 17; channels < 32; channels++) {
1035       DWConvMicrokernelTester()
1036         .cr(16)
1037         .kr(9)
1038         .channels(channels)
1039         .qmax(128)
1040         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1041     }
1042   }
1043 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,multipixel)1044   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel) {
1045     TEST_REQUIRES_ARM_NEON_V8;
1046     for (size_t channels = 1; channels <= 80; channels += 15) {
1047       DWConvMicrokernelTester()
1048         .cr(16)
1049         .kr(9)
1050         .channels(channels)
1051         .width(3)
1052         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1053     }
1054   }
1055 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,multipixel_with_step)1056   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_step) {
1057     TEST_REQUIRES_ARM_NEON_V8;
1058     for (size_t channels = 1; channels <= 80; channels += 15) {
1059       for (size_t step = 2; step <= 9; step++) {
1060         DWConvMicrokernelTester()
1061           .cr(16)
1062           .kr(9)
1063           .channels(channels)
1064           .width(3)
1065           .step(step)
1066           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1067       }
1068     }
1069   }
1070 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,multipixel_with_output_stride)1071   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_output_stride) {
1072     TEST_REQUIRES_ARM_NEON_V8;
1073     for (size_t channels = 1; channels <= 80; channels += 15) {
1074       DWConvMicrokernelTester()
1075         .cr(16)
1076         .kr(9)
1077         .channels(16)
1078         .width(5)
1079         .output_stride(83)
1080         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1081     }
1082   }
1083 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,multipixel_with_qmin)1084   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_qmin) {
1085     TEST_REQUIRES_ARM_NEON_V8;
1086     for (size_t channels = 1; channels <= 80; channels += 15) {
1087       DWConvMicrokernelTester()
1088         .cr(16)
1089         .kr(9)
1090         .channels(channels)
1091         .width(3)
1092         .qmin(128)
1093         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1094     }
1095   }
1096 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,multipixel_with_qmax)1097   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_qmax) {
1098     TEST_REQUIRES_ARM_NEON_V8;
1099     for (size_t channels = 1; channels <= 80; channels += 15) {
1100       DWConvMicrokernelTester()
1101         .cr(16)
1102         .kr(9)
1103         .channels(channels)
1104         .width(3)
1105         .qmax(128)
1106         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1107     }
1108   }
1109 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,input_offset)1110   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, input_offset) {
1111     TEST_REQUIRES_ARM_NEON_V8;
1112     for (uint32_t channels = 32; channels < 256; channels += 48) {
1113       DWConvMicrokernelTester()
1114         .cr(16)
1115         .kr(9)
1116         .channels(channels)
1117         .input_offset(304)
1118         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1119     }
1120   }
1121 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,zero)1122   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, zero) {
1123     TEST_REQUIRES_ARM_NEON_V8;
1124     for (uint32_t mz = 0; mz < 9; mz++) {
1125       for (uint32_t channels = 32; channels < 256; channels += 48) {
1126         DWConvMicrokernelTester()
1127           .cr(16)
1128           .kr(9)
1129           .channels(channels)
1130           .input_offset(304)
1131           .zero_index(mz)
1132           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1133       }
1134     }
1135   }
1136 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1137 
1138 
1139 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_eq_16)1140   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_eq_16) {
1141     TEST_REQUIRES_ARM_NEON;
1142     DWConvMicrokernelTester()
1143       .cr(16)
1144       .kr(25)
1145       .channels(16)
1146       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1147   }
1148 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_div_16)1149   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_div_16) {
1150     TEST_REQUIRES_ARM_NEON;
1151     for (uint32_t channels = 32; channels < 256; channels += 48) {
1152       DWConvMicrokernelTester()
1153         .cr(16)
1154         .kr(25)
1155         .channels(channels)
1156         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1157     }
1158   }
1159 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_div_16_with_qmin)1160   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_div_16_with_qmin) {
1161     TEST_REQUIRES_ARM_NEON;
1162     for (uint32_t channels = 32; channels < 256; channels += 48) {
1163       DWConvMicrokernelTester()
1164         .cr(16)
1165         .kr(25)
1166         .channels(channels)
1167         .qmin(128)
1168         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1169     }
1170   }
1171 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_div_16_with_qmax)1172   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_div_16_with_qmax) {
1173     TEST_REQUIRES_ARM_NEON;
1174     for (uint32_t channels = 32; channels < 256; channels += 48) {
1175       DWConvMicrokernelTester()
1176         .cr(16)
1177         .kr(25)
1178         .channels(channels)
1179         .qmax(128)
1180         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1181     }
1182   }
1183 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_lt_16)1184   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_lt_16) {
1185     TEST_REQUIRES_ARM_NEON;
1186     for (uint32_t channels = 1; channels < 16; channels++) {
1187       DWConvMicrokernelTester()
1188         .cr(16)
1189         .kr(25)
1190         .channels(channels)
1191         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1192     }
1193   }
1194 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_gt_16)1195   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_gt_16) {
1196     TEST_REQUIRES_ARM_NEON;
1197     for (uint32_t channels = 17; channels < 32; channels++) {
1198       DWConvMicrokernelTester()
1199         .cr(16)
1200         .kr(25)
1201         .channels(channels)
1202         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1203     }
1204   }
1205 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_gt_16_with_qmin)1206   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_gt_16_with_qmin) {
1207     TEST_REQUIRES_ARM_NEON;
1208     for (uint32_t channels = 17; channels < 32; channels++) {
1209       DWConvMicrokernelTester()
1210         .cr(16)
1211         .kr(25)
1212         .channels(channels)
1213         .qmin(128)
1214         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1215     }
1216   }
1217 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_gt_16_with_qmax)1218   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_gt_16_with_qmax) {
1219     TEST_REQUIRES_ARM_NEON;
1220     for (uint32_t channels = 17; channels < 32; channels++) {
1221       DWConvMicrokernelTester()
1222         .cr(16)
1223         .kr(25)
1224         .channels(channels)
1225         .qmax(128)
1226         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1227     }
1228   }
1229 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,multipixel)1230   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel) {
1231     TEST_REQUIRES_ARM_NEON;
1232     for (size_t channels = 1; channels <= 80; channels += 15) {
1233       DWConvMicrokernelTester()
1234         .cr(16)
1235         .kr(25)
1236         .channels(channels)
1237         .width(3)
1238         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1239     }
1240   }
1241 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,multipixel_with_step)1242   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_step) {
1243     TEST_REQUIRES_ARM_NEON;
1244     for (size_t channels = 1; channels <= 80; channels += 15) {
1245       for (size_t step = 2; step <= 25; step++) {
1246         DWConvMicrokernelTester()
1247           .cr(16)
1248           .kr(25)
1249           .channels(channels)
1250           .width(3)
1251           .step(step)
1252           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1253       }
1254     }
1255   }
1256 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,multipixel_with_output_stride)1257   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_output_stride) {
1258     TEST_REQUIRES_ARM_NEON;
1259     for (size_t channels = 1; channels <= 80; channels += 15) {
1260       DWConvMicrokernelTester()
1261         .cr(16)
1262         .kr(25)
1263         .channels(16)
1264         .width(5)
1265         .output_stride(83)
1266         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1267     }
1268   }
1269 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,multipixel_with_qmin)1270   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_qmin) {
1271     TEST_REQUIRES_ARM_NEON;
1272     for (size_t channels = 1; channels <= 80; channels += 15) {
1273       DWConvMicrokernelTester()
1274         .cr(16)
1275         .kr(25)
1276         .channels(channels)
1277         .width(3)
1278         .qmin(128)
1279         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1280     }
1281   }
1282 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,multipixel_with_qmax)1283   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_qmax) {
1284     TEST_REQUIRES_ARM_NEON;
1285     for (size_t channels = 1; channels <= 80; channels += 15) {
1286       DWConvMicrokernelTester()
1287         .cr(16)
1288         .kr(25)
1289         .channels(channels)
1290         .width(3)
1291         .qmax(128)
1292         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1293     }
1294   }
1295 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,input_offset)1296   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, input_offset) {
1297     TEST_REQUIRES_ARM_NEON;
1298     for (uint32_t channels = 32; channels < 256; channels += 48) {
1299       DWConvMicrokernelTester()
1300         .cr(16)
1301         .kr(25)
1302         .channels(channels)
1303         .input_offset(304)
1304         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1305     }
1306   }
1307 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,zero)1308   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, zero) {
1309     TEST_REQUIRES_ARM_NEON;
1310     for (uint32_t mz = 0; mz < 25; mz++) {
1311       for (uint32_t channels = 32; channels < 256; channels += 48) {
1312         DWConvMicrokernelTester()
1313           .cr(16)
1314           .kr(25)
1315           .channels(channels)
1316           .input_offset(304)
1317           .zero_index(mz)
1318           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1319       }
1320     }
1321   }
1322 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1323 
1324 
1325 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_eq_16)1326   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_eq_16) {
1327     TEST_REQUIRES_ARM_NEON_V8;
1328     DWConvMicrokernelTester()
1329       .cr(16)
1330       .kr(25)
1331       .channels(16)
1332       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1333   }
1334 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_div_16)1335   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_div_16) {
1336     TEST_REQUIRES_ARM_NEON_V8;
1337     for (uint32_t channels = 32; channels < 256; channels += 48) {
1338       DWConvMicrokernelTester()
1339         .cr(16)
1340         .kr(25)
1341         .channels(channels)
1342         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1343     }
1344   }
1345 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_div_16_with_qmin)1346   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_div_16_with_qmin) {
1347     TEST_REQUIRES_ARM_NEON_V8;
1348     for (uint32_t channels = 32; channels < 256; channels += 48) {
1349       DWConvMicrokernelTester()
1350         .cr(16)
1351         .kr(25)
1352         .channels(channels)
1353         .qmin(128)
1354         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1355     }
1356   }
1357 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_div_16_with_qmax)1358   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_div_16_with_qmax) {
1359     TEST_REQUIRES_ARM_NEON_V8;
1360     for (uint32_t channels = 32; channels < 256; channels += 48) {
1361       DWConvMicrokernelTester()
1362         .cr(16)
1363         .kr(25)
1364         .channels(channels)
1365         .qmax(128)
1366         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1367     }
1368   }
1369 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_lt_16)1370   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_lt_16) {
1371     TEST_REQUIRES_ARM_NEON_V8;
1372     for (uint32_t channels = 1; channels < 16; channels++) {
1373       DWConvMicrokernelTester()
1374         .cr(16)
1375         .kr(25)
1376         .channels(channels)
1377         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1378     }
1379   }
1380 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_gt_16)1381   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_gt_16) {
1382     TEST_REQUIRES_ARM_NEON_V8;
1383     for (uint32_t channels = 17; channels < 32; channels++) {
1384       DWConvMicrokernelTester()
1385         .cr(16)
1386         .kr(25)
1387         .channels(channels)
1388         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1389     }
1390   }
1391 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_gt_16_with_qmin)1392   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_gt_16_with_qmin) {
1393     TEST_REQUIRES_ARM_NEON_V8;
1394     for (uint32_t channels = 17; channels < 32; channels++) {
1395       DWConvMicrokernelTester()
1396         .cr(16)
1397         .kr(25)
1398         .channels(channels)
1399         .qmin(128)
1400         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1401     }
1402   }
1403 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_gt_16_with_qmax)1404   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_gt_16_with_qmax) {
1405     TEST_REQUIRES_ARM_NEON_V8;
1406     for (uint32_t channels = 17; channels < 32; channels++) {
1407       DWConvMicrokernelTester()
1408         .cr(16)
1409         .kr(25)
1410         .channels(channels)
1411         .qmax(128)
1412         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1413     }
1414   }
1415 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,multipixel)1416   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel) {
1417     TEST_REQUIRES_ARM_NEON_V8;
1418     for (size_t channels = 1; channels <= 80; channels += 15) {
1419       DWConvMicrokernelTester()
1420         .cr(16)
1421         .kr(25)
1422         .channels(channels)
1423         .width(3)
1424         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1425     }
1426   }
1427 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,multipixel_with_step)1428   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_step) {
1429     TEST_REQUIRES_ARM_NEON_V8;
1430     for (size_t channels = 1; channels <= 80; channels += 15) {
1431       for (size_t step = 2; step <= 25; step++) {
1432         DWConvMicrokernelTester()
1433           .cr(16)
1434           .kr(25)
1435           .channels(channels)
1436           .width(3)
1437           .step(step)
1438           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1439       }
1440     }
1441   }
1442 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,multipixel_with_output_stride)1443   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_output_stride) {
1444     TEST_REQUIRES_ARM_NEON_V8;
1445     for (size_t channels = 1; channels <= 80; channels += 15) {
1446       DWConvMicrokernelTester()
1447         .cr(16)
1448         .kr(25)
1449         .channels(16)
1450         .width(5)
1451         .output_stride(83)
1452         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1453     }
1454   }
1455 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,multipixel_with_qmin)1456   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_qmin) {
1457     TEST_REQUIRES_ARM_NEON_V8;
1458     for (size_t channels = 1; channels <= 80; channels += 15) {
1459       DWConvMicrokernelTester()
1460         .cr(16)
1461         .kr(25)
1462         .channels(channels)
1463         .width(3)
1464         .qmin(128)
1465         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1466     }
1467   }
1468 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,multipixel_with_qmax)1469   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_qmax) {
1470     TEST_REQUIRES_ARM_NEON_V8;
1471     for (size_t channels = 1; channels <= 80; channels += 15) {
1472       DWConvMicrokernelTester()
1473         .cr(16)
1474         .kr(25)
1475         .channels(channels)
1476         .width(3)
1477         .qmax(128)
1478         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1479     }
1480   }
1481 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,input_offset)1482   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, input_offset) {
1483     TEST_REQUIRES_ARM_NEON_V8;
1484     for (uint32_t channels = 32; channels < 256; channels += 48) {
1485       DWConvMicrokernelTester()
1486         .cr(16)
1487         .kr(25)
1488         .channels(channels)
1489         .input_offset(304)
1490         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1491     }
1492   }
1493 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,zero)1494   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, zero) {
1495     TEST_REQUIRES_ARM_NEON_V8;
1496     for (uint32_t mz = 0; mz < 25; mz++) {
1497       for (uint32_t channels = 32; channels < 256; channels += 48) {
1498         DWConvMicrokernelTester()
1499           .cr(16)
1500           .kr(25)
1501           .channels(channels)
1502           .input_offset(304)
1503           .zero_index(mz)
1504           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1505       }
1506     }
1507   }
1508 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1509 
1510 
1511 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_eq_24)1512   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_eq_24) {
1513     TEST_REQUIRES_ARM_NEON;
1514     DWConvMicrokernelTester()
1515       .cr(24)
1516       .kr(9)
1517       .channels(24)
1518       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1519   }
1520 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_div_24)1521   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_div_24) {
1522     TEST_REQUIRES_ARM_NEON;
1523     for (uint32_t channels = 48; channels < 384; channels += 72) {
1524       DWConvMicrokernelTester()
1525         .cr(24)
1526         .kr(9)
1527         .channels(channels)
1528         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1529     }
1530   }
1531 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_div_24_with_qmin)1532   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_div_24_with_qmin) {
1533     TEST_REQUIRES_ARM_NEON;
1534     for (uint32_t channels = 48; channels < 384; channels += 72) {
1535       DWConvMicrokernelTester()
1536         .cr(24)
1537         .kr(9)
1538         .channels(channels)
1539         .qmin(128)
1540         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1541     }
1542   }
1543 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_div_24_with_qmax)1544   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_div_24_with_qmax) {
1545     TEST_REQUIRES_ARM_NEON;
1546     for (uint32_t channels = 48; channels < 384; channels += 72) {
1547       DWConvMicrokernelTester()
1548         .cr(24)
1549         .kr(9)
1550         .channels(channels)
1551         .qmax(128)
1552         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1553     }
1554   }
1555 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_lt_24)1556   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_lt_24) {
1557     TEST_REQUIRES_ARM_NEON;
1558     for (uint32_t channels = 1; channels < 24; channels++) {
1559       DWConvMicrokernelTester()
1560         .cr(24)
1561         .kr(9)
1562         .channels(channels)
1563         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1564     }
1565   }
1566 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_gt_24)1567   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_gt_24) {
1568     TEST_REQUIRES_ARM_NEON;
1569     for (uint32_t channels = 25; channels < 48; channels++) {
1570       DWConvMicrokernelTester()
1571         .cr(24)
1572         .kr(9)
1573         .channels(channels)
1574         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1575     }
1576   }
1577 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_gt_24_with_qmin)1578   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_gt_24_with_qmin) {
1579     TEST_REQUIRES_ARM_NEON;
1580     for (uint32_t channels = 25; channels < 48; channels++) {
1581       DWConvMicrokernelTester()
1582         .cr(24)
1583         .kr(9)
1584         .channels(channels)
1585         .qmin(128)
1586         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1587     }
1588   }
1589 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_gt_24_with_qmax)1590   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_gt_24_with_qmax) {
1591     TEST_REQUIRES_ARM_NEON;
1592     for (uint32_t channels = 25; channels < 48; channels++) {
1593       DWConvMicrokernelTester()
1594         .cr(24)
1595         .kr(9)
1596         .channels(channels)
1597         .qmax(128)
1598         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1599     }
1600   }
1601 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,multipixel)1602   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel) {
1603     TEST_REQUIRES_ARM_NEON;
1604     for (size_t channels = 1; channels <= 120; channels += 23) {
1605       DWConvMicrokernelTester()
1606         .cr(24)
1607         .kr(9)
1608         .channels(channels)
1609         .width(3)
1610         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1611     }
1612   }
1613 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,multipixel_with_step)1614   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_step) {
1615     TEST_REQUIRES_ARM_NEON;
1616     for (size_t channels = 1; channels <= 120; channels += 23) {
1617       for (size_t step = 2; step <= 9; step++) {
1618         DWConvMicrokernelTester()
1619           .cr(24)
1620           .kr(9)
1621           .channels(channels)
1622           .width(3)
1623           .step(step)
1624           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1625       }
1626     }
1627   }
1628 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,multipixel_with_output_stride)1629   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_output_stride) {
1630     TEST_REQUIRES_ARM_NEON;
1631     for (size_t channels = 1; channels <= 120; channels += 23) {
1632       DWConvMicrokernelTester()
1633         .cr(24)
1634         .kr(9)
1635         .channels(24)
1636         .width(5)
1637         .output_stride(127)
1638         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1639     }
1640   }
1641 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,multipixel_with_qmin)1642   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_qmin) {
1643     TEST_REQUIRES_ARM_NEON;
1644     for (size_t channels = 1; channels <= 120; channels += 23) {
1645       DWConvMicrokernelTester()
1646         .cr(24)
1647         .kr(9)
1648         .channels(channels)
1649         .width(3)
1650         .qmin(128)
1651         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1652     }
1653   }
1654 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,multipixel_with_qmax)1655   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_qmax) {
1656     TEST_REQUIRES_ARM_NEON;
1657     for (size_t channels = 1; channels <= 120; channels += 23) {
1658       DWConvMicrokernelTester()
1659         .cr(24)
1660         .kr(9)
1661         .channels(channels)
1662         .width(3)
1663         .qmax(128)
1664         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1665     }
1666   }
1667 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,input_offset)1668   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, input_offset) {
1669     TEST_REQUIRES_ARM_NEON;
1670     for (uint32_t channels = 48; channels < 384; channels += 72) {
1671       DWConvMicrokernelTester()
1672         .cr(24)
1673         .kr(9)
1674         .channels(channels)
1675         .input_offset(464)
1676         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1677     }
1678   }
1679 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,zero)1680   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, zero) {
1681     TEST_REQUIRES_ARM_NEON;
1682     for (uint32_t mz = 0; mz < 9; mz++) {
1683       for (uint32_t channels = 48; channels < 384; channels += 72) {
1684         DWConvMicrokernelTester()
1685           .cr(24)
1686           .kr(9)
1687           .channels(channels)
1688           .input_offset(464)
1689           .zero_index(mz)
1690           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1691       }
1692     }
1693   }
1694 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1695 
1696 
1697 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_eq_24)1698   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_eq_24) {
1699     TEST_REQUIRES_ARM_NEON_V8;
1700     DWConvMicrokernelTester()
1701       .cr(24)
1702       .kr(9)
1703       .channels(24)
1704       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1705   }
1706 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_div_24)1707   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_div_24) {
1708     TEST_REQUIRES_ARM_NEON_V8;
1709     for (uint32_t channels = 48; channels < 384; channels += 72) {
1710       DWConvMicrokernelTester()
1711         .cr(24)
1712         .kr(9)
1713         .channels(channels)
1714         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1715     }
1716   }
1717 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_div_24_with_qmin)1718   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_div_24_with_qmin) {
1719     TEST_REQUIRES_ARM_NEON_V8;
1720     for (uint32_t channels = 48; channels < 384; channels += 72) {
1721       DWConvMicrokernelTester()
1722         .cr(24)
1723         .kr(9)
1724         .channels(channels)
1725         .qmin(128)
1726         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1727     }
1728   }
1729 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_div_24_with_qmax)1730   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_div_24_with_qmax) {
1731     TEST_REQUIRES_ARM_NEON_V8;
1732     for (uint32_t channels = 48; channels < 384; channels += 72) {
1733       DWConvMicrokernelTester()
1734         .cr(24)
1735         .kr(9)
1736         .channels(channels)
1737         .qmax(128)
1738         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1739     }
1740   }
1741 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_lt_24)1742   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_lt_24) {
1743     TEST_REQUIRES_ARM_NEON_V8;
1744     for (uint32_t channels = 1; channels < 24; channels++) {
1745       DWConvMicrokernelTester()
1746         .cr(24)
1747         .kr(9)
1748         .channels(channels)
1749         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1750     }
1751   }
1752 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_gt_24)1753   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_gt_24) {
1754     TEST_REQUIRES_ARM_NEON_V8;
1755     for (uint32_t channels = 25; channels < 48; channels++) {
1756       DWConvMicrokernelTester()
1757         .cr(24)
1758         .kr(9)
1759         .channels(channels)
1760         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1761     }
1762   }
1763 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_gt_24_with_qmin)1764   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_gt_24_with_qmin) {
1765     TEST_REQUIRES_ARM_NEON_V8;
1766     for (uint32_t channels = 25; channels < 48; channels++) {
1767       DWConvMicrokernelTester()
1768         .cr(24)
1769         .kr(9)
1770         .channels(channels)
1771         .qmin(128)
1772         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1773     }
1774   }
1775 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_gt_24_with_qmax)1776   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_gt_24_with_qmax) {
1777     TEST_REQUIRES_ARM_NEON_V8;
1778     for (uint32_t channels = 25; channels < 48; channels++) {
1779       DWConvMicrokernelTester()
1780         .cr(24)
1781         .kr(9)
1782         .channels(channels)
1783         .qmax(128)
1784         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1785     }
1786   }
1787 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,multipixel)1788   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel) {
1789     TEST_REQUIRES_ARM_NEON_V8;
1790     for (size_t channels = 1; channels <= 120; channels += 23) {
1791       DWConvMicrokernelTester()
1792         .cr(24)
1793         .kr(9)
1794         .channels(channels)
1795         .width(3)
1796         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1797     }
1798   }
1799 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,multipixel_with_step)1800   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_step) {
1801     TEST_REQUIRES_ARM_NEON_V8;
1802     for (size_t channels = 1; channels <= 120; channels += 23) {
1803       for (size_t step = 2; step <= 9; step++) {
1804         DWConvMicrokernelTester()
1805           .cr(24)
1806           .kr(9)
1807           .channels(channels)
1808           .width(3)
1809           .step(step)
1810           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1811       }
1812     }
1813   }
1814 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,multipixel_with_output_stride)1815   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_output_stride) {
1816     TEST_REQUIRES_ARM_NEON_V8;
1817     for (size_t channels = 1; channels <= 120; channels += 23) {
1818       DWConvMicrokernelTester()
1819         .cr(24)
1820         .kr(9)
1821         .channels(24)
1822         .width(5)
1823         .output_stride(127)
1824         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1825     }
1826   }
1827 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,multipixel_with_qmin)1828   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_qmin) {
1829     TEST_REQUIRES_ARM_NEON_V8;
1830     for (size_t channels = 1; channels <= 120; channels += 23) {
1831       DWConvMicrokernelTester()
1832         .cr(24)
1833         .kr(9)
1834         .channels(channels)
1835         .width(3)
1836         .qmin(128)
1837         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1838     }
1839   }
1840 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,multipixel_with_qmax)1841   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_qmax) {
1842     TEST_REQUIRES_ARM_NEON_V8;
1843     for (size_t channels = 1; channels <= 120; channels += 23) {
1844       DWConvMicrokernelTester()
1845         .cr(24)
1846         .kr(9)
1847         .channels(channels)
1848         .width(3)
1849         .qmax(128)
1850         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1851     }
1852   }
1853 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,input_offset)1854   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, input_offset) {
1855     TEST_REQUIRES_ARM_NEON_V8;
1856     for (uint32_t channels = 48; channels < 384; channels += 72) {
1857       DWConvMicrokernelTester()
1858         .cr(24)
1859         .kr(9)
1860         .channels(channels)
1861         .input_offset(464)
1862         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1863     }
1864   }
1865 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,zero)1866   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, zero) {
1867     TEST_REQUIRES_ARM_NEON_V8;
1868     for (uint32_t mz = 0; mz < 9; mz++) {
1869       for (uint32_t channels = 48; channels < 384; channels += 72) {
1870         DWConvMicrokernelTester()
1871           .cr(24)
1872           .kr(9)
1873           .channels(channels)
1874           .input_offset(464)
1875           .zero_index(mz)
1876           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1877       }
1878     }
1879   }
1880 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1881 
1882 
1883 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_eq_24)1884   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_eq_24) {
1885     TEST_REQUIRES_ARM_NEON;
1886     DWConvMicrokernelTester()
1887       .cr(24)
1888       .kr(25)
1889       .channels(24)
1890       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1891   }
1892 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_div_24)1893   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_div_24) {
1894     TEST_REQUIRES_ARM_NEON;
1895     for (uint32_t channels = 48; channels < 384; channels += 72) {
1896       DWConvMicrokernelTester()
1897         .cr(24)
1898         .kr(25)
1899         .channels(channels)
1900         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1901     }
1902   }
1903 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_div_24_with_qmin)1904   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_div_24_with_qmin) {
1905     TEST_REQUIRES_ARM_NEON;
1906     for (uint32_t channels = 48; channels < 384; channels += 72) {
1907       DWConvMicrokernelTester()
1908         .cr(24)
1909         .kr(25)
1910         .channels(channels)
1911         .qmin(128)
1912         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1913     }
1914   }
1915 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_div_24_with_qmax)1916   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_div_24_with_qmax) {
1917     TEST_REQUIRES_ARM_NEON;
1918     for (uint32_t channels = 48; channels < 384; channels += 72) {
1919       DWConvMicrokernelTester()
1920         .cr(24)
1921         .kr(25)
1922         .channels(channels)
1923         .qmax(128)
1924         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1925     }
1926   }
1927 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_lt_24)1928   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_lt_24) {
1929     TEST_REQUIRES_ARM_NEON;
1930     for (uint32_t channels = 1; channels < 24; channels++) {
1931       DWConvMicrokernelTester()
1932         .cr(24)
1933         .kr(25)
1934         .channels(channels)
1935         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1936     }
1937   }
1938 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_gt_24)1939   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_gt_24) {
1940     TEST_REQUIRES_ARM_NEON;
1941     for (uint32_t channels = 25; channels < 48; channels++) {
1942       DWConvMicrokernelTester()
1943         .cr(24)
1944         .kr(25)
1945         .channels(channels)
1946         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1947     }
1948   }
1949 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_gt_24_with_qmin)1950   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_gt_24_with_qmin) {
1951     TEST_REQUIRES_ARM_NEON;
1952     for (uint32_t channels = 25; channels < 48; channels++) {
1953       DWConvMicrokernelTester()
1954         .cr(24)
1955         .kr(25)
1956         .channels(channels)
1957         .qmin(128)
1958         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1959     }
1960   }
1961 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_gt_24_with_qmax)1962   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_gt_24_with_qmax) {
1963     TEST_REQUIRES_ARM_NEON;
1964     for (uint32_t channels = 25; channels < 48; channels++) {
1965       DWConvMicrokernelTester()
1966         .cr(24)
1967         .kr(25)
1968         .channels(channels)
1969         .qmax(128)
1970         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1971     }
1972   }
1973 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,multipixel)1974   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel) {
1975     TEST_REQUIRES_ARM_NEON;
1976     for (size_t channels = 1; channels <= 120; channels += 23) {
1977       DWConvMicrokernelTester()
1978         .cr(24)
1979         .kr(25)
1980         .channels(channels)
1981         .width(3)
1982         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1983     }
1984   }
1985 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,multipixel_with_step)1986   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_step) {
1987     TEST_REQUIRES_ARM_NEON;
1988     for (size_t channels = 1; channels <= 120; channels += 23) {
1989       for (size_t step = 2; step <= 25; step++) {
1990         DWConvMicrokernelTester()
1991           .cr(24)
1992           .kr(25)
1993           .channels(channels)
1994           .width(3)
1995           .step(step)
1996           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1997       }
1998     }
1999   }
2000 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,multipixel_with_output_stride)2001   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_output_stride) {
2002     TEST_REQUIRES_ARM_NEON;
2003     for (size_t channels = 1; channels <= 120; channels += 23) {
2004       DWConvMicrokernelTester()
2005         .cr(24)
2006         .kr(25)
2007         .channels(24)
2008         .width(5)
2009         .output_stride(127)
2010         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2011     }
2012   }
2013 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,multipixel_with_qmin)2014   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_qmin) {
2015     TEST_REQUIRES_ARM_NEON;
2016     for (size_t channels = 1; channels <= 120; channels += 23) {
2017       DWConvMicrokernelTester()
2018         .cr(24)
2019         .kr(25)
2020         .channels(channels)
2021         .width(3)
2022         .qmin(128)
2023         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2024     }
2025   }
2026 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,multipixel_with_qmax)2027   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_qmax) {
2028     TEST_REQUIRES_ARM_NEON;
2029     for (size_t channels = 1; channels <= 120; channels += 23) {
2030       DWConvMicrokernelTester()
2031         .cr(24)
2032         .kr(25)
2033         .channels(channels)
2034         .width(3)
2035         .qmax(128)
2036         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2037     }
2038   }
2039 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,input_offset)2040   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, input_offset) {
2041     TEST_REQUIRES_ARM_NEON;
2042     for (uint32_t channels = 48; channels < 384; channels += 72) {
2043       DWConvMicrokernelTester()
2044         .cr(24)
2045         .kr(25)
2046         .channels(channels)
2047         .input_offset(464)
2048         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2049     }
2050   }
2051 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,zero)2052   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, zero) {
2053     TEST_REQUIRES_ARM_NEON;
2054     for (uint32_t mz = 0; mz < 25; mz++) {
2055       for (uint32_t channels = 48; channels < 384; channels += 72) {
2056         DWConvMicrokernelTester()
2057           .cr(24)
2058           .kr(25)
2059           .channels(channels)
2060           .input_offset(464)
2061           .zero_index(mz)
2062           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2063       }
2064     }
2065   }
2066 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2067 
2068 
2069 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_eq_24)2070   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_eq_24) {
2071     TEST_REQUIRES_ARM_NEON_V8;
2072     DWConvMicrokernelTester()
2073       .cr(24)
2074       .kr(25)
2075       .channels(24)
2076       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2077   }
2078 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_div_24)2079   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_div_24) {
2080     TEST_REQUIRES_ARM_NEON_V8;
2081     for (uint32_t channels = 48; channels < 384; channels += 72) {
2082       DWConvMicrokernelTester()
2083         .cr(24)
2084         .kr(25)
2085         .channels(channels)
2086         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2087     }
2088   }
2089 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_div_24_with_qmin)2090   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_div_24_with_qmin) {
2091     TEST_REQUIRES_ARM_NEON_V8;
2092     for (uint32_t channels = 48; channels < 384; channels += 72) {
2093       DWConvMicrokernelTester()
2094         .cr(24)
2095         .kr(25)
2096         .channels(channels)
2097         .qmin(128)
2098         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2099     }
2100   }
2101 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_div_24_with_qmax)2102   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_div_24_with_qmax) {
2103     TEST_REQUIRES_ARM_NEON_V8;
2104     for (uint32_t channels = 48; channels < 384; channels += 72) {
2105       DWConvMicrokernelTester()
2106         .cr(24)
2107         .kr(25)
2108         .channels(channels)
2109         .qmax(128)
2110         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2111     }
2112   }
2113 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_lt_24)2114   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_lt_24) {
2115     TEST_REQUIRES_ARM_NEON_V8;
2116     for (uint32_t channels = 1; channels < 24; channels++) {
2117       DWConvMicrokernelTester()
2118         .cr(24)
2119         .kr(25)
2120         .channels(channels)
2121         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2122     }
2123   }
2124 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_gt_24)2125   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_gt_24) {
2126     TEST_REQUIRES_ARM_NEON_V8;
2127     for (uint32_t channels = 25; channels < 48; channels++) {
2128       DWConvMicrokernelTester()
2129         .cr(24)
2130         .kr(25)
2131         .channels(channels)
2132         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2133     }
2134   }
2135 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_gt_24_with_qmin)2136   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_gt_24_with_qmin) {
2137     TEST_REQUIRES_ARM_NEON_V8;
2138     for (uint32_t channels = 25; channels < 48; channels++) {
2139       DWConvMicrokernelTester()
2140         .cr(24)
2141         .kr(25)
2142         .channels(channels)
2143         .qmin(128)
2144         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2145     }
2146   }
2147 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_gt_24_with_qmax)2148   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_gt_24_with_qmax) {
2149     TEST_REQUIRES_ARM_NEON_V8;
2150     for (uint32_t channels = 25; channels < 48; channels++) {
2151       DWConvMicrokernelTester()
2152         .cr(24)
2153         .kr(25)
2154         .channels(channels)
2155         .qmax(128)
2156         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2157     }
2158   }
2159 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,multipixel)2160   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel) {
2161     TEST_REQUIRES_ARM_NEON_V8;
2162     for (size_t channels = 1; channels <= 120; channels += 23) {
2163       DWConvMicrokernelTester()
2164         .cr(24)
2165         .kr(25)
2166         .channels(channels)
2167         .width(3)
2168         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2169     }
2170   }
2171 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,multipixel_with_step)2172   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_step) {
2173     TEST_REQUIRES_ARM_NEON_V8;
2174     for (size_t channels = 1; channels <= 120; channels += 23) {
2175       for (size_t step = 2; step <= 25; step++) {
2176         DWConvMicrokernelTester()
2177           .cr(24)
2178           .kr(25)
2179           .channels(channels)
2180           .width(3)
2181           .step(step)
2182           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2183       }
2184     }
2185   }
2186 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,multipixel_with_output_stride)2187   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_output_stride) {
2188     TEST_REQUIRES_ARM_NEON_V8;
2189     for (size_t channels = 1; channels <= 120; channels += 23) {
2190       DWConvMicrokernelTester()
2191         .cr(24)
2192         .kr(25)
2193         .channels(24)
2194         .width(5)
2195         .output_stride(127)
2196         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2197     }
2198   }
2199 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,multipixel_with_qmin)2200   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_qmin) {
2201     TEST_REQUIRES_ARM_NEON_V8;
2202     for (size_t channels = 1; channels <= 120; channels += 23) {
2203       DWConvMicrokernelTester()
2204         .cr(24)
2205         .kr(25)
2206         .channels(channels)
2207         .width(3)
2208         .qmin(128)
2209         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2210     }
2211   }
2212 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,multipixel_with_qmax)2213   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_qmax) {
2214     TEST_REQUIRES_ARM_NEON_V8;
2215     for (size_t channels = 1; channels <= 120; channels += 23) {
2216       DWConvMicrokernelTester()
2217         .cr(24)
2218         .kr(25)
2219         .channels(channels)
2220         .width(3)
2221         .qmax(128)
2222         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2223     }
2224   }
2225 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,input_offset)2226   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, input_offset) {
2227     TEST_REQUIRES_ARM_NEON_V8;
2228     for (uint32_t channels = 48; channels < 384; channels += 72) {
2229       DWConvMicrokernelTester()
2230         .cr(24)
2231         .kr(25)
2232         .channels(channels)
2233         .input_offset(464)
2234         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2235     }
2236   }
2237 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,zero)2238   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, zero) {
2239     TEST_REQUIRES_ARM_NEON_V8;
2240     for (uint32_t mz = 0; mz < 25; mz++) {
2241       for (uint32_t channels = 48; channels < 384; channels += 72) {
2242         DWConvMicrokernelTester()
2243           .cr(24)
2244           .kr(25)
2245           .channels(channels)
2246           .input_offset(464)
2247           .zero_index(mz)
2248           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2249       }
2250     }
2251   }
2252 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2253 
2254 
2255 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_eq_32)2256   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_eq_32) {
2257     TEST_REQUIRES_ARM_NEON;
2258     DWConvMicrokernelTester()
2259       .cr(32)
2260       .kr(9)
2261       .channels(32)
2262       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2263   }
2264 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_div_32)2265   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_div_32) {
2266     TEST_REQUIRES_ARM_NEON;
2267     for (uint32_t channels = 64; channels < 512; channels += 96) {
2268       DWConvMicrokernelTester()
2269         .cr(32)
2270         .kr(9)
2271         .channels(channels)
2272         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2273     }
2274   }
2275 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_div_32_with_qmin)2276   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_div_32_with_qmin) {
2277     TEST_REQUIRES_ARM_NEON;
2278     for (uint32_t channels = 64; channels < 512; channels += 96) {
2279       DWConvMicrokernelTester()
2280         .cr(32)
2281         .kr(9)
2282         .channels(channels)
2283         .qmin(128)
2284         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2285     }
2286   }
2287 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_div_32_with_qmax)2288   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_div_32_with_qmax) {
2289     TEST_REQUIRES_ARM_NEON;
2290     for (uint32_t channels = 64; channels < 512; channels += 96) {
2291       DWConvMicrokernelTester()
2292         .cr(32)
2293         .kr(9)
2294         .channels(channels)
2295         .qmax(128)
2296         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2297     }
2298   }
2299 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_lt_32)2300   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_lt_32) {
2301     TEST_REQUIRES_ARM_NEON;
2302     for (uint32_t channels = 1; channels < 32; channels++) {
2303       DWConvMicrokernelTester()
2304         .cr(32)
2305         .kr(9)
2306         .channels(channels)
2307         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2308     }
2309   }
2310 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_gt_32)2311   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_gt_32) {
2312     TEST_REQUIRES_ARM_NEON;
2313     for (uint32_t channels = 33; channels < 64; channels++) {
2314       DWConvMicrokernelTester()
2315         .cr(32)
2316         .kr(9)
2317         .channels(channels)
2318         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2319     }
2320   }
2321 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_gt_32_with_qmin)2322   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_gt_32_with_qmin) {
2323     TEST_REQUIRES_ARM_NEON;
2324     for (uint32_t channels = 33; channels < 64; channels++) {
2325       DWConvMicrokernelTester()
2326         .cr(32)
2327         .kr(9)
2328         .channels(channels)
2329         .qmin(128)
2330         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2331     }
2332   }
2333 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_gt_32_with_qmax)2334   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_gt_32_with_qmax) {
2335     TEST_REQUIRES_ARM_NEON;
2336     for (uint32_t channels = 33; channels < 64; channels++) {
2337       DWConvMicrokernelTester()
2338         .cr(32)
2339         .kr(9)
2340         .channels(channels)
2341         .qmax(128)
2342         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2343     }
2344   }
2345 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,multipixel)2346   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel) {
2347     TEST_REQUIRES_ARM_NEON;
2348     for (size_t channels = 1; channels <= 160; channels += 31) {
2349       DWConvMicrokernelTester()
2350         .cr(32)
2351         .kr(9)
2352         .channels(channels)
2353         .width(3)
2354         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2355     }
2356   }
2357 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,multipixel_with_step)2358   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_step) {
2359     TEST_REQUIRES_ARM_NEON;
2360     for (size_t channels = 1; channels <= 160; channels += 31) {
2361       for (size_t step = 2; step <= 9; step++) {
2362         DWConvMicrokernelTester()
2363           .cr(32)
2364           .kr(9)
2365           .channels(channels)
2366           .width(3)
2367           .step(step)
2368           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2369       }
2370     }
2371   }
2372 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,multipixel_with_output_stride)2373   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_output_stride) {
2374     TEST_REQUIRES_ARM_NEON;
2375     for (size_t channels = 1; channels <= 160; channels += 31) {
2376       DWConvMicrokernelTester()
2377         .cr(32)
2378         .kr(9)
2379         .channels(32)
2380         .width(5)
2381         .output_stride(163)
2382         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2383     }
2384   }
2385 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,multipixel_with_qmin)2386   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_qmin) {
2387     TEST_REQUIRES_ARM_NEON;
2388     for (size_t channels = 1; channels <= 160; channels += 31) {
2389       DWConvMicrokernelTester()
2390         .cr(32)
2391         .kr(9)
2392         .channels(channels)
2393         .width(3)
2394         .qmin(128)
2395         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2396     }
2397   }
2398 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,multipixel_with_qmax)2399   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_qmax) {
2400     TEST_REQUIRES_ARM_NEON;
2401     for (size_t channels = 1; channels <= 160; channels += 31) {
2402       DWConvMicrokernelTester()
2403         .cr(32)
2404         .kr(9)
2405         .channels(channels)
2406         .width(3)
2407         .qmax(128)
2408         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2409     }
2410   }
2411 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,input_offset)2412   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, input_offset) {
2413     TEST_REQUIRES_ARM_NEON;
2414     for (uint32_t channels = 64; channels < 512; channels += 96) {
2415       DWConvMicrokernelTester()
2416         .cr(32)
2417         .kr(9)
2418         .channels(channels)
2419         .input_offset(592)
2420         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2421     }
2422   }
2423 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,zero)2424   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, zero) {
2425     TEST_REQUIRES_ARM_NEON;
2426     for (uint32_t mz = 0; mz < 9; mz++) {
2427       for (uint32_t channels = 64; channels < 512; channels += 96) {
2428         DWConvMicrokernelTester()
2429           .cr(32)
2430           .kr(9)
2431           .channels(channels)
2432           .input_offset(592)
2433           .zero_index(mz)
2434           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2435       }
2436     }
2437   }
2438 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2439 
2440 
2441 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_eq_32)2442   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_eq_32) {
2443     TEST_REQUIRES_ARM_NEON_V8;
2444     DWConvMicrokernelTester()
2445       .cr(32)
2446       .kr(9)
2447       .channels(32)
2448       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2449   }
2450 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_div_32)2451   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_div_32) {
2452     TEST_REQUIRES_ARM_NEON_V8;
2453     for (uint32_t channels = 64; channels < 512; channels += 96) {
2454       DWConvMicrokernelTester()
2455         .cr(32)
2456         .kr(9)
2457         .channels(channels)
2458         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2459     }
2460   }
2461 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_div_32_with_qmin)2462   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_div_32_with_qmin) {
2463     TEST_REQUIRES_ARM_NEON_V8;
2464     for (uint32_t channels = 64; channels < 512; channels += 96) {
2465       DWConvMicrokernelTester()
2466         .cr(32)
2467         .kr(9)
2468         .channels(channels)
2469         .qmin(128)
2470         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2471     }
2472   }
2473 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_div_32_with_qmax)2474   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_div_32_with_qmax) {
2475     TEST_REQUIRES_ARM_NEON_V8;
2476     for (uint32_t channels = 64; channels < 512; channels += 96) {
2477       DWConvMicrokernelTester()
2478         .cr(32)
2479         .kr(9)
2480         .channels(channels)
2481         .qmax(128)
2482         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2483     }
2484   }
2485 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_lt_32)2486   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_lt_32) {
2487     TEST_REQUIRES_ARM_NEON_V8;
2488     for (uint32_t channels = 1; channels < 32; channels++) {
2489       DWConvMicrokernelTester()
2490         .cr(32)
2491         .kr(9)
2492         .channels(channels)
2493         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2494     }
2495   }
2496 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_gt_32)2497   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_gt_32) {
2498     TEST_REQUIRES_ARM_NEON_V8;
2499     for (uint32_t channels = 33; channels < 64; channels++) {
2500       DWConvMicrokernelTester()
2501         .cr(32)
2502         .kr(9)
2503         .channels(channels)
2504         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2505     }
2506   }
2507 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_gt_32_with_qmin)2508   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_gt_32_with_qmin) {
2509     TEST_REQUIRES_ARM_NEON_V8;
2510     for (uint32_t channels = 33; channels < 64; channels++) {
2511       DWConvMicrokernelTester()
2512         .cr(32)
2513         .kr(9)
2514         .channels(channels)
2515         .qmin(128)
2516         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2517     }
2518   }
2519 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_gt_32_with_qmax)2520   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_gt_32_with_qmax) {
2521     TEST_REQUIRES_ARM_NEON_V8;
2522     for (uint32_t channels = 33; channels < 64; channels++) {
2523       DWConvMicrokernelTester()
2524         .cr(32)
2525         .kr(9)
2526         .channels(channels)
2527         .qmax(128)
2528         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2529     }
2530   }
2531 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,multipixel)2532   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel) {
2533     TEST_REQUIRES_ARM_NEON_V8;
2534     for (size_t channels = 1; channels <= 160; channels += 31) {
2535       DWConvMicrokernelTester()
2536         .cr(32)
2537         .kr(9)
2538         .channels(channels)
2539         .width(3)
2540         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2541     }
2542   }
2543 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,multipixel_with_step)2544   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_step) {
2545     TEST_REQUIRES_ARM_NEON_V8;
2546     for (size_t channels = 1; channels <= 160; channels += 31) {
2547       for (size_t step = 2; step <= 9; step++) {
2548         DWConvMicrokernelTester()
2549           .cr(32)
2550           .kr(9)
2551           .channels(channels)
2552           .width(3)
2553           .step(step)
2554           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2555       }
2556     }
2557   }
2558 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,multipixel_with_output_stride)2559   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_output_stride) {
2560     TEST_REQUIRES_ARM_NEON_V8;
2561     for (size_t channels = 1; channels <= 160; channels += 31) {
2562       DWConvMicrokernelTester()
2563         .cr(32)
2564         .kr(9)
2565         .channels(32)
2566         .width(5)
2567         .output_stride(163)
2568         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2569     }
2570   }
2571 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,multipixel_with_qmin)2572   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_qmin) {
2573     TEST_REQUIRES_ARM_NEON_V8;
2574     for (size_t channels = 1; channels <= 160; channels += 31) {
2575       DWConvMicrokernelTester()
2576         .cr(32)
2577         .kr(9)
2578         .channels(channels)
2579         .width(3)
2580         .qmin(128)
2581         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2582     }
2583   }
2584 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,multipixel_with_qmax)2585   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_qmax) {
2586     TEST_REQUIRES_ARM_NEON_V8;
2587     for (size_t channels = 1; channels <= 160; channels += 31) {
2588       DWConvMicrokernelTester()
2589         .cr(32)
2590         .kr(9)
2591         .channels(channels)
2592         .width(3)
2593         .qmax(128)
2594         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2595     }
2596   }
2597 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,input_offset)2598   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, input_offset) {
2599     TEST_REQUIRES_ARM_NEON_V8;
2600     for (uint32_t channels = 64; channels < 512; channels += 96) {
2601       DWConvMicrokernelTester()
2602         .cr(32)
2603         .kr(9)
2604         .channels(channels)
2605         .input_offset(592)
2606         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2607     }
2608   }
2609 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,zero)2610   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, zero) {
2611     TEST_REQUIRES_ARM_NEON_V8;
2612     for (uint32_t mz = 0; mz < 9; mz++) {
2613       for (uint32_t channels = 64; channels < 512; channels += 96) {
2614         DWConvMicrokernelTester()
2615           .cr(32)
2616           .kr(9)
2617           .channels(channels)
2618           .input_offset(592)
2619           .zero_index(mz)
2620           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2621       }
2622     }
2623   }
2624 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2625 
2626 
2627 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_eq_32)2628   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_eq_32) {
2629     TEST_REQUIRES_ARM_NEON;
2630     DWConvMicrokernelTester()
2631       .cr(32)
2632       .kr(25)
2633       .channels(32)
2634       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2635   }
2636 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_div_32)2637   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_div_32) {
2638     TEST_REQUIRES_ARM_NEON;
2639     for (uint32_t channels = 64; channels < 512; channels += 96) {
2640       DWConvMicrokernelTester()
2641         .cr(32)
2642         .kr(25)
2643         .channels(channels)
2644         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2645     }
2646   }
2647 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_div_32_with_qmin)2648   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_div_32_with_qmin) {
2649     TEST_REQUIRES_ARM_NEON;
2650     for (uint32_t channels = 64; channels < 512; channels += 96) {
2651       DWConvMicrokernelTester()
2652         .cr(32)
2653         .kr(25)
2654         .channels(channels)
2655         .qmin(128)
2656         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2657     }
2658   }
2659 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_div_32_with_qmax)2660   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_div_32_with_qmax) {
2661     TEST_REQUIRES_ARM_NEON;
2662     for (uint32_t channels = 64; channels < 512; channels += 96) {
2663       DWConvMicrokernelTester()
2664         .cr(32)
2665         .kr(25)
2666         .channels(channels)
2667         .qmax(128)
2668         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2669     }
2670   }
2671 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_lt_32)2672   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_lt_32) {
2673     TEST_REQUIRES_ARM_NEON;
2674     for (uint32_t channels = 1; channels < 32; channels++) {
2675       DWConvMicrokernelTester()
2676         .cr(32)
2677         .kr(25)
2678         .channels(channels)
2679         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2680     }
2681   }
2682 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_gt_32)2683   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_gt_32) {
2684     TEST_REQUIRES_ARM_NEON;
2685     for (uint32_t channels = 33; channels < 64; channels++) {
2686       DWConvMicrokernelTester()
2687         .cr(32)
2688         .kr(25)
2689         .channels(channels)
2690         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2691     }
2692   }
2693 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_gt_32_with_qmin)2694   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_gt_32_with_qmin) {
2695     TEST_REQUIRES_ARM_NEON;
2696     for (uint32_t channels = 33; channels < 64; channels++) {
2697       DWConvMicrokernelTester()
2698         .cr(32)
2699         .kr(25)
2700         .channels(channels)
2701         .qmin(128)
2702         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2703     }
2704   }
2705 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_gt_32_with_qmax)2706   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_gt_32_with_qmax) {
2707     TEST_REQUIRES_ARM_NEON;
2708     for (uint32_t channels = 33; channels < 64; channels++) {
2709       DWConvMicrokernelTester()
2710         .cr(32)
2711         .kr(25)
2712         .channels(channels)
2713         .qmax(128)
2714         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2715     }
2716   }
2717 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,multipixel)2718   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel) {
2719     TEST_REQUIRES_ARM_NEON;
2720     for (size_t channels = 1; channels <= 160; channels += 31) {
2721       DWConvMicrokernelTester()
2722         .cr(32)
2723         .kr(25)
2724         .channels(channels)
2725         .width(3)
2726         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2727     }
2728   }
2729 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,multipixel_with_step)2730   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_step) {
2731     TEST_REQUIRES_ARM_NEON;
2732     for (size_t channels = 1; channels <= 160; channels += 31) {
2733       for (size_t step = 2; step <= 25; step++) {
2734         DWConvMicrokernelTester()
2735           .cr(32)
2736           .kr(25)
2737           .channels(channels)
2738           .width(3)
2739           .step(step)
2740           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2741       }
2742     }
2743   }
2744 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,multipixel_with_output_stride)2745   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_output_stride) {
2746     TEST_REQUIRES_ARM_NEON;
2747     for (size_t channels = 1; channels <= 160; channels += 31) {
2748       DWConvMicrokernelTester()
2749         .cr(32)
2750         .kr(25)
2751         .channels(32)
2752         .width(5)
2753         .output_stride(163)
2754         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2755     }
2756   }
2757 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,multipixel_with_qmin)2758   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_qmin) {
2759     TEST_REQUIRES_ARM_NEON;
2760     for (size_t channels = 1; channels <= 160; channels += 31) {
2761       DWConvMicrokernelTester()
2762         .cr(32)
2763         .kr(25)
2764         .channels(channels)
2765         .width(3)
2766         .qmin(128)
2767         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2768     }
2769   }
2770 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,multipixel_with_qmax)2771   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_qmax) {
2772     TEST_REQUIRES_ARM_NEON;
2773     for (size_t channels = 1; channels <= 160; channels += 31) {
2774       DWConvMicrokernelTester()
2775         .cr(32)
2776         .kr(25)
2777         .channels(channels)
2778         .width(3)
2779         .qmax(128)
2780         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2781     }
2782   }
2783 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,input_offset)2784   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, input_offset) {
2785     TEST_REQUIRES_ARM_NEON;
2786     for (uint32_t channels = 64; channels < 512; channels += 96) {
2787       DWConvMicrokernelTester()
2788         .cr(32)
2789         .kr(25)
2790         .channels(channels)
2791         .input_offset(592)
2792         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2793     }
2794   }
2795 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,zero)2796   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, zero) {
2797     TEST_REQUIRES_ARM_NEON;
2798     for (uint32_t mz = 0; mz < 25; mz++) {
2799       for (uint32_t channels = 64; channels < 512; channels += 96) {
2800         DWConvMicrokernelTester()
2801           .cr(32)
2802           .kr(25)
2803           .channels(channels)
2804           .input_offset(592)
2805           .zero_index(mz)
2806           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2807       }
2808     }
2809   }
2810 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2811 
2812 
2813 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_eq_32)2814   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_eq_32) {
2815     TEST_REQUIRES_ARM_NEON_V8;
2816     DWConvMicrokernelTester()
2817       .cr(32)
2818       .kr(25)
2819       .channels(32)
2820       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2821   }
2822 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_div_32)2823   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_div_32) {
2824     TEST_REQUIRES_ARM_NEON_V8;
2825     for (uint32_t channels = 64; channels < 512; channels += 96) {
2826       DWConvMicrokernelTester()
2827         .cr(32)
2828         .kr(25)
2829         .channels(channels)
2830         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2831     }
2832   }
2833 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_div_32_with_qmin)2834   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_div_32_with_qmin) {
2835     TEST_REQUIRES_ARM_NEON_V8;
2836     for (uint32_t channels = 64; channels < 512; channels += 96) {
2837       DWConvMicrokernelTester()
2838         .cr(32)
2839         .kr(25)
2840         .channels(channels)
2841         .qmin(128)
2842         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2843     }
2844   }
2845 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_div_32_with_qmax)2846   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_div_32_with_qmax) {
2847     TEST_REQUIRES_ARM_NEON_V8;
2848     for (uint32_t channels = 64; channels < 512; channels += 96) {
2849       DWConvMicrokernelTester()
2850         .cr(32)
2851         .kr(25)
2852         .channels(channels)
2853         .qmax(128)
2854         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2855     }
2856   }
2857 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_lt_32)2858   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_lt_32) {
2859     TEST_REQUIRES_ARM_NEON_V8;
2860     for (uint32_t channels = 1; channels < 32; channels++) {
2861       DWConvMicrokernelTester()
2862         .cr(32)
2863         .kr(25)
2864         .channels(channels)
2865         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2866     }
2867   }
2868 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_gt_32)2869   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_gt_32) {
2870     TEST_REQUIRES_ARM_NEON_V8;
2871     for (uint32_t channels = 33; channels < 64; channels++) {
2872       DWConvMicrokernelTester()
2873         .cr(32)
2874         .kr(25)
2875         .channels(channels)
2876         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2877     }
2878   }
2879 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_gt_32_with_qmin)2880   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_gt_32_with_qmin) {
2881     TEST_REQUIRES_ARM_NEON_V8;
2882     for (uint32_t channels = 33; channels < 64; channels++) {
2883       DWConvMicrokernelTester()
2884         .cr(32)
2885         .kr(25)
2886         .channels(channels)
2887         .qmin(128)
2888         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2889     }
2890   }
2891 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_gt_32_with_qmax)2892   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_gt_32_with_qmax) {
2893     TEST_REQUIRES_ARM_NEON_V8;
2894     for (uint32_t channels = 33; channels < 64; channels++) {
2895       DWConvMicrokernelTester()
2896         .cr(32)
2897         .kr(25)
2898         .channels(channels)
2899         .qmax(128)
2900         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2901     }
2902   }
2903 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,multipixel)2904   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel) {
2905     TEST_REQUIRES_ARM_NEON_V8;
2906     for (size_t channels = 1; channels <= 160; channels += 31) {
2907       DWConvMicrokernelTester()
2908         .cr(32)
2909         .kr(25)
2910         .channels(channels)
2911         .width(3)
2912         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2913     }
2914   }
2915 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,multipixel_with_step)2916   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_step) {
2917     TEST_REQUIRES_ARM_NEON_V8;
2918     for (size_t channels = 1; channels <= 160; channels += 31) {
2919       for (size_t step = 2; step <= 25; step++) {
2920         DWConvMicrokernelTester()
2921           .cr(32)
2922           .kr(25)
2923           .channels(channels)
2924           .width(3)
2925           .step(step)
2926           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2927       }
2928     }
2929   }
2930 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,multipixel_with_output_stride)2931   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_output_stride) {
2932     TEST_REQUIRES_ARM_NEON_V8;
2933     for (size_t channels = 1; channels <= 160; channels += 31) {
2934       DWConvMicrokernelTester()
2935         .cr(32)
2936         .kr(25)
2937         .channels(32)
2938         .width(5)
2939         .output_stride(163)
2940         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2941     }
2942   }
2943 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,multipixel_with_qmin)2944   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_qmin) {
2945     TEST_REQUIRES_ARM_NEON_V8;
2946     for (size_t channels = 1; channels <= 160; channels += 31) {
2947       DWConvMicrokernelTester()
2948         .cr(32)
2949         .kr(25)
2950         .channels(channels)
2951         .width(3)
2952         .qmin(128)
2953         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2954     }
2955   }
2956 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,multipixel_with_qmax)2957   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_qmax) {
2958     TEST_REQUIRES_ARM_NEON_V8;
2959     for (size_t channels = 1; channels <= 160; channels += 31) {
2960       DWConvMicrokernelTester()
2961         .cr(32)
2962         .kr(25)
2963         .channels(channels)
2964         .width(3)
2965         .qmax(128)
2966         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2967     }
2968   }
2969 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,input_offset)2970   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, input_offset) {
2971     TEST_REQUIRES_ARM_NEON_V8;
2972     for (uint32_t channels = 64; channels < 512; channels += 96) {
2973       DWConvMicrokernelTester()
2974         .cr(32)
2975         .kr(25)
2976         .channels(channels)
2977         .input_offset(592)
2978         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2979     }
2980   }
2981 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,zero)2982   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, zero) {
2983     TEST_REQUIRES_ARM_NEON_V8;
2984     for (uint32_t mz = 0; mz < 25; mz++) {
2985       for (uint32_t channels = 64; channels < 512; channels += 96) {
2986         DWConvMicrokernelTester()
2987           .cr(32)
2988           .kr(25)
2989           .channels(channels)
2990           .input_offset(592)
2991           .zero_index(mz)
2992           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2993       }
2994     }
2995   }
2996 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2997 
2998 
2999 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_eq_8)3000   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_eq_8) {
3001     TEST_REQUIRES_X86_SSE2;
3002     DWConvMicrokernelTester()
3003       .cr(8)
3004       .kr(9)
3005       .channels(8)
3006       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3007   }
3008 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_div_8)3009   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_div_8) {
3010     TEST_REQUIRES_X86_SSE2;
3011     for (uint32_t channels = 16; channels < 128; channels += 24) {
3012       DWConvMicrokernelTester()
3013         .cr(8)
3014         .kr(9)
3015         .channels(channels)
3016         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3017     }
3018   }
3019 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_div_8_with_qmin)3020   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_div_8_with_qmin) {
3021     TEST_REQUIRES_X86_SSE2;
3022     for (uint32_t channels = 16; channels < 128; channels += 24) {
3023       DWConvMicrokernelTester()
3024         .cr(8)
3025         .kr(9)
3026         .channels(channels)
3027         .qmin(128)
3028         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3029     }
3030   }
3031 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_div_8_with_qmax)3032   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_div_8_with_qmax) {
3033     TEST_REQUIRES_X86_SSE2;
3034     for (uint32_t channels = 16; channels < 128; channels += 24) {
3035       DWConvMicrokernelTester()
3036         .cr(8)
3037         .kr(9)
3038         .channels(channels)
3039         .qmax(128)
3040         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3041     }
3042   }
3043 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_lt_8)3044   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_lt_8) {
3045     TEST_REQUIRES_X86_SSE2;
3046     for (uint32_t channels = 1; channels < 8; channels++) {
3047       DWConvMicrokernelTester()
3048         .cr(8)
3049         .kr(9)
3050         .channels(channels)
3051         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3052     }
3053   }
3054 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_gt_8)3055   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_gt_8) {
3056     TEST_REQUIRES_X86_SSE2;
3057     for (uint32_t channels = 9; channels < 16; channels++) {
3058       DWConvMicrokernelTester()
3059         .cr(8)
3060         .kr(9)
3061         .channels(channels)
3062         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3063     }
3064   }
3065 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_gt_8_with_qmin)3066   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_gt_8_with_qmin) {
3067     TEST_REQUIRES_X86_SSE2;
3068     for (uint32_t channels = 9; channels < 16; channels++) {
3069       DWConvMicrokernelTester()
3070         .cr(8)
3071         .kr(9)
3072         .channels(channels)
3073         .qmin(128)
3074         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3075     }
3076   }
3077 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_gt_8_with_qmax)3078   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_gt_8_with_qmax) {
3079     TEST_REQUIRES_X86_SSE2;
3080     for (uint32_t channels = 9; channels < 16; channels++) {
3081       DWConvMicrokernelTester()
3082         .cr(8)
3083         .kr(9)
3084         .channels(channels)
3085         .qmax(128)
3086         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3087     }
3088   }
3089 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,multipixel)3090   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel) {
3091     TEST_REQUIRES_X86_SSE2;
3092     for (size_t channels = 1; channels <= 40; channels += 7) {
3093       DWConvMicrokernelTester()
3094         .cr(8)
3095         .kr(9)
3096         .channels(channels)
3097         .width(3)
3098         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3099     }
3100   }
3101 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,multipixel_with_step)3102   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_step) {
3103     TEST_REQUIRES_X86_SSE2;
3104     for (size_t channels = 1; channels <= 40; channels += 7) {
3105       for (size_t step = 2; step <= 9; step++) {
3106         DWConvMicrokernelTester()
3107           .cr(8)
3108           .kr(9)
3109           .channels(channels)
3110           .width(3)
3111           .step(step)
3112           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3113       }
3114     }
3115   }
3116 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,multipixel_with_output_stride)3117   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_output_stride) {
3118     TEST_REQUIRES_X86_SSE2;
3119     for (size_t channels = 1; channels <= 40; channels += 7) {
3120       DWConvMicrokernelTester()
3121         .cr(8)
3122         .kr(9)
3123         .channels(8)
3124         .width(5)
3125         .output_stride(43)
3126         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3127     }
3128   }
3129 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,multipixel_with_qmin)3130   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_qmin) {
3131     TEST_REQUIRES_X86_SSE2;
3132     for (size_t channels = 1; channels <= 40; channels += 7) {
3133       DWConvMicrokernelTester()
3134         .cr(8)
3135         .kr(9)
3136         .channels(channels)
3137         .width(3)
3138         .qmin(128)
3139         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3140     }
3141   }
3142 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,multipixel_with_qmax)3143   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_qmax) {
3144     TEST_REQUIRES_X86_SSE2;
3145     for (size_t channels = 1; channels <= 40; channels += 7) {
3146       DWConvMicrokernelTester()
3147         .cr(8)
3148         .kr(9)
3149         .channels(channels)
3150         .width(3)
3151         .qmax(128)
3152         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3153     }
3154   }
3155 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,input_offset)3156   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, input_offset) {
3157     TEST_REQUIRES_X86_SSE2;
3158     for (uint32_t channels = 16; channels < 128; channels += 24) {
3159       DWConvMicrokernelTester()
3160         .cr(8)
3161         .kr(9)
3162         .channels(channels)
3163         .input_offset(176)
3164         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3165     }
3166   }
3167 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,zero)3168   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, zero) {
3169     TEST_REQUIRES_X86_SSE2;
3170     for (uint32_t mz = 0; mz < 9; mz++) {
3171       for (uint32_t channels = 16; channels < 128; channels += 24) {
3172         DWConvMicrokernelTester()
3173           .cr(8)
3174           .kr(9)
3175           .channels(channels)
3176           .input_offset(176)
3177           .zero_index(mz)
3178           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3179       }
3180     }
3181   }
3182 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3183 
3184 
3185 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_eq_8)3186   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_eq_8) {
3187     TEST_REQUIRES_X86_SSE2;
3188     DWConvMicrokernelTester()
3189       .cr(8)
3190       .kr(9)
3191       .channels(8)
3192       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3193   }
3194 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_div_8)3195   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_div_8) {
3196     TEST_REQUIRES_X86_SSE2;
3197     for (uint32_t channels = 16; channels < 128; channels += 24) {
3198       DWConvMicrokernelTester()
3199         .cr(8)
3200         .kr(9)
3201         .channels(channels)
3202         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3203     }
3204   }
3205 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_div_8_with_qmin)3206   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_div_8_with_qmin) {
3207     TEST_REQUIRES_X86_SSE2;
3208     for (uint32_t channels = 16; channels < 128; channels += 24) {
3209       DWConvMicrokernelTester()
3210         .cr(8)
3211         .kr(9)
3212         .channels(channels)
3213         .qmin(128)
3214         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3215     }
3216   }
3217 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_div_8_with_qmax)3218   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_div_8_with_qmax) {
3219     TEST_REQUIRES_X86_SSE2;
3220     for (uint32_t channels = 16; channels < 128; channels += 24) {
3221       DWConvMicrokernelTester()
3222         .cr(8)
3223         .kr(9)
3224         .channels(channels)
3225         .qmax(128)
3226         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3227     }
3228   }
3229 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_lt_8)3230   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_lt_8) {
3231     TEST_REQUIRES_X86_SSE2;
3232     for (uint32_t channels = 1; channels < 8; channels++) {
3233       DWConvMicrokernelTester()
3234         .cr(8)
3235         .kr(9)
3236         .channels(channels)
3237         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3238     }
3239   }
3240 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_gt_8)3241   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_gt_8) {
3242     TEST_REQUIRES_X86_SSE2;
3243     for (uint32_t channels = 9; channels < 16; channels++) {
3244       DWConvMicrokernelTester()
3245         .cr(8)
3246         .kr(9)
3247         .channels(channels)
3248         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3249     }
3250   }
3251 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_gt_8_with_qmin)3252   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_gt_8_with_qmin) {
3253     TEST_REQUIRES_X86_SSE2;
3254     for (uint32_t channels = 9; channels < 16; channels++) {
3255       DWConvMicrokernelTester()
3256         .cr(8)
3257         .kr(9)
3258         .channels(channels)
3259         .qmin(128)
3260         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3261     }
3262   }
3263 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_gt_8_with_qmax)3264   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_gt_8_with_qmax) {
3265     TEST_REQUIRES_X86_SSE2;
3266     for (uint32_t channels = 9; channels < 16; channels++) {
3267       DWConvMicrokernelTester()
3268         .cr(8)
3269         .kr(9)
3270         .channels(channels)
3271         .qmax(128)
3272         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3273     }
3274   }
3275 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,multipixel)3276   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel) {
3277     TEST_REQUIRES_X86_SSE2;
3278     for (size_t channels = 1; channels <= 40; channels += 7) {
3279       DWConvMicrokernelTester()
3280         .cr(8)
3281         .kr(9)
3282         .channels(channels)
3283         .width(3)
3284         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3285     }
3286   }
3287 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,multipixel_with_step)3288   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel_with_step) {
3289     TEST_REQUIRES_X86_SSE2;
3290     for (size_t channels = 1; channels <= 40; channels += 7) {
3291       for (size_t step = 2; step <= 9; step++) {
3292         DWConvMicrokernelTester()
3293           .cr(8)
3294           .kr(9)
3295           .channels(channels)
3296           .width(3)
3297           .step(step)
3298           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3299       }
3300     }
3301   }
3302 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,multipixel_with_output_stride)3303   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel_with_output_stride) {
3304     TEST_REQUIRES_X86_SSE2;
3305     for (size_t channels = 1; channels <= 40; channels += 7) {
3306       DWConvMicrokernelTester()
3307         .cr(8)
3308         .kr(9)
3309         .channels(8)
3310         .width(5)
3311         .output_stride(43)
3312         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3313     }
3314   }
3315 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,multipixel_with_qmin)3316   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel_with_qmin) {
3317     TEST_REQUIRES_X86_SSE2;
3318     for (size_t channels = 1; channels <= 40; channels += 7) {
3319       DWConvMicrokernelTester()
3320         .cr(8)
3321         .kr(9)
3322         .channels(channels)
3323         .width(3)
3324         .qmin(128)
3325         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3326     }
3327   }
3328 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,multipixel_with_qmax)3329   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel_with_qmax) {
3330     TEST_REQUIRES_X86_SSE2;
3331     for (size_t channels = 1; channels <= 40; channels += 7) {
3332       DWConvMicrokernelTester()
3333         .cr(8)
3334         .kr(9)
3335         .channels(channels)
3336         .width(3)
3337         .qmax(128)
3338         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3339     }
3340   }
3341 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,input_offset)3342   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, input_offset) {
3343     TEST_REQUIRES_X86_SSE2;
3344     for (uint32_t channels = 16; channels < 128; channels += 24) {
3345       DWConvMicrokernelTester()
3346         .cr(8)
3347         .kr(9)
3348         .channels(channels)
3349         .input_offset(176)
3350         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3351     }
3352   }
3353 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,zero)3354   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, zero) {
3355     TEST_REQUIRES_X86_SSE2;
3356     for (uint32_t mz = 0; mz < 9; mz++) {
3357       for (uint32_t channels = 16; channels < 128; channels += 24) {
3358         DWConvMicrokernelTester()
3359           .cr(8)
3360           .kr(9)
3361           .channels(channels)
3362           .input_offset(176)
3363           .zero_index(mz)
3364           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3365       }
3366     }
3367   }
3368 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3369 
3370 
3371 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_eq_8)3372   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_eq_8) {
3373     TEST_REQUIRES_X86_SSE41;
3374     DWConvMicrokernelTester()
3375       .cr(8)
3376       .kr(9)
3377       .channels(8)
3378       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3379   }
3380 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_div_8)3381   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_div_8) {
3382     TEST_REQUIRES_X86_SSE41;
3383     for (uint32_t channels = 16; channels < 128; channels += 24) {
3384       DWConvMicrokernelTester()
3385         .cr(8)
3386         .kr(9)
3387         .channels(channels)
3388         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3389     }
3390   }
3391 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_div_8_with_qmin)3392   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_div_8_with_qmin) {
3393     TEST_REQUIRES_X86_SSE41;
3394     for (uint32_t channels = 16; channels < 128; channels += 24) {
3395       DWConvMicrokernelTester()
3396         .cr(8)
3397         .kr(9)
3398         .channels(channels)
3399         .qmin(128)
3400         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3401     }
3402   }
3403 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_div_8_with_qmax)3404   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_div_8_with_qmax) {
3405     TEST_REQUIRES_X86_SSE41;
3406     for (uint32_t channels = 16; channels < 128; channels += 24) {
3407       DWConvMicrokernelTester()
3408         .cr(8)
3409         .kr(9)
3410         .channels(channels)
3411         .qmax(128)
3412         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3413     }
3414   }
3415 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_lt_8)3416   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_lt_8) {
3417     TEST_REQUIRES_X86_SSE41;
3418     for (uint32_t channels = 1; channels < 8; channels++) {
3419       DWConvMicrokernelTester()
3420         .cr(8)
3421         .kr(9)
3422         .channels(channels)
3423         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3424     }
3425   }
3426 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_gt_8)3427   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_gt_8) {
3428     TEST_REQUIRES_X86_SSE41;
3429     for (uint32_t channels = 9; channels < 16; channels++) {
3430       DWConvMicrokernelTester()
3431         .cr(8)
3432         .kr(9)
3433         .channels(channels)
3434         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3435     }
3436   }
3437 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_gt_8_with_qmin)3438   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_gt_8_with_qmin) {
3439     TEST_REQUIRES_X86_SSE41;
3440     for (uint32_t channels = 9; channels < 16; channels++) {
3441       DWConvMicrokernelTester()
3442         .cr(8)
3443         .kr(9)
3444         .channels(channels)
3445         .qmin(128)
3446         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3447     }
3448   }
3449 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_gt_8_with_qmax)3450   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_gt_8_with_qmax) {
3451     TEST_REQUIRES_X86_SSE41;
3452     for (uint32_t channels = 9; channels < 16; channels++) {
3453       DWConvMicrokernelTester()
3454         .cr(8)
3455         .kr(9)
3456         .channels(channels)
3457         .qmax(128)
3458         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3459     }
3460   }
3461 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,multipixel)3462   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel) {
3463     TEST_REQUIRES_X86_SSE41;
3464     for (size_t channels = 1; channels <= 40; channels += 7) {
3465       DWConvMicrokernelTester()
3466         .cr(8)
3467         .kr(9)
3468         .channels(channels)
3469         .width(3)
3470         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3471     }
3472   }
3473 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,multipixel_with_step)3474   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_step) {
3475     TEST_REQUIRES_X86_SSE41;
3476     for (size_t channels = 1; channels <= 40; channels += 7) {
3477       for (size_t step = 2; step <= 9; step++) {
3478         DWConvMicrokernelTester()
3479           .cr(8)
3480           .kr(9)
3481           .channels(channels)
3482           .width(3)
3483           .step(step)
3484           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3485       }
3486     }
3487   }
3488 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,multipixel_with_output_stride)3489   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_output_stride) {
3490     TEST_REQUIRES_X86_SSE41;
3491     for (size_t channels = 1; channels <= 40; channels += 7) {
3492       DWConvMicrokernelTester()
3493         .cr(8)
3494         .kr(9)
3495         .channels(8)
3496         .width(5)
3497         .output_stride(43)
3498         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3499     }
3500   }
3501 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,multipixel_with_qmin)3502   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_qmin) {
3503     TEST_REQUIRES_X86_SSE41;
3504     for (size_t channels = 1; channels <= 40; channels += 7) {
3505       DWConvMicrokernelTester()
3506         .cr(8)
3507         .kr(9)
3508         .channels(channels)
3509         .width(3)
3510         .qmin(128)
3511         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3512     }
3513   }
3514 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,multipixel_with_qmax)3515   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_qmax) {
3516     TEST_REQUIRES_X86_SSE41;
3517     for (size_t channels = 1; channels <= 40; channels += 7) {
3518       DWConvMicrokernelTester()
3519         .cr(8)
3520         .kr(9)
3521         .channels(channels)
3522         .width(3)
3523         .qmax(128)
3524         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3525     }
3526   }
3527 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,input_offset)3528   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, input_offset) {
3529     TEST_REQUIRES_X86_SSE41;
3530     for (uint32_t channels = 16; channels < 128; channels += 24) {
3531       DWConvMicrokernelTester()
3532         .cr(8)
3533         .kr(9)
3534         .channels(channels)
3535         .input_offset(176)
3536         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3537     }
3538   }
3539 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,zero)3540   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, zero) {
3541     TEST_REQUIRES_X86_SSE41;
3542     for (uint32_t mz = 0; mz < 9; mz++) {
3543       for (uint32_t channels = 16; channels < 128; channels += 24) {
3544         DWConvMicrokernelTester()
3545           .cr(8)
3546           .kr(9)
3547           .channels(channels)
3548           .input_offset(176)
3549           .zero_index(mz)
3550           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3551       }
3552     }
3553   }
3554 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3555 
3556 
3557 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_eq_8)3558   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_eq_8) {
3559     TEST_REQUIRES_X86_SSE41;
3560     DWConvMicrokernelTester()
3561       .cr(8)
3562       .kr(9)
3563       .channels(8)
3564       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3565   }
3566 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_div_8)3567   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_div_8) {
3568     TEST_REQUIRES_X86_SSE41;
3569     for (uint32_t channels = 16; channels < 128; channels += 24) {
3570       DWConvMicrokernelTester()
3571         .cr(8)
3572         .kr(9)
3573         .channels(channels)
3574         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3575     }
3576   }
3577 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_div_8_with_qmin)3578   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_div_8_with_qmin) {
3579     TEST_REQUIRES_X86_SSE41;
3580     for (uint32_t channels = 16; channels < 128; channels += 24) {
3581       DWConvMicrokernelTester()
3582         .cr(8)
3583         .kr(9)
3584         .channels(channels)
3585         .qmin(128)
3586         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3587     }
3588   }
3589 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_div_8_with_qmax)3590   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_div_8_with_qmax) {
3591     TEST_REQUIRES_X86_SSE41;
3592     for (uint32_t channels = 16; channels < 128; channels += 24) {
3593       DWConvMicrokernelTester()
3594         .cr(8)
3595         .kr(9)
3596         .channels(channels)
3597         .qmax(128)
3598         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3599     }
3600   }
3601 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_lt_8)3602   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_lt_8) {
3603     TEST_REQUIRES_X86_SSE41;
3604     for (uint32_t channels = 1; channels < 8; channels++) {
3605       DWConvMicrokernelTester()
3606         .cr(8)
3607         .kr(9)
3608         .channels(channels)
3609         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3610     }
3611   }
3612 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_gt_8)3613   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_gt_8) {
3614     TEST_REQUIRES_X86_SSE41;
3615     for (uint32_t channels = 9; channels < 16; channels++) {
3616       DWConvMicrokernelTester()
3617         .cr(8)
3618         .kr(9)
3619         .channels(channels)
3620         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3621     }
3622   }
3623 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_gt_8_with_qmin)3624   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_gt_8_with_qmin) {
3625     TEST_REQUIRES_X86_SSE41;
3626     for (uint32_t channels = 9; channels < 16; channels++) {
3627       DWConvMicrokernelTester()
3628         .cr(8)
3629         .kr(9)
3630         .channels(channels)
3631         .qmin(128)
3632         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3633     }
3634   }
3635 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_gt_8_with_qmax)3636   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_gt_8_with_qmax) {
3637     TEST_REQUIRES_X86_SSE41;
3638     for (uint32_t channels = 9; channels < 16; channels++) {
3639       DWConvMicrokernelTester()
3640         .cr(8)
3641         .kr(9)
3642         .channels(channels)
3643         .qmax(128)
3644         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3645     }
3646   }
3647 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,multipixel)3648   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel) {
3649     TEST_REQUIRES_X86_SSE41;
3650     for (size_t channels = 1; channels <= 40; channels += 7) {
3651       DWConvMicrokernelTester()
3652         .cr(8)
3653         .kr(9)
3654         .channels(channels)
3655         .width(3)
3656         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3657     }
3658   }
3659 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,multipixel_with_step)3660   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel_with_step) {
3661     TEST_REQUIRES_X86_SSE41;
3662     for (size_t channels = 1; channels <= 40; channels += 7) {
3663       for (size_t step = 2; step <= 9; step++) {
3664         DWConvMicrokernelTester()
3665           .cr(8)
3666           .kr(9)
3667           .channels(channels)
3668           .width(3)
3669           .step(step)
3670           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3671       }
3672     }
3673   }
3674 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,multipixel_with_output_stride)3675   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel_with_output_stride) {
3676     TEST_REQUIRES_X86_SSE41;
3677     for (size_t channels = 1; channels <= 40; channels += 7) {
3678       DWConvMicrokernelTester()
3679         .cr(8)
3680         .kr(9)
3681         .channels(8)
3682         .width(5)
3683         .output_stride(43)
3684         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3685     }
3686   }
3687 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,multipixel_with_qmin)3688   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel_with_qmin) {
3689     TEST_REQUIRES_X86_SSE41;
3690     for (size_t channels = 1; channels <= 40; channels += 7) {
3691       DWConvMicrokernelTester()
3692         .cr(8)
3693         .kr(9)
3694         .channels(channels)
3695         .width(3)
3696         .qmin(128)
3697         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3698     }
3699   }
3700 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,multipixel_with_qmax)3701   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel_with_qmax) {
3702     TEST_REQUIRES_X86_SSE41;
3703     for (size_t channels = 1; channels <= 40; channels += 7) {
3704       DWConvMicrokernelTester()
3705         .cr(8)
3706         .kr(9)
3707         .channels(channels)
3708         .width(3)
3709         .qmax(128)
3710         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3711     }
3712   }
3713 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,input_offset)3714   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, input_offset) {
3715     TEST_REQUIRES_X86_SSE41;
3716     for (uint32_t channels = 16; channels < 128; channels += 24) {
3717       DWConvMicrokernelTester()
3718         .cr(8)
3719         .kr(9)
3720         .channels(channels)
3721         .input_offset(176)
3722         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3723     }
3724   }
3725 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,zero)3726   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, zero) {
3727     TEST_REQUIRES_X86_SSE41;
3728     for (uint32_t mz = 0; mz < 9; mz++) {
3729       for (uint32_t channels = 16; channels < 128; channels += 24) {
3730         DWConvMicrokernelTester()
3731           .cr(8)
3732           .kr(9)
3733           .channels(channels)
3734           .input_offset(176)
3735           .zero_index(mz)
3736           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3737       }
3738     }
3739   }
3740 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3741 
3742 
3743 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_eq_8)3744   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_eq_8) {
3745     TEST_REQUIRES_X86_SSE41;
3746     DWConvMicrokernelTester()
3747       .cr(8)
3748       .kr(9)
3749       .channels(8)
3750       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3751   }
3752 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_div_8)3753   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_div_8) {
3754     TEST_REQUIRES_X86_SSE41;
3755     for (uint32_t channels = 16; channels < 128; channels += 24) {
3756       DWConvMicrokernelTester()
3757         .cr(8)
3758         .kr(9)
3759         .channels(channels)
3760         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3761     }
3762   }
3763 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_div_8_with_qmin)3764   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_div_8_with_qmin) {
3765     TEST_REQUIRES_X86_SSE41;
3766     for (uint32_t channels = 16; channels < 128; channels += 24) {
3767       DWConvMicrokernelTester()
3768         .cr(8)
3769         .kr(9)
3770         .channels(channels)
3771         .qmin(128)
3772         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3773     }
3774   }
3775 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_div_8_with_qmax)3776   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_div_8_with_qmax) {
3777     TEST_REQUIRES_X86_SSE41;
3778     for (uint32_t channels = 16; channels < 128; channels += 24) {
3779       DWConvMicrokernelTester()
3780         .cr(8)
3781         .kr(9)
3782         .channels(channels)
3783         .qmax(128)
3784         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3785     }
3786   }
3787 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_lt_8)3788   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_lt_8) {
3789     TEST_REQUIRES_X86_SSE41;
3790     for (uint32_t channels = 1; channels < 8; channels++) {
3791       DWConvMicrokernelTester()
3792         .cr(8)
3793         .kr(9)
3794         .channels(channels)
3795         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3796     }
3797   }
3798 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_gt_8)3799   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_gt_8) {
3800     TEST_REQUIRES_X86_SSE41;
3801     for (uint32_t channels = 9; channels < 16; channels++) {
3802       DWConvMicrokernelTester()
3803         .cr(8)
3804         .kr(9)
3805         .channels(channels)
3806         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3807     }
3808   }
3809 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_gt_8_with_qmin)3810   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_gt_8_with_qmin) {
3811     TEST_REQUIRES_X86_SSE41;
3812     for (uint32_t channels = 9; channels < 16; channels++) {
3813       DWConvMicrokernelTester()
3814         .cr(8)
3815         .kr(9)
3816         .channels(channels)
3817         .qmin(128)
3818         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3819     }
3820   }
3821 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_gt_8_with_qmax)3822   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_gt_8_with_qmax) {
3823     TEST_REQUIRES_X86_SSE41;
3824     for (uint32_t channels = 9; channels < 16; channels++) {
3825       DWConvMicrokernelTester()
3826         .cr(8)
3827         .kr(9)
3828         .channels(channels)
3829         .qmax(128)
3830         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3831     }
3832   }
3833 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,multipixel)3834   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel) {
3835     TEST_REQUIRES_X86_SSE41;
3836     for (size_t channels = 1; channels <= 40; channels += 7) {
3837       DWConvMicrokernelTester()
3838         .cr(8)
3839         .kr(9)
3840         .channels(channels)
3841         .width(3)
3842         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3843     }
3844   }
3845 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,multipixel_with_step)3846   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_step) {
3847     TEST_REQUIRES_X86_SSE41;
3848     for (size_t channels = 1; channels <= 40; channels += 7) {
3849       for (size_t step = 2; step <= 9; step++) {
3850         DWConvMicrokernelTester()
3851           .cr(8)
3852           .kr(9)
3853           .channels(channels)
3854           .width(3)
3855           .step(step)
3856           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3857       }
3858     }
3859   }
3860 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,multipixel_with_output_stride)3861   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_output_stride) {
3862     TEST_REQUIRES_X86_SSE41;
3863     for (size_t channels = 1; channels <= 40; channels += 7) {
3864       DWConvMicrokernelTester()
3865         .cr(8)
3866         .kr(9)
3867         .channels(8)
3868         .width(5)
3869         .output_stride(43)
3870         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3871     }
3872   }
3873 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,multipixel_with_qmin)3874   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_qmin) {
3875     TEST_REQUIRES_X86_SSE41;
3876     for (size_t channels = 1; channels <= 40; channels += 7) {
3877       DWConvMicrokernelTester()
3878         .cr(8)
3879         .kr(9)
3880         .channels(channels)
3881         .width(3)
3882         .qmin(128)
3883         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3884     }
3885   }
3886 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,multipixel_with_qmax)3887   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_qmax) {
3888     TEST_REQUIRES_X86_SSE41;
3889     for (size_t channels = 1; channels <= 40; channels += 7) {
3890       DWConvMicrokernelTester()
3891         .cr(8)
3892         .kr(9)
3893         .channels(channels)
3894         .width(3)
3895         .qmax(128)
3896         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3897     }
3898   }
3899 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,input_offset)3900   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, input_offset) {
3901     TEST_REQUIRES_X86_SSE41;
3902     for (uint32_t channels = 16; channels < 128; channels += 24) {
3903       DWConvMicrokernelTester()
3904         .cr(8)
3905         .kr(9)
3906         .channels(channels)
3907         .input_offset(176)
3908         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3909     }
3910   }
3911 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,zero)3912   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, zero) {
3913     TEST_REQUIRES_X86_SSE41;
3914     for (uint32_t mz = 0; mz < 9; mz++) {
3915       for (uint32_t channels = 16; channels < 128; channels += 24) {
3916         DWConvMicrokernelTester()
3917           .cr(8)
3918           .kr(9)
3919           .channels(channels)
3920           .input_offset(176)
3921           .zero_index(mz)
3922           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
3923       }
3924     }
3925   }
3926 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3927 
3928 
3929 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_eq_8)3930   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_eq_8) {
3931     TEST_REQUIRES_X86_SSE2;
3932     DWConvMicrokernelTester()
3933       .cr(8)
3934       .kr(25)
3935       .channels(8)
3936       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3937   }
3938 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_div_8)3939   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_div_8) {
3940     TEST_REQUIRES_X86_SSE2;
3941     for (uint32_t channels = 16; channels < 128; channels += 24) {
3942       DWConvMicrokernelTester()
3943         .cr(8)
3944         .kr(25)
3945         .channels(channels)
3946         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3947     }
3948   }
3949 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_div_8_with_qmin)3950   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_div_8_with_qmin) {
3951     TEST_REQUIRES_X86_SSE2;
3952     for (uint32_t channels = 16; channels < 128; channels += 24) {
3953       DWConvMicrokernelTester()
3954         .cr(8)
3955         .kr(25)
3956         .channels(channels)
3957         .qmin(128)
3958         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3959     }
3960   }
3961 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_div_8_with_qmax)3962   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_div_8_with_qmax) {
3963     TEST_REQUIRES_X86_SSE2;
3964     for (uint32_t channels = 16; channels < 128; channels += 24) {
3965       DWConvMicrokernelTester()
3966         .cr(8)
3967         .kr(25)
3968         .channels(channels)
3969         .qmax(128)
3970         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3971     }
3972   }
3973 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_lt_8)3974   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_lt_8) {
3975     TEST_REQUIRES_X86_SSE2;
3976     for (uint32_t channels = 1; channels < 8; channels++) {
3977       DWConvMicrokernelTester()
3978         .cr(8)
3979         .kr(25)
3980         .channels(channels)
3981         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3982     }
3983   }
3984 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_gt_8)3985   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_gt_8) {
3986     TEST_REQUIRES_X86_SSE2;
3987     for (uint32_t channels = 9; channels < 16; channels++) {
3988       DWConvMicrokernelTester()
3989         .cr(8)
3990         .kr(25)
3991         .channels(channels)
3992         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
3993     }
3994   }
3995 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_gt_8_with_qmin)3996   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_gt_8_with_qmin) {
3997     TEST_REQUIRES_X86_SSE2;
3998     for (uint32_t channels = 9; channels < 16; channels++) {
3999       DWConvMicrokernelTester()
4000         .cr(8)
4001         .kr(25)
4002         .channels(channels)
4003         .qmin(128)
4004         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4005     }
4006   }
4007 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_gt_8_with_qmax)4008   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_gt_8_with_qmax) {
4009     TEST_REQUIRES_X86_SSE2;
4010     for (uint32_t channels = 9; channels < 16; channels++) {
4011       DWConvMicrokernelTester()
4012         .cr(8)
4013         .kr(25)
4014         .channels(channels)
4015         .qmax(128)
4016         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4017     }
4018   }
4019 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,multipixel)4020   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel) {
4021     TEST_REQUIRES_X86_SSE2;
4022     for (size_t channels = 1; channels <= 40; channels += 7) {
4023       DWConvMicrokernelTester()
4024         .cr(8)
4025         .kr(25)
4026         .channels(channels)
4027         .width(3)
4028         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4029     }
4030   }
4031 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,multipixel_with_step)4032   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_step) {
4033     TEST_REQUIRES_X86_SSE2;
4034     for (size_t channels = 1; channels <= 40; channels += 7) {
4035       for (size_t step = 2; step <= 25; step++) {
4036         DWConvMicrokernelTester()
4037           .cr(8)
4038           .kr(25)
4039           .channels(channels)
4040           .width(3)
4041           .step(step)
4042           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4043       }
4044     }
4045   }
4046 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,multipixel_with_output_stride)4047   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_output_stride) {
4048     TEST_REQUIRES_X86_SSE2;
4049     for (size_t channels = 1; channels <= 40; channels += 7) {
4050       DWConvMicrokernelTester()
4051         .cr(8)
4052         .kr(25)
4053         .channels(8)
4054         .width(5)
4055         .output_stride(43)
4056         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4057     }
4058   }
4059 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,multipixel_with_qmin)4060   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_qmin) {
4061     TEST_REQUIRES_X86_SSE2;
4062     for (size_t channels = 1; channels <= 40; channels += 7) {
4063       DWConvMicrokernelTester()
4064         .cr(8)
4065         .kr(25)
4066         .channels(channels)
4067         .width(3)
4068         .qmin(128)
4069         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4070     }
4071   }
4072 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,multipixel_with_qmax)4073   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_qmax) {
4074     TEST_REQUIRES_X86_SSE2;
4075     for (size_t channels = 1; channels <= 40; channels += 7) {
4076       DWConvMicrokernelTester()
4077         .cr(8)
4078         .kr(25)
4079         .channels(channels)
4080         .width(3)
4081         .qmax(128)
4082         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4083     }
4084   }
4085 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,input_offset)4086   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, input_offset) {
4087     TEST_REQUIRES_X86_SSE2;
4088     for (uint32_t channels = 16; channels < 128; channels += 24) {
4089       DWConvMicrokernelTester()
4090         .cr(8)
4091         .kr(25)
4092         .channels(channels)
4093         .input_offset(176)
4094         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4095     }
4096   }
4097 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,zero)4098   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, zero) {
4099     TEST_REQUIRES_X86_SSE2;
4100     for (uint32_t mz = 0; mz < 25; mz++) {
4101       for (uint32_t channels = 16; channels < 128; channels += 24) {
4102         DWConvMicrokernelTester()
4103           .cr(8)
4104           .kr(25)
4105           .channels(channels)
4106           .input_offset(176)
4107           .zero_index(mz)
4108           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4109       }
4110     }
4111   }
4112 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4113 
4114 
4115 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_eq_8)4116   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_eq_8) {
4117     TEST_REQUIRES_X86_SSE2;
4118     DWConvMicrokernelTester()
4119       .cr(8)
4120       .kr(25)
4121       .channels(8)
4122       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4123   }
4124 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_div_8)4125   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_div_8) {
4126     TEST_REQUIRES_X86_SSE2;
4127     for (uint32_t channels = 16; channels < 128; channels += 24) {
4128       DWConvMicrokernelTester()
4129         .cr(8)
4130         .kr(25)
4131         .channels(channels)
4132         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4133     }
4134   }
4135 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_div_8_with_qmin)4136   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_div_8_with_qmin) {
4137     TEST_REQUIRES_X86_SSE2;
4138     for (uint32_t channels = 16; channels < 128; channels += 24) {
4139       DWConvMicrokernelTester()
4140         .cr(8)
4141         .kr(25)
4142         .channels(channels)
4143         .qmin(128)
4144         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4145     }
4146   }
4147 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_div_8_with_qmax)4148   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_div_8_with_qmax) {
4149     TEST_REQUIRES_X86_SSE2;
4150     for (uint32_t channels = 16; channels < 128; channels += 24) {
4151       DWConvMicrokernelTester()
4152         .cr(8)
4153         .kr(25)
4154         .channels(channels)
4155         .qmax(128)
4156         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4157     }
4158   }
4159 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_lt_8)4160   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_lt_8) {
4161     TEST_REQUIRES_X86_SSE2;
4162     for (uint32_t channels = 1; channels < 8; channels++) {
4163       DWConvMicrokernelTester()
4164         .cr(8)
4165         .kr(25)
4166         .channels(channels)
4167         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4168     }
4169   }
4170 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_gt_8)4171   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_gt_8) {
4172     TEST_REQUIRES_X86_SSE2;
4173     for (uint32_t channels = 9; channels < 16; channels++) {
4174       DWConvMicrokernelTester()
4175         .cr(8)
4176         .kr(25)
4177         .channels(channels)
4178         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4179     }
4180   }
4181 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_gt_8_with_qmin)4182   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_gt_8_with_qmin) {
4183     TEST_REQUIRES_X86_SSE2;
4184     for (uint32_t channels = 9; channels < 16; channels++) {
4185       DWConvMicrokernelTester()
4186         .cr(8)
4187         .kr(25)
4188         .channels(channels)
4189         .qmin(128)
4190         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4191     }
4192   }
4193 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_gt_8_with_qmax)4194   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_gt_8_with_qmax) {
4195     TEST_REQUIRES_X86_SSE2;
4196     for (uint32_t channels = 9; channels < 16; channels++) {
4197       DWConvMicrokernelTester()
4198         .cr(8)
4199         .kr(25)
4200         .channels(channels)
4201         .qmax(128)
4202         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4203     }
4204   }
4205 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,multipixel)4206   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel) {
4207     TEST_REQUIRES_X86_SSE2;
4208     for (size_t channels = 1; channels <= 40; channels += 7) {
4209       DWConvMicrokernelTester()
4210         .cr(8)
4211         .kr(25)
4212         .channels(channels)
4213         .width(3)
4214         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4215     }
4216   }
4217 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,multipixel_with_step)4218   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel_with_step) {
4219     TEST_REQUIRES_X86_SSE2;
4220     for (size_t channels = 1; channels <= 40; channels += 7) {
4221       for (size_t step = 2; step <= 25; step++) {
4222         DWConvMicrokernelTester()
4223           .cr(8)
4224           .kr(25)
4225           .channels(channels)
4226           .width(3)
4227           .step(step)
4228           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4229       }
4230     }
4231   }
4232 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,multipixel_with_output_stride)4233   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel_with_output_stride) {
4234     TEST_REQUIRES_X86_SSE2;
4235     for (size_t channels = 1; channels <= 40; channels += 7) {
4236       DWConvMicrokernelTester()
4237         .cr(8)
4238         .kr(25)
4239         .channels(8)
4240         .width(5)
4241         .output_stride(43)
4242         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4243     }
4244   }
4245 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,multipixel_with_qmin)4246   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel_with_qmin) {
4247     TEST_REQUIRES_X86_SSE2;
4248     for (size_t channels = 1; channels <= 40; channels += 7) {
4249       DWConvMicrokernelTester()
4250         .cr(8)
4251         .kr(25)
4252         .channels(channels)
4253         .width(3)
4254         .qmin(128)
4255         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4256     }
4257   }
4258 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,multipixel_with_qmax)4259   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel_with_qmax) {
4260     TEST_REQUIRES_X86_SSE2;
4261     for (size_t channels = 1; channels <= 40; channels += 7) {
4262       DWConvMicrokernelTester()
4263         .cr(8)
4264         .kr(25)
4265         .channels(channels)
4266         .width(3)
4267         .qmax(128)
4268         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4269     }
4270   }
4271 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,input_offset)4272   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, input_offset) {
4273     TEST_REQUIRES_X86_SSE2;
4274     for (uint32_t channels = 16; channels < 128; channels += 24) {
4275       DWConvMicrokernelTester()
4276         .cr(8)
4277         .kr(25)
4278         .channels(channels)
4279         .input_offset(176)
4280         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4281     }
4282   }
4283 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,zero)4284   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, zero) {
4285     TEST_REQUIRES_X86_SSE2;
4286     for (uint32_t mz = 0; mz < 25; mz++) {
4287       for (uint32_t channels = 16; channels < 128; channels += 24) {
4288         DWConvMicrokernelTester()
4289           .cr(8)
4290           .kr(25)
4291           .channels(channels)
4292           .input_offset(176)
4293           .zero_index(mz)
4294           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4295       }
4296     }
4297   }
4298 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4299 
4300 
4301 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_eq_8)4302   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_eq_8) {
4303     TEST_REQUIRES_X86_SSE41;
4304     DWConvMicrokernelTester()
4305       .cr(8)
4306       .kr(25)
4307       .channels(8)
4308       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4309   }
4310 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_div_8)4311   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_div_8) {
4312     TEST_REQUIRES_X86_SSE41;
4313     for (uint32_t channels = 16; channels < 128; channels += 24) {
4314       DWConvMicrokernelTester()
4315         .cr(8)
4316         .kr(25)
4317         .channels(channels)
4318         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4319     }
4320   }
4321 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_div_8_with_qmin)4322   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_div_8_with_qmin) {
4323     TEST_REQUIRES_X86_SSE41;
4324     for (uint32_t channels = 16; channels < 128; channels += 24) {
4325       DWConvMicrokernelTester()
4326         .cr(8)
4327         .kr(25)
4328         .channels(channels)
4329         .qmin(128)
4330         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4331     }
4332   }
4333 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_div_8_with_qmax)4334   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_div_8_with_qmax) {
4335     TEST_REQUIRES_X86_SSE41;
4336     for (uint32_t channels = 16; channels < 128; channels += 24) {
4337       DWConvMicrokernelTester()
4338         .cr(8)
4339         .kr(25)
4340         .channels(channels)
4341         .qmax(128)
4342         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4343     }
4344   }
4345 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_lt_8)4346   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_lt_8) {
4347     TEST_REQUIRES_X86_SSE41;
4348     for (uint32_t channels = 1; channels < 8; channels++) {
4349       DWConvMicrokernelTester()
4350         .cr(8)
4351         .kr(25)
4352         .channels(channels)
4353         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4354     }
4355   }
4356 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_gt_8)4357   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_gt_8) {
4358     TEST_REQUIRES_X86_SSE41;
4359     for (uint32_t channels = 9; channels < 16; channels++) {
4360       DWConvMicrokernelTester()
4361         .cr(8)
4362         .kr(25)
4363         .channels(channels)
4364         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4365     }
4366   }
4367 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_gt_8_with_qmin)4368   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_gt_8_with_qmin) {
4369     TEST_REQUIRES_X86_SSE41;
4370     for (uint32_t channels = 9; channels < 16; channels++) {
4371       DWConvMicrokernelTester()
4372         .cr(8)
4373         .kr(25)
4374         .channels(channels)
4375         .qmin(128)
4376         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4377     }
4378   }
4379 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_gt_8_with_qmax)4380   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_gt_8_with_qmax) {
4381     TEST_REQUIRES_X86_SSE41;
4382     for (uint32_t channels = 9; channels < 16; channels++) {
4383       DWConvMicrokernelTester()
4384         .cr(8)
4385         .kr(25)
4386         .channels(channels)
4387         .qmax(128)
4388         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4389     }
4390   }
4391 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,multipixel)4392   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel) {
4393     TEST_REQUIRES_X86_SSE41;
4394     for (size_t channels = 1; channels <= 40; channels += 7) {
4395       DWConvMicrokernelTester()
4396         .cr(8)
4397         .kr(25)
4398         .channels(channels)
4399         .width(3)
4400         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4401     }
4402   }
4403 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,multipixel_with_step)4404   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_step) {
4405     TEST_REQUIRES_X86_SSE41;
4406     for (size_t channels = 1; channels <= 40; channels += 7) {
4407       for (size_t step = 2; step <= 25; step++) {
4408         DWConvMicrokernelTester()
4409           .cr(8)
4410           .kr(25)
4411           .channels(channels)
4412           .width(3)
4413           .step(step)
4414           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4415       }
4416     }
4417   }
4418 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,multipixel_with_output_stride)4419   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_output_stride) {
4420     TEST_REQUIRES_X86_SSE41;
4421     for (size_t channels = 1; channels <= 40; channels += 7) {
4422       DWConvMicrokernelTester()
4423         .cr(8)
4424         .kr(25)
4425         .channels(8)
4426         .width(5)
4427         .output_stride(43)
4428         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4429     }
4430   }
4431 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,multipixel_with_qmin)4432   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_qmin) {
4433     TEST_REQUIRES_X86_SSE41;
4434     for (size_t channels = 1; channels <= 40; channels += 7) {
4435       DWConvMicrokernelTester()
4436         .cr(8)
4437         .kr(25)
4438         .channels(channels)
4439         .width(3)
4440         .qmin(128)
4441         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4442     }
4443   }
4444 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,multipixel_with_qmax)4445   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_qmax) {
4446     TEST_REQUIRES_X86_SSE41;
4447     for (size_t channels = 1; channels <= 40; channels += 7) {
4448       DWConvMicrokernelTester()
4449         .cr(8)
4450         .kr(25)
4451         .channels(channels)
4452         .width(3)
4453         .qmax(128)
4454         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4455     }
4456   }
4457 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,input_offset)4458   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, input_offset) {
4459     TEST_REQUIRES_X86_SSE41;
4460     for (uint32_t channels = 16; channels < 128; channels += 24) {
4461       DWConvMicrokernelTester()
4462         .cr(8)
4463         .kr(25)
4464         .channels(channels)
4465         .input_offset(176)
4466         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4467     }
4468   }
4469 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,zero)4470   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, zero) {
4471     TEST_REQUIRES_X86_SSE41;
4472     for (uint32_t mz = 0; mz < 25; mz++) {
4473       for (uint32_t channels = 16; channels < 128; channels += 24) {
4474         DWConvMicrokernelTester()
4475           .cr(8)
4476           .kr(25)
4477           .channels(channels)
4478           .input_offset(176)
4479           .zero_index(mz)
4480           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4481       }
4482     }
4483   }
4484 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4485 
4486 
4487 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_eq_8)4488   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_eq_8) {
4489     TEST_REQUIRES_X86_SSE41;
4490     DWConvMicrokernelTester()
4491       .cr(8)
4492       .kr(25)
4493       .channels(8)
4494       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4495   }
4496 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_div_8)4497   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_div_8) {
4498     TEST_REQUIRES_X86_SSE41;
4499     for (uint32_t channels = 16; channels < 128; channels += 24) {
4500       DWConvMicrokernelTester()
4501         .cr(8)
4502         .kr(25)
4503         .channels(channels)
4504         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4505     }
4506   }
4507 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_div_8_with_qmin)4508   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_div_8_with_qmin) {
4509     TEST_REQUIRES_X86_SSE41;
4510     for (uint32_t channels = 16; channels < 128; channels += 24) {
4511       DWConvMicrokernelTester()
4512         .cr(8)
4513         .kr(25)
4514         .channels(channels)
4515         .qmin(128)
4516         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4517     }
4518   }
4519 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_div_8_with_qmax)4520   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_div_8_with_qmax) {
4521     TEST_REQUIRES_X86_SSE41;
4522     for (uint32_t channels = 16; channels < 128; channels += 24) {
4523       DWConvMicrokernelTester()
4524         .cr(8)
4525         .kr(25)
4526         .channels(channels)
4527         .qmax(128)
4528         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4529     }
4530   }
4531 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_lt_8)4532   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_lt_8) {
4533     TEST_REQUIRES_X86_SSE41;
4534     for (uint32_t channels = 1; channels < 8; channels++) {
4535       DWConvMicrokernelTester()
4536         .cr(8)
4537         .kr(25)
4538         .channels(channels)
4539         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4540     }
4541   }
4542 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_gt_8)4543   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_gt_8) {
4544     TEST_REQUIRES_X86_SSE41;
4545     for (uint32_t channels = 9; channels < 16; channels++) {
4546       DWConvMicrokernelTester()
4547         .cr(8)
4548         .kr(25)
4549         .channels(channels)
4550         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4551     }
4552   }
4553 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_gt_8_with_qmin)4554   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_gt_8_with_qmin) {
4555     TEST_REQUIRES_X86_SSE41;
4556     for (uint32_t channels = 9; channels < 16; channels++) {
4557       DWConvMicrokernelTester()
4558         .cr(8)
4559         .kr(25)
4560         .channels(channels)
4561         .qmin(128)
4562         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4563     }
4564   }
4565 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_gt_8_with_qmax)4566   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_gt_8_with_qmax) {
4567     TEST_REQUIRES_X86_SSE41;
4568     for (uint32_t channels = 9; channels < 16; channels++) {
4569       DWConvMicrokernelTester()
4570         .cr(8)
4571         .kr(25)
4572         .channels(channels)
4573         .qmax(128)
4574         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4575     }
4576   }
4577 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,multipixel)4578   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel) {
4579     TEST_REQUIRES_X86_SSE41;
4580     for (size_t channels = 1; channels <= 40; channels += 7) {
4581       DWConvMicrokernelTester()
4582         .cr(8)
4583         .kr(25)
4584         .channels(channels)
4585         .width(3)
4586         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4587     }
4588   }
4589 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,multipixel_with_step)4590   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel_with_step) {
4591     TEST_REQUIRES_X86_SSE41;
4592     for (size_t channels = 1; channels <= 40; channels += 7) {
4593       for (size_t step = 2; step <= 25; step++) {
4594         DWConvMicrokernelTester()
4595           .cr(8)
4596           .kr(25)
4597           .channels(channels)
4598           .width(3)
4599           .step(step)
4600           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4601       }
4602     }
4603   }
4604 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,multipixel_with_output_stride)4605   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel_with_output_stride) {
4606     TEST_REQUIRES_X86_SSE41;
4607     for (size_t channels = 1; channels <= 40; channels += 7) {
4608       DWConvMicrokernelTester()
4609         .cr(8)
4610         .kr(25)
4611         .channels(8)
4612         .width(5)
4613         .output_stride(43)
4614         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4615     }
4616   }
4617 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,multipixel_with_qmin)4618   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel_with_qmin) {
4619     TEST_REQUIRES_X86_SSE41;
4620     for (size_t channels = 1; channels <= 40; channels += 7) {
4621       DWConvMicrokernelTester()
4622         .cr(8)
4623         .kr(25)
4624         .channels(channels)
4625         .width(3)
4626         .qmin(128)
4627         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4628     }
4629   }
4630 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,multipixel_with_qmax)4631   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel_with_qmax) {
4632     TEST_REQUIRES_X86_SSE41;
4633     for (size_t channels = 1; channels <= 40; channels += 7) {
4634       DWConvMicrokernelTester()
4635         .cr(8)
4636         .kr(25)
4637         .channels(channels)
4638         .width(3)
4639         .qmax(128)
4640         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4641     }
4642   }
4643 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,input_offset)4644   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, input_offset) {
4645     TEST_REQUIRES_X86_SSE41;
4646     for (uint32_t channels = 16; channels < 128; channels += 24) {
4647       DWConvMicrokernelTester()
4648         .cr(8)
4649         .kr(25)
4650         .channels(channels)
4651         .input_offset(176)
4652         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4653     }
4654   }
4655 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,zero)4656   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, zero) {
4657     TEST_REQUIRES_X86_SSE41;
4658     for (uint32_t mz = 0; mz < 25; mz++) {
4659       for (uint32_t channels = 16; channels < 128; channels += 24) {
4660         DWConvMicrokernelTester()
4661           .cr(8)
4662           .kr(25)
4663           .channels(channels)
4664           .input_offset(176)
4665           .zero_index(mz)
4666           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4667       }
4668     }
4669   }
4670 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4671 
4672 
4673 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_eq_8)4674   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_eq_8) {
4675     TEST_REQUIRES_X86_SSE41;
4676     DWConvMicrokernelTester()
4677       .cr(8)
4678       .kr(25)
4679       .channels(8)
4680       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4681   }
4682 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_div_8)4683   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_div_8) {
4684     TEST_REQUIRES_X86_SSE41;
4685     for (uint32_t channels = 16; channels < 128; channels += 24) {
4686       DWConvMicrokernelTester()
4687         .cr(8)
4688         .kr(25)
4689         .channels(channels)
4690         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4691     }
4692   }
4693 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_div_8_with_qmin)4694   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_div_8_with_qmin) {
4695     TEST_REQUIRES_X86_SSE41;
4696     for (uint32_t channels = 16; channels < 128; channels += 24) {
4697       DWConvMicrokernelTester()
4698         .cr(8)
4699         .kr(25)
4700         .channels(channels)
4701         .qmin(128)
4702         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4703     }
4704   }
4705 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_div_8_with_qmax)4706   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_div_8_with_qmax) {
4707     TEST_REQUIRES_X86_SSE41;
4708     for (uint32_t channels = 16; channels < 128; channels += 24) {
4709       DWConvMicrokernelTester()
4710         .cr(8)
4711         .kr(25)
4712         .channels(channels)
4713         .qmax(128)
4714         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4715     }
4716   }
4717 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_lt_8)4718   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_lt_8) {
4719     TEST_REQUIRES_X86_SSE41;
4720     for (uint32_t channels = 1; channels < 8; channels++) {
4721       DWConvMicrokernelTester()
4722         .cr(8)
4723         .kr(25)
4724         .channels(channels)
4725         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4726     }
4727   }
4728 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_gt_8)4729   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_gt_8) {
4730     TEST_REQUIRES_X86_SSE41;
4731     for (uint32_t channels = 9; channels < 16; channels++) {
4732       DWConvMicrokernelTester()
4733         .cr(8)
4734         .kr(25)
4735         .channels(channels)
4736         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4737     }
4738   }
4739 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_gt_8_with_qmin)4740   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_gt_8_with_qmin) {
4741     TEST_REQUIRES_X86_SSE41;
4742     for (uint32_t channels = 9; channels < 16; channels++) {
4743       DWConvMicrokernelTester()
4744         .cr(8)
4745         .kr(25)
4746         .channels(channels)
4747         .qmin(128)
4748         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4749     }
4750   }
4751 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_gt_8_with_qmax)4752   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_gt_8_with_qmax) {
4753     TEST_REQUIRES_X86_SSE41;
4754     for (uint32_t channels = 9; channels < 16; channels++) {
4755       DWConvMicrokernelTester()
4756         .cr(8)
4757         .kr(25)
4758         .channels(channels)
4759         .qmax(128)
4760         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4761     }
4762   }
4763 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,multipixel)4764   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel) {
4765     TEST_REQUIRES_X86_SSE41;
4766     for (size_t channels = 1; channels <= 40; channels += 7) {
4767       DWConvMicrokernelTester()
4768         .cr(8)
4769         .kr(25)
4770         .channels(channels)
4771         .width(3)
4772         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4773     }
4774   }
4775 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,multipixel_with_step)4776   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_step) {
4777     TEST_REQUIRES_X86_SSE41;
4778     for (size_t channels = 1; channels <= 40; channels += 7) {
4779       for (size_t step = 2; step <= 25; step++) {
4780         DWConvMicrokernelTester()
4781           .cr(8)
4782           .kr(25)
4783           .channels(channels)
4784           .width(3)
4785           .step(step)
4786           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4787       }
4788     }
4789   }
4790 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,multipixel_with_output_stride)4791   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_output_stride) {
4792     TEST_REQUIRES_X86_SSE41;
4793     for (size_t channels = 1; channels <= 40; channels += 7) {
4794       DWConvMicrokernelTester()
4795         .cr(8)
4796         .kr(25)
4797         .channels(8)
4798         .width(5)
4799         .output_stride(43)
4800         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4801     }
4802   }
4803 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,multipixel_with_qmin)4804   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_qmin) {
4805     TEST_REQUIRES_X86_SSE41;
4806     for (size_t channels = 1; channels <= 40; channels += 7) {
4807       DWConvMicrokernelTester()
4808         .cr(8)
4809         .kr(25)
4810         .channels(channels)
4811         .width(3)
4812         .qmin(128)
4813         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4814     }
4815   }
4816 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,multipixel_with_qmax)4817   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_qmax) {
4818     TEST_REQUIRES_X86_SSE41;
4819     for (size_t channels = 1; channels <= 40; channels += 7) {
4820       DWConvMicrokernelTester()
4821         .cr(8)
4822         .kr(25)
4823         .channels(channels)
4824         .width(3)
4825         .qmax(128)
4826         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4827     }
4828   }
4829 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,input_offset)4830   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, input_offset) {
4831     TEST_REQUIRES_X86_SSE41;
4832     for (uint32_t channels = 16; channels < 128; channels += 24) {
4833       DWConvMicrokernelTester()
4834         .cr(8)
4835         .kr(25)
4836         .channels(channels)
4837         .input_offset(176)
4838         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4839     }
4840   }
4841 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,zero)4842   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, zero) {
4843     TEST_REQUIRES_X86_SSE41;
4844     for (uint32_t mz = 0; mz < 25; mz++) {
4845       for (uint32_t channels = 16; channels < 128; channels += 24) {
4846         DWConvMicrokernelTester()
4847           .cr(8)
4848           .kr(25)
4849           .channels(channels)
4850           .input_offset(176)
4851           .zero_index(mz)
4852           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
4853       }
4854     }
4855   }
4856 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4857 
4858 
4859 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_eq_16)4860   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_eq_16) {
4861     TEST_REQUIRES_X86_SSE2;
4862     DWConvMicrokernelTester()
4863       .cr(16)
4864       .kr(9)
4865       .channels(16)
4866       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4867   }
4868 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_div_16)4869   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_div_16) {
4870     TEST_REQUIRES_X86_SSE2;
4871     for (uint32_t channels = 32; channels < 256; channels += 48) {
4872       DWConvMicrokernelTester()
4873         .cr(16)
4874         .kr(9)
4875         .channels(channels)
4876         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4877     }
4878   }
4879 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_div_16_with_qmin)4880   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_div_16_with_qmin) {
4881     TEST_REQUIRES_X86_SSE2;
4882     for (uint32_t channels = 32; channels < 256; channels += 48) {
4883       DWConvMicrokernelTester()
4884         .cr(16)
4885         .kr(9)
4886         .channels(channels)
4887         .qmin(128)
4888         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4889     }
4890   }
4891 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_div_16_with_qmax)4892   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_div_16_with_qmax) {
4893     TEST_REQUIRES_X86_SSE2;
4894     for (uint32_t channels = 32; channels < 256; channels += 48) {
4895       DWConvMicrokernelTester()
4896         .cr(16)
4897         .kr(9)
4898         .channels(channels)
4899         .qmax(128)
4900         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4901     }
4902   }
4903 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_lt_16)4904   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_lt_16) {
4905     TEST_REQUIRES_X86_SSE2;
4906     for (uint32_t channels = 1; channels < 16; channels++) {
4907       DWConvMicrokernelTester()
4908         .cr(16)
4909         .kr(9)
4910         .channels(channels)
4911         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4912     }
4913   }
4914 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_gt_16)4915   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_gt_16) {
4916     TEST_REQUIRES_X86_SSE2;
4917     for (uint32_t channels = 17; channels < 32; channels++) {
4918       DWConvMicrokernelTester()
4919         .cr(16)
4920         .kr(9)
4921         .channels(channels)
4922         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4923     }
4924   }
4925 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_gt_16_with_qmin)4926   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_gt_16_with_qmin) {
4927     TEST_REQUIRES_X86_SSE2;
4928     for (uint32_t channels = 17; channels < 32; channels++) {
4929       DWConvMicrokernelTester()
4930         .cr(16)
4931         .kr(9)
4932         .channels(channels)
4933         .qmin(128)
4934         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4935     }
4936   }
4937 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_gt_16_with_qmax)4938   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_gt_16_with_qmax) {
4939     TEST_REQUIRES_X86_SSE2;
4940     for (uint32_t channels = 17; channels < 32; channels++) {
4941       DWConvMicrokernelTester()
4942         .cr(16)
4943         .kr(9)
4944         .channels(channels)
4945         .qmax(128)
4946         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4947     }
4948   }
4949 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,multipixel)4950   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel) {
4951     TEST_REQUIRES_X86_SSE2;
4952     for (size_t channels = 1; channels <= 80; channels += 15) {
4953       DWConvMicrokernelTester()
4954         .cr(16)
4955         .kr(9)
4956         .channels(channels)
4957         .width(3)
4958         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4959     }
4960   }
4961 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,multipixel_with_step)4962   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_step) {
4963     TEST_REQUIRES_X86_SSE2;
4964     for (size_t channels = 1; channels <= 80; channels += 15) {
4965       for (size_t step = 2; step <= 9; step++) {
4966         DWConvMicrokernelTester()
4967           .cr(16)
4968           .kr(9)
4969           .channels(channels)
4970           .width(3)
4971           .step(step)
4972           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4973       }
4974     }
4975   }
4976 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,multipixel_with_output_stride)4977   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_output_stride) {
4978     TEST_REQUIRES_X86_SSE2;
4979     for (size_t channels = 1; channels <= 80; channels += 15) {
4980       DWConvMicrokernelTester()
4981         .cr(16)
4982         .kr(9)
4983         .channels(16)
4984         .width(5)
4985         .output_stride(83)
4986         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
4987     }
4988   }
4989 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,multipixel_with_qmin)4990   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_qmin) {
4991     TEST_REQUIRES_X86_SSE2;
4992     for (size_t channels = 1; channels <= 80; channels += 15) {
4993       DWConvMicrokernelTester()
4994         .cr(16)
4995         .kr(9)
4996         .channels(channels)
4997         .width(3)
4998         .qmin(128)
4999         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5000     }
5001   }
5002 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,multipixel_with_qmax)5003   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_qmax) {
5004     TEST_REQUIRES_X86_SSE2;
5005     for (size_t channels = 1; channels <= 80; channels += 15) {
5006       DWConvMicrokernelTester()
5007         .cr(16)
5008         .kr(9)
5009         .channels(channels)
5010         .width(3)
5011         .qmax(128)
5012         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5013     }
5014   }
5015 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,input_offset)5016   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, input_offset) {
5017     TEST_REQUIRES_X86_SSE2;
5018     for (uint32_t channels = 32; channels < 256; channels += 48) {
5019       DWConvMicrokernelTester()
5020         .cr(16)
5021         .kr(9)
5022         .channels(channels)
5023         .input_offset(304)
5024         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5025     }
5026   }
5027 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,zero)5028   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, zero) {
5029     TEST_REQUIRES_X86_SSE2;
5030     for (uint32_t mz = 0; mz < 9; mz++) {
5031       for (uint32_t channels = 32; channels < 256; channels += 48) {
5032         DWConvMicrokernelTester()
5033           .cr(16)
5034           .kr(9)
5035           .channels(channels)
5036           .input_offset(304)
5037           .zero_index(mz)
5038           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5039       }
5040     }
5041   }
5042 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5043 
5044 
5045 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_eq_16)5046   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_eq_16) {
5047     TEST_REQUIRES_X86_SSE2;
5048     DWConvMicrokernelTester()
5049       .cr(16)
5050       .kr(9)
5051       .channels(16)
5052       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5053   }
5054 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_div_16)5055   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_div_16) {
5056     TEST_REQUIRES_X86_SSE2;
5057     for (uint32_t channels = 32; channels < 256; channels += 48) {
5058       DWConvMicrokernelTester()
5059         .cr(16)
5060         .kr(9)
5061         .channels(channels)
5062         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5063     }
5064   }
5065 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_div_16_with_qmin)5066   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_div_16_with_qmin) {
5067     TEST_REQUIRES_X86_SSE2;
5068     for (uint32_t channels = 32; channels < 256; channels += 48) {
5069       DWConvMicrokernelTester()
5070         .cr(16)
5071         .kr(9)
5072         .channels(channels)
5073         .qmin(128)
5074         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5075     }
5076   }
5077 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_div_16_with_qmax)5078   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_div_16_with_qmax) {
5079     TEST_REQUIRES_X86_SSE2;
5080     for (uint32_t channels = 32; channels < 256; channels += 48) {
5081       DWConvMicrokernelTester()
5082         .cr(16)
5083         .kr(9)
5084         .channels(channels)
5085         .qmax(128)
5086         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5087     }
5088   }
5089 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_lt_16)5090   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_lt_16) {
5091     TEST_REQUIRES_X86_SSE2;
5092     for (uint32_t channels = 1; channels < 16; channels++) {
5093       DWConvMicrokernelTester()
5094         .cr(16)
5095         .kr(9)
5096         .channels(channels)
5097         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5098     }
5099   }
5100 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_gt_16)5101   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_gt_16) {
5102     TEST_REQUIRES_X86_SSE2;
5103     for (uint32_t channels = 17; channels < 32; channels++) {
5104       DWConvMicrokernelTester()
5105         .cr(16)
5106         .kr(9)
5107         .channels(channels)
5108         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5109     }
5110   }
5111 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_gt_16_with_qmin)5112   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_gt_16_with_qmin) {
5113     TEST_REQUIRES_X86_SSE2;
5114     for (uint32_t channels = 17; channels < 32; channels++) {
5115       DWConvMicrokernelTester()
5116         .cr(16)
5117         .kr(9)
5118         .channels(channels)
5119         .qmin(128)
5120         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5121     }
5122   }
5123 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_gt_16_with_qmax)5124   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_gt_16_with_qmax) {
5125     TEST_REQUIRES_X86_SSE2;
5126     for (uint32_t channels = 17; channels < 32; channels++) {
5127       DWConvMicrokernelTester()
5128         .cr(16)
5129         .kr(9)
5130         .channels(channels)
5131         .qmax(128)
5132         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5133     }
5134   }
5135 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,multipixel)5136   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel) {
5137     TEST_REQUIRES_X86_SSE2;
5138     for (size_t channels = 1; channels <= 80; channels += 15) {
5139       DWConvMicrokernelTester()
5140         .cr(16)
5141         .kr(9)
5142         .channels(channels)
5143         .width(3)
5144         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5145     }
5146   }
5147 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,multipixel_with_step)5148   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel_with_step) {
5149     TEST_REQUIRES_X86_SSE2;
5150     for (size_t channels = 1; channels <= 80; channels += 15) {
5151       for (size_t step = 2; step <= 9; step++) {
5152         DWConvMicrokernelTester()
5153           .cr(16)
5154           .kr(9)
5155           .channels(channels)
5156           .width(3)
5157           .step(step)
5158           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5159       }
5160     }
5161   }
5162 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,multipixel_with_output_stride)5163   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel_with_output_stride) {
5164     TEST_REQUIRES_X86_SSE2;
5165     for (size_t channels = 1; channels <= 80; channels += 15) {
5166       DWConvMicrokernelTester()
5167         .cr(16)
5168         .kr(9)
5169         .channels(16)
5170         .width(5)
5171         .output_stride(83)
5172         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5173     }
5174   }
5175 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,multipixel_with_qmin)5176   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel_with_qmin) {
5177     TEST_REQUIRES_X86_SSE2;
5178     for (size_t channels = 1; channels <= 80; channels += 15) {
5179       DWConvMicrokernelTester()
5180         .cr(16)
5181         .kr(9)
5182         .channels(channels)
5183         .width(3)
5184         .qmin(128)
5185         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5186     }
5187   }
5188 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,multipixel_with_qmax)5189   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel_with_qmax) {
5190     TEST_REQUIRES_X86_SSE2;
5191     for (size_t channels = 1; channels <= 80; channels += 15) {
5192       DWConvMicrokernelTester()
5193         .cr(16)
5194         .kr(9)
5195         .channels(channels)
5196         .width(3)
5197         .qmax(128)
5198         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5199     }
5200   }
5201 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,input_offset)5202   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, input_offset) {
5203     TEST_REQUIRES_X86_SSE2;
5204     for (uint32_t channels = 32; channels < 256; channels += 48) {
5205       DWConvMicrokernelTester()
5206         .cr(16)
5207         .kr(9)
5208         .channels(channels)
5209         .input_offset(304)
5210         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5211     }
5212   }
5213 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,zero)5214   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, zero) {
5215     TEST_REQUIRES_X86_SSE2;
5216     for (uint32_t mz = 0; mz < 9; mz++) {
5217       for (uint32_t channels = 32; channels < 256; channels += 48) {
5218         DWConvMicrokernelTester()
5219           .cr(16)
5220           .kr(9)
5221           .channels(channels)
5222           .input_offset(304)
5223           .zero_index(mz)
5224           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5225       }
5226     }
5227   }
5228 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5229 
5230 
5231 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_eq_16)5232   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_eq_16) {
5233     TEST_REQUIRES_X86_SSE41;
5234     DWConvMicrokernelTester()
5235       .cr(16)
5236       .kr(9)
5237       .channels(16)
5238       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5239   }
5240 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_div_16)5241   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_div_16) {
5242     TEST_REQUIRES_X86_SSE41;
5243     for (uint32_t channels = 32; channels < 256; channels += 48) {
5244       DWConvMicrokernelTester()
5245         .cr(16)
5246         .kr(9)
5247         .channels(channels)
5248         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5249     }
5250   }
5251 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_div_16_with_qmin)5252   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_div_16_with_qmin) {
5253     TEST_REQUIRES_X86_SSE41;
5254     for (uint32_t channels = 32; channels < 256; channels += 48) {
5255       DWConvMicrokernelTester()
5256         .cr(16)
5257         .kr(9)
5258         .channels(channels)
5259         .qmin(128)
5260         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5261     }
5262   }
5263 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_div_16_with_qmax)5264   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_div_16_with_qmax) {
5265     TEST_REQUIRES_X86_SSE41;
5266     for (uint32_t channels = 32; channels < 256; channels += 48) {
5267       DWConvMicrokernelTester()
5268         .cr(16)
5269         .kr(9)
5270         .channels(channels)
5271         .qmax(128)
5272         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5273     }
5274   }
5275 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_lt_16)5276   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_lt_16) {
5277     TEST_REQUIRES_X86_SSE41;
5278     for (uint32_t channels = 1; channels < 16; channels++) {
5279       DWConvMicrokernelTester()
5280         .cr(16)
5281         .kr(9)
5282         .channels(channels)
5283         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5284     }
5285   }
5286 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_gt_16)5287   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_gt_16) {
5288     TEST_REQUIRES_X86_SSE41;
5289     for (uint32_t channels = 17; channels < 32; channels++) {
5290       DWConvMicrokernelTester()
5291         .cr(16)
5292         .kr(9)
5293         .channels(channels)
5294         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5295     }
5296   }
5297 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_gt_16_with_qmin)5298   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_gt_16_with_qmin) {
5299     TEST_REQUIRES_X86_SSE41;
5300     for (uint32_t channels = 17; channels < 32; channels++) {
5301       DWConvMicrokernelTester()
5302         .cr(16)
5303         .kr(9)
5304         .channels(channels)
5305         .qmin(128)
5306         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5307     }
5308   }
5309 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_gt_16_with_qmax)5310   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_gt_16_with_qmax) {
5311     TEST_REQUIRES_X86_SSE41;
5312     for (uint32_t channels = 17; channels < 32; channels++) {
5313       DWConvMicrokernelTester()
5314         .cr(16)
5315         .kr(9)
5316         .channels(channels)
5317         .qmax(128)
5318         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5319     }
5320   }
5321 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,multipixel)5322   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel) {
5323     TEST_REQUIRES_X86_SSE41;
5324     for (size_t channels = 1; channels <= 80; channels += 15) {
5325       DWConvMicrokernelTester()
5326         .cr(16)
5327         .kr(9)
5328         .channels(channels)
5329         .width(3)
5330         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5331     }
5332   }
5333 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,multipixel_with_step)5334   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_step) {
5335     TEST_REQUIRES_X86_SSE41;
5336     for (size_t channels = 1; channels <= 80; channels += 15) {
5337       for (size_t step = 2; step <= 9; step++) {
5338         DWConvMicrokernelTester()
5339           .cr(16)
5340           .kr(9)
5341           .channels(channels)
5342           .width(3)
5343           .step(step)
5344           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5345       }
5346     }
5347   }
5348 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,multipixel_with_output_stride)5349   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_output_stride) {
5350     TEST_REQUIRES_X86_SSE41;
5351     for (size_t channels = 1; channels <= 80; channels += 15) {
5352       DWConvMicrokernelTester()
5353         .cr(16)
5354         .kr(9)
5355         .channels(16)
5356         .width(5)
5357         .output_stride(83)
5358         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5359     }
5360   }
5361 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,multipixel_with_qmin)5362   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_qmin) {
5363     TEST_REQUIRES_X86_SSE41;
5364     for (size_t channels = 1; channels <= 80; channels += 15) {
5365       DWConvMicrokernelTester()
5366         .cr(16)
5367         .kr(9)
5368         .channels(channels)
5369         .width(3)
5370         .qmin(128)
5371         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5372     }
5373   }
5374 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,multipixel_with_qmax)5375   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_qmax) {
5376     TEST_REQUIRES_X86_SSE41;
5377     for (size_t channels = 1; channels <= 80; channels += 15) {
5378       DWConvMicrokernelTester()
5379         .cr(16)
5380         .kr(9)
5381         .channels(channels)
5382         .width(3)
5383         .qmax(128)
5384         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5385     }
5386   }
5387 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,input_offset)5388   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, input_offset) {
5389     TEST_REQUIRES_X86_SSE41;
5390     for (uint32_t channels = 32; channels < 256; channels += 48) {
5391       DWConvMicrokernelTester()
5392         .cr(16)
5393         .kr(9)
5394         .channels(channels)
5395         .input_offset(304)
5396         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5397     }
5398   }
5399 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,zero)5400   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, zero) {
5401     TEST_REQUIRES_X86_SSE41;
5402     for (uint32_t mz = 0; mz < 9; mz++) {
5403       for (uint32_t channels = 32; channels < 256; channels += 48) {
5404         DWConvMicrokernelTester()
5405           .cr(16)
5406           .kr(9)
5407           .channels(channels)
5408           .input_offset(304)
5409           .zero_index(mz)
5410           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5411       }
5412     }
5413   }
5414 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5415 
5416 
5417 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_eq_16)5418   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_eq_16) {
5419     TEST_REQUIRES_X86_SSE41;
5420     DWConvMicrokernelTester()
5421       .cr(16)
5422       .kr(9)
5423       .channels(16)
5424       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5425   }
5426 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_div_16)5427   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_div_16) {
5428     TEST_REQUIRES_X86_SSE41;
5429     for (uint32_t channels = 32; channels < 256; channels += 48) {
5430       DWConvMicrokernelTester()
5431         .cr(16)
5432         .kr(9)
5433         .channels(channels)
5434         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5435     }
5436   }
5437 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_div_16_with_qmin)5438   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_div_16_with_qmin) {
5439     TEST_REQUIRES_X86_SSE41;
5440     for (uint32_t channels = 32; channels < 256; channels += 48) {
5441       DWConvMicrokernelTester()
5442         .cr(16)
5443         .kr(9)
5444         .channels(channels)
5445         .qmin(128)
5446         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5447     }
5448   }
5449 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_div_16_with_qmax)5450   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_div_16_with_qmax) {
5451     TEST_REQUIRES_X86_SSE41;
5452     for (uint32_t channels = 32; channels < 256; channels += 48) {
5453       DWConvMicrokernelTester()
5454         .cr(16)
5455         .kr(9)
5456         .channels(channels)
5457         .qmax(128)
5458         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5459     }
5460   }
5461 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_lt_16)5462   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_lt_16) {
5463     TEST_REQUIRES_X86_SSE41;
5464     for (uint32_t channels = 1; channels < 16; channels++) {
5465       DWConvMicrokernelTester()
5466         .cr(16)
5467         .kr(9)
5468         .channels(channels)
5469         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5470     }
5471   }
5472 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_gt_16)5473   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_gt_16) {
5474     TEST_REQUIRES_X86_SSE41;
5475     for (uint32_t channels = 17; channels < 32; channels++) {
5476       DWConvMicrokernelTester()
5477         .cr(16)
5478         .kr(9)
5479         .channels(channels)
5480         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5481     }
5482   }
5483 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_gt_16_with_qmin)5484   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_gt_16_with_qmin) {
5485     TEST_REQUIRES_X86_SSE41;
5486     for (uint32_t channels = 17; channels < 32; channels++) {
5487       DWConvMicrokernelTester()
5488         .cr(16)
5489         .kr(9)
5490         .channels(channels)
5491         .qmin(128)
5492         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5493     }
5494   }
5495 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_gt_16_with_qmax)5496   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_gt_16_with_qmax) {
5497     TEST_REQUIRES_X86_SSE41;
5498     for (uint32_t channels = 17; channels < 32; channels++) {
5499       DWConvMicrokernelTester()
5500         .cr(16)
5501         .kr(9)
5502         .channels(channels)
5503         .qmax(128)
5504         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5505     }
5506   }
5507 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,multipixel)5508   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel) {
5509     TEST_REQUIRES_X86_SSE41;
5510     for (size_t channels = 1; channels <= 80; channels += 15) {
5511       DWConvMicrokernelTester()
5512         .cr(16)
5513         .kr(9)
5514         .channels(channels)
5515         .width(3)
5516         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5517     }
5518   }
5519 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,multipixel_with_step)5520   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel_with_step) {
5521     TEST_REQUIRES_X86_SSE41;
5522     for (size_t channels = 1; channels <= 80; channels += 15) {
5523       for (size_t step = 2; step <= 9; step++) {
5524         DWConvMicrokernelTester()
5525           .cr(16)
5526           .kr(9)
5527           .channels(channels)
5528           .width(3)
5529           .step(step)
5530           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5531       }
5532     }
5533   }
5534 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,multipixel_with_output_stride)5535   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel_with_output_stride) {
5536     TEST_REQUIRES_X86_SSE41;
5537     for (size_t channels = 1; channels <= 80; channels += 15) {
5538       DWConvMicrokernelTester()
5539         .cr(16)
5540         .kr(9)
5541         .channels(16)
5542         .width(5)
5543         .output_stride(83)
5544         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5545     }
5546   }
5547 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,multipixel_with_qmin)5548   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel_with_qmin) {
5549     TEST_REQUIRES_X86_SSE41;
5550     for (size_t channels = 1; channels <= 80; channels += 15) {
5551       DWConvMicrokernelTester()
5552         .cr(16)
5553         .kr(9)
5554         .channels(channels)
5555         .width(3)
5556         .qmin(128)
5557         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5558     }
5559   }
5560 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,multipixel_with_qmax)5561   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel_with_qmax) {
5562     TEST_REQUIRES_X86_SSE41;
5563     for (size_t channels = 1; channels <= 80; channels += 15) {
5564       DWConvMicrokernelTester()
5565         .cr(16)
5566         .kr(9)
5567         .channels(channels)
5568         .width(3)
5569         .qmax(128)
5570         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5571     }
5572   }
5573 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,input_offset)5574   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, input_offset) {
5575     TEST_REQUIRES_X86_SSE41;
5576     for (uint32_t channels = 32; channels < 256; channels += 48) {
5577       DWConvMicrokernelTester()
5578         .cr(16)
5579         .kr(9)
5580         .channels(channels)
5581         .input_offset(304)
5582         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5583     }
5584   }
5585 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,zero)5586   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, zero) {
5587     TEST_REQUIRES_X86_SSE41;
5588     for (uint32_t mz = 0; mz < 9; mz++) {
5589       for (uint32_t channels = 32; channels < 256; channels += 48) {
5590         DWConvMicrokernelTester()
5591           .cr(16)
5592           .kr(9)
5593           .channels(channels)
5594           .input_offset(304)
5595           .zero_index(mz)
5596           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5597       }
5598     }
5599   }
5600 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5601 
5602 
5603 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_eq_16)5604   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_eq_16) {
5605     TEST_REQUIRES_X86_SSE41;
5606     DWConvMicrokernelTester()
5607       .cr(16)
5608       .kr(9)
5609       .channels(16)
5610       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5611   }
5612 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_div_16)5613   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_div_16) {
5614     TEST_REQUIRES_X86_SSE41;
5615     for (uint32_t channels = 32; channels < 256; channels += 48) {
5616       DWConvMicrokernelTester()
5617         .cr(16)
5618         .kr(9)
5619         .channels(channels)
5620         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5621     }
5622   }
5623 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_div_16_with_qmin)5624   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_div_16_with_qmin) {
5625     TEST_REQUIRES_X86_SSE41;
5626     for (uint32_t channels = 32; channels < 256; channels += 48) {
5627       DWConvMicrokernelTester()
5628         .cr(16)
5629         .kr(9)
5630         .channels(channels)
5631         .qmin(128)
5632         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5633     }
5634   }
5635 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_div_16_with_qmax)5636   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_div_16_with_qmax) {
5637     TEST_REQUIRES_X86_SSE41;
5638     for (uint32_t channels = 32; channels < 256; channels += 48) {
5639       DWConvMicrokernelTester()
5640         .cr(16)
5641         .kr(9)
5642         .channels(channels)
5643         .qmax(128)
5644         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5645     }
5646   }
5647 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_lt_16)5648   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_lt_16) {
5649     TEST_REQUIRES_X86_SSE41;
5650     for (uint32_t channels = 1; channels < 16; channels++) {
5651       DWConvMicrokernelTester()
5652         .cr(16)
5653         .kr(9)
5654         .channels(channels)
5655         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5656     }
5657   }
5658 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_gt_16)5659   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_gt_16) {
5660     TEST_REQUIRES_X86_SSE41;
5661     for (uint32_t channels = 17; channels < 32; channels++) {
5662       DWConvMicrokernelTester()
5663         .cr(16)
5664         .kr(9)
5665         .channels(channels)
5666         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5667     }
5668   }
5669 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_gt_16_with_qmin)5670   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_gt_16_with_qmin) {
5671     TEST_REQUIRES_X86_SSE41;
5672     for (uint32_t channels = 17; channels < 32; channels++) {
5673       DWConvMicrokernelTester()
5674         .cr(16)
5675         .kr(9)
5676         .channels(channels)
5677         .qmin(128)
5678         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5679     }
5680   }
5681 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_gt_16_with_qmax)5682   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_gt_16_with_qmax) {
5683     TEST_REQUIRES_X86_SSE41;
5684     for (uint32_t channels = 17; channels < 32; channels++) {
5685       DWConvMicrokernelTester()
5686         .cr(16)
5687         .kr(9)
5688         .channels(channels)
5689         .qmax(128)
5690         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5691     }
5692   }
5693 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,multipixel)5694   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel) {
5695     TEST_REQUIRES_X86_SSE41;
5696     for (size_t channels = 1; channels <= 80; channels += 15) {
5697       DWConvMicrokernelTester()
5698         .cr(16)
5699         .kr(9)
5700         .channels(channels)
5701         .width(3)
5702         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5703     }
5704   }
5705 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,multipixel_with_step)5706   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_step) {
5707     TEST_REQUIRES_X86_SSE41;
5708     for (size_t channels = 1; channels <= 80; channels += 15) {
5709       for (size_t step = 2; step <= 9; step++) {
5710         DWConvMicrokernelTester()
5711           .cr(16)
5712           .kr(9)
5713           .channels(channels)
5714           .width(3)
5715           .step(step)
5716           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5717       }
5718     }
5719   }
5720 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,multipixel_with_output_stride)5721   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_output_stride) {
5722     TEST_REQUIRES_X86_SSE41;
5723     for (size_t channels = 1; channels <= 80; channels += 15) {
5724       DWConvMicrokernelTester()
5725         .cr(16)
5726         .kr(9)
5727         .channels(16)
5728         .width(5)
5729         .output_stride(83)
5730         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5731     }
5732   }
5733 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,multipixel_with_qmin)5734   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_qmin) {
5735     TEST_REQUIRES_X86_SSE41;
5736     for (size_t channels = 1; channels <= 80; channels += 15) {
5737       DWConvMicrokernelTester()
5738         .cr(16)
5739         .kr(9)
5740         .channels(channels)
5741         .width(3)
5742         .qmin(128)
5743         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5744     }
5745   }
5746 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,multipixel_with_qmax)5747   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_qmax) {
5748     TEST_REQUIRES_X86_SSE41;
5749     for (size_t channels = 1; channels <= 80; channels += 15) {
5750       DWConvMicrokernelTester()
5751         .cr(16)
5752         .kr(9)
5753         .channels(channels)
5754         .width(3)
5755         .qmax(128)
5756         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5757     }
5758   }
5759 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,input_offset)5760   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, input_offset) {
5761     TEST_REQUIRES_X86_SSE41;
5762     for (uint32_t channels = 32; channels < 256; channels += 48) {
5763       DWConvMicrokernelTester()
5764         .cr(16)
5765         .kr(9)
5766         .channels(channels)
5767         .input_offset(304)
5768         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5769     }
5770   }
5771 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,zero)5772   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, zero) {
5773     TEST_REQUIRES_X86_SSE41;
5774     for (uint32_t mz = 0; mz < 9; mz++) {
5775       for (uint32_t channels = 32; channels < 256; channels += 48) {
5776         DWConvMicrokernelTester()
5777           .cr(16)
5778           .kr(9)
5779           .channels(channels)
5780           .input_offset(304)
5781           .zero_index(mz)
5782           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
5783       }
5784     }
5785   }
5786 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5787 
5788 
5789 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_eq_16)5790   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_eq_16) {
5791     TEST_REQUIRES_X86_SSE2;
5792     DWConvMicrokernelTester()
5793       .cr(16)
5794       .kr(25)
5795       .channels(16)
5796       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5797   }
5798 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_div_16)5799   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_div_16) {
5800     TEST_REQUIRES_X86_SSE2;
5801     for (uint32_t channels = 32; channels < 256; channels += 48) {
5802       DWConvMicrokernelTester()
5803         .cr(16)
5804         .kr(25)
5805         .channels(channels)
5806         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5807     }
5808   }
5809 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_div_16_with_qmin)5810   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_div_16_with_qmin) {
5811     TEST_REQUIRES_X86_SSE2;
5812     for (uint32_t channels = 32; channels < 256; channels += 48) {
5813       DWConvMicrokernelTester()
5814         .cr(16)
5815         .kr(25)
5816         .channels(channels)
5817         .qmin(128)
5818         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5819     }
5820   }
5821 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_div_16_with_qmax)5822   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_div_16_with_qmax) {
5823     TEST_REQUIRES_X86_SSE2;
5824     for (uint32_t channels = 32; channels < 256; channels += 48) {
5825       DWConvMicrokernelTester()
5826         .cr(16)
5827         .kr(25)
5828         .channels(channels)
5829         .qmax(128)
5830         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5831     }
5832   }
5833 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_lt_16)5834   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_lt_16) {
5835     TEST_REQUIRES_X86_SSE2;
5836     for (uint32_t channels = 1; channels < 16; channels++) {
5837       DWConvMicrokernelTester()
5838         .cr(16)
5839         .kr(25)
5840         .channels(channels)
5841         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5842     }
5843   }
5844 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_gt_16)5845   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_gt_16) {
5846     TEST_REQUIRES_X86_SSE2;
5847     for (uint32_t channels = 17; channels < 32; channels++) {
5848       DWConvMicrokernelTester()
5849         .cr(16)
5850         .kr(25)
5851         .channels(channels)
5852         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5853     }
5854   }
5855 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_gt_16_with_qmin)5856   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_gt_16_with_qmin) {
5857     TEST_REQUIRES_X86_SSE2;
5858     for (uint32_t channels = 17; channels < 32; channels++) {
5859       DWConvMicrokernelTester()
5860         .cr(16)
5861         .kr(25)
5862         .channels(channels)
5863         .qmin(128)
5864         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5865     }
5866   }
5867 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_gt_16_with_qmax)5868   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_gt_16_with_qmax) {
5869     TEST_REQUIRES_X86_SSE2;
5870     for (uint32_t channels = 17; channels < 32; channels++) {
5871       DWConvMicrokernelTester()
5872         .cr(16)
5873         .kr(25)
5874         .channels(channels)
5875         .qmax(128)
5876         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5877     }
5878   }
5879 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,multipixel)5880   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel) {
5881     TEST_REQUIRES_X86_SSE2;
5882     for (size_t channels = 1; channels <= 80; channels += 15) {
5883       DWConvMicrokernelTester()
5884         .cr(16)
5885         .kr(25)
5886         .channels(channels)
5887         .width(3)
5888         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5889     }
5890   }
5891 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,multipixel_with_step)5892   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_step) {
5893     TEST_REQUIRES_X86_SSE2;
5894     for (size_t channels = 1; channels <= 80; channels += 15) {
5895       for (size_t step = 2; step <= 25; step++) {
5896         DWConvMicrokernelTester()
5897           .cr(16)
5898           .kr(25)
5899           .channels(channels)
5900           .width(3)
5901           .step(step)
5902           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5903       }
5904     }
5905   }
5906 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,multipixel_with_output_stride)5907   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_output_stride) {
5908     TEST_REQUIRES_X86_SSE2;
5909     for (size_t channels = 1; channels <= 80; channels += 15) {
5910       DWConvMicrokernelTester()
5911         .cr(16)
5912         .kr(25)
5913         .channels(16)
5914         .width(5)
5915         .output_stride(83)
5916         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5917     }
5918   }
5919 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,multipixel_with_qmin)5920   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_qmin) {
5921     TEST_REQUIRES_X86_SSE2;
5922     for (size_t channels = 1; channels <= 80; channels += 15) {
5923       DWConvMicrokernelTester()
5924         .cr(16)
5925         .kr(25)
5926         .channels(channels)
5927         .width(3)
5928         .qmin(128)
5929         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5930     }
5931   }
5932 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,multipixel_with_qmax)5933   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_qmax) {
5934     TEST_REQUIRES_X86_SSE2;
5935     for (size_t channels = 1; channels <= 80; channels += 15) {
5936       DWConvMicrokernelTester()
5937         .cr(16)
5938         .kr(25)
5939         .channels(channels)
5940         .width(3)
5941         .qmax(128)
5942         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5943     }
5944   }
5945 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,input_offset)5946   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, input_offset) {
5947     TEST_REQUIRES_X86_SSE2;
5948     for (uint32_t channels = 32; channels < 256; channels += 48) {
5949       DWConvMicrokernelTester()
5950         .cr(16)
5951         .kr(25)
5952         .channels(channels)
5953         .input_offset(304)
5954         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5955     }
5956   }
5957 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,zero)5958   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, zero) {
5959     TEST_REQUIRES_X86_SSE2;
5960     for (uint32_t mz = 0; mz < 25; mz++) {
5961       for (uint32_t channels = 32; channels < 256; channels += 48) {
5962         DWConvMicrokernelTester()
5963           .cr(16)
5964           .kr(25)
5965           .channels(channels)
5966           .input_offset(304)
5967           .zero_index(mz)
5968           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5969       }
5970     }
5971   }
5972 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5973 
5974 
5975 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_eq_16)5976   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_eq_16) {
5977     TEST_REQUIRES_X86_SSE2;
5978     DWConvMicrokernelTester()
5979       .cr(16)
5980       .kr(25)
5981       .channels(16)
5982       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5983   }
5984 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_div_16)5985   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_div_16) {
5986     TEST_REQUIRES_X86_SSE2;
5987     for (uint32_t channels = 32; channels < 256; channels += 48) {
5988       DWConvMicrokernelTester()
5989         .cr(16)
5990         .kr(25)
5991         .channels(channels)
5992         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
5993     }
5994   }
5995 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_div_16_with_qmin)5996   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_div_16_with_qmin) {
5997     TEST_REQUIRES_X86_SSE2;
5998     for (uint32_t channels = 32; channels < 256; channels += 48) {
5999       DWConvMicrokernelTester()
6000         .cr(16)
6001         .kr(25)
6002         .channels(channels)
6003         .qmin(128)
6004         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6005     }
6006   }
6007 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_div_16_with_qmax)6008   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_div_16_with_qmax) {
6009     TEST_REQUIRES_X86_SSE2;
6010     for (uint32_t channels = 32; channels < 256; channels += 48) {
6011       DWConvMicrokernelTester()
6012         .cr(16)
6013         .kr(25)
6014         .channels(channels)
6015         .qmax(128)
6016         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6017     }
6018   }
6019 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_lt_16)6020   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_lt_16) {
6021     TEST_REQUIRES_X86_SSE2;
6022     for (uint32_t channels = 1; channels < 16; channels++) {
6023       DWConvMicrokernelTester()
6024         .cr(16)
6025         .kr(25)
6026         .channels(channels)
6027         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6028     }
6029   }
6030 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_gt_16)6031   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_gt_16) {
6032     TEST_REQUIRES_X86_SSE2;
6033     for (uint32_t channels = 17; channels < 32; channels++) {
6034       DWConvMicrokernelTester()
6035         .cr(16)
6036         .kr(25)
6037         .channels(channels)
6038         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6039     }
6040   }
6041 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_gt_16_with_qmin)6042   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_gt_16_with_qmin) {
6043     TEST_REQUIRES_X86_SSE2;
6044     for (uint32_t channels = 17; channels < 32; channels++) {
6045       DWConvMicrokernelTester()
6046         .cr(16)
6047         .kr(25)
6048         .channels(channels)
6049         .qmin(128)
6050         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6051     }
6052   }
6053 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_gt_16_with_qmax)6054   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_gt_16_with_qmax) {
6055     TEST_REQUIRES_X86_SSE2;
6056     for (uint32_t channels = 17; channels < 32; channels++) {
6057       DWConvMicrokernelTester()
6058         .cr(16)
6059         .kr(25)
6060         .channels(channels)
6061         .qmax(128)
6062         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6063     }
6064   }
6065 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,multipixel)6066   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel) {
6067     TEST_REQUIRES_X86_SSE2;
6068     for (size_t channels = 1; channels <= 80; channels += 15) {
6069       DWConvMicrokernelTester()
6070         .cr(16)
6071         .kr(25)
6072         .channels(channels)
6073         .width(3)
6074         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6075     }
6076   }
6077 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,multipixel_with_step)6078   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel_with_step) {
6079     TEST_REQUIRES_X86_SSE2;
6080     for (size_t channels = 1; channels <= 80; channels += 15) {
6081       for (size_t step = 2; step <= 25; step++) {
6082         DWConvMicrokernelTester()
6083           .cr(16)
6084           .kr(25)
6085           .channels(channels)
6086           .width(3)
6087           .step(step)
6088           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6089       }
6090     }
6091   }
6092 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,multipixel_with_output_stride)6093   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel_with_output_stride) {
6094     TEST_REQUIRES_X86_SSE2;
6095     for (size_t channels = 1; channels <= 80; channels += 15) {
6096       DWConvMicrokernelTester()
6097         .cr(16)
6098         .kr(25)
6099         .channels(16)
6100         .width(5)
6101         .output_stride(83)
6102         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6103     }
6104   }
6105 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,multipixel_with_qmin)6106   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel_with_qmin) {
6107     TEST_REQUIRES_X86_SSE2;
6108     for (size_t channels = 1; channels <= 80; channels += 15) {
6109       DWConvMicrokernelTester()
6110         .cr(16)
6111         .kr(25)
6112         .channels(channels)
6113         .width(3)
6114         .qmin(128)
6115         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6116     }
6117   }
6118 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,multipixel_with_qmax)6119   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel_with_qmax) {
6120     TEST_REQUIRES_X86_SSE2;
6121     for (size_t channels = 1; channels <= 80; channels += 15) {
6122       DWConvMicrokernelTester()
6123         .cr(16)
6124         .kr(25)
6125         .channels(channels)
6126         .width(3)
6127         .qmax(128)
6128         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6129     }
6130   }
6131 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,input_offset)6132   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, input_offset) {
6133     TEST_REQUIRES_X86_SSE2;
6134     for (uint32_t channels = 32; channels < 256; channels += 48) {
6135       DWConvMicrokernelTester()
6136         .cr(16)
6137         .kr(25)
6138         .channels(channels)
6139         .input_offset(304)
6140         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6141     }
6142   }
6143 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,zero)6144   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, zero) {
6145     TEST_REQUIRES_X86_SSE2;
6146     for (uint32_t mz = 0; mz < 25; mz++) {
6147       for (uint32_t channels = 32; channels < 256; channels += 48) {
6148         DWConvMicrokernelTester()
6149           .cr(16)
6150           .kr(25)
6151           .channels(channels)
6152           .input_offset(304)
6153           .zero_index(mz)
6154           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6155       }
6156     }
6157   }
6158 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6159 
6160 
6161 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_eq_16)6162   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_eq_16) {
6163     TEST_REQUIRES_X86_SSE41;
6164     DWConvMicrokernelTester()
6165       .cr(16)
6166       .kr(25)
6167       .channels(16)
6168       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6169   }
6170 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_div_16)6171   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_div_16) {
6172     TEST_REQUIRES_X86_SSE41;
6173     for (uint32_t channels = 32; channels < 256; channels += 48) {
6174       DWConvMicrokernelTester()
6175         .cr(16)
6176         .kr(25)
6177         .channels(channels)
6178         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6179     }
6180   }
6181 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_div_16_with_qmin)6182   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_div_16_with_qmin) {
6183     TEST_REQUIRES_X86_SSE41;
6184     for (uint32_t channels = 32; channels < 256; channels += 48) {
6185       DWConvMicrokernelTester()
6186         .cr(16)
6187         .kr(25)
6188         .channels(channels)
6189         .qmin(128)
6190         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6191     }
6192   }
6193 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_div_16_with_qmax)6194   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_div_16_with_qmax) {
6195     TEST_REQUIRES_X86_SSE41;
6196     for (uint32_t channels = 32; channels < 256; channels += 48) {
6197       DWConvMicrokernelTester()
6198         .cr(16)
6199         .kr(25)
6200         .channels(channels)
6201         .qmax(128)
6202         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6203     }
6204   }
6205 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_lt_16)6206   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_lt_16) {
6207     TEST_REQUIRES_X86_SSE41;
6208     for (uint32_t channels = 1; channels < 16; channels++) {
6209       DWConvMicrokernelTester()
6210         .cr(16)
6211         .kr(25)
6212         .channels(channels)
6213         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6214     }
6215   }
6216 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_gt_16)6217   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_gt_16) {
6218     TEST_REQUIRES_X86_SSE41;
6219     for (uint32_t channels = 17; channels < 32; channels++) {
6220       DWConvMicrokernelTester()
6221         .cr(16)
6222         .kr(25)
6223         .channels(channels)
6224         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6225     }
6226   }
6227 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_gt_16_with_qmin)6228   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_gt_16_with_qmin) {
6229     TEST_REQUIRES_X86_SSE41;
6230     for (uint32_t channels = 17; channels < 32; channels++) {
6231       DWConvMicrokernelTester()
6232         .cr(16)
6233         .kr(25)
6234         .channels(channels)
6235         .qmin(128)
6236         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6237     }
6238   }
6239 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_gt_16_with_qmax)6240   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_gt_16_with_qmax) {
6241     TEST_REQUIRES_X86_SSE41;
6242     for (uint32_t channels = 17; channels < 32; channels++) {
6243       DWConvMicrokernelTester()
6244         .cr(16)
6245         .kr(25)
6246         .channels(channels)
6247         .qmax(128)
6248         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6249     }
6250   }
6251 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,multipixel)6252   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel) {
6253     TEST_REQUIRES_X86_SSE41;
6254     for (size_t channels = 1; channels <= 80; channels += 15) {
6255       DWConvMicrokernelTester()
6256         .cr(16)
6257         .kr(25)
6258         .channels(channels)
6259         .width(3)
6260         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6261     }
6262   }
6263 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,multipixel_with_step)6264   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_step) {
6265     TEST_REQUIRES_X86_SSE41;
6266     for (size_t channels = 1; channels <= 80; channels += 15) {
6267       for (size_t step = 2; step <= 25; step++) {
6268         DWConvMicrokernelTester()
6269           .cr(16)
6270           .kr(25)
6271           .channels(channels)
6272           .width(3)
6273           .step(step)
6274           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6275       }
6276     }
6277   }
6278 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,multipixel_with_output_stride)6279   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_output_stride) {
6280     TEST_REQUIRES_X86_SSE41;
6281     for (size_t channels = 1; channels <= 80; channels += 15) {
6282       DWConvMicrokernelTester()
6283         .cr(16)
6284         .kr(25)
6285         .channels(16)
6286         .width(5)
6287         .output_stride(83)
6288         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6289     }
6290   }
6291 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,multipixel_with_qmin)6292   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_qmin) {
6293     TEST_REQUIRES_X86_SSE41;
6294     for (size_t channels = 1; channels <= 80; channels += 15) {
6295       DWConvMicrokernelTester()
6296         .cr(16)
6297         .kr(25)
6298         .channels(channels)
6299         .width(3)
6300         .qmin(128)
6301         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6302     }
6303   }
6304 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,multipixel_with_qmax)6305   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_qmax) {
6306     TEST_REQUIRES_X86_SSE41;
6307     for (size_t channels = 1; channels <= 80; channels += 15) {
6308       DWConvMicrokernelTester()
6309         .cr(16)
6310         .kr(25)
6311         .channels(channels)
6312         .width(3)
6313         .qmax(128)
6314         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6315     }
6316   }
6317 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,input_offset)6318   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, input_offset) {
6319     TEST_REQUIRES_X86_SSE41;
6320     for (uint32_t channels = 32; channels < 256; channels += 48) {
6321       DWConvMicrokernelTester()
6322         .cr(16)
6323         .kr(25)
6324         .channels(channels)
6325         .input_offset(304)
6326         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6327     }
6328   }
6329 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,zero)6330   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, zero) {
6331     TEST_REQUIRES_X86_SSE41;
6332     for (uint32_t mz = 0; mz < 25; mz++) {
6333       for (uint32_t channels = 32; channels < 256; channels += 48) {
6334         DWConvMicrokernelTester()
6335           .cr(16)
6336           .kr(25)
6337           .channels(channels)
6338           .input_offset(304)
6339           .zero_index(mz)
6340           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6341       }
6342     }
6343   }
6344 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6345 
6346 
6347 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_eq_16)6348   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_eq_16) {
6349     TEST_REQUIRES_X86_SSE41;
6350     DWConvMicrokernelTester()
6351       .cr(16)
6352       .kr(25)
6353       .channels(16)
6354       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6355   }
6356 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_div_16)6357   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_div_16) {
6358     TEST_REQUIRES_X86_SSE41;
6359     for (uint32_t channels = 32; channels < 256; channels += 48) {
6360       DWConvMicrokernelTester()
6361         .cr(16)
6362         .kr(25)
6363         .channels(channels)
6364         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6365     }
6366   }
6367 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_div_16_with_qmin)6368   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_div_16_with_qmin) {
6369     TEST_REQUIRES_X86_SSE41;
6370     for (uint32_t channels = 32; channels < 256; channels += 48) {
6371       DWConvMicrokernelTester()
6372         .cr(16)
6373         .kr(25)
6374         .channels(channels)
6375         .qmin(128)
6376         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6377     }
6378   }
6379 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_div_16_with_qmax)6380   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_div_16_with_qmax) {
6381     TEST_REQUIRES_X86_SSE41;
6382     for (uint32_t channels = 32; channels < 256; channels += 48) {
6383       DWConvMicrokernelTester()
6384         .cr(16)
6385         .kr(25)
6386         .channels(channels)
6387         .qmax(128)
6388         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6389     }
6390   }
6391 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_lt_16)6392   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_lt_16) {
6393     TEST_REQUIRES_X86_SSE41;
6394     for (uint32_t channels = 1; channels < 16; channels++) {
6395       DWConvMicrokernelTester()
6396         .cr(16)
6397         .kr(25)
6398         .channels(channels)
6399         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6400     }
6401   }
6402 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_gt_16)6403   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_gt_16) {
6404     TEST_REQUIRES_X86_SSE41;
6405     for (uint32_t channels = 17; channels < 32; channels++) {
6406       DWConvMicrokernelTester()
6407         .cr(16)
6408         .kr(25)
6409         .channels(channels)
6410         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6411     }
6412   }
6413 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_gt_16_with_qmin)6414   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_gt_16_with_qmin) {
6415     TEST_REQUIRES_X86_SSE41;
6416     for (uint32_t channels = 17; channels < 32; channels++) {
6417       DWConvMicrokernelTester()
6418         .cr(16)
6419         .kr(25)
6420         .channels(channels)
6421         .qmin(128)
6422         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6423     }
6424   }
6425 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_gt_16_with_qmax)6426   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_gt_16_with_qmax) {
6427     TEST_REQUIRES_X86_SSE41;
6428     for (uint32_t channels = 17; channels < 32; channels++) {
6429       DWConvMicrokernelTester()
6430         .cr(16)
6431         .kr(25)
6432         .channels(channels)
6433         .qmax(128)
6434         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6435     }
6436   }
6437 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,multipixel)6438   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel) {
6439     TEST_REQUIRES_X86_SSE41;
6440     for (size_t channels = 1; channels <= 80; channels += 15) {
6441       DWConvMicrokernelTester()
6442         .cr(16)
6443         .kr(25)
6444         .channels(channels)
6445         .width(3)
6446         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6447     }
6448   }
6449 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,multipixel_with_step)6450   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel_with_step) {
6451     TEST_REQUIRES_X86_SSE41;
6452     for (size_t channels = 1; channels <= 80; channels += 15) {
6453       for (size_t step = 2; step <= 25; step++) {
6454         DWConvMicrokernelTester()
6455           .cr(16)
6456           .kr(25)
6457           .channels(channels)
6458           .width(3)
6459           .step(step)
6460           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6461       }
6462     }
6463   }
6464 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,multipixel_with_output_stride)6465   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel_with_output_stride) {
6466     TEST_REQUIRES_X86_SSE41;
6467     for (size_t channels = 1; channels <= 80; channels += 15) {
6468       DWConvMicrokernelTester()
6469         .cr(16)
6470         .kr(25)
6471         .channels(16)
6472         .width(5)
6473         .output_stride(83)
6474         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6475     }
6476   }
6477 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,multipixel_with_qmin)6478   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel_with_qmin) {
6479     TEST_REQUIRES_X86_SSE41;
6480     for (size_t channels = 1; channels <= 80; channels += 15) {
6481       DWConvMicrokernelTester()
6482         .cr(16)
6483         .kr(25)
6484         .channels(channels)
6485         .width(3)
6486         .qmin(128)
6487         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6488     }
6489   }
6490 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,multipixel_with_qmax)6491   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel_with_qmax) {
6492     TEST_REQUIRES_X86_SSE41;
6493     for (size_t channels = 1; channels <= 80; channels += 15) {
6494       DWConvMicrokernelTester()
6495         .cr(16)
6496         .kr(25)
6497         .channels(channels)
6498         .width(3)
6499         .qmax(128)
6500         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6501     }
6502   }
6503 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,input_offset)6504   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, input_offset) {
6505     TEST_REQUIRES_X86_SSE41;
6506     for (uint32_t channels = 32; channels < 256; channels += 48) {
6507       DWConvMicrokernelTester()
6508         .cr(16)
6509         .kr(25)
6510         .channels(channels)
6511         .input_offset(304)
6512         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6513     }
6514   }
6515 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,zero)6516   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, zero) {
6517     TEST_REQUIRES_X86_SSE41;
6518     for (uint32_t mz = 0; mz < 25; mz++) {
6519       for (uint32_t channels = 32; channels < 256; channels += 48) {
6520         DWConvMicrokernelTester()
6521           .cr(16)
6522           .kr(25)
6523           .channels(channels)
6524           .input_offset(304)
6525           .zero_index(mz)
6526           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6527       }
6528     }
6529   }
6530 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6531 
6532 
6533 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_eq_16)6534   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_eq_16) {
6535     TEST_REQUIRES_X86_SSE41;
6536     DWConvMicrokernelTester()
6537       .cr(16)
6538       .kr(25)
6539       .channels(16)
6540       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6541   }
6542 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_div_16)6543   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_div_16) {
6544     TEST_REQUIRES_X86_SSE41;
6545     for (uint32_t channels = 32; channels < 256; channels += 48) {
6546       DWConvMicrokernelTester()
6547         .cr(16)
6548         .kr(25)
6549         .channels(channels)
6550         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6551     }
6552   }
6553 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_div_16_with_qmin)6554   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_div_16_with_qmin) {
6555     TEST_REQUIRES_X86_SSE41;
6556     for (uint32_t channels = 32; channels < 256; channels += 48) {
6557       DWConvMicrokernelTester()
6558         .cr(16)
6559         .kr(25)
6560         .channels(channels)
6561         .qmin(128)
6562         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6563     }
6564   }
6565 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_div_16_with_qmax)6566   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_div_16_with_qmax) {
6567     TEST_REQUIRES_X86_SSE41;
6568     for (uint32_t channels = 32; channels < 256; channels += 48) {
6569       DWConvMicrokernelTester()
6570         .cr(16)
6571         .kr(25)
6572         .channels(channels)
6573         .qmax(128)
6574         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6575     }
6576   }
6577 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_lt_16)6578   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_lt_16) {
6579     TEST_REQUIRES_X86_SSE41;
6580     for (uint32_t channels = 1; channels < 16; channels++) {
6581       DWConvMicrokernelTester()
6582         .cr(16)
6583         .kr(25)
6584         .channels(channels)
6585         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6586     }
6587   }
6588 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_gt_16)6589   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_gt_16) {
6590     TEST_REQUIRES_X86_SSE41;
6591     for (uint32_t channels = 17; channels < 32; channels++) {
6592       DWConvMicrokernelTester()
6593         .cr(16)
6594         .kr(25)
6595         .channels(channels)
6596         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6597     }
6598   }
6599 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_gt_16_with_qmin)6600   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_gt_16_with_qmin) {
6601     TEST_REQUIRES_X86_SSE41;
6602     for (uint32_t channels = 17; channels < 32; channels++) {
6603       DWConvMicrokernelTester()
6604         .cr(16)
6605         .kr(25)
6606         .channels(channels)
6607         .qmin(128)
6608         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6609     }
6610   }
6611 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_gt_16_with_qmax)6612   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_gt_16_with_qmax) {
6613     TEST_REQUIRES_X86_SSE41;
6614     for (uint32_t channels = 17; channels < 32; channels++) {
6615       DWConvMicrokernelTester()
6616         .cr(16)
6617         .kr(25)
6618         .channels(channels)
6619         .qmax(128)
6620         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6621     }
6622   }
6623 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,multipixel)6624   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel) {
6625     TEST_REQUIRES_X86_SSE41;
6626     for (size_t channels = 1; channels <= 80; channels += 15) {
6627       DWConvMicrokernelTester()
6628         .cr(16)
6629         .kr(25)
6630         .channels(channels)
6631         .width(3)
6632         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6633     }
6634   }
6635 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,multipixel_with_step)6636   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_step) {
6637     TEST_REQUIRES_X86_SSE41;
6638     for (size_t channels = 1; channels <= 80; channels += 15) {
6639       for (size_t step = 2; step <= 25; step++) {
6640         DWConvMicrokernelTester()
6641           .cr(16)
6642           .kr(25)
6643           .channels(channels)
6644           .width(3)
6645           .step(step)
6646           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6647       }
6648     }
6649   }
6650 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,multipixel_with_output_stride)6651   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_output_stride) {
6652     TEST_REQUIRES_X86_SSE41;
6653     for (size_t channels = 1; channels <= 80; channels += 15) {
6654       DWConvMicrokernelTester()
6655         .cr(16)
6656         .kr(25)
6657         .channels(16)
6658         .width(5)
6659         .output_stride(83)
6660         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6661     }
6662   }
6663 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,multipixel_with_qmin)6664   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_qmin) {
6665     TEST_REQUIRES_X86_SSE41;
6666     for (size_t channels = 1; channels <= 80; channels += 15) {
6667       DWConvMicrokernelTester()
6668         .cr(16)
6669         .kr(25)
6670         .channels(channels)
6671         .width(3)
6672         .qmin(128)
6673         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6674     }
6675   }
6676 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,multipixel_with_qmax)6677   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_qmax) {
6678     TEST_REQUIRES_X86_SSE41;
6679     for (size_t channels = 1; channels <= 80; channels += 15) {
6680       DWConvMicrokernelTester()
6681         .cr(16)
6682         .kr(25)
6683         .channels(channels)
6684         .width(3)
6685         .qmax(128)
6686         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6687     }
6688   }
6689 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,input_offset)6690   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, input_offset) {
6691     TEST_REQUIRES_X86_SSE41;
6692     for (uint32_t channels = 32; channels < 256; channels += 48) {
6693       DWConvMicrokernelTester()
6694         .cr(16)
6695         .kr(25)
6696         .channels(channels)
6697         .input_offset(304)
6698         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6699     }
6700   }
6701 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,zero)6702   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, zero) {
6703     TEST_REQUIRES_X86_SSE41;
6704     for (uint32_t mz = 0; mz < 25; mz++) {
6705       for (uint32_t channels = 32; channels < 256; channels += 48) {
6706         DWConvMicrokernelTester()
6707           .cr(16)
6708           .kr(25)
6709           .channels(channels)
6710           .input_offset(304)
6711           .zero_index(mz)
6712           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6713       }
6714     }
6715   }
6716 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6717 
6718 
6719 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_eq_24)6720   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_eq_24) {
6721     TEST_REQUIRES_X86_SSE2;
6722     DWConvMicrokernelTester()
6723       .cr(24)
6724       .kr(9)
6725       .channels(24)
6726       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6727   }
6728 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_div_24)6729   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_div_24) {
6730     TEST_REQUIRES_X86_SSE2;
6731     for (uint32_t channels = 48; channels < 384; channels += 72) {
6732       DWConvMicrokernelTester()
6733         .cr(24)
6734         .kr(9)
6735         .channels(channels)
6736         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6737     }
6738   }
6739 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_div_24_with_qmin)6740   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_div_24_with_qmin) {
6741     TEST_REQUIRES_X86_SSE2;
6742     for (uint32_t channels = 48; channels < 384; channels += 72) {
6743       DWConvMicrokernelTester()
6744         .cr(24)
6745         .kr(9)
6746         .channels(channels)
6747         .qmin(128)
6748         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6749     }
6750   }
6751 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_div_24_with_qmax)6752   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_div_24_with_qmax) {
6753     TEST_REQUIRES_X86_SSE2;
6754     for (uint32_t channels = 48; channels < 384; channels += 72) {
6755       DWConvMicrokernelTester()
6756         .cr(24)
6757         .kr(9)
6758         .channels(channels)
6759         .qmax(128)
6760         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6761     }
6762   }
6763 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_lt_24)6764   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_lt_24) {
6765     TEST_REQUIRES_X86_SSE2;
6766     for (uint32_t channels = 1; channels < 24; channels++) {
6767       DWConvMicrokernelTester()
6768         .cr(24)
6769         .kr(9)
6770         .channels(channels)
6771         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6772     }
6773   }
6774 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_gt_24)6775   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_gt_24) {
6776     TEST_REQUIRES_X86_SSE2;
6777     for (uint32_t channels = 25; channels < 48; channels++) {
6778       DWConvMicrokernelTester()
6779         .cr(24)
6780         .kr(9)
6781         .channels(channels)
6782         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6783     }
6784   }
6785 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_gt_24_with_qmin)6786   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_gt_24_with_qmin) {
6787     TEST_REQUIRES_X86_SSE2;
6788     for (uint32_t channels = 25; channels < 48; channels++) {
6789       DWConvMicrokernelTester()
6790         .cr(24)
6791         .kr(9)
6792         .channels(channels)
6793         .qmin(128)
6794         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6795     }
6796   }
6797 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_gt_24_with_qmax)6798   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_gt_24_with_qmax) {
6799     TEST_REQUIRES_X86_SSE2;
6800     for (uint32_t channels = 25; channels < 48; channels++) {
6801       DWConvMicrokernelTester()
6802         .cr(24)
6803         .kr(9)
6804         .channels(channels)
6805         .qmax(128)
6806         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6807     }
6808   }
6809 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,multipixel)6810   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel) {
6811     TEST_REQUIRES_X86_SSE2;
6812     for (size_t channels = 1; channels <= 120; channels += 23) {
6813       DWConvMicrokernelTester()
6814         .cr(24)
6815         .kr(9)
6816         .channels(channels)
6817         .width(3)
6818         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6819     }
6820   }
6821 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,multipixel_with_step)6822   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel_with_step) {
6823     TEST_REQUIRES_X86_SSE2;
6824     for (size_t channels = 1; channels <= 120; channels += 23) {
6825       for (size_t step = 2; step <= 9; step++) {
6826         DWConvMicrokernelTester()
6827           .cr(24)
6828           .kr(9)
6829           .channels(channels)
6830           .width(3)
6831           .step(step)
6832           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6833       }
6834     }
6835   }
6836 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,multipixel_with_output_stride)6837   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel_with_output_stride) {
6838     TEST_REQUIRES_X86_SSE2;
6839     for (size_t channels = 1; channels <= 120; channels += 23) {
6840       DWConvMicrokernelTester()
6841         .cr(24)
6842         .kr(9)
6843         .channels(24)
6844         .width(5)
6845         .output_stride(127)
6846         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6847     }
6848   }
6849 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,multipixel_with_qmin)6850   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel_with_qmin) {
6851     TEST_REQUIRES_X86_SSE2;
6852     for (size_t channels = 1; channels <= 120; channels += 23) {
6853       DWConvMicrokernelTester()
6854         .cr(24)
6855         .kr(9)
6856         .channels(channels)
6857         .width(3)
6858         .qmin(128)
6859         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6860     }
6861   }
6862 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,multipixel_with_qmax)6863   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel_with_qmax) {
6864     TEST_REQUIRES_X86_SSE2;
6865     for (size_t channels = 1; channels <= 120; channels += 23) {
6866       DWConvMicrokernelTester()
6867         .cr(24)
6868         .kr(9)
6869         .channels(channels)
6870         .width(3)
6871         .qmax(128)
6872         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6873     }
6874   }
6875 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,input_offset)6876   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, input_offset) {
6877     TEST_REQUIRES_X86_SSE2;
6878     for (uint32_t channels = 48; channels < 384; channels += 72) {
6879       DWConvMicrokernelTester()
6880         .cr(24)
6881         .kr(9)
6882         .channels(channels)
6883         .input_offset(464)
6884         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6885     }
6886   }
6887 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,zero)6888   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, zero) {
6889     TEST_REQUIRES_X86_SSE2;
6890     for (uint32_t mz = 0; mz < 9; mz++) {
6891       for (uint32_t channels = 48; channels < 384; channels += 72) {
6892         DWConvMicrokernelTester()
6893           .cr(24)
6894           .kr(9)
6895           .channels(channels)
6896           .input_offset(464)
6897           .zero_index(mz)
6898           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
6899       }
6900     }
6901   }
6902 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6903 
6904 
6905 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_eq_24)6906   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_eq_24) {
6907     TEST_REQUIRES_X86_SSE41;
6908     DWConvMicrokernelTester()
6909       .cr(24)
6910       .kr(9)
6911       .channels(24)
6912       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6913   }
6914 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_div_24)6915   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_div_24) {
6916     TEST_REQUIRES_X86_SSE41;
6917     for (uint32_t channels = 48; channels < 384; channels += 72) {
6918       DWConvMicrokernelTester()
6919         .cr(24)
6920         .kr(9)
6921         .channels(channels)
6922         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6923     }
6924   }
6925 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_div_24_with_qmin)6926   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_div_24_with_qmin) {
6927     TEST_REQUIRES_X86_SSE41;
6928     for (uint32_t channels = 48; channels < 384; channels += 72) {
6929       DWConvMicrokernelTester()
6930         .cr(24)
6931         .kr(9)
6932         .channels(channels)
6933         .qmin(128)
6934         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6935     }
6936   }
6937 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_div_24_with_qmax)6938   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_div_24_with_qmax) {
6939     TEST_REQUIRES_X86_SSE41;
6940     for (uint32_t channels = 48; channels < 384; channels += 72) {
6941       DWConvMicrokernelTester()
6942         .cr(24)
6943         .kr(9)
6944         .channels(channels)
6945         .qmax(128)
6946         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6947     }
6948   }
6949 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_lt_24)6950   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_lt_24) {
6951     TEST_REQUIRES_X86_SSE41;
6952     for (uint32_t channels = 1; channels < 24; channels++) {
6953       DWConvMicrokernelTester()
6954         .cr(24)
6955         .kr(9)
6956         .channels(channels)
6957         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6958     }
6959   }
6960 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_gt_24)6961   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_gt_24) {
6962     TEST_REQUIRES_X86_SSE41;
6963     for (uint32_t channels = 25; channels < 48; channels++) {
6964       DWConvMicrokernelTester()
6965         .cr(24)
6966         .kr(9)
6967         .channels(channels)
6968         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6969     }
6970   }
6971 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_gt_24_with_qmin)6972   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_gt_24_with_qmin) {
6973     TEST_REQUIRES_X86_SSE41;
6974     for (uint32_t channels = 25; channels < 48; channels++) {
6975       DWConvMicrokernelTester()
6976         .cr(24)
6977         .kr(9)
6978         .channels(channels)
6979         .qmin(128)
6980         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6981     }
6982   }
6983 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_gt_24_with_qmax)6984   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_gt_24_with_qmax) {
6985     TEST_REQUIRES_X86_SSE41;
6986     for (uint32_t channels = 25; channels < 48; channels++) {
6987       DWConvMicrokernelTester()
6988         .cr(24)
6989         .kr(9)
6990         .channels(channels)
6991         .qmax(128)
6992         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
6993     }
6994   }
6995 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,multipixel)6996   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel) {
6997     TEST_REQUIRES_X86_SSE41;
6998     for (size_t channels = 1; channels <= 120; channels += 23) {
6999       DWConvMicrokernelTester()
7000         .cr(24)
7001         .kr(9)
7002         .channels(channels)
7003         .width(3)
7004         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7005     }
7006   }
7007 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,multipixel_with_step)7008   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel_with_step) {
7009     TEST_REQUIRES_X86_SSE41;
7010     for (size_t channels = 1; channels <= 120; channels += 23) {
7011       for (size_t step = 2; step <= 9; step++) {
7012         DWConvMicrokernelTester()
7013           .cr(24)
7014           .kr(9)
7015           .channels(channels)
7016           .width(3)
7017           .step(step)
7018           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7019       }
7020     }
7021   }
7022 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,multipixel_with_output_stride)7023   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel_with_output_stride) {
7024     TEST_REQUIRES_X86_SSE41;
7025     for (size_t channels = 1; channels <= 120; channels += 23) {
7026       DWConvMicrokernelTester()
7027         .cr(24)
7028         .kr(9)
7029         .channels(24)
7030         .width(5)
7031         .output_stride(127)
7032         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7033     }
7034   }
7035 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,multipixel_with_qmin)7036   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel_with_qmin) {
7037     TEST_REQUIRES_X86_SSE41;
7038     for (size_t channels = 1; channels <= 120; channels += 23) {
7039       DWConvMicrokernelTester()
7040         .cr(24)
7041         .kr(9)
7042         .channels(channels)
7043         .width(3)
7044         .qmin(128)
7045         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7046     }
7047   }
7048 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,multipixel_with_qmax)7049   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel_with_qmax) {
7050     TEST_REQUIRES_X86_SSE41;
7051     for (size_t channels = 1; channels <= 120; channels += 23) {
7052       DWConvMicrokernelTester()
7053         .cr(24)
7054         .kr(9)
7055         .channels(channels)
7056         .width(3)
7057         .qmax(128)
7058         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7059     }
7060   }
7061 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,input_offset)7062   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, input_offset) {
7063     TEST_REQUIRES_X86_SSE41;
7064     for (uint32_t channels = 48; channels < 384; channels += 72) {
7065       DWConvMicrokernelTester()
7066         .cr(24)
7067         .kr(9)
7068         .channels(channels)
7069         .input_offset(464)
7070         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7071     }
7072   }
7073 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,zero)7074   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, zero) {
7075     TEST_REQUIRES_X86_SSE41;
7076     for (uint32_t mz = 0; mz < 9; mz++) {
7077       for (uint32_t channels = 48; channels < 384; channels += 72) {
7078         DWConvMicrokernelTester()
7079           .cr(24)
7080           .kr(9)
7081           .channels(channels)
7082           .input_offset(464)
7083           .zero_index(mz)
7084           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7085       }
7086     }
7087   }
7088 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7089 
7090 
7091 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_eq_24)7092   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_eq_24) {
7093     TEST_REQUIRES_X86_SSE41;
7094     DWConvMicrokernelTester()
7095       .cr(24)
7096       .kr(9)
7097       .channels(24)
7098       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7099   }
7100 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_div_24)7101   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_div_24) {
7102     TEST_REQUIRES_X86_SSE41;
7103     for (uint32_t channels = 48; channels < 384; channels += 72) {
7104       DWConvMicrokernelTester()
7105         .cr(24)
7106         .kr(9)
7107         .channels(channels)
7108         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7109     }
7110   }
7111 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_div_24_with_qmin)7112   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_div_24_with_qmin) {
7113     TEST_REQUIRES_X86_SSE41;
7114     for (uint32_t channels = 48; channels < 384; channels += 72) {
7115       DWConvMicrokernelTester()
7116         .cr(24)
7117         .kr(9)
7118         .channels(channels)
7119         .qmin(128)
7120         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7121     }
7122   }
7123 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_div_24_with_qmax)7124   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_div_24_with_qmax) {
7125     TEST_REQUIRES_X86_SSE41;
7126     for (uint32_t channels = 48; channels < 384; channels += 72) {
7127       DWConvMicrokernelTester()
7128         .cr(24)
7129         .kr(9)
7130         .channels(channels)
7131         .qmax(128)
7132         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7133     }
7134   }
7135 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_lt_24)7136   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_lt_24) {
7137     TEST_REQUIRES_X86_SSE41;
7138     for (uint32_t channels = 1; channels < 24; channels++) {
7139       DWConvMicrokernelTester()
7140         .cr(24)
7141         .kr(9)
7142         .channels(channels)
7143         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7144     }
7145   }
7146 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_gt_24)7147   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_gt_24) {
7148     TEST_REQUIRES_X86_SSE41;
7149     for (uint32_t channels = 25; channels < 48; channels++) {
7150       DWConvMicrokernelTester()
7151         .cr(24)
7152         .kr(9)
7153         .channels(channels)
7154         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7155     }
7156   }
7157 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_gt_24_with_qmin)7158   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_gt_24_with_qmin) {
7159     TEST_REQUIRES_X86_SSE41;
7160     for (uint32_t channels = 25; channels < 48; channels++) {
7161       DWConvMicrokernelTester()
7162         .cr(24)
7163         .kr(9)
7164         .channels(channels)
7165         .qmin(128)
7166         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7167     }
7168   }
7169 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_gt_24_with_qmax)7170   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_gt_24_with_qmax) {
7171     TEST_REQUIRES_X86_SSE41;
7172     for (uint32_t channels = 25; channels < 48; channels++) {
7173       DWConvMicrokernelTester()
7174         .cr(24)
7175         .kr(9)
7176         .channels(channels)
7177         .qmax(128)
7178         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7179     }
7180   }
7181 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,multipixel)7182   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel) {
7183     TEST_REQUIRES_X86_SSE41;
7184     for (size_t channels = 1; channels <= 120; channels += 23) {
7185       DWConvMicrokernelTester()
7186         .cr(24)
7187         .kr(9)
7188         .channels(channels)
7189         .width(3)
7190         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7191     }
7192   }
7193 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,multipixel_with_step)7194   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel_with_step) {
7195     TEST_REQUIRES_X86_SSE41;
7196     for (size_t channels = 1; channels <= 120; channels += 23) {
7197       for (size_t step = 2; step <= 9; step++) {
7198         DWConvMicrokernelTester()
7199           .cr(24)
7200           .kr(9)
7201           .channels(channels)
7202           .width(3)
7203           .step(step)
7204           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7205       }
7206     }
7207   }
7208 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,multipixel_with_output_stride)7209   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel_with_output_stride) {
7210     TEST_REQUIRES_X86_SSE41;
7211     for (size_t channels = 1; channels <= 120; channels += 23) {
7212       DWConvMicrokernelTester()
7213         .cr(24)
7214         .kr(9)
7215         .channels(24)
7216         .width(5)
7217         .output_stride(127)
7218         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7219     }
7220   }
7221 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,multipixel_with_qmin)7222   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel_with_qmin) {
7223     TEST_REQUIRES_X86_SSE41;
7224     for (size_t channels = 1; channels <= 120; channels += 23) {
7225       DWConvMicrokernelTester()
7226         .cr(24)
7227         .kr(9)
7228         .channels(channels)
7229         .width(3)
7230         .qmin(128)
7231         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7232     }
7233   }
7234 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,multipixel_with_qmax)7235   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel_with_qmax) {
7236     TEST_REQUIRES_X86_SSE41;
7237     for (size_t channels = 1; channels <= 120; channels += 23) {
7238       DWConvMicrokernelTester()
7239         .cr(24)
7240         .kr(9)
7241         .channels(channels)
7242         .width(3)
7243         .qmax(128)
7244         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7245     }
7246   }
7247 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,input_offset)7248   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, input_offset) {
7249     TEST_REQUIRES_X86_SSE41;
7250     for (uint32_t channels = 48; channels < 384; channels += 72) {
7251       DWConvMicrokernelTester()
7252         .cr(24)
7253         .kr(9)
7254         .channels(channels)
7255         .input_offset(464)
7256         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7257     }
7258   }
7259 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,zero)7260   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, zero) {
7261     TEST_REQUIRES_X86_SSE41;
7262     for (uint32_t mz = 0; mz < 9; mz++) {
7263       for (uint32_t channels = 48; channels < 384; channels += 72) {
7264         DWConvMicrokernelTester()
7265           .cr(24)
7266           .kr(9)
7267           .channels(channels)
7268           .input_offset(464)
7269           .zero_index(mz)
7270           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7271       }
7272     }
7273   }
7274 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7275 
7276 
7277 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_eq_24)7278   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_eq_24) {
7279     TEST_REQUIRES_X86_SSE2;
7280     DWConvMicrokernelTester()
7281       .cr(24)
7282       .kr(25)
7283       .channels(24)
7284       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
7285   }
7286 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_div_24)7287   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_div_24) {
7288     TEST_REQUIRES_X86_SSE2;
7289     for (uint32_t channels = 48; channels < 384; channels += 72) {
7290       DWConvMicrokernelTester()
7291         .cr(24)
7292         .kr(25)
7293         .channels(channels)
7294         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
7295     }
7296   }
7297 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_div_24_with_qmin)7298   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_div_24_with_qmin) {
7299     TEST_REQUIRES_X86_SSE2;
7300     for (uint32_t channels = 48; channels < 384; channels += 72) {
7301       DWConvMicrokernelTester()
7302         .cr(24)
7303         .kr(25)
7304         .channels(channels)
7305         .qmin(128)
7306         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
7307     }
7308   }
7309 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_div_24_with_qmax)7310   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_div_24_with_qmax) {
7311     TEST_REQUIRES_X86_SSE2;
7312     for (uint32_t channels = 48; channels < 384; channels += 72) {
7313       DWConvMicrokernelTester()
7314         .cr(24)
7315         .kr(25)
7316         .channels(channels)
7317         .qmax(128)
7318         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
7319     }
7320   }
7321 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_lt_24)7322   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_lt_24) {
7323     TEST_REQUIRES_X86_SSE2;
7324     for (uint32_t channels = 1; channels < 24; channels++) {
7325       DWConvMicrokernelTester()
7326         .cr(24)
7327         .kr(25)
7328         .channels(channels)
7329         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
7330     }
7331   }
7332 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_gt_24)7333   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_gt_24) {
7334     TEST_REQUIRES_X86_SSE2;
7335     for (uint32_t channels = 25; channels < 48; channels++) {
7336       DWConvMicrokernelTester()
7337         .cr(24)
7338         .kr(25)
7339         .channels(channels)
7340         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
7341     }
7342   }
7343 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_gt_24_with_qmin)7344   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_gt_24_with_qmin) {
7345     TEST_REQUIRES_X86_SSE2;
7346     for (uint32_t channels = 25; channels < 48; channels++) {
7347       DWConvMicrokernelTester()
7348         .cr(24)
7349         .kr(25)
7350         .channels(channels)
7351         .qmin(128)
7352         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
7353     }
7354   }
7355 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_gt_24_with_qmax)7356   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_gt_24_with_qmax) {
7357     TEST_REQUIRES_X86_SSE2;
7358     for (uint32_t channels = 25; channels < 48; channels++) {
7359       DWConvMicrokernelTester()
7360         .cr(24)
7361         .kr(25)
7362         .channels(channels)
7363         .qmax(128)
7364         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
7365     }
7366   }
7367 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,multipixel)7368   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel) {
7369     TEST_REQUIRES_X86_SSE2;
7370     for (size_t channels = 1; channels <= 120; channels += 23) {
7371       DWConvMicrokernelTester()
7372         .cr(24)
7373         .kr(25)
7374         .channels(channels)
7375         .width(3)
7376         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
7377     }
7378   }
7379 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,multipixel_with_step)7380   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel_with_step) {
7381     TEST_REQUIRES_X86_SSE2;
7382     for (size_t channels = 1; channels <= 120; channels += 23) {
7383       for (size_t step = 2; step <= 25; step++) {
7384         DWConvMicrokernelTester()
7385           .cr(24)
7386           .kr(25)
7387           .channels(channels)
7388           .width(3)
7389           .step(step)
7390           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
7391       }
7392     }
7393   }
7394 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,multipixel_with_output_stride)7395   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel_with_output_stride) {
7396     TEST_REQUIRES_X86_SSE2;
7397     for (size_t channels = 1; channels <= 120; channels += 23) {
7398       DWConvMicrokernelTester()
7399         .cr(24)
7400         .kr(25)
7401         .channels(24)
7402         .width(5)
7403         .output_stride(127)
7404         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
7405     }
7406   }
7407 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,multipixel_with_qmin)7408   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel_with_qmin) {
7409     TEST_REQUIRES_X86_SSE2;
7410     for (size_t channels = 1; channels <= 120; channels += 23) {
7411       DWConvMicrokernelTester()
7412         .cr(24)
7413         .kr(25)
7414         .channels(channels)
7415         .width(3)
7416         .qmin(128)
7417         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
7418     }
7419   }
7420 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,multipixel_with_qmax)7421   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel_with_qmax) {
7422     TEST_REQUIRES_X86_SSE2;
7423     for (size_t channels = 1; channels <= 120; channels += 23) {
7424       DWConvMicrokernelTester()
7425         .cr(24)
7426         .kr(25)
7427         .channels(channels)
7428         .width(3)
7429         .qmax(128)
7430         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
7431     }
7432   }
7433 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,input_offset)7434   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, input_offset) {
7435     TEST_REQUIRES_X86_SSE2;
7436     for (uint32_t channels = 48; channels < 384; channels += 72) {
7437       DWConvMicrokernelTester()
7438         .cr(24)
7439         .kr(25)
7440         .channels(channels)
7441         .input_offset(464)
7442         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
7443     }
7444   }
7445 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,zero)7446   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, zero) {
7447     TEST_REQUIRES_X86_SSE2;
7448     for (uint32_t mz = 0; mz < 25; mz++) {
7449       for (uint32_t channels = 48; channels < 384; channels += 72) {
7450         DWConvMicrokernelTester()
7451           .cr(24)
7452           .kr(25)
7453           .channels(channels)
7454           .input_offset(464)
7455           .zero_index(mz)
7456           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
7457       }
7458     }
7459   }
7460 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7461 
7462 
7463 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_eq_24)7464   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_eq_24) {
7465     TEST_REQUIRES_X86_SSE41;
7466     DWConvMicrokernelTester()
7467       .cr(24)
7468       .kr(25)
7469       .channels(24)
7470       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7471   }
7472 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_div_24)7473   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_div_24) {
7474     TEST_REQUIRES_X86_SSE41;
7475     for (uint32_t channels = 48; channels < 384; channels += 72) {
7476       DWConvMicrokernelTester()
7477         .cr(24)
7478         .kr(25)
7479         .channels(channels)
7480         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7481     }
7482   }
7483 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_div_24_with_qmin)7484   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_div_24_with_qmin) {
7485     TEST_REQUIRES_X86_SSE41;
7486     for (uint32_t channels = 48; channels < 384; channels += 72) {
7487       DWConvMicrokernelTester()
7488         .cr(24)
7489         .kr(25)
7490         .channels(channels)
7491         .qmin(128)
7492         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7493     }
7494   }
7495 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_div_24_with_qmax)7496   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_div_24_with_qmax) {
7497     TEST_REQUIRES_X86_SSE41;
7498     for (uint32_t channels = 48; channels < 384; channels += 72) {
7499       DWConvMicrokernelTester()
7500         .cr(24)
7501         .kr(25)
7502         .channels(channels)
7503         .qmax(128)
7504         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7505     }
7506   }
7507 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_lt_24)7508   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_lt_24) {
7509     TEST_REQUIRES_X86_SSE41;
7510     for (uint32_t channels = 1; channels < 24; channels++) {
7511       DWConvMicrokernelTester()
7512         .cr(24)
7513         .kr(25)
7514         .channels(channels)
7515         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7516     }
7517   }
7518 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_gt_24)7519   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_gt_24) {
7520     TEST_REQUIRES_X86_SSE41;
7521     for (uint32_t channels = 25; channels < 48; channels++) {
7522       DWConvMicrokernelTester()
7523         .cr(24)
7524         .kr(25)
7525         .channels(channels)
7526         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7527     }
7528   }
7529 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_gt_24_with_qmin)7530   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_gt_24_with_qmin) {
7531     TEST_REQUIRES_X86_SSE41;
7532     for (uint32_t channels = 25; channels < 48; channels++) {
7533       DWConvMicrokernelTester()
7534         .cr(24)
7535         .kr(25)
7536         .channels(channels)
7537         .qmin(128)
7538         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7539     }
7540   }
7541 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_gt_24_with_qmax)7542   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_gt_24_with_qmax) {
7543     TEST_REQUIRES_X86_SSE41;
7544     for (uint32_t channels = 25; channels < 48; channels++) {
7545       DWConvMicrokernelTester()
7546         .cr(24)
7547         .kr(25)
7548         .channels(channels)
7549         .qmax(128)
7550         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7551     }
7552   }
7553 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,multipixel)7554   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel) {
7555     TEST_REQUIRES_X86_SSE41;
7556     for (size_t channels = 1; channels <= 120; channels += 23) {
7557       DWConvMicrokernelTester()
7558         .cr(24)
7559         .kr(25)
7560         .channels(channels)
7561         .width(3)
7562         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7563     }
7564   }
7565 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,multipixel_with_step)7566   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel_with_step) {
7567     TEST_REQUIRES_X86_SSE41;
7568     for (size_t channels = 1; channels <= 120; channels += 23) {
7569       for (size_t step = 2; step <= 25; step++) {
7570         DWConvMicrokernelTester()
7571           .cr(24)
7572           .kr(25)
7573           .channels(channels)
7574           .width(3)
7575           .step(step)
7576           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7577       }
7578     }
7579   }
7580 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,multipixel_with_output_stride)7581   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel_with_output_stride) {
7582     TEST_REQUIRES_X86_SSE41;
7583     for (size_t channels = 1; channels <= 120; channels += 23) {
7584       DWConvMicrokernelTester()
7585         .cr(24)
7586         .kr(25)
7587         .channels(24)
7588         .width(5)
7589         .output_stride(127)
7590         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7591     }
7592   }
7593 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,multipixel_with_qmin)7594   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel_with_qmin) {
7595     TEST_REQUIRES_X86_SSE41;
7596     for (size_t channels = 1; channels <= 120; channels += 23) {
7597       DWConvMicrokernelTester()
7598         .cr(24)
7599         .kr(25)
7600         .channels(channels)
7601         .width(3)
7602         .qmin(128)
7603         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7604     }
7605   }
7606 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,multipixel_with_qmax)7607   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel_with_qmax) {
7608     TEST_REQUIRES_X86_SSE41;
7609     for (size_t channels = 1; channels <= 120; channels += 23) {
7610       DWConvMicrokernelTester()
7611         .cr(24)
7612         .kr(25)
7613         .channels(channels)
7614         .width(3)
7615         .qmax(128)
7616         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7617     }
7618   }
7619 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,input_offset)7620   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, input_offset) {
7621     TEST_REQUIRES_X86_SSE41;
7622     for (uint32_t channels = 48; channels < 384; channels += 72) {
7623       DWConvMicrokernelTester()
7624         .cr(24)
7625         .kr(25)
7626         .channels(channels)
7627         .input_offset(464)
7628         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7629     }
7630   }
7631 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,zero)7632   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, zero) {
7633     TEST_REQUIRES_X86_SSE41;
7634     for (uint32_t mz = 0; mz < 25; mz++) {
7635       for (uint32_t channels = 48; channels < 384; channels += 72) {
7636         DWConvMicrokernelTester()
7637           .cr(24)
7638           .kr(25)
7639           .channels(channels)
7640           .input_offset(464)
7641           .zero_index(mz)
7642           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7643       }
7644     }
7645   }
7646 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7647 
7648 
7649 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_eq_24)7650   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_eq_24) {
7651     TEST_REQUIRES_X86_SSE41;
7652     DWConvMicrokernelTester()
7653       .cr(24)
7654       .kr(25)
7655       .channels(24)
7656       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7657   }
7658 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_div_24)7659   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_div_24) {
7660     TEST_REQUIRES_X86_SSE41;
7661     for (uint32_t channels = 48; channels < 384; channels += 72) {
7662       DWConvMicrokernelTester()
7663         .cr(24)
7664         .kr(25)
7665         .channels(channels)
7666         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7667     }
7668   }
7669 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_div_24_with_qmin)7670   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_div_24_with_qmin) {
7671     TEST_REQUIRES_X86_SSE41;
7672     for (uint32_t channels = 48; channels < 384; channels += 72) {
7673       DWConvMicrokernelTester()
7674         .cr(24)
7675         .kr(25)
7676         .channels(channels)
7677         .qmin(128)
7678         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7679     }
7680   }
7681 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_div_24_with_qmax)7682   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_div_24_with_qmax) {
7683     TEST_REQUIRES_X86_SSE41;
7684     for (uint32_t channels = 48; channels < 384; channels += 72) {
7685       DWConvMicrokernelTester()
7686         .cr(24)
7687         .kr(25)
7688         .channels(channels)
7689         .qmax(128)
7690         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7691     }
7692   }
7693 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_lt_24)7694   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_lt_24) {
7695     TEST_REQUIRES_X86_SSE41;
7696     for (uint32_t channels = 1; channels < 24; channels++) {
7697       DWConvMicrokernelTester()
7698         .cr(24)
7699         .kr(25)
7700         .channels(channels)
7701         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7702     }
7703   }
7704 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_gt_24)7705   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_gt_24) {
7706     TEST_REQUIRES_X86_SSE41;
7707     for (uint32_t channels = 25; channels < 48; channels++) {
7708       DWConvMicrokernelTester()
7709         .cr(24)
7710         .kr(25)
7711         .channels(channels)
7712         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7713     }
7714   }
7715 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_gt_24_with_qmin)7716   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_gt_24_with_qmin) {
7717     TEST_REQUIRES_X86_SSE41;
7718     for (uint32_t channels = 25; channels < 48; channels++) {
7719       DWConvMicrokernelTester()
7720         .cr(24)
7721         .kr(25)
7722         .channels(channels)
7723         .qmin(128)
7724         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7725     }
7726   }
7727 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_gt_24_with_qmax)7728   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_gt_24_with_qmax) {
7729     TEST_REQUIRES_X86_SSE41;
7730     for (uint32_t channels = 25; channels < 48; channels++) {
7731       DWConvMicrokernelTester()
7732         .cr(24)
7733         .kr(25)
7734         .channels(channels)
7735         .qmax(128)
7736         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7737     }
7738   }
7739 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,multipixel)7740   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel) {
7741     TEST_REQUIRES_X86_SSE41;
7742     for (size_t channels = 1; channels <= 120; channels += 23) {
7743       DWConvMicrokernelTester()
7744         .cr(24)
7745         .kr(25)
7746         .channels(channels)
7747         .width(3)
7748         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7749     }
7750   }
7751 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,multipixel_with_step)7752   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel_with_step) {
7753     TEST_REQUIRES_X86_SSE41;
7754     for (size_t channels = 1; channels <= 120; channels += 23) {
7755       for (size_t step = 2; step <= 25; step++) {
7756         DWConvMicrokernelTester()
7757           .cr(24)
7758           .kr(25)
7759           .channels(channels)
7760           .width(3)
7761           .step(step)
7762           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7763       }
7764     }
7765   }
7766 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,multipixel_with_output_stride)7767   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel_with_output_stride) {
7768     TEST_REQUIRES_X86_SSE41;
7769     for (size_t channels = 1; channels <= 120; channels += 23) {
7770       DWConvMicrokernelTester()
7771         .cr(24)
7772         .kr(25)
7773         .channels(24)
7774         .width(5)
7775         .output_stride(127)
7776         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7777     }
7778   }
7779 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,multipixel_with_qmin)7780   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel_with_qmin) {
7781     TEST_REQUIRES_X86_SSE41;
7782     for (size_t channels = 1; channels <= 120; channels += 23) {
7783       DWConvMicrokernelTester()
7784         .cr(24)
7785         .kr(25)
7786         .channels(channels)
7787         .width(3)
7788         .qmin(128)
7789         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7790     }
7791   }
7792 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,multipixel_with_qmax)7793   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel_with_qmax) {
7794     TEST_REQUIRES_X86_SSE41;
7795     for (size_t channels = 1; channels <= 120; channels += 23) {
7796       DWConvMicrokernelTester()
7797         .cr(24)
7798         .kr(25)
7799         .channels(channels)
7800         .width(3)
7801         .qmax(128)
7802         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7803     }
7804   }
7805 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,input_offset)7806   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, input_offset) {
7807     TEST_REQUIRES_X86_SSE41;
7808     for (uint32_t channels = 48; channels < 384; channels += 72) {
7809       DWConvMicrokernelTester()
7810         .cr(24)
7811         .kr(25)
7812         .channels(channels)
7813         .input_offset(464)
7814         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7815     }
7816   }
7817 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,zero)7818   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, zero) {
7819     TEST_REQUIRES_X86_SSE41;
7820     for (uint32_t mz = 0; mz < 25; mz++) {
7821       for (uint32_t channels = 48; channels < 384; channels += 72) {
7822         DWConvMicrokernelTester()
7823           .cr(24)
7824           .kr(25)
7825           .channels(channels)
7826           .input_offset(464)
7827           .zero_index(mz)
7828           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7829       }
7830     }
7831   }
7832 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7833 
7834 
7835 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_eq_8)7836   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_eq_8) {
7837     TEST_REQUIRES_X86_AVX;
7838     DWConvMicrokernelTester()
7839       .cr(8)
7840       .kr(9)
7841       .channels(8)
7842       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7843   }
7844 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_div_8)7845   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_div_8) {
7846     TEST_REQUIRES_X86_AVX;
7847     for (uint32_t channels = 16; channels < 128; channels += 24) {
7848       DWConvMicrokernelTester()
7849         .cr(8)
7850         .kr(9)
7851         .channels(channels)
7852         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7853     }
7854   }
7855 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_div_8_with_qmin)7856   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_div_8_with_qmin) {
7857     TEST_REQUIRES_X86_AVX;
7858     for (uint32_t channels = 16; channels < 128; channels += 24) {
7859       DWConvMicrokernelTester()
7860         .cr(8)
7861         .kr(9)
7862         .channels(channels)
7863         .qmin(128)
7864         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7865     }
7866   }
7867 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_div_8_with_qmax)7868   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_div_8_with_qmax) {
7869     TEST_REQUIRES_X86_AVX;
7870     for (uint32_t channels = 16; channels < 128; channels += 24) {
7871       DWConvMicrokernelTester()
7872         .cr(8)
7873         .kr(9)
7874         .channels(channels)
7875         .qmax(128)
7876         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7877     }
7878   }
7879 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_lt_8)7880   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_lt_8) {
7881     TEST_REQUIRES_X86_AVX;
7882     for (uint32_t channels = 1; channels < 8; channels++) {
7883       DWConvMicrokernelTester()
7884         .cr(8)
7885         .kr(9)
7886         .channels(channels)
7887         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7888     }
7889   }
7890 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_gt_8)7891   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_gt_8) {
7892     TEST_REQUIRES_X86_AVX;
7893     for (uint32_t channels = 9; channels < 16; channels++) {
7894       DWConvMicrokernelTester()
7895         .cr(8)
7896         .kr(9)
7897         .channels(channels)
7898         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7899     }
7900   }
7901 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_gt_8_with_qmin)7902   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_gt_8_with_qmin) {
7903     TEST_REQUIRES_X86_AVX;
7904     for (uint32_t channels = 9; channels < 16; channels++) {
7905       DWConvMicrokernelTester()
7906         .cr(8)
7907         .kr(9)
7908         .channels(channels)
7909         .qmin(128)
7910         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7911     }
7912   }
7913 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_gt_8_with_qmax)7914   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_gt_8_with_qmax) {
7915     TEST_REQUIRES_X86_AVX;
7916     for (uint32_t channels = 9; channels < 16; channels++) {
7917       DWConvMicrokernelTester()
7918         .cr(8)
7919         .kr(9)
7920         .channels(channels)
7921         .qmax(128)
7922         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7923     }
7924   }
7925 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,multipixel)7926   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel) {
7927     TEST_REQUIRES_X86_AVX;
7928     for (size_t channels = 1; channels <= 40; channels += 7) {
7929       DWConvMicrokernelTester()
7930         .cr(8)
7931         .kr(9)
7932         .channels(channels)
7933         .width(3)
7934         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7935     }
7936   }
7937 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,multipixel_with_step)7938   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_step) {
7939     TEST_REQUIRES_X86_AVX;
7940     for (size_t channels = 1; channels <= 40; channels += 7) {
7941       for (size_t step = 2; step <= 9; step++) {
7942         DWConvMicrokernelTester()
7943           .cr(8)
7944           .kr(9)
7945           .channels(channels)
7946           .width(3)
7947           .step(step)
7948           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7949       }
7950     }
7951   }
7952 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,multipixel_with_output_stride)7953   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_output_stride) {
7954     TEST_REQUIRES_X86_AVX;
7955     for (size_t channels = 1; channels <= 40; channels += 7) {
7956       DWConvMicrokernelTester()
7957         .cr(8)
7958         .kr(9)
7959         .channels(8)
7960         .width(5)
7961         .output_stride(43)
7962         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7963     }
7964   }
7965 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,multipixel_with_qmin)7966   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_qmin) {
7967     TEST_REQUIRES_X86_AVX;
7968     for (size_t channels = 1; channels <= 40; channels += 7) {
7969       DWConvMicrokernelTester()
7970         .cr(8)
7971         .kr(9)
7972         .channels(channels)
7973         .width(3)
7974         .qmin(128)
7975         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7976     }
7977   }
7978 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,multipixel_with_qmax)7979   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_qmax) {
7980     TEST_REQUIRES_X86_AVX;
7981     for (size_t channels = 1; channels <= 40; channels += 7) {
7982       DWConvMicrokernelTester()
7983         .cr(8)
7984         .kr(9)
7985         .channels(channels)
7986         .width(3)
7987         .qmax(128)
7988         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
7989     }
7990   }
7991 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,input_offset)7992   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, input_offset) {
7993     TEST_REQUIRES_X86_AVX;
7994     for (uint32_t channels = 16; channels < 128; channels += 24) {
7995       DWConvMicrokernelTester()
7996         .cr(8)
7997         .kr(9)
7998         .channels(channels)
7999         .input_offset(176)
8000         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8001     }
8002   }
8003 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,zero)8004   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, zero) {
8005     TEST_REQUIRES_X86_AVX;
8006     for (uint32_t mz = 0; mz < 9; mz++) {
8007       for (uint32_t channels = 16; channels < 128; channels += 24) {
8008         DWConvMicrokernelTester()
8009           .cr(8)
8010           .kr(9)
8011           .channels(channels)
8012           .input_offset(176)
8013           .zero_index(mz)
8014           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8015       }
8016     }
8017   }
8018 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8019 
8020 
8021 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_eq_8)8022   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_eq_8) {
8023     TEST_REQUIRES_X86_AVX;
8024     DWConvMicrokernelTester()
8025       .cr(8)
8026       .kr(9)
8027       .channels(8)
8028       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8029   }
8030 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_div_8)8031   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_div_8) {
8032     TEST_REQUIRES_X86_AVX;
8033     for (uint32_t channels = 16; channels < 128; channels += 24) {
8034       DWConvMicrokernelTester()
8035         .cr(8)
8036         .kr(9)
8037         .channels(channels)
8038         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8039     }
8040   }
8041 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_div_8_with_qmin)8042   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_div_8_with_qmin) {
8043     TEST_REQUIRES_X86_AVX;
8044     for (uint32_t channels = 16; channels < 128; channels += 24) {
8045       DWConvMicrokernelTester()
8046         .cr(8)
8047         .kr(9)
8048         .channels(channels)
8049         .qmin(128)
8050         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8051     }
8052   }
8053 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_div_8_with_qmax)8054   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_div_8_with_qmax) {
8055     TEST_REQUIRES_X86_AVX;
8056     for (uint32_t channels = 16; channels < 128; channels += 24) {
8057       DWConvMicrokernelTester()
8058         .cr(8)
8059         .kr(9)
8060         .channels(channels)
8061         .qmax(128)
8062         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8063     }
8064   }
8065 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_lt_8)8066   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_lt_8) {
8067     TEST_REQUIRES_X86_AVX;
8068     for (uint32_t channels = 1; channels < 8; channels++) {
8069       DWConvMicrokernelTester()
8070         .cr(8)
8071         .kr(9)
8072         .channels(channels)
8073         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8074     }
8075   }
8076 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_gt_8)8077   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_gt_8) {
8078     TEST_REQUIRES_X86_AVX;
8079     for (uint32_t channels = 9; channels < 16; channels++) {
8080       DWConvMicrokernelTester()
8081         .cr(8)
8082         .kr(9)
8083         .channels(channels)
8084         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8085     }
8086   }
8087 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_gt_8_with_qmin)8088   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_gt_8_with_qmin) {
8089     TEST_REQUIRES_X86_AVX;
8090     for (uint32_t channels = 9; channels < 16; channels++) {
8091       DWConvMicrokernelTester()
8092         .cr(8)
8093         .kr(9)
8094         .channels(channels)
8095         .qmin(128)
8096         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8097     }
8098   }
8099 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_gt_8_with_qmax)8100   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_gt_8_with_qmax) {
8101     TEST_REQUIRES_X86_AVX;
8102     for (uint32_t channels = 9; channels < 16; channels++) {
8103       DWConvMicrokernelTester()
8104         .cr(8)
8105         .kr(9)
8106         .channels(channels)
8107         .qmax(128)
8108         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8109     }
8110   }
8111 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,multipixel)8112   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel) {
8113     TEST_REQUIRES_X86_AVX;
8114     for (size_t channels = 1; channels <= 40; channels += 7) {
8115       DWConvMicrokernelTester()
8116         .cr(8)
8117         .kr(9)
8118         .channels(channels)
8119         .width(3)
8120         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8121     }
8122   }
8123 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,multipixel_with_step)8124   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel_with_step) {
8125     TEST_REQUIRES_X86_AVX;
8126     for (size_t channels = 1; channels <= 40; channels += 7) {
8127       for (size_t step = 2; step <= 9; step++) {
8128         DWConvMicrokernelTester()
8129           .cr(8)
8130           .kr(9)
8131           .channels(channels)
8132           .width(3)
8133           .step(step)
8134           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8135       }
8136     }
8137   }
8138 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,multipixel_with_output_stride)8139   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel_with_output_stride) {
8140     TEST_REQUIRES_X86_AVX;
8141     for (size_t channels = 1; channels <= 40; channels += 7) {
8142       DWConvMicrokernelTester()
8143         .cr(8)
8144         .kr(9)
8145         .channels(8)
8146         .width(5)
8147         .output_stride(43)
8148         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8149     }
8150   }
8151 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,multipixel_with_qmin)8152   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel_with_qmin) {
8153     TEST_REQUIRES_X86_AVX;
8154     for (size_t channels = 1; channels <= 40; channels += 7) {
8155       DWConvMicrokernelTester()
8156         .cr(8)
8157         .kr(9)
8158         .channels(channels)
8159         .width(3)
8160         .qmin(128)
8161         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8162     }
8163   }
8164 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,multipixel_with_qmax)8165   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel_with_qmax) {
8166     TEST_REQUIRES_X86_AVX;
8167     for (size_t channels = 1; channels <= 40; channels += 7) {
8168       DWConvMicrokernelTester()
8169         .cr(8)
8170         .kr(9)
8171         .channels(channels)
8172         .width(3)
8173         .qmax(128)
8174         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8175     }
8176   }
8177 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,input_offset)8178   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, input_offset) {
8179     TEST_REQUIRES_X86_AVX;
8180     for (uint32_t channels = 16; channels < 128; channels += 24) {
8181       DWConvMicrokernelTester()
8182         .cr(8)
8183         .kr(9)
8184         .channels(channels)
8185         .input_offset(176)
8186         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8187     }
8188   }
8189 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,zero)8190   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, zero) {
8191     TEST_REQUIRES_X86_AVX;
8192     for (uint32_t mz = 0; mz < 9; mz++) {
8193       for (uint32_t channels = 16; channels < 128; channels += 24) {
8194         DWConvMicrokernelTester()
8195           .cr(8)
8196           .kr(9)
8197           .channels(channels)
8198           .input_offset(176)
8199           .zero_index(mz)
8200           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8201       }
8202     }
8203   }
8204 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8205 
8206 
8207 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_eq_8)8208   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_eq_8) {
8209     TEST_REQUIRES_X86_AVX;
8210     DWConvMicrokernelTester()
8211       .cr(8)
8212       .kr(9)
8213       .channels(8)
8214       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8215   }
8216 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_div_8)8217   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_div_8) {
8218     TEST_REQUIRES_X86_AVX;
8219     for (uint32_t channels = 16; channels < 128; channels += 24) {
8220       DWConvMicrokernelTester()
8221         .cr(8)
8222         .kr(9)
8223         .channels(channels)
8224         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8225     }
8226   }
8227 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_div_8_with_qmin)8228   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_div_8_with_qmin) {
8229     TEST_REQUIRES_X86_AVX;
8230     for (uint32_t channels = 16; channels < 128; channels += 24) {
8231       DWConvMicrokernelTester()
8232         .cr(8)
8233         .kr(9)
8234         .channels(channels)
8235         .qmin(128)
8236         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8237     }
8238   }
8239 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_div_8_with_qmax)8240   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_div_8_with_qmax) {
8241     TEST_REQUIRES_X86_AVX;
8242     for (uint32_t channels = 16; channels < 128; channels += 24) {
8243       DWConvMicrokernelTester()
8244         .cr(8)
8245         .kr(9)
8246         .channels(channels)
8247         .qmax(128)
8248         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8249     }
8250   }
8251 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_lt_8)8252   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_lt_8) {
8253     TEST_REQUIRES_X86_AVX;
8254     for (uint32_t channels = 1; channels < 8; channels++) {
8255       DWConvMicrokernelTester()
8256         .cr(8)
8257         .kr(9)
8258         .channels(channels)
8259         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8260     }
8261   }
8262 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_gt_8)8263   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_gt_8) {
8264     TEST_REQUIRES_X86_AVX;
8265     for (uint32_t channels = 9; channels < 16; channels++) {
8266       DWConvMicrokernelTester()
8267         .cr(8)
8268         .kr(9)
8269         .channels(channels)
8270         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8271     }
8272   }
8273 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_gt_8_with_qmin)8274   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_gt_8_with_qmin) {
8275     TEST_REQUIRES_X86_AVX;
8276     for (uint32_t channels = 9; channels < 16; channels++) {
8277       DWConvMicrokernelTester()
8278         .cr(8)
8279         .kr(9)
8280         .channels(channels)
8281         .qmin(128)
8282         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8283     }
8284   }
8285 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_gt_8_with_qmax)8286   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_gt_8_with_qmax) {
8287     TEST_REQUIRES_X86_AVX;
8288     for (uint32_t channels = 9; channels < 16; channels++) {
8289       DWConvMicrokernelTester()
8290         .cr(8)
8291         .kr(9)
8292         .channels(channels)
8293         .qmax(128)
8294         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8295     }
8296   }
8297 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,multipixel)8298   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel) {
8299     TEST_REQUIRES_X86_AVX;
8300     for (size_t channels = 1; channels <= 40; channels += 7) {
8301       DWConvMicrokernelTester()
8302         .cr(8)
8303         .kr(9)
8304         .channels(channels)
8305         .width(3)
8306         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8307     }
8308   }
8309 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,multipixel_with_step)8310   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_step) {
8311     TEST_REQUIRES_X86_AVX;
8312     for (size_t channels = 1; channels <= 40; channels += 7) {
8313       for (size_t step = 2; step <= 9; step++) {
8314         DWConvMicrokernelTester()
8315           .cr(8)
8316           .kr(9)
8317           .channels(channels)
8318           .width(3)
8319           .step(step)
8320           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8321       }
8322     }
8323   }
8324 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,multipixel_with_output_stride)8325   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_output_stride) {
8326     TEST_REQUIRES_X86_AVX;
8327     for (size_t channels = 1; channels <= 40; channels += 7) {
8328       DWConvMicrokernelTester()
8329         .cr(8)
8330         .kr(9)
8331         .channels(8)
8332         .width(5)
8333         .output_stride(43)
8334         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8335     }
8336   }
8337 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,multipixel_with_qmin)8338   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_qmin) {
8339     TEST_REQUIRES_X86_AVX;
8340     for (size_t channels = 1; channels <= 40; channels += 7) {
8341       DWConvMicrokernelTester()
8342         .cr(8)
8343         .kr(9)
8344         .channels(channels)
8345         .width(3)
8346         .qmin(128)
8347         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8348     }
8349   }
8350 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,multipixel_with_qmax)8351   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_qmax) {
8352     TEST_REQUIRES_X86_AVX;
8353     for (size_t channels = 1; channels <= 40; channels += 7) {
8354       DWConvMicrokernelTester()
8355         .cr(8)
8356         .kr(9)
8357         .channels(channels)
8358         .width(3)
8359         .qmax(128)
8360         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8361     }
8362   }
8363 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,input_offset)8364   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, input_offset) {
8365     TEST_REQUIRES_X86_AVX;
8366     for (uint32_t channels = 16; channels < 128; channels += 24) {
8367       DWConvMicrokernelTester()
8368         .cr(8)
8369         .kr(9)
8370         .channels(channels)
8371         .input_offset(176)
8372         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8373     }
8374   }
8375 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,zero)8376   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, zero) {
8377     TEST_REQUIRES_X86_AVX;
8378     for (uint32_t mz = 0; mz < 9; mz++) {
8379       for (uint32_t channels = 16; channels < 128; channels += 24) {
8380         DWConvMicrokernelTester()
8381           .cr(8)
8382           .kr(9)
8383           .channels(channels)
8384           .input_offset(176)
8385           .zero_index(mz)
8386           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8387       }
8388     }
8389   }
8390 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8391 
8392 
8393 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_eq_8)8394   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_eq_8) {
8395     TEST_REQUIRES_X86_AVX2;
8396     DWConvMicrokernelTester()
8397       .cr(8)
8398       .kr(9)
8399       .channels(8)
8400       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
8401   }
8402 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_div_8)8403   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8) {
8404     TEST_REQUIRES_X86_AVX2;
8405     for (uint32_t channels = 16; channels < 128; channels += 24) {
8406       DWConvMicrokernelTester()
8407         .cr(8)
8408         .kr(9)
8409         .channels(channels)
8410         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
8411     }
8412   }
8413 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_div_8_with_qmin)8414   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8_with_qmin) {
8415     TEST_REQUIRES_X86_AVX2;
8416     for (uint32_t channels = 16; channels < 128; channels += 24) {
8417       DWConvMicrokernelTester()
8418         .cr(8)
8419         .kr(9)
8420         .channels(channels)
8421         .qmin(128)
8422         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
8423     }
8424   }
8425 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_div_8_with_qmax)8426   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8_with_qmax) {
8427     TEST_REQUIRES_X86_AVX2;
8428     for (uint32_t channels = 16; channels < 128; channels += 24) {
8429       DWConvMicrokernelTester()
8430         .cr(8)
8431         .kr(9)
8432         .channels(channels)
8433         .qmax(128)
8434         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
8435     }
8436   }
8437 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_lt_8)8438   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_lt_8) {
8439     TEST_REQUIRES_X86_AVX2;
8440     for (uint32_t channels = 1; channels < 8; channels++) {
8441       DWConvMicrokernelTester()
8442         .cr(8)
8443         .kr(9)
8444         .channels(channels)
8445         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
8446     }
8447   }
8448 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_gt_8)8449   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8) {
8450     TEST_REQUIRES_X86_AVX2;
8451     for (uint32_t channels = 9; channels < 16; channels++) {
8452       DWConvMicrokernelTester()
8453         .cr(8)
8454         .kr(9)
8455         .channels(channels)
8456         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
8457     }
8458   }
8459 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_gt_8_with_qmin)8460   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8_with_qmin) {
8461     TEST_REQUIRES_X86_AVX2;
8462     for (uint32_t channels = 9; channels < 16; channels++) {
8463       DWConvMicrokernelTester()
8464         .cr(8)
8465         .kr(9)
8466         .channels(channels)
8467         .qmin(128)
8468         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
8469     }
8470   }
8471 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_gt_8_with_qmax)8472   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8_with_qmax) {
8473     TEST_REQUIRES_X86_AVX2;
8474     for (uint32_t channels = 9; channels < 16; channels++) {
8475       DWConvMicrokernelTester()
8476         .cr(8)
8477         .kr(9)
8478         .channels(channels)
8479         .qmax(128)
8480         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
8481     }
8482   }
8483 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,multipixel)8484   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel) {
8485     TEST_REQUIRES_X86_AVX2;
8486     for (size_t channels = 1; channels <= 40; channels += 7) {
8487       DWConvMicrokernelTester()
8488         .cr(8)
8489         .kr(9)
8490         .channels(channels)
8491         .width(3)
8492         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
8493     }
8494   }
8495 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,multipixel_with_step)8496   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_step) {
8497     TEST_REQUIRES_X86_AVX2;
8498     for (size_t channels = 1; channels <= 40; channels += 7) {
8499       for (size_t step = 2; step <= 9; step++) {
8500         DWConvMicrokernelTester()
8501           .cr(8)
8502           .kr(9)
8503           .channels(channels)
8504           .width(3)
8505           .step(step)
8506           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
8507       }
8508     }
8509   }
8510 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,multipixel_with_output_stride)8511   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_output_stride) {
8512     TEST_REQUIRES_X86_AVX2;
8513     for (size_t channels = 1; channels <= 40; channels += 7) {
8514       DWConvMicrokernelTester()
8515         .cr(8)
8516         .kr(9)
8517         .channels(8)
8518         .width(5)
8519         .output_stride(43)
8520         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
8521     }
8522   }
8523 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,multipixel_with_qmin)8524   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_qmin) {
8525     TEST_REQUIRES_X86_AVX2;
8526     for (size_t channels = 1; channels <= 40; channels += 7) {
8527       DWConvMicrokernelTester()
8528         .cr(8)
8529         .kr(9)
8530         .channels(channels)
8531         .width(3)
8532         .qmin(128)
8533         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
8534     }
8535   }
8536 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,multipixel_with_qmax)8537   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_qmax) {
8538     TEST_REQUIRES_X86_AVX2;
8539     for (size_t channels = 1; channels <= 40; channels += 7) {
8540       DWConvMicrokernelTester()
8541         .cr(8)
8542         .kr(9)
8543         .channels(channels)
8544         .width(3)
8545         .qmax(128)
8546         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
8547     }
8548   }
8549 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,input_offset)8550   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, input_offset) {
8551     TEST_REQUIRES_X86_AVX2;
8552     for (uint32_t channels = 16; channels < 128; channels += 24) {
8553       DWConvMicrokernelTester()
8554         .cr(8)
8555         .kr(9)
8556         .channels(channels)
8557         .input_offset(176)
8558         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
8559     }
8560   }
8561 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,zero)8562   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, zero) {
8563     TEST_REQUIRES_X86_AVX2;
8564     for (uint32_t mz = 0; mz < 9; mz++) {
8565       for (uint32_t channels = 16; channels < 128; channels += 24) {
8566         DWConvMicrokernelTester()
8567           .cr(8)
8568           .kr(9)
8569           .channels(channels)
8570           .input_offset(176)
8571           .zero_index(mz)
8572           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
8573       }
8574     }
8575   }
8576 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8577 
8578 
8579 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_eq_8)8580   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_eq_8) {
8581     TEST_REQUIRES_X86_XOP;
8582     DWConvMicrokernelTester()
8583       .cr(8)
8584       .kr(9)
8585       .channels(8)
8586       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8587   }
8588 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_div_8)8589   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_div_8) {
8590     TEST_REQUIRES_X86_XOP;
8591     for (uint32_t channels = 16; channels < 128; channels += 24) {
8592       DWConvMicrokernelTester()
8593         .cr(8)
8594         .kr(9)
8595         .channels(channels)
8596         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8597     }
8598   }
8599 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_div_8_with_qmin)8600   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_div_8_with_qmin) {
8601     TEST_REQUIRES_X86_XOP;
8602     for (uint32_t channels = 16; channels < 128; channels += 24) {
8603       DWConvMicrokernelTester()
8604         .cr(8)
8605         .kr(9)
8606         .channels(channels)
8607         .qmin(128)
8608         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8609     }
8610   }
8611 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_div_8_with_qmax)8612   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_div_8_with_qmax) {
8613     TEST_REQUIRES_X86_XOP;
8614     for (uint32_t channels = 16; channels < 128; channels += 24) {
8615       DWConvMicrokernelTester()
8616         .cr(8)
8617         .kr(9)
8618         .channels(channels)
8619         .qmax(128)
8620         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8621     }
8622   }
8623 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_lt_8)8624   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_lt_8) {
8625     TEST_REQUIRES_X86_XOP;
8626     for (uint32_t channels = 1; channels < 8; channels++) {
8627       DWConvMicrokernelTester()
8628         .cr(8)
8629         .kr(9)
8630         .channels(channels)
8631         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8632     }
8633   }
8634 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_gt_8)8635   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_gt_8) {
8636     TEST_REQUIRES_X86_XOP;
8637     for (uint32_t channels = 9; channels < 16; channels++) {
8638       DWConvMicrokernelTester()
8639         .cr(8)
8640         .kr(9)
8641         .channels(channels)
8642         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8643     }
8644   }
8645 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_gt_8_with_qmin)8646   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_gt_8_with_qmin) {
8647     TEST_REQUIRES_X86_XOP;
8648     for (uint32_t channels = 9; channels < 16; channels++) {
8649       DWConvMicrokernelTester()
8650         .cr(8)
8651         .kr(9)
8652         .channels(channels)
8653         .qmin(128)
8654         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8655     }
8656   }
8657 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_gt_8_with_qmax)8658   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_gt_8_with_qmax) {
8659     TEST_REQUIRES_X86_XOP;
8660     for (uint32_t channels = 9; channels < 16; channels++) {
8661       DWConvMicrokernelTester()
8662         .cr(8)
8663         .kr(9)
8664         .channels(channels)
8665         .qmax(128)
8666         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8667     }
8668   }
8669 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,multipixel)8670   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel) {
8671     TEST_REQUIRES_X86_XOP;
8672     for (size_t channels = 1; channels <= 40; channels += 7) {
8673       DWConvMicrokernelTester()
8674         .cr(8)
8675         .kr(9)
8676         .channels(channels)
8677         .width(3)
8678         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8679     }
8680   }
8681 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,multipixel_with_step)8682   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel_with_step) {
8683     TEST_REQUIRES_X86_XOP;
8684     for (size_t channels = 1; channels <= 40; channels += 7) {
8685       for (size_t step = 2; step <= 9; step++) {
8686         DWConvMicrokernelTester()
8687           .cr(8)
8688           .kr(9)
8689           .channels(channels)
8690           .width(3)
8691           .step(step)
8692           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8693       }
8694     }
8695   }
8696 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,multipixel_with_output_stride)8697   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel_with_output_stride) {
8698     TEST_REQUIRES_X86_XOP;
8699     for (size_t channels = 1; channels <= 40; channels += 7) {
8700       DWConvMicrokernelTester()
8701         .cr(8)
8702         .kr(9)
8703         .channels(8)
8704         .width(5)
8705         .output_stride(43)
8706         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8707     }
8708   }
8709 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,multipixel_with_qmin)8710   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel_with_qmin) {
8711     TEST_REQUIRES_X86_XOP;
8712     for (size_t channels = 1; channels <= 40; channels += 7) {
8713       DWConvMicrokernelTester()
8714         .cr(8)
8715         .kr(9)
8716         .channels(channels)
8717         .width(3)
8718         .qmin(128)
8719         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8720     }
8721   }
8722 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,multipixel_with_qmax)8723   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel_with_qmax) {
8724     TEST_REQUIRES_X86_XOP;
8725     for (size_t channels = 1; channels <= 40; channels += 7) {
8726       DWConvMicrokernelTester()
8727         .cr(8)
8728         .kr(9)
8729         .channels(channels)
8730         .width(3)
8731         .qmax(128)
8732         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8733     }
8734   }
8735 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,input_offset)8736   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, input_offset) {
8737     TEST_REQUIRES_X86_XOP;
8738     for (uint32_t channels = 16; channels < 128; channels += 24) {
8739       DWConvMicrokernelTester()
8740         .cr(8)
8741         .kr(9)
8742         .channels(channels)
8743         .input_offset(176)
8744         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8745     }
8746   }
8747 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,zero)8748   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, zero) {
8749     TEST_REQUIRES_X86_XOP;
8750     for (uint32_t mz = 0; mz < 9; mz++) {
8751       for (uint32_t channels = 16; channels < 128; channels += 24) {
8752         DWConvMicrokernelTester()
8753           .cr(8)
8754           .kr(9)
8755           .channels(channels)
8756           .input_offset(176)
8757           .zero_index(mz)
8758           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8759       }
8760     }
8761   }
8762 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8763 
8764 
8765 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_eq_8)8766   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_eq_8) {
8767     TEST_REQUIRES_X86_XOP;
8768     DWConvMicrokernelTester()
8769       .cr(8)
8770       .kr(9)
8771       .channels(8)
8772       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8773   }
8774 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_div_8)8775   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_div_8) {
8776     TEST_REQUIRES_X86_XOP;
8777     for (uint32_t channels = 16; channels < 128; channels += 24) {
8778       DWConvMicrokernelTester()
8779         .cr(8)
8780         .kr(9)
8781         .channels(channels)
8782         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8783     }
8784   }
8785 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_div_8_with_qmin)8786   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_div_8_with_qmin) {
8787     TEST_REQUIRES_X86_XOP;
8788     for (uint32_t channels = 16; channels < 128; channels += 24) {
8789       DWConvMicrokernelTester()
8790         .cr(8)
8791         .kr(9)
8792         .channels(channels)
8793         .qmin(128)
8794         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8795     }
8796   }
8797 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_div_8_with_qmax)8798   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_div_8_with_qmax) {
8799     TEST_REQUIRES_X86_XOP;
8800     for (uint32_t channels = 16; channels < 128; channels += 24) {
8801       DWConvMicrokernelTester()
8802         .cr(8)
8803         .kr(9)
8804         .channels(channels)
8805         .qmax(128)
8806         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8807     }
8808   }
8809 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_lt_8)8810   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_lt_8) {
8811     TEST_REQUIRES_X86_XOP;
8812     for (uint32_t channels = 1; channels < 8; channels++) {
8813       DWConvMicrokernelTester()
8814         .cr(8)
8815         .kr(9)
8816         .channels(channels)
8817         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8818     }
8819   }
8820 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_gt_8)8821   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_gt_8) {
8822     TEST_REQUIRES_X86_XOP;
8823     for (uint32_t channels = 9; channels < 16; channels++) {
8824       DWConvMicrokernelTester()
8825         .cr(8)
8826         .kr(9)
8827         .channels(channels)
8828         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8829     }
8830   }
8831 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_gt_8_with_qmin)8832   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_gt_8_with_qmin) {
8833     TEST_REQUIRES_X86_XOP;
8834     for (uint32_t channels = 9; channels < 16; channels++) {
8835       DWConvMicrokernelTester()
8836         .cr(8)
8837         .kr(9)
8838         .channels(channels)
8839         .qmin(128)
8840         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8841     }
8842   }
8843 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_gt_8_with_qmax)8844   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_gt_8_with_qmax) {
8845     TEST_REQUIRES_X86_XOP;
8846     for (uint32_t channels = 9; channels < 16; channels++) {
8847       DWConvMicrokernelTester()
8848         .cr(8)
8849         .kr(9)
8850         .channels(channels)
8851         .qmax(128)
8852         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8853     }
8854   }
8855 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,multipixel)8856   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel) {
8857     TEST_REQUIRES_X86_XOP;
8858     for (size_t channels = 1; channels <= 40; channels += 7) {
8859       DWConvMicrokernelTester()
8860         .cr(8)
8861         .kr(9)
8862         .channels(channels)
8863         .width(3)
8864         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8865     }
8866   }
8867 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,multipixel_with_step)8868   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_step) {
8869     TEST_REQUIRES_X86_XOP;
8870     for (size_t channels = 1; channels <= 40; channels += 7) {
8871       for (size_t step = 2; step <= 9; step++) {
8872         DWConvMicrokernelTester()
8873           .cr(8)
8874           .kr(9)
8875           .channels(channels)
8876           .width(3)
8877           .step(step)
8878           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8879       }
8880     }
8881   }
8882 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,multipixel_with_output_stride)8883   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_output_stride) {
8884     TEST_REQUIRES_X86_XOP;
8885     for (size_t channels = 1; channels <= 40; channels += 7) {
8886       DWConvMicrokernelTester()
8887         .cr(8)
8888         .kr(9)
8889         .channels(8)
8890         .width(5)
8891         .output_stride(43)
8892         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8893     }
8894   }
8895 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,multipixel_with_qmin)8896   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_qmin) {
8897     TEST_REQUIRES_X86_XOP;
8898     for (size_t channels = 1; channels <= 40; channels += 7) {
8899       DWConvMicrokernelTester()
8900         .cr(8)
8901         .kr(9)
8902         .channels(channels)
8903         .width(3)
8904         .qmin(128)
8905         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8906     }
8907   }
8908 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,multipixel_with_qmax)8909   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_qmax) {
8910     TEST_REQUIRES_X86_XOP;
8911     for (size_t channels = 1; channels <= 40; channels += 7) {
8912       DWConvMicrokernelTester()
8913         .cr(8)
8914         .kr(9)
8915         .channels(channels)
8916         .width(3)
8917         .qmax(128)
8918         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8919     }
8920   }
8921 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,input_offset)8922   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, input_offset) {
8923     TEST_REQUIRES_X86_XOP;
8924     for (uint32_t channels = 16; channels < 128; channels += 24) {
8925       DWConvMicrokernelTester()
8926         .cr(8)
8927         .kr(9)
8928         .channels(channels)
8929         .input_offset(176)
8930         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8931     }
8932   }
8933 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,zero)8934   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, zero) {
8935     TEST_REQUIRES_X86_XOP;
8936     for (uint32_t mz = 0; mz < 9; mz++) {
8937       for (uint32_t channels = 16; channels < 128; channels += 24) {
8938         DWConvMicrokernelTester()
8939           .cr(8)
8940           .kr(9)
8941           .channels(channels)
8942           .input_offset(176)
8943           .zero_index(mz)
8944           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8945       }
8946     }
8947   }
8948 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8949 
8950 
8951 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_eq_8)8952   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_eq_8) {
8953     TEST_REQUIRES_X86_AVX;
8954     DWConvMicrokernelTester()
8955       .cr(8)
8956       .kr(25)
8957       .channels(8)
8958       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8959   }
8960 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_div_8)8961   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_div_8) {
8962     TEST_REQUIRES_X86_AVX;
8963     for (uint32_t channels = 16; channels < 128; channels += 24) {
8964       DWConvMicrokernelTester()
8965         .cr(8)
8966         .kr(25)
8967         .channels(channels)
8968         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8969     }
8970   }
8971 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_div_8_with_qmin)8972   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_div_8_with_qmin) {
8973     TEST_REQUIRES_X86_AVX;
8974     for (uint32_t channels = 16; channels < 128; channels += 24) {
8975       DWConvMicrokernelTester()
8976         .cr(8)
8977         .kr(25)
8978         .channels(channels)
8979         .qmin(128)
8980         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8981     }
8982   }
8983 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_div_8_with_qmax)8984   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_div_8_with_qmax) {
8985     TEST_REQUIRES_X86_AVX;
8986     for (uint32_t channels = 16; channels < 128; channels += 24) {
8987       DWConvMicrokernelTester()
8988         .cr(8)
8989         .kr(25)
8990         .channels(channels)
8991         .qmax(128)
8992         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
8993     }
8994   }
8995 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_lt_8)8996   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_lt_8) {
8997     TEST_REQUIRES_X86_AVX;
8998     for (uint32_t channels = 1; channels < 8; channels++) {
8999       DWConvMicrokernelTester()
9000         .cr(8)
9001         .kr(25)
9002         .channels(channels)
9003         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9004     }
9005   }
9006 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_gt_8)9007   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_gt_8) {
9008     TEST_REQUIRES_X86_AVX;
9009     for (uint32_t channels = 9; channels < 16; channels++) {
9010       DWConvMicrokernelTester()
9011         .cr(8)
9012         .kr(25)
9013         .channels(channels)
9014         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9015     }
9016   }
9017 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_gt_8_with_qmin)9018   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_gt_8_with_qmin) {
9019     TEST_REQUIRES_X86_AVX;
9020     for (uint32_t channels = 9; channels < 16; channels++) {
9021       DWConvMicrokernelTester()
9022         .cr(8)
9023         .kr(25)
9024         .channels(channels)
9025         .qmin(128)
9026         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9027     }
9028   }
9029 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_gt_8_with_qmax)9030   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_gt_8_with_qmax) {
9031     TEST_REQUIRES_X86_AVX;
9032     for (uint32_t channels = 9; channels < 16; channels++) {
9033       DWConvMicrokernelTester()
9034         .cr(8)
9035         .kr(25)
9036         .channels(channels)
9037         .qmax(128)
9038         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9039     }
9040   }
9041 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,multipixel)9042   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel) {
9043     TEST_REQUIRES_X86_AVX;
9044     for (size_t channels = 1; channels <= 40; channels += 7) {
9045       DWConvMicrokernelTester()
9046         .cr(8)
9047         .kr(25)
9048         .channels(channels)
9049         .width(3)
9050         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9051     }
9052   }
9053 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,multipixel_with_step)9054   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_step) {
9055     TEST_REQUIRES_X86_AVX;
9056     for (size_t channels = 1; channels <= 40; channels += 7) {
9057       for (size_t step = 2; step <= 25; step++) {
9058         DWConvMicrokernelTester()
9059           .cr(8)
9060           .kr(25)
9061           .channels(channels)
9062           .width(3)
9063           .step(step)
9064           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9065       }
9066     }
9067   }
9068 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,multipixel_with_output_stride)9069   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_output_stride) {
9070     TEST_REQUIRES_X86_AVX;
9071     for (size_t channels = 1; channels <= 40; channels += 7) {
9072       DWConvMicrokernelTester()
9073         .cr(8)
9074         .kr(25)
9075         .channels(8)
9076         .width(5)
9077         .output_stride(43)
9078         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9079     }
9080   }
9081 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,multipixel_with_qmin)9082   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_qmin) {
9083     TEST_REQUIRES_X86_AVX;
9084     for (size_t channels = 1; channels <= 40; channels += 7) {
9085       DWConvMicrokernelTester()
9086         .cr(8)
9087         .kr(25)
9088         .channels(channels)
9089         .width(3)
9090         .qmin(128)
9091         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9092     }
9093   }
9094 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,multipixel_with_qmax)9095   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_qmax) {
9096     TEST_REQUIRES_X86_AVX;
9097     for (size_t channels = 1; channels <= 40; channels += 7) {
9098       DWConvMicrokernelTester()
9099         .cr(8)
9100         .kr(25)
9101         .channels(channels)
9102         .width(3)
9103         .qmax(128)
9104         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9105     }
9106   }
9107 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,input_offset)9108   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, input_offset) {
9109     TEST_REQUIRES_X86_AVX;
9110     for (uint32_t channels = 16; channels < 128; channels += 24) {
9111       DWConvMicrokernelTester()
9112         .cr(8)
9113         .kr(25)
9114         .channels(channels)
9115         .input_offset(176)
9116         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9117     }
9118   }
9119 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,zero)9120   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, zero) {
9121     TEST_REQUIRES_X86_AVX;
9122     for (uint32_t mz = 0; mz < 25; mz++) {
9123       for (uint32_t channels = 16; channels < 128; channels += 24) {
9124         DWConvMicrokernelTester()
9125           .cr(8)
9126           .kr(25)
9127           .channels(channels)
9128           .input_offset(176)
9129           .zero_index(mz)
9130           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9131       }
9132     }
9133   }
9134 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9135 
9136 
9137 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_eq_8)9138   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_eq_8) {
9139     TEST_REQUIRES_X86_AVX;
9140     DWConvMicrokernelTester()
9141       .cr(8)
9142       .kr(25)
9143       .channels(8)
9144       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9145   }
9146 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_div_8)9147   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_div_8) {
9148     TEST_REQUIRES_X86_AVX;
9149     for (uint32_t channels = 16; channels < 128; channels += 24) {
9150       DWConvMicrokernelTester()
9151         .cr(8)
9152         .kr(25)
9153         .channels(channels)
9154         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9155     }
9156   }
9157 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_div_8_with_qmin)9158   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_div_8_with_qmin) {
9159     TEST_REQUIRES_X86_AVX;
9160     for (uint32_t channels = 16; channels < 128; channels += 24) {
9161       DWConvMicrokernelTester()
9162         .cr(8)
9163         .kr(25)
9164         .channels(channels)
9165         .qmin(128)
9166         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9167     }
9168   }
9169 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_div_8_with_qmax)9170   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_div_8_with_qmax) {
9171     TEST_REQUIRES_X86_AVX;
9172     for (uint32_t channels = 16; channels < 128; channels += 24) {
9173       DWConvMicrokernelTester()
9174         .cr(8)
9175         .kr(25)
9176         .channels(channels)
9177         .qmax(128)
9178         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9179     }
9180   }
9181 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_lt_8)9182   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_lt_8) {
9183     TEST_REQUIRES_X86_AVX;
9184     for (uint32_t channels = 1; channels < 8; channels++) {
9185       DWConvMicrokernelTester()
9186         .cr(8)
9187         .kr(25)
9188         .channels(channels)
9189         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9190     }
9191   }
9192 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_gt_8)9193   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_gt_8) {
9194     TEST_REQUIRES_X86_AVX;
9195     for (uint32_t channels = 9; channels < 16; channels++) {
9196       DWConvMicrokernelTester()
9197         .cr(8)
9198         .kr(25)
9199         .channels(channels)
9200         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9201     }
9202   }
9203 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_gt_8_with_qmin)9204   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_gt_8_with_qmin) {
9205     TEST_REQUIRES_X86_AVX;
9206     for (uint32_t channels = 9; channels < 16; channels++) {
9207       DWConvMicrokernelTester()
9208         .cr(8)
9209         .kr(25)
9210         .channels(channels)
9211         .qmin(128)
9212         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9213     }
9214   }
9215 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_gt_8_with_qmax)9216   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_gt_8_with_qmax) {
9217     TEST_REQUIRES_X86_AVX;
9218     for (uint32_t channels = 9; channels < 16; channels++) {
9219       DWConvMicrokernelTester()
9220         .cr(8)
9221         .kr(25)
9222         .channels(channels)
9223         .qmax(128)
9224         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9225     }
9226   }
9227 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,multipixel)9228   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel) {
9229     TEST_REQUIRES_X86_AVX;
9230     for (size_t channels = 1; channels <= 40; channels += 7) {
9231       DWConvMicrokernelTester()
9232         .cr(8)
9233         .kr(25)
9234         .channels(channels)
9235         .width(3)
9236         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9237     }
9238   }
9239 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,multipixel_with_step)9240   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel_with_step) {
9241     TEST_REQUIRES_X86_AVX;
9242     for (size_t channels = 1; channels <= 40; channels += 7) {
9243       for (size_t step = 2; step <= 25; step++) {
9244         DWConvMicrokernelTester()
9245           .cr(8)
9246           .kr(25)
9247           .channels(channels)
9248           .width(3)
9249           .step(step)
9250           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9251       }
9252     }
9253   }
9254 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,multipixel_with_output_stride)9255   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel_with_output_stride) {
9256     TEST_REQUIRES_X86_AVX;
9257     for (size_t channels = 1; channels <= 40; channels += 7) {
9258       DWConvMicrokernelTester()
9259         .cr(8)
9260         .kr(25)
9261         .channels(8)
9262         .width(5)
9263         .output_stride(43)
9264         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9265     }
9266   }
9267 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,multipixel_with_qmin)9268   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel_with_qmin) {
9269     TEST_REQUIRES_X86_AVX;
9270     for (size_t channels = 1; channels <= 40; channels += 7) {
9271       DWConvMicrokernelTester()
9272         .cr(8)
9273         .kr(25)
9274         .channels(channels)
9275         .width(3)
9276         .qmin(128)
9277         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9278     }
9279   }
9280 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,multipixel_with_qmax)9281   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel_with_qmax) {
9282     TEST_REQUIRES_X86_AVX;
9283     for (size_t channels = 1; channels <= 40; channels += 7) {
9284       DWConvMicrokernelTester()
9285         .cr(8)
9286         .kr(25)
9287         .channels(channels)
9288         .width(3)
9289         .qmax(128)
9290         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9291     }
9292   }
9293 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,input_offset)9294   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, input_offset) {
9295     TEST_REQUIRES_X86_AVX;
9296     for (uint32_t channels = 16; channels < 128; channels += 24) {
9297       DWConvMicrokernelTester()
9298         .cr(8)
9299         .kr(25)
9300         .channels(channels)
9301         .input_offset(176)
9302         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9303     }
9304   }
9305 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,zero)9306   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, zero) {
9307     TEST_REQUIRES_X86_AVX;
9308     for (uint32_t mz = 0; mz < 25; mz++) {
9309       for (uint32_t channels = 16; channels < 128; channels += 24) {
9310         DWConvMicrokernelTester()
9311           .cr(8)
9312           .kr(25)
9313           .channels(channels)
9314           .input_offset(176)
9315           .zero_index(mz)
9316           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9317       }
9318     }
9319   }
9320 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9321 
9322 
9323 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_eq_8)9324   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_eq_8) {
9325     TEST_REQUIRES_X86_AVX;
9326     DWConvMicrokernelTester()
9327       .cr(8)
9328       .kr(25)
9329       .channels(8)
9330       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9331   }
9332 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_div_8)9333   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_div_8) {
9334     TEST_REQUIRES_X86_AVX;
9335     for (uint32_t channels = 16; channels < 128; channels += 24) {
9336       DWConvMicrokernelTester()
9337         .cr(8)
9338         .kr(25)
9339         .channels(channels)
9340         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9341     }
9342   }
9343 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_div_8_with_qmin)9344   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_div_8_with_qmin) {
9345     TEST_REQUIRES_X86_AVX;
9346     for (uint32_t channels = 16; channels < 128; channels += 24) {
9347       DWConvMicrokernelTester()
9348         .cr(8)
9349         .kr(25)
9350         .channels(channels)
9351         .qmin(128)
9352         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9353     }
9354   }
9355 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_div_8_with_qmax)9356   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_div_8_with_qmax) {
9357     TEST_REQUIRES_X86_AVX;
9358     for (uint32_t channels = 16; channels < 128; channels += 24) {
9359       DWConvMicrokernelTester()
9360         .cr(8)
9361         .kr(25)
9362         .channels(channels)
9363         .qmax(128)
9364         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9365     }
9366   }
9367 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_lt_8)9368   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_lt_8) {
9369     TEST_REQUIRES_X86_AVX;
9370     for (uint32_t channels = 1; channels < 8; channels++) {
9371       DWConvMicrokernelTester()
9372         .cr(8)
9373         .kr(25)
9374         .channels(channels)
9375         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9376     }
9377   }
9378 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_gt_8)9379   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_gt_8) {
9380     TEST_REQUIRES_X86_AVX;
9381     for (uint32_t channels = 9; channels < 16; channels++) {
9382       DWConvMicrokernelTester()
9383         .cr(8)
9384         .kr(25)
9385         .channels(channels)
9386         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9387     }
9388   }
9389 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_gt_8_with_qmin)9390   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_gt_8_with_qmin) {
9391     TEST_REQUIRES_X86_AVX;
9392     for (uint32_t channels = 9; channels < 16; channels++) {
9393       DWConvMicrokernelTester()
9394         .cr(8)
9395         .kr(25)
9396         .channels(channels)
9397         .qmin(128)
9398         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9399     }
9400   }
9401 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_gt_8_with_qmax)9402   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_gt_8_with_qmax) {
9403     TEST_REQUIRES_X86_AVX;
9404     for (uint32_t channels = 9; channels < 16; channels++) {
9405       DWConvMicrokernelTester()
9406         .cr(8)
9407         .kr(25)
9408         .channels(channels)
9409         .qmax(128)
9410         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9411     }
9412   }
9413 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,multipixel)9414   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel) {
9415     TEST_REQUIRES_X86_AVX;
9416     for (size_t channels = 1; channels <= 40; channels += 7) {
9417       DWConvMicrokernelTester()
9418         .cr(8)
9419         .kr(25)
9420         .channels(channels)
9421         .width(3)
9422         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9423     }
9424   }
9425 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,multipixel_with_step)9426   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_step) {
9427     TEST_REQUIRES_X86_AVX;
9428     for (size_t channels = 1; channels <= 40; channels += 7) {
9429       for (size_t step = 2; step <= 25; step++) {
9430         DWConvMicrokernelTester()
9431           .cr(8)
9432           .kr(25)
9433           .channels(channels)
9434           .width(3)
9435           .step(step)
9436           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9437       }
9438     }
9439   }
9440 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,multipixel_with_output_stride)9441   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_output_stride) {
9442     TEST_REQUIRES_X86_AVX;
9443     for (size_t channels = 1; channels <= 40; channels += 7) {
9444       DWConvMicrokernelTester()
9445         .cr(8)
9446         .kr(25)
9447         .channels(8)
9448         .width(5)
9449         .output_stride(43)
9450         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9451     }
9452   }
9453 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,multipixel_with_qmin)9454   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_qmin) {
9455     TEST_REQUIRES_X86_AVX;
9456     for (size_t channels = 1; channels <= 40; channels += 7) {
9457       DWConvMicrokernelTester()
9458         .cr(8)
9459         .kr(25)
9460         .channels(channels)
9461         .width(3)
9462         .qmin(128)
9463         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9464     }
9465   }
9466 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,multipixel_with_qmax)9467   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_qmax) {
9468     TEST_REQUIRES_X86_AVX;
9469     for (size_t channels = 1; channels <= 40; channels += 7) {
9470       DWConvMicrokernelTester()
9471         .cr(8)
9472         .kr(25)
9473         .channels(channels)
9474         .width(3)
9475         .qmax(128)
9476         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9477     }
9478   }
9479 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,input_offset)9480   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, input_offset) {
9481     TEST_REQUIRES_X86_AVX;
9482     for (uint32_t channels = 16; channels < 128; channels += 24) {
9483       DWConvMicrokernelTester()
9484         .cr(8)
9485         .kr(25)
9486         .channels(channels)
9487         .input_offset(176)
9488         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9489     }
9490   }
9491 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,zero)9492   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, zero) {
9493     TEST_REQUIRES_X86_AVX;
9494     for (uint32_t mz = 0; mz < 25; mz++) {
9495       for (uint32_t channels = 16; channels < 128; channels += 24) {
9496         DWConvMicrokernelTester()
9497           .cr(8)
9498           .kr(25)
9499           .channels(channels)
9500           .input_offset(176)
9501           .zero_index(mz)
9502           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9503       }
9504     }
9505   }
9506 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9507 
9508 
9509 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_eq_8)9510   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_eq_8) {
9511     TEST_REQUIRES_X86_AVX2;
9512     DWConvMicrokernelTester()
9513       .cr(8)
9514       .kr(25)
9515       .channels(8)
9516       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
9517   }
9518 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_div_8)9519   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8) {
9520     TEST_REQUIRES_X86_AVX2;
9521     for (uint32_t channels = 16; channels < 128; channels += 24) {
9522       DWConvMicrokernelTester()
9523         .cr(8)
9524         .kr(25)
9525         .channels(channels)
9526         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
9527     }
9528   }
9529 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_div_8_with_qmin)9530   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8_with_qmin) {
9531     TEST_REQUIRES_X86_AVX2;
9532     for (uint32_t channels = 16; channels < 128; channels += 24) {
9533       DWConvMicrokernelTester()
9534         .cr(8)
9535         .kr(25)
9536         .channels(channels)
9537         .qmin(128)
9538         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
9539     }
9540   }
9541 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_div_8_with_qmax)9542   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8_with_qmax) {
9543     TEST_REQUIRES_X86_AVX2;
9544     for (uint32_t channels = 16; channels < 128; channels += 24) {
9545       DWConvMicrokernelTester()
9546         .cr(8)
9547         .kr(25)
9548         .channels(channels)
9549         .qmax(128)
9550         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
9551     }
9552   }
9553 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_lt_8)9554   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_lt_8) {
9555     TEST_REQUIRES_X86_AVX2;
9556     for (uint32_t channels = 1; channels < 8; channels++) {
9557       DWConvMicrokernelTester()
9558         .cr(8)
9559         .kr(25)
9560         .channels(channels)
9561         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
9562     }
9563   }
9564 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_gt_8)9565   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8) {
9566     TEST_REQUIRES_X86_AVX2;
9567     for (uint32_t channels = 9; channels < 16; channels++) {
9568       DWConvMicrokernelTester()
9569         .cr(8)
9570         .kr(25)
9571         .channels(channels)
9572         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
9573     }
9574   }
9575 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_gt_8_with_qmin)9576   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8_with_qmin) {
9577     TEST_REQUIRES_X86_AVX2;
9578     for (uint32_t channels = 9; channels < 16; channels++) {
9579       DWConvMicrokernelTester()
9580         .cr(8)
9581         .kr(25)
9582         .channels(channels)
9583         .qmin(128)
9584         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
9585     }
9586   }
9587 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_gt_8_with_qmax)9588   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8_with_qmax) {
9589     TEST_REQUIRES_X86_AVX2;
9590     for (uint32_t channels = 9; channels < 16; channels++) {
9591       DWConvMicrokernelTester()
9592         .cr(8)
9593         .kr(25)
9594         .channels(channels)
9595         .qmax(128)
9596         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
9597     }
9598   }
9599 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,multipixel)9600   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel) {
9601     TEST_REQUIRES_X86_AVX2;
9602     for (size_t channels = 1; channels <= 40; channels += 7) {
9603       DWConvMicrokernelTester()
9604         .cr(8)
9605         .kr(25)
9606         .channels(channels)
9607         .width(3)
9608         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
9609     }
9610   }
9611 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,multipixel_with_step)9612   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_step) {
9613     TEST_REQUIRES_X86_AVX2;
9614     for (size_t channels = 1; channels <= 40; channels += 7) {
9615       for (size_t step = 2; step <= 25; step++) {
9616         DWConvMicrokernelTester()
9617           .cr(8)
9618           .kr(25)
9619           .channels(channels)
9620           .width(3)
9621           .step(step)
9622           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
9623       }
9624     }
9625   }
9626 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,multipixel_with_output_stride)9627   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_output_stride) {
9628     TEST_REQUIRES_X86_AVX2;
9629     for (size_t channels = 1; channels <= 40; channels += 7) {
9630       DWConvMicrokernelTester()
9631         .cr(8)
9632         .kr(25)
9633         .channels(8)
9634         .width(5)
9635         .output_stride(43)
9636         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
9637     }
9638   }
9639 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,multipixel_with_qmin)9640   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_qmin) {
9641     TEST_REQUIRES_X86_AVX2;
9642     for (size_t channels = 1; channels <= 40; channels += 7) {
9643       DWConvMicrokernelTester()
9644         .cr(8)
9645         .kr(25)
9646         .channels(channels)
9647         .width(3)
9648         .qmin(128)
9649         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
9650     }
9651   }
9652 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,multipixel_with_qmax)9653   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_qmax) {
9654     TEST_REQUIRES_X86_AVX2;
9655     for (size_t channels = 1; channels <= 40; channels += 7) {
9656       DWConvMicrokernelTester()
9657         .cr(8)
9658         .kr(25)
9659         .channels(channels)
9660         .width(3)
9661         .qmax(128)
9662         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
9663     }
9664   }
9665 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,input_offset)9666   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, input_offset) {
9667     TEST_REQUIRES_X86_AVX2;
9668     for (uint32_t channels = 16; channels < 128; channels += 24) {
9669       DWConvMicrokernelTester()
9670         .cr(8)
9671         .kr(25)
9672         .channels(channels)
9673         .input_offset(176)
9674         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
9675     }
9676   }
9677 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,zero)9678   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, zero) {
9679     TEST_REQUIRES_X86_AVX2;
9680     for (uint32_t mz = 0; mz < 25; mz++) {
9681       for (uint32_t channels = 16; channels < 128; channels += 24) {
9682         DWConvMicrokernelTester()
9683           .cr(8)
9684           .kr(25)
9685           .channels(channels)
9686           .input_offset(176)
9687           .zero_index(mz)
9688           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
9689       }
9690     }
9691   }
9692 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9693 
9694 
9695 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_eq_8)9696   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_eq_8) {
9697     TEST_REQUIRES_X86_XOP;
9698     DWConvMicrokernelTester()
9699       .cr(8)
9700       .kr(25)
9701       .channels(8)
9702       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9703   }
9704 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_div_8)9705   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_div_8) {
9706     TEST_REQUIRES_X86_XOP;
9707     for (uint32_t channels = 16; channels < 128; channels += 24) {
9708       DWConvMicrokernelTester()
9709         .cr(8)
9710         .kr(25)
9711         .channels(channels)
9712         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9713     }
9714   }
9715 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_div_8_with_qmin)9716   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_div_8_with_qmin) {
9717     TEST_REQUIRES_X86_XOP;
9718     for (uint32_t channels = 16; channels < 128; channels += 24) {
9719       DWConvMicrokernelTester()
9720         .cr(8)
9721         .kr(25)
9722         .channels(channels)
9723         .qmin(128)
9724         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9725     }
9726   }
9727 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_div_8_with_qmax)9728   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_div_8_with_qmax) {
9729     TEST_REQUIRES_X86_XOP;
9730     for (uint32_t channels = 16; channels < 128; channels += 24) {
9731       DWConvMicrokernelTester()
9732         .cr(8)
9733         .kr(25)
9734         .channels(channels)
9735         .qmax(128)
9736         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9737     }
9738   }
9739 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_lt_8)9740   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_lt_8) {
9741     TEST_REQUIRES_X86_XOP;
9742     for (uint32_t channels = 1; channels < 8; channels++) {
9743       DWConvMicrokernelTester()
9744         .cr(8)
9745         .kr(25)
9746         .channels(channels)
9747         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9748     }
9749   }
9750 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_gt_8)9751   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_gt_8) {
9752     TEST_REQUIRES_X86_XOP;
9753     for (uint32_t channels = 9; channels < 16; channels++) {
9754       DWConvMicrokernelTester()
9755         .cr(8)
9756         .kr(25)
9757         .channels(channels)
9758         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9759     }
9760   }
9761 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_gt_8_with_qmin)9762   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_gt_8_with_qmin) {
9763     TEST_REQUIRES_X86_XOP;
9764     for (uint32_t channels = 9; channels < 16; channels++) {
9765       DWConvMicrokernelTester()
9766         .cr(8)
9767         .kr(25)
9768         .channels(channels)
9769         .qmin(128)
9770         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9771     }
9772   }
9773 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_gt_8_with_qmax)9774   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_gt_8_with_qmax) {
9775     TEST_REQUIRES_X86_XOP;
9776     for (uint32_t channels = 9; channels < 16; channels++) {
9777       DWConvMicrokernelTester()
9778         .cr(8)
9779         .kr(25)
9780         .channels(channels)
9781         .qmax(128)
9782         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9783     }
9784   }
9785 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,multipixel)9786   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel) {
9787     TEST_REQUIRES_X86_XOP;
9788     for (size_t channels = 1; channels <= 40; channels += 7) {
9789       DWConvMicrokernelTester()
9790         .cr(8)
9791         .kr(25)
9792         .channels(channels)
9793         .width(3)
9794         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9795     }
9796   }
9797 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,multipixel_with_step)9798   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel_with_step) {
9799     TEST_REQUIRES_X86_XOP;
9800     for (size_t channels = 1; channels <= 40; channels += 7) {
9801       for (size_t step = 2; step <= 25; step++) {
9802         DWConvMicrokernelTester()
9803           .cr(8)
9804           .kr(25)
9805           .channels(channels)
9806           .width(3)
9807           .step(step)
9808           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9809       }
9810     }
9811   }
9812 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,multipixel_with_output_stride)9813   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel_with_output_stride) {
9814     TEST_REQUIRES_X86_XOP;
9815     for (size_t channels = 1; channels <= 40; channels += 7) {
9816       DWConvMicrokernelTester()
9817         .cr(8)
9818         .kr(25)
9819         .channels(8)
9820         .width(5)
9821         .output_stride(43)
9822         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9823     }
9824   }
9825 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,multipixel_with_qmin)9826   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel_with_qmin) {
9827     TEST_REQUIRES_X86_XOP;
9828     for (size_t channels = 1; channels <= 40; channels += 7) {
9829       DWConvMicrokernelTester()
9830         .cr(8)
9831         .kr(25)
9832         .channels(channels)
9833         .width(3)
9834         .qmin(128)
9835         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9836     }
9837   }
9838 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,multipixel_with_qmax)9839   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel_with_qmax) {
9840     TEST_REQUIRES_X86_XOP;
9841     for (size_t channels = 1; channels <= 40; channels += 7) {
9842       DWConvMicrokernelTester()
9843         .cr(8)
9844         .kr(25)
9845         .channels(channels)
9846         .width(3)
9847         .qmax(128)
9848         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9849     }
9850   }
9851 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,input_offset)9852   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, input_offset) {
9853     TEST_REQUIRES_X86_XOP;
9854     for (uint32_t channels = 16; channels < 128; channels += 24) {
9855       DWConvMicrokernelTester()
9856         .cr(8)
9857         .kr(25)
9858         .channels(channels)
9859         .input_offset(176)
9860         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9861     }
9862   }
9863 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,zero)9864   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, zero) {
9865     TEST_REQUIRES_X86_XOP;
9866     for (uint32_t mz = 0; mz < 25; mz++) {
9867       for (uint32_t channels = 16; channels < 128; channels += 24) {
9868         DWConvMicrokernelTester()
9869           .cr(8)
9870           .kr(25)
9871           .channels(channels)
9872           .input_offset(176)
9873           .zero_index(mz)
9874           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9875       }
9876     }
9877   }
9878 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9879 
9880 
9881 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_eq_8)9882   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_eq_8) {
9883     TEST_REQUIRES_X86_XOP;
9884     DWConvMicrokernelTester()
9885       .cr(8)
9886       .kr(25)
9887       .channels(8)
9888       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9889   }
9890 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_div_8)9891   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_div_8) {
9892     TEST_REQUIRES_X86_XOP;
9893     for (uint32_t channels = 16; channels < 128; channels += 24) {
9894       DWConvMicrokernelTester()
9895         .cr(8)
9896         .kr(25)
9897         .channels(channels)
9898         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9899     }
9900   }
9901 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_div_8_with_qmin)9902   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_div_8_with_qmin) {
9903     TEST_REQUIRES_X86_XOP;
9904     for (uint32_t channels = 16; channels < 128; channels += 24) {
9905       DWConvMicrokernelTester()
9906         .cr(8)
9907         .kr(25)
9908         .channels(channels)
9909         .qmin(128)
9910         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9911     }
9912   }
9913 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_div_8_with_qmax)9914   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_div_8_with_qmax) {
9915     TEST_REQUIRES_X86_XOP;
9916     for (uint32_t channels = 16; channels < 128; channels += 24) {
9917       DWConvMicrokernelTester()
9918         .cr(8)
9919         .kr(25)
9920         .channels(channels)
9921         .qmax(128)
9922         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9923     }
9924   }
9925 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_lt_8)9926   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_lt_8) {
9927     TEST_REQUIRES_X86_XOP;
9928     for (uint32_t channels = 1; channels < 8; channels++) {
9929       DWConvMicrokernelTester()
9930         .cr(8)
9931         .kr(25)
9932         .channels(channels)
9933         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9934     }
9935   }
9936 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_gt_8)9937   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_gt_8) {
9938     TEST_REQUIRES_X86_XOP;
9939     for (uint32_t channels = 9; channels < 16; channels++) {
9940       DWConvMicrokernelTester()
9941         .cr(8)
9942         .kr(25)
9943         .channels(channels)
9944         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9945     }
9946   }
9947 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_gt_8_with_qmin)9948   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_gt_8_with_qmin) {
9949     TEST_REQUIRES_X86_XOP;
9950     for (uint32_t channels = 9; channels < 16; channels++) {
9951       DWConvMicrokernelTester()
9952         .cr(8)
9953         .kr(25)
9954         .channels(channels)
9955         .qmin(128)
9956         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9957     }
9958   }
9959 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_gt_8_with_qmax)9960   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_gt_8_with_qmax) {
9961     TEST_REQUIRES_X86_XOP;
9962     for (uint32_t channels = 9; channels < 16; channels++) {
9963       DWConvMicrokernelTester()
9964         .cr(8)
9965         .kr(25)
9966         .channels(channels)
9967         .qmax(128)
9968         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9969     }
9970   }
9971 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,multipixel)9972   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel) {
9973     TEST_REQUIRES_X86_XOP;
9974     for (size_t channels = 1; channels <= 40; channels += 7) {
9975       DWConvMicrokernelTester()
9976         .cr(8)
9977         .kr(25)
9978         .channels(channels)
9979         .width(3)
9980         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9981     }
9982   }
9983 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,multipixel_with_step)9984   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_step) {
9985     TEST_REQUIRES_X86_XOP;
9986     for (size_t channels = 1; channels <= 40; channels += 7) {
9987       for (size_t step = 2; step <= 25; step++) {
9988         DWConvMicrokernelTester()
9989           .cr(8)
9990           .kr(25)
9991           .channels(channels)
9992           .width(3)
9993           .step(step)
9994           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9995       }
9996     }
9997   }
9998 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,multipixel_with_output_stride)9999   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_output_stride) {
10000     TEST_REQUIRES_X86_XOP;
10001     for (size_t channels = 1; channels <= 40; channels += 7) {
10002       DWConvMicrokernelTester()
10003         .cr(8)
10004         .kr(25)
10005         .channels(8)
10006         .width(5)
10007         .output_stride(43)
10008         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10009     }
10010   }
10011 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,multipixel_with_qmin)10012   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_qmin) {
10013     TEST_REQUIRES_X86_XOP;
10014     for (size_t channels = 1; channels <= 40; channels += 7) {
10015       DWConvMicrokernelTester()
10016         .cr(8)
10017         .kr(25)
10018         .channels(channels)
10019         .width(3)
10020         .qmin(128)
10021         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10022     }
10023   }
10024 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,multipixel_with_qmax)10025   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_qmax) {
10026     TEST_REQUIRES_X86_XOP;
10027     for (size_t channels = 1; channels <= 40; channels += 7) {
10028       DWConvMicrokernelTester()
10029         .cr(8)
10030         .kr(25)
10031         .channels(channels)
10032         .width(3)
10033         .qmax(128)
10034         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10035     }
10036   }
10037 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,input_offset)10038   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, input_offset) {
10039     TEST_REQUIRES_X86_XOP;
10040     for (uint32_t channels = 16; channels < 128; channels += 24) {
10041       DWConvMicrokernelTester()
10042         .cr(8)
10043         .kr(25)
10044         .channels(channels)
10045         .input_offset(176)
10046         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10047     }
10048   }
10049 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,zero)10050   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, zero) {
10051     TEST_REQUIRES_X86_XOP;
10052     for (uint32_t mz = 0; mz < 25; mz++) {
10053       for (uint32_t channels = 16; channels < 128; channels += 24) {
10054         DWConvMicrokernelTester()
10055           .cr(8)
10056           .kr(25)
10057           .channels(channels)
10058           .input_offset(176)
10059           .zero_index(mz)
10060           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10061       }
10062     }
10063   }
10064 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
10065 
10066 
10067 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_eq_16)10068   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_eq_16) {
10069     TEST_REQUIRES_X86_AVX;
10070     DWConvMicrokernelTester()
10071       .cr(16)
10072       .kr(9)
10073       .channels(16)
10074       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10075   }
10076 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_div_16)10077   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_div_16) {
10078     TEST_REQUIRES_X86_AVX;
10079     for (uint32_t channels = 32; channels < 256; channels += 48) {
10080       DWConvMicrokernelTester()
10081         .cr(16)
10082         .kr(9)
10083         .channels(channels)
10084         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10085     }
10086   }
10087 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_div_16_with_qmin)10088   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_div_16_with_qmin) {
10089     TEST_REQUIRES_X86_AVX;
10090     for (uint32_t channels = 32; channels < 256; channels += 48) {
10091       DWConvMicrokernelTester()
10092         .cr(16)
10093         .kr(9)
10094         .channels(channels)
10095         .qmin(128)
10096         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10097     }
10098   }
10099 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_div_16_with_qmax)10100   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_div_16_with_qmax) {
10101     TEST_REQUIRES_X86_AVX;
10102     for (uint32_t channels = 32; channels < 256; channels += 48) {
10103       DWConvMicrokernelTester()
10104         .cr(16)
10105         .kr(9)
10106         .channels(channels)
10107         .qmax(128)
10108         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10109     }
10110   }
10111 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_lt_16)10112   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_lt_16) {
10113     TEST_REQUIRES_X86_AVX;
10114     for (uint32_t channels = 1; channels < 16; channels++) {
10115       DWConvMicrokernelTester()
10116         .cr(16)
10117         .kr(9)
10118         .channels(channels)
10119         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10120     }
10121   }
10122 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_gt_16)10123   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_gt_16) {
10124     TEST_REQUIRES_X86_AVX;
10125     for (uint32_t channels = 17; channels < 32; channels++) {
10126       DWConvMicrokernelTester()
10127         .cr(16)
10128         .kr(9)
10129         .channels(channels)
10130         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10131     }
10132   }
10133 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_gt_16_with_qmin)10134   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_gt_16_with_qmin) {
10135     TEST_REQUIRES_X86_AVX;
10136     for (uint32_t channels = 17; channels < 32; channels++) {
10137       DWConvMicrokernelTester()
10138         .cr(16)
10139         .kr(9)
10140         .channels(channels)
10141         .qmin(128)
10142         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10143     }
10144   }
10145 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_gt_16_with_qmax)10146   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_gt_16_with_qmax) {
10147     TEST_REQUIRES_X86_AVX;
10148     for (uint32_t channels = 17; channels < 32; channels++) {
10149       DWConvMicrokernelTester()
10150         .cr(16)
10151         .kr(9)
10152         .channels(channels)
10153         .qmax(128)
10154         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10155     }
10156   }
10157 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,multipixel)10158   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel) {
10159     TEST_REQUIRES_X86_AVX;
10160     for (size_t channels = 1; channels <= 80; channels += 15) {
10161       DWConvMicrokernelTester()
10162         .cr(16)
10163         .kr(9)
10164         .channels(channels)
10165         .width(3)
10166         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10167     }
10168   }
10169 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,multipixel_with_step)10170   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_step) {
10171     TEST_REQUIRES_X86_AVX;
10172     for (size_t channels = 1; channels <= 80; channels += 15) {
10173       for (size_t step = 2; step <= 9; step++) {
10174         DWConvMicrokernelTester()
10175           .cr(16)
10176           .kr(9)
10177           .channels(channels)
10178           .width(3)
10179           .step(step)
10180           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10181       }
10182     }
10183   }
10184 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,multipixel_with_output_stride)10185   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_output_stride) {
10186     TEST_REQUIRES_X86_AVX;
10187     for (size_t channels = 1; channels <= 80; channels += 15) {
10188       DWConvMicrokernelTester()
10189         .cr(16)
10190         .kr(9)
10191         .channels(16)
10192         .width(5)
10193         .output_stride(83)
10194         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10195     }
10196   }
10197 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,multipixel_with_qmin)10198   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_qmin) {
10199     TEST_REQUIRES_X86_AVX;
10200     for (size_t channels = 1; channels <= 80; channels += 15) {
10201       DWConvMicrokernelTester()
10202         .cr(16)
10203         .kr(9)
10204         .channels(channels)
10205         .width(3)
10206         .qmin(128)
10207         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10208     }
10209   }
10210 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,multipixel_with_qmax)10211   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_qmax) {
10212     TEST_REQUIRES_X86_AVX;
10213     for (size_t channels = 1; channels <= 80; channels += 15) {
10214       DWConvMicrokernelTester()
10215         .cr(16)
10216         .kr(9)
10217         .channels(channels)
10218         .width(3)
10219         .qmax(128)
10220         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10221     }
10222   }
10223 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,input_offset)10224   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, input_offset) {
10225     TEST_REQUIRES_X86_AVX;
10226     for (uint32_t channels = 32; channels < 256; channels += 48) {
10227       DWConvMicrokernelTester()
10228         .cr(16)
10229         .kr(9)
10230         .channels(channels)
10231         .input_offset(304)
10232         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10233     }
10234   }
10235 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,zero)10236   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, zero) {
10237     TEST_REQUIRES_X86_AVX;
10238     for (uint32_t mz = 0; mz < 9; mz++) {
10239       for (uint32_t channels = 32; channels < 256; channels += 48) {
10240         DWConvMicrokernelTester()
10241           .cr(16)
10242           .kr(9)
10243           .channels(channels)
10244           .input_offset(304)
10245           .zero_index(mz)
10246           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10247       }
10248     }
10249   }
10250 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
10251 
10252 
10253 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_eq_16)10254   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_eq_16) {
10255     TEST_REQUIRES_X86_AVX;
10256     DWConvMicrokernelTester()
10257       .cr(16)
10258       .kr(9)
10259       .channels(16)
10260       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10261   }
10262 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_div_16)10263   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_div_16) {
10264     TEST_REQUIRES_X86_AVX;
10265     for (uint32_t channels = 32; channels < 256; channels += 48) {
10266       DWConvMicrokernelTester()
10267         .cr(16)
10268         .kr(9)
10269         .channels(channels)
10270         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10271     }
10272   }
10273 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_div_16_with_qmin)10274   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_div_16_with_qmin) {
10275     TEST_REQUIRES_X86_AVX;
10276     for (uint32_t channels = 32; channels < 256; channels += 48) {
10277       DWConvMicrokernelTester()
10278         .cr(16)
10279         .kr(9)
10280         .channels(channels)
10281         .qmin(128)
10282         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10283     }
10284   }
10285 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_div_16_with_qmax)10286   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_div_16_with_qmax) {
10287     TEST_REQUIRES_X86_AVX;
10288     for (uint32_t channels = 32; channels < 256; channels += 48) {
10289       DWConvMicrokernelTester()
10290         .cr(16)
10291         .kr(9)
10292         .channels(channels)
10293         .qmax(128)
10294         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10295     }
10296   }
10297 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_lt_16)10298   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_lt_16) {
10299     TEST_REQUIRES_X86_AVX;
10300     for (uint32_t channels = 1; channels < 16; channels++) {
10301       DWConvMicrokernelTester()
10302         .cr(16)
10303         .kr(9)
10304         .channels(channels)
10305         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10306     }
10307   }
10308 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_gt_16)10309   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_gt_16) {
10310     TEST_REQUIRES_X86_AVX;
10311     for (uint32_t channels = 17; channels < 32; channels++) {
10312       DWConvMicrokernelTester()
10313         .cr(16)
10314         .kr(9)
10315         .channels(channels)
10316         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10317     }
10318   }
10319 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_gt_16_with_qmin)10320   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_gt_16_with_qmin) {
10321     TEST_REQUIRES_X86_AVX;
10322     for (uint32_t channels = 17; channels < 32; channels++) {
10323       DWConvMicrokernelTester()
10324         .cr(16)
10325         .kr(9)
10326         .channels(channels)
10327         .qmin(128)
10328         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10329     }
10330   }
10331 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_gt_16_with_qmax)10332   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_gt_16_with_qmax) {
10333     TEST_REQUIRES_X86_AVX;
10334     for (uint32_t channels = 17; channels < 32; channels++) {
10335       DWConvMicrokernelTester()
10336         .cr(16)
10337         .kr(9)
10338         .channels(channels)
10339         .qmax(128)
10340         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10341     }
10342   }
10343 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,multipixel)10344   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel) {
10345     TEST_REQUIRES_X86_AVX;
10346     for (size_t channels = 1; channels <= 80; channels += 15) {
10347       DWConvMicrokernelTester()
10348         .cr(16)
10349         .kr(9)
10350         .channels(channels)
10351         .width(3)
10352         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10353     }
10354   }
10355 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,multipixel_with_step)10356   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel_with_step) {
10357     TEST_REQUIRES_X86_AVX;
10358     for (size_t channels = 1; channels <= 80; channels += 15) {
10359       for (size_t step = 2; step <= 9; step++) {
10360         DWConvMicrokernelTester()
10361           .cr(16)
10362           .kr(9)
10363           .channels(channels)
10364           .width(3)
10365           .step(step)
10366           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10367       }
10368     }
10369   }
10370 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,multipixel_with_output_stride)10371   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel_with_output_stride) {
10372     TEST_REQUIRES_X86_AVX;
10373     for (size_t channels = 1; channels <= 80; channels += 15) {
10374       DWConvMicrokernelTester()
10375         .cr(16)
10376         .kr(9)
10377         .channels(16)
10378         .width(5)
10379         .output_stride(83)
10380         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10381     }
10382   }
10383 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,multipixel_with_qmin)10384   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel_with_qmin) {
10385     TEST_REQUIRES_X86_AVX;
10386     for (size_t channels = 1; channels <= 80; channels += 15) {
10387       DWConvMicrokernelTester()
10388         .cr(16)
10389         .kr(9)
10390         .channels(channels)
10391         .width(3)
10392         .qmin(128)
10393         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10394     }
10395   }
10396 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,multipixel_with_qmax)10397   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel_with_qmax) {
10398     TEST_REQUIRES_X86_AVX;
10399     for (size_t channels = 1; channels <= 80; channels += 15) {
10400       DWConvMicrokernelTester()
10401         .cr(16)
10402         .kr(9)
10403         .channels(channels)
10404         .width(3)
10405         .qmax(128)
10406         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10407     }
10408   }
10409 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,input_offset)10410   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, input_offset) {
10411     TEST_REQUIRES_X86_AVX;
10412     for (uint32_t channels = 32; channels < 256; channels += 48) {
10413       DWConvMicrokernelTester()
10414         .cr(16)
10415         .kr(9)
10416         .channels(channels)
10417         .input_offset(304)
10418         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10419     }
10420   }
10421 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,zero)10422   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, zero) {
10423     TEST_REQUIRES_X86_AVX;
10424     for (uint32_t mz = 0; mz < 9; mz++) {
10425       for (uint32_t channels = 32; channels < 256; channels += 48) {
10426         DWConvMicrokernelTester()
10427           .cr(16)
10428           .kr(9)
10429           .channels(channels)
10430           .input_offset(304)
10431           .zero_index(mz)
10432           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10433       }
10434     }
10435   }
10436 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
10437 
10438 
10439 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_eq_16)10440   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_eq_16) {
10441     TEST_REQUIRES_X86_AVX;
10442     DWConvMicrokernelTester()
10443       .cr(16)
10444       .kr(9)
10445       .channels(16)
10446       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10447   }
10448 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_div_16)10449   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_div_16) {
10450     TEST_REQUIRES_X86_AVX;
10451     for (uint32_t channels = 32; channels < 256; channels += 48) {
10452       DWConvMicrokernelTester()
10453         .cr(16)
10454         .kr(9)
10455         .channels(channels)
10456         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10457     }
10458   }
10459 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_div_16_with_qmin)10460   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_div_16_with_qmin) {
10461     TEST_REQUIRES_X86_AVX;
10462     for (uint32_t channels = 32; channels < 256; channels += 48) {
10463       DWConvMicrokernelTester()
10464         .cr(16)
10465         .kr(9)
10466         .channels(channels)
10467         .qmin(128)
10468         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10469     }
10470   }
10471 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_div_16_with_qmax)10472   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_div_16_with_qmax) {
10473     TEST_REQUIRES_X86_AVX;
10474     for (uint32_t channels = 32; channels < 256; channels += 48) {
10475       DWConvMicrokernelTester()
10476         .cr(16)
10477         .kr(9)
10478         .channels(channels)
10479         .qmax(128)
10480         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10481     }
10482   }
10483 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_lt_16)10484   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_lt_16) {
10485     TEST_REQUIRES_X86_AVX;
10486     for (uint32_t channels = 1; channels < 16; channels++) {
10487       DWConvMicrokernelTester()
10488         .cr(16)
10489         .kr(9)
10490         .channels(channels)
10491         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10492     }
10493   }
10494 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_gt_16)10495   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_gt_16) {
10496     TEST_REQUIRES_X86_AVX;
10497     for (uint32_t channels = 17; channels < 32; channels++) {
10498       DWConvMicrokernelTester()
10499         .cr(16)
10500         .kr(9)
10501         .channels(channels)
10502         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10503     }
10504   }
10505 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_gt_16_with_qmin)10506   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_gt_16_with_qmin) {
10507     TEST_REQUIRES_X86_AVX;
10508     for (uint32_t channels = 17; channels < 32; channels++) {
10509       DWConvMicrokernelTester()
10510         .cr(16)
10511         .kr(9)
10512         .channels(channels)
10513         .qmin(128)
10514         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10515     }
10516   }
10517 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_gt_16_with_qmax)10518   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_gt_16_with_qmax) {
10519     TEST_REQUIRES_X86_AVX;
10520     for (uint32_t channels = 17; channels < 32; channels++) {
10521       DWConvMicrokernelTester()
10522         .cr(16)
10523         .kr(9)
10524         .channels(channels)
10525         .qmax(128)
10526         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10527     }
10528   }
10529 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,multipixel)10530   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel) {
10531     TEST_REQUIRES_X86_AVX;
10532     for (size_t channels = 1; channels <= 80; channels += 15) {
10533       DWConvMicrokernelTester()
10534         .cr(16)
10535         .kr(9)
10536         .channels(channels)
10537         .width(3)
10538         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10539     }
10540   }
10541 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,multipixel_with_step)10542   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_step) {
10543     TEST_REQUIRES_X86_AVX;
10544     for (size_t channels = 1; channels <= 80; channels += 15) {
10545       for (size_t step = 2; step <= 9; step++) {
10546         DWConvMicrokernelTester()
10547           .cr(16)
10548           .kr(9)
10549           .channels(channels)
10550           .width(3)
10551           .step(step)
10552           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10553       }
10554     }
10555   }
10556 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,multipixel_with_output_stride)10557   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_output_stride) {
10558     TEST_REQUIRES_X86_AVX;
10559     for (size_t channels = 1; channels <= 80; channels += 15) {
10560       DWConvMicrokernelTester()
10561         .cr(16)
10562         .kr(9)
10563         .channels(16)
10564         .width(5)
10565         .output_stride(83)
10566         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10567     }
10568   }
10569 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,multipixel_with_qmin)10570   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_qmin) {
10571     TEST_REQUIRES_X86_AVX;
10572     for (size_t channels = 1; channels <= 80; channels += 15) {
10573       DWConvMicrokernelTester()
10574         .cr(16)
10575         .kr(9)
10576         .channels(channels)
10577         .width(3)
10578         .qmin(128)
10579         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10580     }
10581   }
10582 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,multipixel_with_qmax)10583   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_qmax) {
10584     TEST_REQUIRES_X86_AVX;
10585     for (size_t channels = 1; channels <= 80; channels += 15) {
10586       DWConvMicrokernelTester()
10587         .cr(16)
10588         .kr(9)
10589         .channels(channels)
10590         .width(3)
10591         .qmax(128)
10592         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10593     }
10594   }
10595 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,input_offset)10596   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, input_offset) {
10597     TEST_REQUIRES_X86_AVX;
10598     for (uint32_t channels = 32; channels < 256; channels += 48) {
10599       DWConvMicrokernelTester()
10600         .cr(16)
10601         .kr(9)
10602         .channels(channels)
10603         .input_offset(304)
10604         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10605     }
10606   }
10607 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,zero)10608   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, zero) {
10609     TEST_REQUIRES_X86_AVX;
10610     for (uint32_t mz = 0; mz < 9; mz++) {
10611       for (uint32_t channels = 32; channels < 256; channels += 48) {
10612         DWConvMicrokernelTester()
10613           .cr(16)
10614           .kr(9)
10615           .channels(channels)
10616           .input_offset(304)
10617           .zero_index(mz)
10618           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10619       }
10620     }
10621   }
10622 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
10623 
10624 
10625 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_eq_16)10626   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_eq_16) {
10627     TEST_REQUIRES_X86_AVX2;
10628     DWConvMicrokernelTester()
10629       .cr(16)
10630       .kr(9)
10631       .channels(16)
10632       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10633   }
10634 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_div_16)10635   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_16) {
10636     TEST_REQUIRES_X86_AVX2;
10637     for (uint32_t channels = 32; channels < 256; channels += 48) {
10638       DWConvMicrokernelTester()
10639         .cr(16)
10640         .kr(9)
10641         .channels(channels)
10642         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10643     }
10644   }
10645 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_div_16_with_qmin)10646   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_16_with_qmin) {
10647     TEST_REQUIRES_X86_AVX2;
10648     for (uint32_t channels = 32; channels < 256; channels += 48) {
10649       DWConvMicrokernelTester()
10650         .cr(16)
10651         .kr(9)
10652         .channels(channels)
10653         .qmin(128)
10654         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10655     }
10656   }
10657 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_div_16_with_qmax)10658   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_16_with_qmax) {
10659     TEST_REQUIRES_X86_AVX2;
10660     for (uint32_t channels = 32; channels < 256; channels += 48) {
10661       DWConvMicrokernelTester()
10662         .cr(16)
10663         .kr(9)
10664         .channels(channels)
10665         .qmax(128)
10666         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10667     }
10668   }
10669 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_lt_16)10670   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_lt_16) {
10671     TEST_REQUIRES_X86_AVX2;
10672     for (uint32_t channels = 1; channels < 16; channels++) {
10673       DWConvMicrokernelTester()
10674         .cr(16)
10675         .kr(9)
10676         .channels(channels)
10677         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10678     }
10679   }
10680 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_gt_16)10681   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16) {
10682     TEST_REQUIRES_X86_AVX2;
10683     for (uint32_t channels = 17; channels < 32; channels++) {
10684       DWConvMicrokernelTester()
10685         .cr(16)
10686         .kr(9)
10687         .channels(channels)
10688         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10689     }
10690   }
10691 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_gt_16_with_qmin)10692   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16_with_qmin) {
10693     TEST_REQUIRES_X86_AVX2;
10694     for (uint32_t channels = 17; channels < 32; channels++) {
10695       DWConvMicrokernelTester()
10696         .cr(16)
10697         .kr(9)
10698         .channels(channels)
10699         .qmin(128)
10700         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10701     }
10702   }
10703 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_gt_16_with_qmax)10704   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16_with_qmax) {
10705     TEST_REQUIRES_X86_AVX2;
10706     for (uint32_t channels = 17; channels < 32; channels++) {
10707       DWConvMicrokernelTester()
10708         .cr(16)
10709         .kr(9)
10710         .channels(channels)
10711         .qmax(128)
10712         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10713     }
10714   }
10715 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel)10716   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel) {
10717     TEST_REQUIRES_X86_AVX2;
10718     for (size_t channels = 1; channels <= 80; channels += 15) {
10719       DWConvMicrokernelTester()
10720         .cr(16)
10721         .kr(9)
10722         .channels(channels)
10723         .width(3)
10724         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10725     }
10726   }
10727 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_step)10728   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_step) {
10729     TEST_REQUIRES_X86_AVX2;
10730     for (size_t channels = 1; channels <= 80; channels += 15) {
10731       for (size_t step = 2; step <= 9; step++) {
10732         DWConvMicrokernelTester()
10733           .cr(16)
10734           .kr(9)
10735           .channels(channels)
10736           .width(3)
10737           .step(step)
10738           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10739       }
10740     }
10741   }
10742 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_output_stride)10743   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_output_stride) {
10744     TEST_REQUIRES_X86_AVX2;
10745     for (size_t channels = 1; channels <= 80; channels += 15) {
10746       DWConvMicrokernelTester()
10747         .cr(16)
10748         .kr(9)
10749         .channels(16)
10750         .width(5)
10751         .output_stride(83)
10752         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10753     }
10754   }
10755 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmin)10756   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmin) {
10757     TEST_REQUIRES_X86_AVX2;
10758     for (size_t channels = 1; channels <= 80; channels += 15) {
10759       DWConvMicrokernelTester()
10760         .cr(16)
10761         .kr(9)
10762         .channels(channels)
10763         .width(3)
10764         .qmin(128)
10765         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10766     }
10767   }
10768 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmax)10769   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmax) {
10770     TEST_REQUIRES_X86_AVX2;
10771     for (size_t channels = 1; channels <= 80; channels += 15) {
10772       DWConvMicrokernelTester()
10773         .cr(16)
10774         .kr(9)
10775         .channels(channels)
10776         .width(3)
10777         .qmax(128)
10778         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10779     }
10780   }
10781 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,input_offset)10782   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, input_offset) {
10783     TEST_REQUIRES_X86_AVX2;
10784     for (uint32_t channels = 32; channels < 256; channels += 48) {
10785       DWConvMicrokernelTester()
10786         .cr(16)
10787         .kr(9)
10788         .channels(channels)
10789         .input_offset(304)
10790         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10791     }
10792   }
10793 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,zero)10794   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, zero) {
10795     TEST_REQUIRES_X86_AVX2;
10796     for (uint32_t mz = 0; mz < 9; mz++) {
10797       for (uint32_t channels = 32; channels < 256; channels += 48) {
10798         DWConvMicrokernelTester()
10799           .cr(16)
10800           .kr(9)
10801           .channels(channels)
10802           .input_offset(304)
10803           .zero_index(mz)
10804           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10805       }
10806     }
10807   }
10808 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
10809 
10810 
10811 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_eq_16)10812   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_eq_16) {
10813     TEST_REQUIRES_X86_AVX2;
10814     DWConvMicrokernelTester()
10815       .cr(16)
10816       .kr(9)
10817       .channels(16)
10818       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10819   }
10820 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_div_16)10821   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_div_16) {
10822     TEST_REQUIRES_X86_AVX2;
10823     for (uint32_t channels = 32; channels < 256; channels += 48) {
10824       DWConvMicrokernelTester()
10825         .cr(16)
10826         .kr(9)
10827         .channels(channels)
10828         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10829     }
10830   }
10831 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_div_16_with_qmin)10832   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_div_16_with_qmin) {
10833     TEST_REQUIRES_X86_AVX2;
10834     for (uint32_t channels = 32; channels < 256; channels += 48) {
10835       DWConvMicrokernelTester()
10836         .cr(16)
10837         .kr(9)
10838         .channels(channels)
10839         .qmin(128)
10840         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10841     }
10842   }
10843 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_div_16_with_qmax)10844   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_div_16_with_qmax) {
10845     TEST_REQUIRES_X86_AVX2;
10846     for (uint32_t channels = 32; channels < 256; channels += 48) {
10847       DWConvMicrokernelTester()
10848         .cr(16)
10849         .kr(9)
10850         .channels(channels)
10851         .qmax(128)
10852         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10853     }
10854   }
10855 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_lt_16)10856   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_lt_16) {
10857     TEST_REQUIRES_X86_AVX2;
10858     for (uint32_t channels = 1; channels < 16; channels++) {
10859       DWConvMicrokernelTester()
10860         .cr(16)
10861         .kr(9)
10862         .channels(channels)
10863         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10864     }
10865   }
10866 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_gt_16)10867   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_gt_16) {
10868     TEST_REQUIRES_X86_AVX2;
10869     for (uint32_t channels = 17; channels < 32; channels++) {
10870       DWConvMicrokernelTester()
10871         .cr(16)
10872         .kr(9)
10873         .channels(channels)
10874         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10875     }
10876   }
10877 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_gt_16_with_qmin)10878   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_gt_16_with_qmin) {
10879     TEST_REQUIRES_X86_AVX2;
10880     for (uint32_t channels = 17; channels < 32; channels++) {
10881       DWConvMicrokernelTester()
10882         .cr(16)
10883         .kr(9)
10884         .channels(channels)
10885         .qmin(128)
10886         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10887     }
10888   }
10889 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_gt_16_with_qmax)10890   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_gt_16_with_qmax) {
10891     TEST_REQUIRES_X86_AVX2;
10892     for (uint32_t channels = 17; channels < 32; channels++) {
10893       DWConvMicrokernelTester()
10894         .cr(16)
10895         .kr(9)
10896         .channels(channels)
10897         .qmax(128)
10898         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10899     }
10900   }
10901 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,multipixel)10902   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel) {
10903     TEST_REQUIRES_X86_AVX2;
10904     for (size_t channels = 1; channels <= 80; channels += 15) {
10905       DWConvMicrokernelTester()
10906         .cr(16)
10907         .kr(9)
10908         .channels(channels)
10909         .width(3)
10910         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10911     }
10912   }
10913 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,multipixel_with_step)10914   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel_with_step) {
10915     TEST_REQUIRES_X86_AVX2;
10916     for (size_t channels = 1; channels <= 80; channels += 15) {
10917       for (size_t step = 2; step <= 9; step++) {
10918         DWConvMicrokernelTester()
10919           .cr(16)
10920           .kr(9)
10921           .channels(channels)
10922           .width(3)
10923           .step(step)
10924           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10925       }
10926     }
10927   }
10928 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,multipixel_with_output_stride)10929   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel_with_output_stride) {
10930     TEST_REQUIRES_X86_AVX2;
10931     for (size_t channels = 1; channels <= 80; channels += 15) {
10932       DWConvMicrokernelTester()
10933         .cr(16)
10934         .kr(9)
10935         .channels(16)
10936         .width(5)
10937         .output_stride(83)
10938         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10939     }
10940   }
10941 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,multipixel_with_qmin)10942   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel_with_qmin) {
10943     TEST_REQUIRES_X86_AVX2;
10944     for (size_t channels = 1; channels <= 80; channels += 15) {
10945       DWConvMicrokernelTester()
10946         .cr(16)
10947         .kr(9)
10948         .channels(channels)
10949         .width(3)
10950         .qmin(128)
10951         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10952     }
10953   }
10954 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,multipixel_with_qmax)10955   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel_with_qmax) {
10956     TEST_REQUIRES_X86_AVX2;
10957     for (size_t channels = 1; channels <= 80; channels += 15) {
10958       DWConvMicrokernelTester()
10959         .cr(16)
10960         .kr(9)
10961         .channels(channels)
10962         .width(3)
10963         .qmax(128)
10964         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10965     }
10966   }
10967 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,input_offset)10968   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, input_offset) {
10969     TEST_REQUIRES_X86_AVX2;
10970     for (uint32_t channels = 32; channels < 256; channels += 48) {
10971       DWConvMicrokernelTester()
10972         .cr(16)
10973         .kr(9)
10974         .channels(channels)
10975         .input_offset(304)
10976         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10977     }
10978   }
10979 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,zero)10980   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, zero) {
10981     TEST_REQUIRES_X86_AVX2;
10982     for (uint32_t mz = 0; mz < 9; mz++) {
10983       for (uint32_t channels = 32; channels < 256; channels += 48) {
10984         DWConvMicrokernelTester()
10985           .cr(16)
10986           .kr(9)
10987           .channels(channels)
10988           .input_offset(304)
10989           .zero_index(mz)
10990           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
10991       }
10992     }
10993   }
10994 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
10995 
10996 
10997 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_eq_16)10998   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_eq_16) {
10999     TEST_REQUIRES_X86_AVX2;
11000     DWConvMicrokernelTester()
11001       .cr(16)
11002       .kr(9)
11003       .channels(16)
11004       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11005   }
11006 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_div_16)11007   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_div_16) {
11008     TEST_REQUIRES_X86_AVX2;
11009     for (uint32_t channels = 32; channels < 256; channels += 48) {
11010       DWConvMicrokernelTester()
11011         .cr(16)
11012         .kr(9)
11013         .channels(channels)
11014         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11015     }
11016   }
11017 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_div_16_with_qmin)11018   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_div_16_with_qmin) {
11019     TEST_REQUIRES_X86_AVX2;
11020     for (uint32_t channels = 32; channels < 256; channels += 48) {
11021       DWConvMicrokernelTester()
11022         .cr(16)
11023         .kr(9)
11024         .channels(channels)
11025         .qmin(128)
11026         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11027     }
11028   }
11029 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_div_16_with_qmax)11030   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_div_16_with_qmax) {
11031     TEST_REQUIRES_X86_AVX2;
11032     for (uint32_t channels = 32; channels < 256; channels += 48) {
11033       DWConvMicrokernelTester()
11034         .cr(16)
11035         .kr(9)
11036         .channels(channels)
11037         .qmax(128)
11038         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11039     }
11040   }
11041 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_lt_16)11042   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_lt_16) {
11043     TEST_REQUIRES_X86_AVX2;
11044     for (uint32_t channels = 1; channels < 16; channels++) {
11045       DWConvMicrokernelTester()
11046         .cr(16)
11047         .kr(9)
11048         .channels(channels)
11049         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11050     }
11051   }
11052 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_gt_16)11053   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_gt_16) {
11054     TEST_REQUIRES_X86_AVX2;
11055     for (uint32_t channels = 17; channels < 32; channels++) {
11056       DWConvMicrokernelTester()
11057         .cr(16)
11058         .kr(9)
11059         .channels(channels)
11060         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11061     }
11062   }
11063 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_gt_16_with_qmin)11064   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_gt_16_with_qmin) {
11065     TEST_REQUIRES_X86_AVX2;
11066     for (uint32_t channels = 17; channels < 32; channels++) {
11067       DWConvMicrokernelTester()
11068         .cr(16)
11069         .kr(9)
11070         .channels(channels)
11071         .qmin(128)
11072         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11073     }
11074   }
11075 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_gt_16_with_qmax)11076   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_gt_16_with_qmax) {
11077     TEST_REQUIRES_X86_AVX2;
11078     for (uint32_t channels = 17; channels < 32; channels++) {
11079       DWConvMicrokernelTester()
11080         .cr(16)
11081         .kr(9)
11082         .channels(channels)
11083         .qmax(128)
11084         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11085     }
11086   }
11087 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,multipixel)11088   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel) {
11089     TEST_REQUIRES_X86_AVX2;
11090     for (size_t channels = 1; channels <= 80; channels += 15) {
11091       DWConvMicrokernelTester()
11092         .cr(16)
11093         .kr(9)
11094         .channels(channels)
11095         .width(3)
11096         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11097     }
11098   }
11099 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,multipixel_with_step)11100   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel_with_step) {
11101     TEST_REQUIRES_X86_AVX2;
11102     for (size_t channels = 1; channels <= 80; channels += 15) {
11103       for (size_t step = 2; step <= 9; step++) {
11104         DWConvMicrokernelTester()
11105           .cr(16)
11106           .kr(9)
11107           .channels(channels)
11108           .width(3)
11109           .step(step)
11110           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11111       }
11112     }
11113   }
11114 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,multipixel_with_output_stride)11115   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel_with_output_stride) {
11116     TEST_REQUIRES_X86_AVX2;
11117     for (size_t channels = 1; channels <= 80; channels += 15) {
11118       DWConvMicrokernelTester()
11119         .cr(16)
11120         .kr(9)
11121         .channels(16)
11122         .width(5)
11123         .output_stride(83)
11124         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11125     }
11126   }
11127 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,multipixel_with_qmin)11128   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel_with_qmin) {
11129     TEST_REQUIRES_X86_AVX2;
11130     for (size_t channels = 1; channels <= 80; channels += 15) {
11131       DWConvMicrokernelTester()
11132         .cr(16)
11133         .kr(9)
11134         .channels(channels)
11135         .width(3)
11136         .qmin(128)
11137         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11138     }
11139   }
11140 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,multipixel_with_qmax)11141   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel_with_qmax) {
11142     TEST_REQUIRES_X86_AVX2;
11143     for (size_t channels = 1; channels <= 80; channels += 15) {
11144       DWConvMicrokernelTester()
11145         .cr(16)
11146         .kr(9)
11147         .channels(channels)
11148         .width(3)
11149         .qmax(128)
11150         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11151     }
11152   }
11153 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,input_offset)11154   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, input_offset) {
11155     TEST_REQUIRES_X86_AVX2;
11156     for (uint32_t channels = 32; channels < 256; channels += 48) {
11157       DWConvMicrokernelTester()
11158         .cr(16)
11159         .kr(9)
11160         .channels(channels)
11161         .input_offset(304)
11162         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11163     }
11164   }
11165 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,zero)11166   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, zero) {
11167     TEST_REQUIRES_X86_AVX2;
11168     for (uint32_t mz = 0; mz < 9; mz++) {
11169       for (uint32_t channels = 32; channels < 256; channels += 48) {
11170         DWConvMicrokernelTester()
11171           .cr(16)
11172           .kr(9)
11173           .channels(channels)
11174           .input_offset(304)
11175           .zero_index(mz)
11176           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11177       }
11178     }
11179   }
11180 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
11181 
11182 
11183 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_eq_16)11184   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_eq_16) {
11185     TEST_REQUIRES_X86_AVX2;
11186     DWConvMicrokernelTester()
11187       .cr(16)
11188       .kr(9)
11189       .channels(16)
11190       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11191   }
11192 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_div_16)11193   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16) {
11194     TEST_REQUIRES_X86_AVX2;
11195     for (uint32_t channels = 32; channels < 256; channels += 48) {
11196       DWConvMicrokernelTester()
11197         .cr(16)
11198         .kr(9)
11199         .channels(channels)
11200         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11201     }
11202   }
11203 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_div_16_with_qmin)11204   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16_with_qmin) {
11205     TEST_REQUIRES_X86_AVX2;
11206     for (uint32_t channels = 32; channels < 256; channels += 48) {
11207       DWConvMicrokernelTester()
11208         .cr(16)
11209         .kr(9)
11210         .channels(channels)
11211         .qmin(128)
11212         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11213     }
11214   }
11215 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_div_16_with_qmax)11216   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16_with_qmax) {
11217     TEST_REQUIRES_X86_AVX2;
11218     for (uint32_t channels = 32; channels < 256; channels += 48) {
11219       DWConvMicrokernelTester()
11220         .cr(16)
11221         .kr(9)
11222         .channels(channels)
11223         .qmax(128)
11224         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11225     }
11226   }
11227 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_lt_16)11228   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_lt_16) {
11229     TEST_REQUIRES_X86_AVX2;
11230     for (uint32_t channels = 1; channels < 16; channels++) {
11231       DWConvMicrokernelTester()
11232         .cr(16)
11233         .kr(9)
11234         .channels(channels)
11235         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11236     }
11237   }
11238 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_gt_16)11239   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16) {
11240     TEST_REQUIRES_X86_AVX2;
11241     for (uint32_t channels = 17; channels < 32; channels++) {
11242       DWConvMicrokernelTester()
11243         .cr(16)
11244         .kr(9)
11245         .channels(channels)
11246         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11247     }
11248   }
11249 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_gt_16_with_qmin)11250   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16_with_qmin) {
11251     TEST_REQUIRES_X86_AVX2;
11252     for (uint32_t channels = 17; channels < 32; channels++) {
11253       DWConvMicrokernelTester()
11254         .cr(16)
11255         .kr(9)
11256         .channels(channels)
11257         .qmin(128)
11258         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11259     }
11260   }
11261 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_gt_16_with_qmax)11262   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16_with_qmax) {
11263     TEST_REQUIRES_X86_AVX2;
11264     for (uint32_t channels = 17; channels < 32; channels++) {
11265       DWConvMicrokernelTester()
11266         .cr(16)
11267         .kr(9)
11268         .channels(channels)
11269         .qmax(128)
11270         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11271     }
11272   }
11273 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,multipixel)11274   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel) {
11275     TEST_REQUIRES_X86_AVX2;
11276     for (size_t channels = 1; channels <= 80; channels += 15) {
11277       DWConvMicrokernelTester()
11278         .cr(16)
11279         .kr(9)
11280         .channels(channels)
11281         .width(3)
11282         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11283     }
11284   }
11285 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,multipixel_with_step)11286   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_step) {
11287     TEST_REQUIRES_X86_AVX2;
11288     for (size_t channels = 1; channels <= 80; channels += 15) {
11289       for (size_t step = 2; step <= 9; step++) {
11290         DWConvMicrokernelTester()
11291           .cr(16)
11292           .kr(9)
11293           .channels(channels)
11294           .width(3)
11295           .step(step)
11296           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11297       }
11298     }
11299   }
11300 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,multipixel_with_output_stride)11301   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_output_stride) {
11302     TEST_REQUIRES_X86_AVX2;
11303     for (size_t channels = 1; channels <= 80; channels += 15) {
11304       DWConvMicrokernelTester()
11305         .cr(16)
11306         .kr(9)
11307         .channels(16)
11308         .width(5)
11309         .output_stride(83)
11310         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11311     }
11312   }
11313 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,multipixel_with_qmin)11314   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_qmin) {
11315     TEST_REQUIRES_X86_AVX2;
11316     for (size_t channels = 1; channels <= 80; channels += 15) {
11317       DWConvMicrokernelTester()
11318         .cr(16)
11319         .kr(9)
11320         .channels(channels)
11321         .width(3)
11322         .qmin(128)
11323         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11324     }
11325   }
11326 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,multipixel_with_qmax)11327   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_qmax) {
11328     TEST_REQUIRES_X86_AVX2;
11329     for (size_t channels = 1; channels <= 80; channels += 15) {
11330       DWConvMicrokernelTester()
11331         .cr(16)
11332         .kr(9)
11333         .channels(channels)
11334         .width(3)
11335         .qmax(128)
11336         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11337     }
11338   }
11339 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,input_offset)11340   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, input_offset) {
11341     TEST_REQUIRES_X86_AVX2;
11342     for (uint32_t channels = 32; channels < 256; channels += 48) {
11343       DWConvMicrokernelTester()
11344         .cr(16)
11345         .kr(9)
11346         .channels(channels)
11347         .input_offset(304)
11348         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11349     }
11350   }
11351 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,zero)11352   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, zero) {
11353     TEST_REQUIRES_X86_AVX2;
11354     for (uint32_t mz = 0; mz < 9; mz++) {
11355       for (uint32_t channels = 32; channels < 256; channels += 48) {
11356         DWConvMicrokernelTester()
11357           .cr(16)
11358           .kr(9)
11359           .channels(channels)
11360           .input_offset(304)
11361           .zero_index(mz)
11362           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
11363       }
11364     }
11365   }
11366 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
11367 
11368 
11369 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_eq_16)11370   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_eq_16) {
11371     TEST_REQUIRES_X86_XOP;
11372     DWConvMicrokernelTester()
11373       .cr(16)
11374       .kr(9)
11375       .channels(16)
11376       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11377   }
11378 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_div_16)11379   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_div_16) {
11380     TEST_REQUIRES_X86_XOP;
11381     for (uint32_t channels = 32; channels < 256; channels += 48) {
11382       DWConvMicrokernelTester()
11383         .cr(16)
11384         .kr(9)
11385         .channels(channels)
11386         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11387     }
11388   }
11389 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_div_16_with_qmin)11390   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_div_16_with_qmin) {
11391     TEST_REQUIRES_X86_XOP;
11392     for (uint32_t channels = 32; channels < 256; channels += 48) {
11393       DWConvMicrokernelTester()
11394         .cr(16)
11395         .kr(9)
11396         .channels(channels)
11397         .qmin(128)
11398         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11399     }
11400   }
11401 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_div_16_with_qmax)11402   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_div_16_with_qmax) {
11403     TEST_REQUIRES_X86_XOP;
11404     for (uint32_t channels = 32; channels < 256; channels += 48) {
11405       DWConvMicrokernelTester()
11406         .cr(16)
11407         .kr(9)
11408         .channels(channels)
11409         .qmax(128)
11410         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11411     }
11412   }
11413 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_lt_16)11414   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_lt_16) {
11415     TEST_REQUIRES_X86_XOP;
11416     for (uint32_t channels = 1; channels < 16; channels++) {
11417       DWConvMicrokernelTester()
11418         .cr(16)
11419         .kr(9)
11420         .channels(channels)
11421         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11422     }
11423   }
11424 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_gt_16)11425   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_gt_16) {
11426     TEST_REQUIRES_X86_XOP;
11427     for (uint32_t channels = 17; channels < 32; channels++) {
11428       DWConvMicrokernelTester()
11429         .cr(16)
11430         .kr(9)
11431         .channels(channels)
11432         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11433     }
11434   }
11435 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_gt_16_with_qmin)11436   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_gt_16_with_qmin) {
11437     TEST_REQUIRES_X86_XOP;
11438     for (uint32_t channels = 17; channels < 32; channels++) {
11439       DWConvMicrokernelTester()
11440         .cr(16)
11441         .kr(9)
11442         .channels(channels)
11443         .qmin(128)
11444         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11445     }
11446   }
11447 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_gt_16_with_qmax)11448   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_gt_16_with_qmax) {
11449     TEST_REQUIRES_X86_XOP;
11450     for (uint32_t channels = 17; channels < 32; channels++) {
11451       DWConvMicrokernelTester()
11452         .cr(16)
11453         .kr(9)
11454         .channels(channels)
11455         .qmax(128)
11456         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11457     }
11458   }
11459 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,multipixel)11460   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel) {
11461     TEST_REQUIRES_X86_XOP;
11462     for (size_t channels = 1; channels <= 80; channels += 15) {
11463       DWConvMicrokernelTester()
11464         .cr(16)
11465         .kr(9)
11466         .channels(channels)
11467         .width(3)
11468         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11469     }
11470   }
11471 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,multipixel_with_step)11472   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel_with_step) {
11473     TEST_REQUIRES_X86_XOP;
11474     for (size_t channels = 1; channels <= 80; channels += 15) {
11475       for (size_t step = 2; step <= 9; step++) {
11476         DWConvMicrokernelTester()
11477           .cr(16)
11478           .kr(9)
11479           .channels(channels)
11480           .width(3)
11481           .step(step)
11482           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11483       }
11484     }
11485   }
11486 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,multipixel_with_output_stride)11487   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel_with_output_stride) {
11488     TEST_REQUIRES_X86_XOP;
11489     for (size_t channels = 1; channels <= 80; channels += 15) {
11490       DWConvMicrokernelTester()
11491         .cr(16)
11492         .kr(9)
11493         .channels(16)
11494         .width(5)
11495         .output_stride(83)
11496         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11497     }
11498   }
11499 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,multipixel_with_qmin)11500   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel_with_qmin) {
11501     TEST_REQUIRES_X86_XOP;
11502     for (size_t channels = 1; channels <= 80; channels += 15) {
11503       DWConvMicrokernelTester()
11504         .cr(16)
11505         .kr(9)
11506         .channels(channels)
11507         .width(3)
11508         .qmin(128)
11509         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11510     }
11511   }
11512 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,multipixel_with_qmax)11513   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel_with_qmax) {
11514     TEST_REQUIRES_X86_XOP;
11515     for (size_t channels = 1; channels <= 80; channels += 15) {
11516       DWConvMicrokernelTester()
11517         .cr(16)
11518         .kr(9)
11519         .channels(channels)
11520         .width(3)
11521         .qmax(128)
11522         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11523     }
11524   }
11525 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,input_offset)11526   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, input_offset) {
11527     TEST_REQUIRES_X86_XOP;
11528     for (uint32_t channels = 32; channels < 256; channels += 48) {
11529       DWConvMicrokernelTester()
11530         .cr(16)
11531         .kr(9)
11532         .channels(channels)
11533         .input_offset(304)
11534         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11535     }
11536   }
11537 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,zero)11538   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, zero) {
11539     TEST_REQUIRES_X86_XOP;
11540     for (uint32_t mz = 0; mz < 9; mz++) {
11541       for (uint32_t channels = 32; channels < 256; channels += 48) {
11542         DWConvMicrokernelTester()
11543           .cr(16)
11544           .kr(9)
11545           .channels(channels)
11546           .input_offset(304)
11547           .zero_index(mz)
11548           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11549       }
11550     }
11551   }
11552 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
11553 
11554 
11555 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_eq_16)11556   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_eq_16) {
11557     TEST_REQUIRES_X86_XOP;
11558     DWConvMicrokernelTester()
11559       .cr(16)
11560       .kr(9)
11561       .channels(16)
11562       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11563   }
11564 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_div_16)11565   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_div_16) {
11566     TEST_REQUIRES_X86_XOP;
11567     for (uint32_t channels = 32; channels < 256; channels += 48) {
11568       DWConvMicrokernelTester()
11569         .cr(16)
11570         .kr(9)
11571         .channels(channels)
11572         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11573     }
11574   }
11575 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_div_16_with_qmin)11576   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_div_16_with_qmin) {
11577     TEST_REQUIRES_X86_XOP;
11578     for (uint32_t channels = 32; channels < 256; channels += 48) {
11579       DWConvMicrokernelTester()
11580         .cr(16)
11581         .kr(9)
11582         .channels(channels)
11583         .qmin(128)
11584         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11585     }
11586   }
11587 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_div_16_with_qmax)11588   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_div_16_with_qmax) {
11589     TEST_REQUIRES_X86_XOP;
11590     for (uint32_t channels = 32; channels < 256; channels += 48) {
11591       DWConvMicrokernelTester()
11592         .cr(16)
11593         .kr(9)
11594         .channels(channels)
11595         .qmax(128)
11596         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11597     }
11598   }
11599 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_lt_16)11600   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_lt_16) {
11601     TEST_REQUIRES_X86_XOP;
11602     for (uint32_t channels = 1; channels < 16; channels++) {
11603       DWConvMicrokernelTester()
11604         .cr(16)
11605         .kr(9)
11606         .channels(channels)
11607         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11608     }
11609   }
11610 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_gt_16)11611   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_gt_16) {
11612     TEST_REQUIRES_X86_XOP;
11613     for (uint32_t channels = 17; channels < 32; channels++) {
11614       DWConvMicrokernelTester()
11615         .cr(16)
11616         .kr(9)
11617         .channels(channels)
11618         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11619     }
11620   }
11621 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_gt_16_with_qmin)11622   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_gt_16_with_qmin) {
11623     TEST_REQUIRES_X86_XOP;
11624     for (uint32_t channels = 17; channels < 32; channels++) {
11625       DWConvMicrokernelTester()
11626         .cr(16)
11627         .kr(9)
11628         .channels(channels)
11629         .qmin(128)
11630         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11631     }
11632   }
11633 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_gt_16_with_qmax)11634   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_gt_16_with_qmax) {
11635     TEST_REQUIRES_X86_XOP;
11636     for (uint32_t channels = 17; channels < 32; channels++) {
11637       DWConvMicrokernelTester()
11638         .cr(16)
11639         .kr(9)
11640         .channels(channels)
11641         .qmax(128)
11642         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11643     }
11644   }
11645 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,multipixel)11646   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel) {
11647     TEST_REQUIRES_X86_XOP;
11648     for (size_t channels = 1; channels <= 80; channels += 15) {
11649       DWConvMicrokernelTester()
11650         .cr(16)
11651         .kr(9)
11652         .channels(channels)
11653         .width(3)
11654         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11655     }
11656   }
11657 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,multipixel_with_step)11658   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_step) {
11659     TEST_REQUIRES_X86_XOP;
11660     for (size_t channels = 1; channels <= 80; channels += 15) {
11661       for (size_t step = 2; step <= 9; step++) {
11662         DWConvMicrokernelTester()
11663           .cr(16)
11664           .kr(9)
11665           .channels(channels)
11666           .width(3)
11667           .step(step)
11668           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11669       }
11670     }
11671   }
11672 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,multipixel_with_output_stride)11673   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_output_stride) {
11674     TEST_REQUIRES_X86_XOP;
11675     for (size_t channels = 1; channels <= 80; channels += 15) {
11676       DWConvMicrokernelTester()
11677         .cr(16)
11678         .kr(9)
11679         .channels(16)
11680         .width(5)
11681         .output_stride(83)
11682         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11683     }
11684   }
11685 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,multipixel_with_qmin)11686   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_qmin) {
11687     TEST_REQUIRES_X86_XOP;
11688     for (size_t channels = 1; channels <= 80; channels += 15) {
11689       DWConvMicrokernelTester()
11690         .cr(16)
11691         .kr(9)
11692         .channels(channels)
11693         .width(3)
11694         .qmin(128)
11695         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11696     }
11697   }
11698 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,multipixel_with_qmax)11699   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_qmax) {
11700     TEST_REQUIRES_X86_XOP;
11701     for (size_t channels = 1; channels <= 80; channels += 15) {
11702       DWConvMicrokernelTester()
11703         .cr(16)
11704         .kr(9)
11705         .channels(channels)
11706         .width(3)
11707         .qmax(128)
11708         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11709     }
11710   }
11711 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,input_offset)11712   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, input_offset) {
11713     TEST_REQUIRES_X86_XOP;
11714     for (uint32_t channels = 32; channels < 256; channels += 48) {
11715       DWConvMicrokernelTester()
11716         .cr(16)
11717         .kr(9)
11718         .channels(channels)
11719         .input_offset(304)
11720         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11721     }
11722   }
11723 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,zero)11724   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, zero) {
11725     TEST_REQUIRES_X86_XOP;
11726     for (uint32_t mz = 0; mz < 9; mz++) {
11727       for (uint32_t channels = 32; channels < 256; channels += 48) {
11728         DWConvMicrokernelTester()
11729           .cr(16)
11730           .kr(9)
11731           .channels(channels)
11732           .input_offset(304)
11733           .zero_index(mz)
11734           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11735       }
11736     }
11737   }
11738 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
11739 
11740 
11741 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_eq_16)11742   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_eq_16) {
11743     TEST_REQUIRES_X86_AVX;
11744     DWConvMicrokernelTester()
11745       .cr(16)
11746       .kr(25)
11747       .channels(16)
11748       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11749   }
11750 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_div_16)11751   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_div_16) {
11752     TEST_REQUIRES_X86_AVX;
11753     for (uint32_t channels = 32; channels < 256; channels += 48) {
11754       DWConvMicrokernelTester()
11755         .cr(16)
11756         .kr(25)
11757         .channels(channels)
11758         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11759     }
11760   }
11761 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_div_16_with_qmin)11762   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_div_16_with_qmin) {
11763     TEST_REQUIRES_X86_AVX;
11764     for (uint32_t channels = 32; channels < 256; channels += 48) {
11765       DWConvMicrokernelTester()
11766         .cr(16)
11767         .kr(25)
11768         .channels(channels)
11769         .qmin(128)
11770         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11771     }
11772   }
11773 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_div_16_with_qmax)11774   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_div_16_with_qmax) {
11775     TEST_REQUIRES_X86_AVX;
11776     for (uint32_t channels = 32; channels < 256; channels += 48) {
11777       DWConvMicrokernelTester()
11778         .cr(16)
11779         .kr(25)
11780         .channels(channels)
11781         .qmax(128)
11782         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11783     }
11784   }
11785 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_lt_16)11786   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_lt_16) {
11787     TEST_REQUIRES_X86_AVX;
11788     for (uint32_t channels = 1; channels < 16; channels++) {
11789       DWConvMicrokernelTester()
11790         .cr(16)
11791         .kr(25)
11792         .channels(channels)
11793         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11794     }
11795   }
11796 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_gt_16)11797   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_gt_16) {
11798     TEST_REQUIRES_X86_AVX;
11799     for (uint32_t channels = 17; channels < 32; channels++) {
11800       DWConvMicrokernelTester()
11801         .cr(16)
11802         .kr(25)
11803         .channels(channels)
11804         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11805     }
11806   }
11807 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_gt_16_with_qmin)11808   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_gt_16_with_qmin) {
11809     TEST_REQUIRES_X86_AVX;
11810     for (uint32_t channels = 17; channels < 32; channels++) {
11811       DWConvMicrokernelTester()
11812         .cr(16)
11813         .kr(25)
11814         .channels(channels)
11815         .qmin(128)
11816         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11817     }
11818   }
11819 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_gt_16_with_qmax)11820   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_gt_16_with_qmax) {
11821     TEST_REQUIRES_X86_AVX;
11822     for (uint32_t channels = 17; channels < 32; channels++) {
11823       DWConvMicrokernelTester()
11824         .cr(16)
11825         .kr(25)
11826         .channels(channels)
11827         .qmax(128)
11828         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11829     }
11830   }
11831 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,multipixel)11832   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel) {
11833     TEST_REQUIRES_X86_AVX;
11834     for (size_t channels = 1; channels <= 80; channels += 15) {
11835       DWConvMicrokernelTester()
11836         .cr(16)
11837         .kr(25)
11838         .channels(channels)
11839         .width(3)
11840         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11841     }
11842   }
11843 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,multipixel_with_step)11844   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_step) {
11845     TEST_REQUIRES_X86_AVX;
11846     for (size_t channels = 1; channels <= 80; channels += 15) {
11847       for (size_t step = 2; step <= 25; step++) {
11848         DWConvMicrokernelTester()
11849           .cr(16)
11850           .kr(25)
11851           .channels(channels)
11852           .width(3)
11853           .step(step)
11854           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11855       }
11856     }
11857   }
11858 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,multipixel_with_output_stride)11859   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_output_stride) {
11860     TEST_REQUIRES_X86_AVX;
11861     for (size_t channels = 1; channels <= 80; channels += 15) {
11862       DWConvMicrokernelTester()
11863         .cr(16)
11864         .kr(25)
11865         .channels(16)
11866         .width(5)
11867         .output_stride(83)
11868         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11869     }
11870   }
11871 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,multipixel_with_qmin)11872   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_qmin) {
11873     TEST_REQUIRES_X86_AVX;
11874     for (size_t channels = 1; channels <= 80; channels += 15) {
11875       DWConvMicrokernelTester()
11876         .cr(16)
11877         .kr(25)
11878         .channels(channels)
11879         .width(3)
11880         .qmin(128)
11881         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11882     }
11883   }
11884 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,multipixel_with_qmax)11885   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_qmax) {
11886     TEST_REQUIRES_X86_AVX;
11887     for (size_t channels = 1; channels <= 80; channels += 15) {
11888       DWConvMicrokernelTester()
11889         .cr(16)
11890         .kr(25)
11891         .channels(channels)
11892         .width(3)
11893         .qmax(128)
11894         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11895     }
11896   }
11897 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,input_offset)11898   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, input_offset) {
11899     TEST_REQUIRES_X86_AVX;
11900     for (uint32_t channels = 32; channels < 256; channels += 48) {
11901       DWConvMicrokernelTester()
11902         .cr(16)
11903         .kr(25)
11904         .channels(channels)
11905         .input_offset(304)
11906         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11907     }
11908   }
11909 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,zero)11910   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, zero) {
11911     TEST_REQUIRES_X86_AVX;
11912     for (uint32_t mz = 0; mz < 25; mz++) {
11913       for (uint32_t channels = 32; channels < 256; channels += 48) {
11914         DWConvMicrokernelTester()
11915           .cr(16)
11916           .kr(25)
11917           .channels(channels)
11918           .input_offset(304)
11919           .zero_index(mz)
11920           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11921       }
11922     }
11923   }
11924 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
11925 
11926 
11927 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_eq_16)11928   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_eq_16) {
11929     TEST_REQUIRES_X86_AVX;
11930     DWConvMicrokernelTester()
11931       .cr(16)
11932       .kr(25)
11933       .channels(16)
11934       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11935   }
11936 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_div_16)11937   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_div_16) {
11938     TEST_REQUIRES_X86_AVX;
11939     for (uint32_t channels = 32; channels < 256; channels += 48) {
11940       DWConvMicrokernelTester()
11941         .cr(16)
11942         .kr(25)
11943         .channels(channels)
11944         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11945     }
11946   }
11947 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_div_16_with_qmin)11948   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_div_16_with_qmin) {
11949     TEST_REQUIRES_X86_AVX;
11950     for (uint32_t channels = 32; channels < 256; channels += 48) {
11951       DWConvMicrokernelTester()
11952         .cr(16)
11953         .kr(25)
11954         .channels(channels)
11955         .qmin(128)
11956         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11957     }
11958   }
11959 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_div_16_with_qmax)11960   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_div_16_with_qmax) {
11961     TEST_REQUIRES_X86_AVX;
11962     for (uint32_t channels = 32; channels < 256; channels += 48) {
11963       DWConvMicrokernelTester()
11964         .cr(16)
11965         .kr(25)
11966         .channels(channels)
11967         .qmax(128)
11968         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11969     }
11970   }
11971 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_lt_16)11972   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_lt_16) {
11973     TEST_REQUIRES_X86_AVX;
11974     for (uint32_t channels = 1; channels < 16; channels++) {
11975       DWConvMicrokernelTester()
11976         .cr(16)
11977         .kr(25)
11978         .channels(channels)
11979         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11980     }
11981   }
11982 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_gt_16)11983   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_gt_16) {
11984     TEST_REQUIRES_X86_AVX;
11985     for (uint32_t channels = 17; channels < 32; channels++) {
11986       DWConvMicrokernelTester()
11987         .cr(16)
11988         .kr(25)
11989         .channels(channels)
11990         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11991     }
11992   }
11993 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_gt_16_with_qmin)11994   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_gt_16_with_qmin) {
11995     TEST_REQUIRES_X86_AVX;
11996     for (uint32_t channels = 17; channels < 32; channels++) {
11997       DWConvMicrokernelTester()
11998         .cr(16)
11999         .kr(25)
12000         .channels(channels)
12001         .qmin(128)
12002         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12003     }
12004   }
12005 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_gt_16_with_qmax)12006   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_gt_16_with_qmax) {
12007     TEST_REQUIRES_X86_AVX;
12008     for (uint32_t channels = 17; channels < 32; channels++) {
12009       DWConvMicrokernelTester()
12010         .cr(16)
12011         .kr(25)
12012         .channels(channels)
12013         .qmax(128)
12014         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12015     }
12016   }
12017 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,multipixel)12018   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel) {
12019     TEST_REQUIRES_X86_AVX;
12020     for (size_t channels = 1; channels <= 80; channels += 15) {
12021       DWConvMicrokernelTester()
12022         .cr(16)
12023         .kr(25)
12024         .channels(channels)
12025         .width(3)
12026         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12027     }
12028   }
12029 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,multipixel_with_step)12030   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel_with_step) {
12031     TEST_REQUIRES_X86_AVX;
12032     for (size_t channels = 1; channels <= 80; channels += 15) {
12033       for (size_t step = 2; step <= 25; step++) {
12034         DWConvMicrokernelTester()
12035           .cr(16)
12036           .kr(25)
12037           .channels(channels)
12038           .width(3)
12039           .step(step)
12040           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12041       }
12042     }
12043   }
12044 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,multipixel_with_output_stride)12045   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel_with_output_stride) {
12046     TEST_REQUIRES_X86_AVX;
12047     for (size_t channels = 1; channels <= 80; channels += 15) {
12048       DWConvMicrokernelTester()
12049         .cr(16)
12050         .kr(25)
12051         .channels(16)
12052         .width(5)
12053         .output_stride(83)
12054         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12055     }
12056   }
12057 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,multipixel_with_qmin)12058   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel_with_qmin) {
12059     TEST_REQUIRES_X86_AVX;
12060     for (size_t channels = 1; channels <= 80; channels += 15) {
12061       DWConvMicrokernelTester()
12062         .cr(16)
12063         .kr(25)
12064         .channels(channels)
12065         .width(3)
12066         .qmin(128)
12067         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12068     }
12069   }
12070 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,multipixel_with_qmax)12071   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel_with_qmax) {
12072     TEST_REQUIRES_X86_AVX;
12073     for (size_t channels = 1; channels <= 80; channels += 15) {
12074       DWConvMicrokernelTester()
12075         .cr(16)
12076         .kr(25)
12077         .channels(channels)
12078         .width(3)
12079         .qmax(128)
12080         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12081     }
12082   }
12083 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,input_offset)12084   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, input_offset) {
12085     TEST_REQUIRES_X86_AVX;
12086     for (uint32_t channels = 32; channels < 256; channels += 48) {
12087       DWConvMicrokernelTester()
12088         .cr(16)
12089         .kr(25)
12090         .channels(channels)
12091         .input_offset(304)
12092         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12093     }
12094   }
12095 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,zero)12096   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, zero) {
12097     TEST_REQUIRES_X86_AVX;
12098     for (uint32_t mz = 0; mz < 25; mz++) {
12099       for (uint32_t channels = 32; channels < 256; channels += 48) {
12100         DWConvMicrokernelTester()
12101           .cr(16)
12102           .kr(25)
12103           .channels(channels)
12104           .input_offset(304)
12105           .zero_index(mz)
12106           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12107       }
12108     }
12109   }
12110 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
12111 
12112 
12113 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_eq_16)12114   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_eq_16) {
12115     TEST_REQUIRES_X86_AVX;
12116     DWConvMicrokernelTester()
12117       .cr(16)
12118       .kr(25)
12119       .channels(16)
12120       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12121   }
12122 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_div_16)12123   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_div_16) {
12124     TEST_REQUIRES_X86_AVX;
12125     for (uint32_t channels = 32; channels < 256; channels += 48) {
12126       DWConvMicrokernelTester()
12127         .cr(16)
12128         .kr(25)
12129         .channels(channels)
12130         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12131     }
12132   }
12133 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_div_16_with_qmin)12134   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_div_16_with_qmin) {
12135     TEST_REQUIRES_X86_AVX;
12136     for (uint32_t channels = 32; channels < 256; channels += 48) {
12137       DWConvMicrokernelTester()
12138         .cr(16)
12139         .kr(25)
12140         .channels(channels)
12141         .qmin(128)
12142         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12143     }
12144   }
12145 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_div_16_with_qmax)12146   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_div_16_with_qmax) {
12147     TEST_REQUIRES_X86_AVX;
12148     for (uint32_t channels = 32; channels < 256; channels += 48) {
12149       DWConvMicrokernelTester()
12150         .cr(16)
12151         .kr(25)
12152         .channels(channels)
12153         .qmax(128)
12154         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12155     }
12156   }
12157 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_lt_16)12158   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_lt_16) {
12159     TEST_REQUIRES_X86_AVX;
12160     for (uint32_t channels = 1; channels < 16; channels++) {
12161       DWConvMicrokernelTester()
12162         .cr(16)
12163         .kr(25)
12164         .channels(channels)
12165         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12166     }
12167   }
12168 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_gt_16)12169   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_gt_16) {
12170     TEST_REQUIRES_X86_AVX;
12171     for (uint32_t channels = 17; channels < 32; channels++) {
12172       DWConvMicrokernelTester()
12173         .cr(16)
12174         .kr(25)
12175         .channels(channels)
12176         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12177     }
12178   }
12179 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_gt_16_with_qmin)12180   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_gt_16_with_qmin) {
12181     TEST_REQUIRES_X86_AVX;
12182     for (uint32_t channels = 17; channels < 32; channels++) {
12183       DWConvMicrokernelTester()
12184         .cr(16)
12185         .kr(25)
12186         .channels(channels)
12187         .qmin(128)
12188         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12189     }
12190   }
12191 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_gt_16_with_qmax)12192   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_gt_16_with_qmax) {
12193     TEST_REQUIRES_X86_AVX;
12194     for (uint32_t channels = 17; channels < 32; channels++) {
12195       DWConvMicrokernelTester()
12196         .cr(16)
12197         .kr(25)
12198         .channels(channels)
12199         .qmax(128)
12200         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12201     }
12202   }
12203 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,multipixel)12204   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel) {
12205     TEST_REQUIRES_X86_AVX;
12206     for (size_t channels = 1; channels <= 80; channels += 15) {
12207       DWConvMicrokernelTester()
12208         .cr(16)
12209         .kr(25)
12210         .channels(channels)
12211         .width(3)
12212         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12213     }
12214   }
12215 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,multipixel_with_step)12216   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_step) {
12217     TEST_REQUIRES_X86_AVX;
12218     for (size_t channels = 1; channels <= 80; channels += 15) {
12219       for (size_t step = 2; step <= 25; step++) {
12220         DWConvMicrokernelTester()
12221           .cr(16)
12222           .kr(25)
12223           .channels(channels)
12224           .width(3)
12225           .step(step)
12226           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12227       }
12228     }
12229   }
12230 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,multipixel_with_output_stride)12231   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_output_stride) {
12232     TEST_REQUIRES_X86_AVX;
12233     for (size_t channels = 1; channels <= 80; channels += 15) {
12234       DWConvMicrokernelTester()
12235         .cr(16)
12236         .kr(25)
12237         .channels(16)
12238         .width(5)
12239         .output_stride(83)
12240         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12241     }
12242   }
12243 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,multipixel_with_qmin)12244   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_qmin) {
12245     TEST_REQUIRES_X86_AVX;
12246     for (size_t channels = 1; channels <= 80; channels += 15) {
12247       DWConvMicrokernelTester()
12248         .cr(16)
12249         .kr(25)
12250         .channels(channels)
12251         .width(3)
12252         .qmin(128)
12253         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12254     }
12255   }
12256 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,multipixel_with_qmax)12257   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_qmax) {
12258     TEST_REQUIRES_X86_AVX;
12259     for (size_t channels = 1; channels <= 80; channels += 15) {
12260       DWConvMicrokernelTester()
12261         .cr(16)
12262         .kr(25)
12263         .channels(channels)
12264         .width(3)
12265         .qmax(128)
12266         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12267     }
12268   }
12269 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,input_offset)12270   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, input_offset) {
12271     TEST_REQUIRES_X86_AVX;
12272     for (uint32_t channels = 32; channels < 256; channels += 48) {
12273       DWConvMicrokernelTester()
12274         .cr(16)
12275         .kr(25)
12276         .channels(channels)
12277         .input_offset(304)
12278         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12279     }
12280   }
12281 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,zero)12282   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, zero) {
12283     TEST_REQUIRES_X86_AVX;
12284     for (uint32_t mz = 0; mz < 25; mz++) {
12285       for (uint32_t channels = 32; channels < 256; channels += 48) {
12286         DWConvMicrokernelTester()
12287           .cr(16)
12288           .kr(25)
12289           .channels(channels)
12290           .input_offset(304)
12291           .zero_index(mz)
12292           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12293       }
12294     }
12295   }
12296 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
12297 
12298 
12299 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_eq_16)12300   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_eq_16) {
12301     TEST_REQUIRES_X86_AVX2;
12302     DWConvMicrokernelTester()
12303       .cr(16)
12304       .kr(25)
12305       .channels(16)
12306       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12307   }
12308 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_div_16)12309   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_16) {
12310     TEST_REQUIRES_X86_AVX2;
12311     for (uint32_t channels = 32; channels < 256; channels += 48) {
12312       DWConvMicrokernelTester()
12313         .cr(16)
12314         .kr(25)
12315         .channels(channels)
12316         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12317     }
12318   }
12319 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_div_16_with_qmin)12320   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_16_with_qmin) {
12321     TEST_REQUIRES_X86_AVX2;
12322     for (uint32_t channels = 32; channels < 256; channels += 48) {
12323       DWConvMicrokernelTester()
12324         .cr(16)
12325         .kr(25)
12326         .channels(channels)
12327         .qmin(128)
12328         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12329     }
12330   }
12331 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_div_16_with_qmax)12332   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_16_with_qmax) {
12333     TEST_REQUIRES_X86_AVX2;
12334     for (uint32_t channels = 32; channels < 256; channels += 48) {
12335       DWConvMicrokernelTester()
12336         .cr(16)
12337         .kr(25)
12338         .channels(channels)
12339         .qmax(128)
12340         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12341     }
12342   }
12343 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_lt_16)12344   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_lt_16) {
12345     TEST_REQUIRES_X86_AVX2;
12346     for (uint32_t channels = 1; channels < 16; channels++) {
12347       DWConvMicrokernelTester()
12348         .cr(16)
12349         .kr(25)
12350         .channels(channels)
12351         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12352     }
12353   }
12354 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_gt_16)12355   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16) {
12356     TEST_REQUIRES_X86_AVX2;
12357     for (uint32_t channels = 17; channels < 32; channels++) {
12358       DWConvMicrokernelTester()
12359         .cr(16)
12360         .kr(25)
12361         .channels(channels)
12362         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12363     }
12364   }
12365 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_gt_16_with_qmin)12366   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16_with_qmin) {
12367     TEST_REQUIRES_X86_AVX2;
12368     for (uint32_t channels = 17; channels < 32; channels++) {
12369       DWConvMicrokernelTester()
12370         .cr(16)
12371         .kr(25)
12372         .channels(channels)
12373         .qmin(128)
12374         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12375     }
12376   }
12377 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_gt_16_with_qmax)12378   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16_with_qmax) {
12379     TEST_REQUIRES_X86_AVX2;
12380     for (uint32_t channels = 17; channels < 32; channels++) {
12381       DWConvMicrokernelTester()
12382         .cr(16)
12383         .kr(25)
12384         .channels(channels)
12385         .qmax(128)
12386         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12387     }
12388   }
12389 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel)12390   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel) {
12391     TEST_REQUIRES_X86_AVX2;
12392     for (size_t channels = 1; channels <= 80; channels += 15) {
12393       DWConvMicrokernelTester()
12394         .cr(16)
12395         .kr(25)
12396         .channels(channels)
12397         .width(3)
12398         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12399     }
12400   }
12401 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_step)12402   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_step) {
12403     TEST_REQUIRES_X86_AVX2;
12404     for (size_t channels = 1; channels <= 80; channels += 15) {
12405       for (size_t step = 2; step <= 25; step++) {
12406         DWConvMicrokernelTester()
12407           .cr(16)
12408           .kr(25)
12409           .channels(channels)
12410           .width(3)
12411           .step(step)
12412           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12413       }
12414     }
12415   }
12416 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_output_stride)12417   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_output_stride) {
12418     TEST_REQUIRES_X86_AVX2;
12419     for (size_t channels = 1; channels <= 80; channels += 15) {
12420       DWConvMicrokernelTester()
12421         .cr(16)
12422         .kr(25)
12423         .channels(16)
12424         .width(5)
12425         .output_stride(83)
12426         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12427     }
12428   }
12429 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmin)12430   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmin) {
12431     TEST_REQUIRES_X86_AVX2;
12432     for (size_t channels = 1; channels <= 80; channels += 15) {
12433       DWConvMicrokernelTester()
12434         .cr(16)
12435         .kr(25)
12436         .channels(channels)
12437         .width(3)
12438         .qmin(128)
12439         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12440     }
12441   }
12442 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmax)12443   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmax) {
12444     TEST_REQUIRES_X86_AVX2;
12445     for (size_t channels = 1; channels <= 80; channels += 15) {
12446       DWConvMicrokernelTester()
12447         .cr(16)
12448         .kr(25)
12449         .channels(channels)
12450         .width(3)
12451         .qmax(128)
12452         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12453     }
12454   }
12455 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,input_offset)12456   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, input_offset) {
12457     TEST_REQUIRES_X86_AVX2;
12458     for (uint32_t channels = 32; channels < 256; channels += 48) {
12459       DWConvMicrokernelTester()
12460         .cr(16)
12461         .kr(25)
12462         .channels(channels)
12463         .input_offset(304)
12464         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12465     }
12466   }
12467 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,zero)12468   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, zero) {
12469     TEST_REQUIRES_X86_AVX2;
12470     for (uint32_t mz = 0; mz < 25; mz++) {
12471       for (uint32_t channels = 32; channels < 256; channels += 48) {
12472         DWConvMicrokernelTester()
12473           .cr(16)
12474           .kr(25)
12475           .channels(channels)
12476           .input_offset(304)
12477           .zero_index(mz)
12478           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12479       }
12480     }
12481   }
12482 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
12483 
12484 
12485 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_eq_16)12486   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_eq_16) {
12487     TEST_REQUIRES_X86_AVX2;
12488     DWConvMicrokernelTester()
12489       .cr(16)
12490       .kr(25)
12491       .channels(16)
12492       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12493   }
12494 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_div_16)12495   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_div_16) {
12496     TEST_REQUIRES_X86_AVX2;
12497     for (uint32_t channels = 32; channels < 256; channels += 48) {
12498       DWConvMicrokernelTester()
12499         .cr(16)
12500         .kr(25)
12501         .channels(channels)
12502         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12503     }
12504   }
12505 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_div_16_with_qmin)12506   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_div_16_with_qmin) {
12507     TEST_REQUIRES_X86_AVX2;
12508     for (uint32_t channels = 32; channels < 256; channels += 48) {
12509       DWConvMicrokernelTester()
12510         .cr(16)
12511         .kr(25)
12512         .channels(channels)
12513         .qmin(128)
12514         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12515     }
12516   }
12517 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_div_16_with_qmax)12518   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_div_16_with_qmax) {
12519     TEST_REQUIRES_X86_AVX2;
12520     for (uint32_t channels = 32; channels < 256; channels += 48) {
12521       DWConvMicrokernelTester()
12522         .cr(16)
12523         .kr(25)
12524         .channels(channels)
12525         .qmax(128)
12526         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12527     }
12528   }
12529 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_lt_16)12530   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_lt_16) {
12531     TEST_REQUIRES_X86_AVX2;
12532     for (uint32_t channels = 1; channels < 16; channels++) {
12533       DWConvMicrokernelTester()
12534         .cr(16)
12535         .kr(25)
12536         .channels(channels)
12537         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12538     }
12539   }
12540 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_gt_16)12541   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_gt_16) {
12542     TEST_REQUIRES_X86_AVX2;
12543     for (uint32_t channels = 17; channels < 32; channels++) {
12544       DWConvMicrokernelTester()
12545         .cr(16)
12546         .kr(25)
12547         .channels(channels)
12548         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12549     }
12550   }
12551 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_gt_16_with_qmin)12552   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_gt_16_with_qmin) {
12553     TEST_REQUIRES_X86_AVX2;
12554     for (uint32_t channels = 17; channels < 32; channels++) {
12555       DWConvMicrokernelTester()
12556         .cr(16)
12557         .kr(25)
12558         .channels(channels)
12559         .qmin(128)
12560         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12561     }
12562   }
12563 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_gt_16_with_qmax)12564   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_gt_16_with_qmax) {
12565     TEST_REQUIRES_X86_AVX2;
12566     for (uint32_t channels = 17; channels < 32; channels++) {
12567       DWConvMicrokernelTester()
12568         .cr(16)
12569         .kr(25)
12570         .channels(channels)
12571         .qmax(128)
12572         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12573     }
12574   }
12575 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,multipixel)12576   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel) {
12577     TEST_REQUIRES_X86_AVX2;
12578     for (size_t channels = 1; channels <= 80; channels += 15) {
12579       DWConvMicrokernelTester()
12580         .cr(16)
12581         .kr(25)
12582         .channels(channels)
12583         .width(3)
12584         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12585     }
12586   }
12587 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,multipixel_with_step)12588   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel_with_step) {
12589     TEST_REQUIRES_X86_AVX2;
12590     for (size_t channels = 1; channels <= 80; channels += 15) {
12591       for (size_t step = 2; step <= 25; step++) {
12592         DWConvMicrokernelTester()
12593           .cr(16)
12594           .kr(25)
12595           .channels(channels)
12596           .width(3)
12597           .step(step)
12598           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12599       }
12600     }
12601   }
12602 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,multipixel_with_output_stride)12603   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel_with_output_stride) {
12604     TEST_REQUIRES_X86_AVX2;
12605     for (size_t channels = 1; channels <= 80; channels += 15) {
12606       DWConvMicrokernelTester()
12607         .cr(16)
12608         .kr(25)
12609         .channels(16)
12610         .width(5)
12611         .output_stride(83)
12612         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12613     }
12614   }
12615 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,multipixel_with_qmin)12616   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel_with_qmin) {
12617     TEST_REQUIRES_X86_AVX2;
12618     for (size_t channels = 1; channels <= 80; channels += 15) {
12619       DWConvMicrokernelTester()
12620         .cr(16)
12621         .kr(25)
12622         .channels(channels)
12623         .width(3)
12624         .qmin(128)
12625         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12626     }
12627   }
12628 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,multipixel_with_qmax)12629   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel_with_qmax) {
12630     TEST_REQUIRES_X86_AVX2;
12631     for (size_t channels = 1; channels <= 80; channels += 15) {
12632       DWConvMicrokernelTester()
12633         .cr(16)
12634         .kr(25)
12635         .channels(channels)
12636         .width(3)
12637         .qmax(128)
12638         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12639     }
12640   }
12641 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,input_offset)12642   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, input_offset) {
12643     TEST_REQUIRES_X86_AVX2;
12644     for (uint32_t channels = 32; channels < 256; channels += 48) {
12645       DWConvMicrokernelTester()
12646         .cr(16)
12647         .kr(25)
12648         .channels(channels)
12649         .input_offset(304)
12650         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12651     }
12652   }
12653 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,zero)12654   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, zero) {
12655     TEST_REQUIRES_X86_AVX2;
12656     for (uint32_t mz = 0; mz < 25; mz++) {
12657       for (uint32_t channels = 32; channels < 256; channels += 48) {
12658         DWConvMicrokernelTester()
12659           .cr(16)
12660           .kr(25)
12661           .channels(channels)
12662           .input_offset(304)
12663           .zero_index(mz)
12664           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12665       }
12666     }
12667   }
12668 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
12669 
12670 
12671 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_eq_16)12672   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_eq_16) {
12673     TEST_REQUIRES_X86_AVX2;
12674     DWConvMicrokernelTester()
12675       .cr(16)
12676       .kr(25)
12677       .channels(16)
12678       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12679   }
12680 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_div_16)12681   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_div_16) {
12682     TEST_REQUIRES_X86_AVX2;
12683     for (uint32_t channels = 32; channels < 256; channels += 48) {
12684       DWConvMicrokernelTester()
12685         .cr(16)
12686         .kr(25)
12687         .channels(channels)
12688         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12689     }
12690   }
12691 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_div_16_with_qmin)12692   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_div_16_with_qmin) {
12693     TEST_REQUIRES_X86_AVX2;
12694     for (uint32_t channels = 32; channels < 256; channels += 48) {
12695       DWConvMicrokernelTester()
12696         .cr(16)
12697         .kr(25)
12698         .channels(channels)
12699         .qmin(128)
12700         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12701     }
12702   }
12703 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_div_16_with_qmax)12704   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_div_16_with_qmax) {
12705     TEST_REQUIRES_X86_AVX2;
12706     for (uint32_t channels = 32; channels < 256; channels += 48) {
12707       DWConvMicrokernelTester()
12708         .cr(16)
12709         .kr(25)
12710         .channels(channels)
12711         .qmax(128)
12712         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12713     }
12714   }
12715 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_lt_16)12716   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_lt_16) {
12717     TEST_REQUIRES_X86_AVX2;
12718     for (uint32_t channels = 1; channels < 16; channels++) {
12719       DWConvMicrokernelTester()
12720         .cr(16)
12721         .kr(25)
12722         .channels(channels)
12723         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12724     }
12725   }
12726 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_gt_16)12727   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_gt_16) {
12728     TEST_REQUIRES_X86_AVX2;
12729     for (uint32_t channels = 17; channels < 32; channels++) {
12730       DWConvMicrokernelTester()
12731         .cr(16)
12732         .kr(25)
12733         .channels(channels)
12734         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12735     }
12736   }
12737 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_gt_16_with_qmin)12738   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_gt_16_with_qmin) {
12739     TEST_REQUIRES_X86_AVX2;
12740     for (uint32_t channels = 17; channels < 32; channels++) {
12741       DWConvMicrokernelTester()
12742         .cr(16)
12743         .kr(25)
12744         .channels(channels)
12745         .qmin(128)
12746         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12747     }
12748   }
12749 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_gt_16_with_qmax)12750   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_gt_16_with_qmax) {
12751     TEST_REQUIRES_X86_AVX2;
12752     for (uint32_t channels = 17; channels < 32; channels++) {
12753       DWConvMicrokernelTester()
12754         .cr(16)
12755         .kr(25)
12756         .channels(channels)
12757         .qmax(128)
12758         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12759     }
12760   }
12761 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,multipixel)12762   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel) {
12763     TEST_REQUIRES_X86_AVX2;
12764     for (size_t channels = 1; channels <= 80; channels += 15) {
12765       DWConvMicrokernelTester()
12766         .cr(16)
12767         .kr(25)
12768         .channels(channels)
12769         .width(3)
12770         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12771     }
12772   }
12773 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,multipixel_with_step)12774   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel_with_step) {
12775     TEST_REQUIRES_X86_AVX2;
12776     for (size_t channels = 1; channels <= 80; channels += 15) {
12777       for (size_t step = 2; step <= 25; step++) {
12778         DWConvMicrokernelTester()
12779           .cr(16)
12780           .kr(25)
12781           .channels(channels)
12782           .width(3)
12783           .step(step)
12784           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12785       }
12786     }
12787   }
12788 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,multipixel_with_output_stride)12789   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel_with_output_stride) {
12790     TEST_REQUIRES_X86_AVX2;
12791     for (size_t channels = 1; channels <= 80; channels += 15) {
12792       DWConvMicrokernelTester()
12793         .cr(16)
12794         .kr(25)
12795         .channels(16)
12796         .width(5)
12797         .output_stride(83)
12798         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12799     }
12800   }
12801 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,multipixel_with_qmin)12802   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel_with_qmin) {
12803     TEST_REQUIRES_X86_AVX2;
12804     for (size_t channels = 1; channels <= 80; channels += 15) {
12805       DWConvMicrokernelTester()
12806         .cr(16)
12807         .kr(25)
12808         .channels(channels)
12809         .width(3)
12810         .qmin(128)
12811         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12812     }
12813   }
12814 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,multipixel_with_qmax)12815   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel_with_qmax) {
12816     TEST_REQUIRES_X86_AVX2;
12817     for (size_t channels = 1; channels <= 80; channels += 15) {
12818       DWConvMicrokernelTester()
12819         .cr(16)
12820         .kr(25)
12821         .channels(channels)
12822         .width(3)
12823         .qmax(128)
12824         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12825     }
12826   }
12827 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,input_offset)12828   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, input_offset) {
12829     TEST_REQUIRES_X86_AVX2;
12830     for (uint32_t channels = 32; channels < 256; channels += 48) {
12831       DWConvMicrokernelTester()
12832         .cr(16)
12833         .kr(25)
12834         .channels(channels)
12835         .input_offset(304)
12836         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12837     }
12838   }
12839 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,zero)12840   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, zero) {
12841     TEST_REQUIRES_X86_AVX2;
12842     for (uint32_t mz = 0; mz < 25; mz++) {
12843       for (uint32_t channels = 32; channels < 256; channels += 48) {
12844         DWConvMicrokernelTester()
12845           .cr(16)
12846           .kr(25)
12847           .channels(channels)
12848           .input_offset(304)
12849           .zero_index(mz)
12850           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12851       }
12852     }
12853   }
12854 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
12855 
12856 
12857 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_eq_16)12858   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_eq_16) {
12859     TEST_REQUIRES_X86_AVX2;
12860     DWConvMicrokernelTester()
12861       .cr(16)
12862       .kr(25)
12863       .channels(16)
12864       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12865   }
12866 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_div_16)12867   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16) {
12868     TEST_REQUIRES_X86_AVX2;
12869     for (uint32_t channels = 32; channels < 256; channels += 48) {
12870       DWConvMicrokernelTester()
12871         .cr(16)
12872         .kr(25)
12873         .channels(channels)
12874         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12875     }
12876   }
12877 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_div_16_with_qmin)12878   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16_with_qmin) {
12879     TEST_REQUIRES_X86_AVX2;
12880     for (uint32_t channels = 32; channels < 256; channels += 48) {
12881       DWConvMicrokernelTester()
12882         .cr(16)
12883         .kr(25)
12884         .channels(channels)
12885         .qmin(128)
12886         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12887     }
12888   }
12889 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_div_16_with_qmax)12890   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16_with_qmax) {
12891     TEST_REQUIRES_X86_AVX2;
12892     for (uint32_t channels = 32; channels < 256; channels += 48) {
12893       DWConvMicrokernelTester()
12894         .cr(16)
12895         .kr(25)
12896         .channels(channels)
12897         .qmax(128)
12898         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12899     }
12900   }
12901 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_lt_16)12902   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_lt_16) {
12903     TEST_REQUIRES_X86_AVX2;
12904     for (uint32_t channels = 1; channels < 16; channels++) {
12905       DWConvMicrokernelTester()
12906         .cr(16)
12907         .kr(25)
12908         .channels(channels)
12909         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12910     }
12911   }
12912 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_gt_16)12913   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16) {
12914     TEST_REQUIRES_X86_AVX2;
12915     for (uint32_t channels = 17; channels < 32; channels++) {
12916       DWConvMicrokernelTester()
12917         .cr(16)
12918         .kr(25)
12919         .channels(channels)
12920         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12921     }
12922   }
12923 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_gt_16_with_qmin)12924   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16_with_qmin) {
12925     TEST_REQUIRES_X86_AVX2;
12926     for (uint32_t channels = 17; channels < 32; channels++) {
12927       DWConvMicrokernelTester()
12928         .cr(16)
12929         .kr(25)
12930         .channels(channels)
12931         .qmin(128)
12932         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12933     }
12934   }
12935 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_gt_16_with_qmax)12936   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16_with_qmax) {
12937     TEST_REQUIRES_X86_AVX2;
12938     for (uint32_t channels = 17; channels < 32; channels++) {
12939       DWConvMicrokernelTester()
12940         .cr(16)
12941         .kr(25)
12942         .channels(channels)
12943         .qmax(128)
12944         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12945     }
12946   }
12947 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,multipixel)12948   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel) {
12949     TEST_REQUIRES_X86_AVX2;
12950     for (size_t channels = 1; channels <= 80; channels += 15) {
12951       DWConvMicrokernelTester()
12952         .cr(16)
12953         .kr(25)
12954         .channels(channels)
12955         .width(3)
12956         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12957     }
12958   }
12959 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,multipixel_with_step)12960   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_step) {
12961     TEST_REQUIRES_X86_AVX2;
12962     for (size_t channels = 1; channels <= 80; channels += 15) {
12963       for (size_t step = 2; step <= 25; step++) {
12964         DWConvMicrokernelTester()
12965           .cr(16)
12966           .kr(25)
12967           .channels(channels)
12968           .width(3)
12969           .step(step)
12970           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12971       }
12972     }
12973   }
12974 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,multipixel_with_output_stride)12975   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_output_stride) {
12976     TEST_REQUIRES_X86_AVX2;
12977     for (size_t channels = 1; channels <= 80; channels += 15) {
12978       DWConvMicrokernelTester()
12979         .cr(16)
12980         .kr(25)
12981         .channels(16)
12982         .width(5)
12983         .output_stride(83)
12984         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12985     }
12986   }
12987 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,multipixel_with_qmin)12988   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_qmin) {
12989     TEST_REQUIRES_X86_AVX2;
12990     for (size_t channels = 1; channels <= 80; channels += 15) {
12991       DWConvMicrokernelTester()
12992         .cr(16)
12993         .kr(25)
12994         .channels(channels)
12995         .width(3)
12996         .qmin(128)
12997         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
12998     }
12999   }
13000 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,multipixel_with_qmax)13001   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_qmax) {
13002     TEST_REQUIRES_X86_AVX2;
13003     for (size_t channels = 1; channels <= 80; channels += 15) {
13004       DWConvMicrokernelTester()
13005         .cr(16)
13006         .kr(25)
13007         .channels(channels)
13008         .width(3)
13009         .qmax(128)
13010         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
13011     }
13012   }
13013 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,input_offset)13014   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, input_offset) {
13015     TEST_REQUIRES_X86_AVX2;
13016     for (uint32_t channels = 32; channels < 256; channels += 48) {
13017       DWConvMicrokernelTester()
13018         .cr(16)
13019         .kr(25)
13020         .channels(channels)
13021         .input_offset(304)
13022         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
13023     }
13024   }
13025 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,zero)13026   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, zero) {
13027     TEST_REQUIRES_X86_AVX2;
13028     for (uint32_t mz = 0; mz < 25; mz++) {
13029       for (uint32_t channels = 32; channels < 256; channels += 48) {
13030         DWConvMicrokernelTester()
13031           .cr(16)
13032           .kr(25)
13033           .channels(channels)
13034           .input_offset(304)
13035           .zero_index(mz)
13036           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
13037       }
13038     }
13039   }
13040 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
13041 
13042 
13043 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_eq_16)13044   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_eq_16) {
13045     TEST_REQUIRES_X86_XOP;
13046     DWConvMicrokernelTester()
13047       .cr(16)
13048       .kr(25)
13049       .channels(16)
13050       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13051   }
13052 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_div_16)13053   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_div_16) {
13054     TEST_REQUIRES_X86_XOP;
13055     for (uint32_t channels = 32; channels < 256; channels += 48) {
13056       DWConvMicrokernelTester()
13057         .cr(16)
13058         .kr(25)
13059         .channels(channels)
13060         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13061     }
13062   }
13063 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_div_16_with_qmin)13064   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_div_16_with_qmin) {
13065     TEST_REQUIRES_X86_XOP;
13066     for (uint32_t channels = 32; channels < 256; channels += 48) {
13067       DWConvMicrokernelTester()
13068         .cr(16)
13069         .kr(25)
13070         .channels(channels)
13071         .qmin(128)
13072         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13073     }
13074   }
13075 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_div_16_with_qmax)13076   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_div_16_with_qmax) {
13077     TEST_REQUIRES_X86_XOP;
13078     for (uint32_t channels = 32; channels < 256; channels += 48) {
13079       DWConvMicrokernelTester()
13080         .cr(16)
13081         .kr(25)
13082         .channels(channels)
13083         .qmax(128)
13084         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13085     }
13086   }
13087 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_lt_16)13088   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_lt_16) {
13089     TEST_REQUIRES_X86_XOP;
13090     for (uint32_t channels = 1; channels < 16; channels++) {
13091       DWConvMicrokernelTester()
13092         .cr(16)
13093         .kr(25)
13094         .channels(channels)
13095         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13096     }
13097   }
13098 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_gt_16)13099   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_gt_16) {
13100     TEST_REQUIRES_X86_XOP;
13101     for (uint32_t channels = 17; channels < 32; channels++) {
13102       DWConvMicrokernelTester()
13103         .cr(16)
13104         .kr(25)
13105         .channels(channels)
13106         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13107     }
13108   }
13109 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_gt_16_with_qmin)13110   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_gt_16_with_qmin) {
13111     TEST_REQUIRES_X86_XOP;
13112     for (uint32_t channels = 17; channels < 32; channels++) {
13113       DWConvMicrokernelTester()
13114         .cr(16)
13115         .kr(25)
13116         .channels(channels)
13117         .qmin(128)
13118         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13119     }
13120   }
13121 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_gt_16_with_qmax)13122   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_gt_16_with_qmax) {
13123     TEST_REQUIRES_X86_XOP;
13124     for (uint32_t channels = 17; channels < 32; channels++) {
13125       DWConvMicrokernelTester()
13126         .cr(16)
13127         .kr(25)
13128         .channels(channels)
13129         .qmax(128)
13130         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13131     }
13132   }
13133 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,multipixel)13134   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel) {
13135     TEST_REQUIRES_X86_XOP;
13136     for (size_t channels = 1; channels <= 80; channels += 15) {
13137       DWConvMicrokernelTester()
13138         .cr(16)
13139         .kr(25)
13140         .channels(channels)
13141         .width(3)
13142         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13143     }
13144   }
13145 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,multipixel_with_step)13146   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel_with_step) {
13147     TEST_REQUIRES_X86_XOP;
13148     for (size_t channels = 1; channels <= 80; channels += 15) {
13149       for (size_t step = 2; step <= 25; step++) {
13150         DWConvMicrokernelTester()
13151           .cr(16)
13152           .kr(25)
13153           .channels(channels)
13154           .width(3)
13155           .step(step)
13156           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13157       }
13158     }
13159   }
13160 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,multipixel_with_output_stride)13161   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel_with_output_stride) {
13162     TEST_REQUIRES_X86_XOP;
13163     for (size_t channels = 1; channels <= 80; channels += 15) {
13164       DWConvMicrokernelTester()
13165         .cr(16)
13166         .kr(25)
13167         .channels(16)
13168         .width(5)
13169         .output_stride(83)
13170         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13171     }
13172   }
13173 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,multipixel_with_qmin)13174   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel_with_qmin) {
13175     TEST_REQUIRES_X86_XOP;
13176     for (size_t channels = 1; channels <= 80; channels += 15) {
13177       DWConvMicrokernelTester()
13178         .cr(16)
13179         .kr(25)
13180         .channels(channels)
13181         .width(3)
13182         .qmin(128)
13183         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13184     }
13185   }
13186 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,multipixel_with_qmax)13187   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel_with_qmax) {
13188     TEST_REQUIRES_X86_XOP;
13189     for (size_t channels = 1; channels <= 80; channels += 15) {
13190       DWConvMicrokernelTester()
13191         .cr(16)
13192         .kr(25)
13193         .channels(channels)
13194         .width(3)
13195         .qmax(128)
13196         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13197     }
13198   }
13199 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,input_offset)13200   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, input_offset) {
13201     TEST_REQUIRES_X86_XOP;
13202     for (uint32_t channels = 32; channels < 256; channels += 48) {
13203       DWConvMicrokernelTester()
13204         .cr(16)
13205         .kr(25)
13206         .channels(channels)
13207         .input_offset(304)
13208         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13209     }
13210   }
13211 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,zero)13212   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, zero) {
13213     TEST_REQUIRES_X86_XOP;
13214     for (uint32_t mz = 0; mz < 25; mz++) {
13215       for (uint32_t channels = 32; channels < 256; channels += 48) {
13216         DWConvMicrokernelTester()
13217           .cr(16)
13218           .kr(25)
13219           .channels(channels)
13220           .input_offset(304)
13221           .zero_index(mz)
13222           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13223       }
13224     }
13225   }
13226 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
13227 
13228 
13229 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_eq_16)13230   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_eq_16) {
13231     TEST_REQUIRES_X86_XOP;
13232     DWConvMicrokernelTester()
13233       .cr(16)
13234       .kr(25)
13235       .channels(16)
13236       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13237   }
13238 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_div_16)13239   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_div_16) {
13240     TEST_REQUIRES_X86_XOP;
13241     for (uint32_t channels = 32; channels < 256; channels += 48) {
13242       DWConvMicrokernelTester()
13243         .cr(16)
13244         .kr(25)
13245         .channels(channels)
13246         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13247     }
13248   }
13249 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_div_16_with_qmin)13250   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_div_16_with_qmin) {
13251     TEST_REQUIRES_X86_XOP;
13252     for (uint32_t channels = 32; channels < 256; channels += 48) {
13253       DWConvMicrokernelTester()
13254         .cr(16)
13255         .kr(25)
13256         .channels(channels)
13257         .qmin(128)
13258         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13259     }
13260   }
13261 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_div_16_with_qmax)13262   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_div_16_with_qmax) {
13263     TEST_REQUIRES_X86_XOP;
13264     for (uint32_t channels = 32; channels < 256; channels += 48) {
13265       DWConvMicrokernelTester()
13266         .cr(16)
13267         .kr(25)
13268         .channels(channels)
13269         .qmax(128)
13270         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13271     }
13272   }
13273 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_lt_16)13274   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_lt_16) {
13275     TEST_REQUIRES_X86_XOP;
13276     for (uint32_t channels = 1; channels < 16; channels++) {
13277       DWConvMicrokernelTester()
13278         .cr(16)
13279         .kr(25)
13280         .channels(channels)
13281         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13282     }
13283   }
13284 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_gt_16)13285   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_gt_16) {
13286     TEST_REQUIRES_X86_XOP;
13287     for (uint32_t channels = 17; channels < 32; channels++) {
13288       DWConvMicrokernelTester()
13289         .cr(16)
13290         .kr(25)
13291         .channels(channels)
13292         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13293     }
13294   }
13295 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_gt_16_with_qmin)13296   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_gt_16_with_qmin) {
13297     TEST_REQUIRES_X86_XOP;
13298     for (uint32_t channels = 17; channels < 32; channels++) {
13299       DWConvMicrokernelTester()
13300         .cr(16)
13301         .kr(25)
13302         .channels(channels)
13303         .qmin(128)
13304         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13305     }
13306   }
13307 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_gt_16_with_qmax)13308   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_gt_16_with_qmax) {
13309     TEST_REQUIRES_X86_XOP;
13310     for (uint32_t channels = 17; channels < 32; channels++) {
13311       DWConvMicrokernelTester()
13312         .cr(16)
13313         .kr(25)
13314         .channels(channels)
13315         .qmax(128)
13316         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13317     }
13318   }
13319 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,multipixel)13320   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel) {
13321     TEST_REQUIRES_X86_XOP;
13322     for (size_t channels = 1; channels <= 80; channels += 15) {
13323       DWConvMicrokernelTester()
13324         .cr(16)
13325         .kr(25)
13326         .channels(channels)
13327         .width(3)
13328         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13329     }
13330   }
13331 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,multipixel_with_step)13332   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_step) {
13333     TEST_REQUIRES_X86_XOP;
13334     for (size_t channels = 1; channels <= 80; channels += 15) {
13335       for (size_t step = 2; step <= 25; step++) {
13336         DWConvMicrokernelTester()
13337           .cr(16)
13338           .kr(25)
13339           .channels(channels)
13340           .width(3)
13341           .step(step)
13342           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13343       }
13344     }
13345   }
13346 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,multipixel_with_output_stride)13347   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_output_stride) {
13348     TEST_REQUIRES_X86_XOP;
13349     for (size_t channels = 1; channels <= 80; channels += 15) {
13350       DWConvMicrokernelTester()
13351         .cr(16)
13352         .kr(25)
13353         .channels(16)
13354         .width(5)
13355         .output_stride(83)
13356         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13357     }
13358   }
13359 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,multipixel_with_qmin)13360   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_qmin) {
13361     TEST_REQUIRES_X86_XOP;
13362     for (size_t channels = 1; channels <= 80; channels += 15) {
13363       DWConvMicrokernelTester()
13364         .cr(16)
13365         .kr(25)
13366         .channels(channels)
13367         .width(3)
13368         .qmin(128)
13369         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13370     }
13371   }
13372 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,multipixel_with_qmax)13373   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_qmax) {
13374     TEST_REQUIRES_X86_XOP;
13375     for (size_t channels = 1; channels <= 80; channels += 15) {
13376       DWConvMicrokernelTester()
13377         .cr(16)
13378         .kr(25)
13379         .channels(channels)
13380         .width(3)
13381         .qmax(128)
13382         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13383     }
13384   }
13385 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,input_offset)13386   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, input_offset) {
13387     TEST_REQUIRES_X86_XOP;
13388     for (uint32_t channels = 32; channels < 256; channels += 48) {
13389       DWConvMicrokernelTester()
13390         .cr(16)
13391         .kr(25)
13392         .channels(channels)
13393         .input_offset(304)
13394         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13395     }
13396   }
13397 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,zero)13398   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, zero) {
13399     TEST_REQUIRES_X86_XOP;
13400     for (uint32_t mz = 0; mz < 25; mz++) {
13401       for (uint32_t channels = 32; channels < 256; channels += 48) {
13402         DWConvMicrokernelTester()
13403           .cr(16)
13404           .kr(25)
13405           .channels(channels)
13406           .input_offset(304)
13407           .zero_index(mz)
13408           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13409       }
13410     }
13411   }
13412 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
13413 
13414 
13415 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_eq_24)13416   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_eq_24) {
13417     TEST_REQUIRES_X86_AVX;
13418     DWConvMicrokernelTester()
13419       .cr(24)
13420       .kr(9)
13421       .channels(24)
13422       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13423   }
13424 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_div_24)13425   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_div_24) {
13426     TEST_REQUIRES_X86_AVX;
13427     for (uint32_t channels = 48; channels < 384; channels += 72) {
13428       DWConvMicrokernelTester()
13429         .cr(24)
13430         .kr(9)
13431         .channels(channels)
13432         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13433     }
13434   }
13435 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_div_24_with_qmin)13436   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_div_24_with_qmin) {
13437     TEST_REQUIRES_X86_AVX;
13438     for (uint32_t channels = 48; channels < 384; channels += 72) {
13439       DWConvMicrokernelTester()
13440         .cr(24)
13441         .kr(9)
13442         .channels(channels)
13443         .qmin(128)
13444         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13445     }
13446   }
13447 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_div_24_with_qmax)13448   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_div_24_with_qmax) {
13449     TEST_REQUIRES_X86_AVX;
13450     for (uint32_t channels = 48; channels < 384; channels += 72) {
13451       DWConvMicrokernelTester()
13452         .cr(24)
13453         .kr(9)
13454         .channels(channels)
13455         .qmax(128)
13456         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13457     }
13458   }
13459 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_lt_24)13460   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_lt_24) {
13461     TEST_REQUIRES_X86_AVX;
13462     for (uint32_t channels = 1; channels < 24; channels++) {
13463       DWConvMicrokernelTester()
13464         .cr(24)
13465         .kr(9)
13466         .channels(channels)
13467         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13468     }
13469   }
13470 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_gt_24)13471   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_gt_24) {
13472     TEST_REQUIRES_X86_AVX;
13473     for (uint32_t channels = 25; channels < 48; channels++) {
13474       DWConvMicrokernelTester()
13475         .cr(24)
13476         .kr(9)
13477         .channels(channels)
13478         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13479     }
13480   }
13481 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_gt_24_with_qmin)13482   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_gt_24_with_qmin) {
13483     TEST_REQUIRES_X86_AVX;
13484     for (uint32_t channels = 25; channels < 48; channels++) {
13485       DWConvMicrokernelTester()
13486         .cr(24)
13487         .kr(9)
13488         .channels(channels)
13489         .qmin(128)
13490         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13491     }
13492   }
13493 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_gt_24_with_qmax)13494   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_gt_24_with_qmax) {
13495     TEST_REQUIRES_X86_AVX;
13496     for (uint32_t channels = 25; channels < 48; channels++) {
13497       DWConvMicrokernelTester()
13498         .cr(24)
13499         .kr(9)
13500         .channels(channels)
13501         .qmax(128)
13502         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13503     }
13504   }
13505 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,multipixel)13506   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel) {
13507     TEST_REQUIRES_X86_AVX;
13508     for (size_t channels = 1; channels <= 120; channels += 23) {
13509       DWConvMicrokernelTester()
13510         .cr(24)
13511         .kr(9)
13512         .channels(channels)
13513         .width(3)
13514         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13515     }
13516   }
13517 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,multipixel_with_step)13518   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel_with_step) {
13519     TEST_REQUIRES_X86_AVX;
13520     for (size_t channels = 1; channels <= 120; channels += 23) {
13521       for (size_t step = 2; step <= 9; step++) {
13522         DWConvMicrokernelTester()
13523           .cr(24)
13524           .kr(9)
13525           .channels(channels)
13526           .width(3)
13527           .step(step)
13528           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13529       }
13530     }
13531   }
13532 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,multipixel_with_output_stride)13533   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel_with_output_stride) {
13534     TEST_REQUIRES_X86_AVX;
13535     for (size_t channels = 1; channels <= 120; channels += 23) {
13536       DWConvMicrokernelTester()
13537         .cr(24)
13538         .kr(9)
13539         .channels(24)
13540         .width(5)
13541         .output_stride(127)
13542         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13543     }
13544   }
13545 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,multipixel_with_qmin)13546   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel_with_qmin) {
13547     TEST_REQUIRES_X86_AVX;
13548     for (size_t channels = 1; channels <= 120; channels += 23) {
13549       DWConvMicrokernelTester()
13550         .cr(24)
13551         .kr(9)
13552         .channels(channels)
13553         .width(3)
13554         .qmin(128)
13555         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13556     }
13557   }
13558 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,multipixel_with_qmax)13559   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel_with_qmax) {
13560     TEST_REQUIRES_X86_AVX;
13561     for (size_t channels = 1; channels <= 120; channels += 23) {
13562       DWConvMicrokernelTester()
13563         .cr(24)
13564         .kr(9)
13565         .channels(channels)
13566         .width(3)
13567         .qmax(128)
13568         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13569     }
13570   }
13571 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,input_offset)13572   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, input_offset) {
13573     TEST_REQUIRES_X86_AVX;
13574     for (uint32_t channels = 48; channels < 384; channels += 72) {
13575       DWConvMicrokernelTester()
13576         .cr(24)
13577         .kr(9)
13578         .channels(channels)
13579         .input_offset(464)
13580         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13581     }
13582   }
13583 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,zero)13584   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, zero) {
13585     TEST_REQUIRES_X86_AVX;
13586     for (uint32_t mz = 0; mz < 9; mz++) {
13587       for (uint32_t channels = 48; channels < 384; channels += 72) {
13588         DWConvMicrokernelTester()
13589           .cr(24)
13590           .kr(9)
13591           .channels(channels)
13592           .input_offset(464)
13593           .zero_index(mz)
13594           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13595       }
13596     }
13597   }
13598 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
13599 
13600 
13601 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_eq_24)13602   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_eq_24) {
13603     TEST_REQUIRES_X86_AVX;
13604     DWConvMicrokernelTester()
13605       .cr(24)
13606       .kr(9)
13607       .channels(24)
13608       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13609   }
13610 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_div_24)13611   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_div_24) {
13612     TEST_REQUIRES_X86_AVX;
13613     for (uint32_t channels = 48; channels < 384; channels += 72) {
13614       DWConvMicrokernelTester()
13615         .cr(24)
13616         .kr(9)
13617         .channels(channels)
13618         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13619     }
13620   }
13621 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_div_24_with_qmin)13622   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_div_24_with_qmin) {
13623     TEST_REQUIRES_X86_AVX;
13624     for (uint32_t channels = 48; channels < 384; channels += 72) {
13625       DWConvMicrokernelTester()
13626         .cr(24)
13627         .kr(9)
13628         .channels(channels)
13629         .qmin(128)
13630         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13631     }
13632   }
13633 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_div_24_with_qmax)13634   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_div_24_with_qmax) {
13635     TEST_REQUIRES_X86_AVX;
13636     for (uint32_t channels = 48; channels < 384; channels += 72) {
13637       DWConvMicrokernelTester()
13638         .cr(24)
13639         .kr(9)
13640         .channels(channels)
13641         .qmax(128)
13642         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13643     }
13644   }
13645 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_lt_24)13646   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_lt_24) {
13647     TEST_REQUIRES_X86_AVX;
13648     for (uint32_t channels = 1; channels < 24; channels++) {
13649       DWConvMicrokernelTester()
13650         .cr(24)
13651         .kr(9)
13652         .channels(channels)
13653         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13654     }
13655   }
13656 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_gt_24)13657   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_gt_24) {
13658     TEST_REQUIRES_X86_AVX;
13659     for (uint32_t channels = 25; channels < 48; channels++) {
13660       DWConvMicrokernelTester()
13661         .cr(24)
13662         .kr(9)
13663         .channels(channels)
13664         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13665     }
13666   }
13667 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_gt_24_with_qmin)13668   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_gt_24_with_qmin) {
13669     TEST_REQUIRES_X86_AVX;
13670     for (uint32_t channels = 25; channels < 48; channels++) {
13671       DWConvMicrokernelTester()
13672         .cr(24)
13673         .kr(9)
13674         .channels(channels)
13675         .qmin(128)
13676         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13677     }
13678   }
13679 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_gt_24_with_qmax)13680   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_gt_24_with_qmax) {
13681     TEST_REQUIRES_X86_AVX;
13682     for (uint32_t channels = 25; channels < 48; channels++) {
13683       DWConvMicrokernelTester()
13684         .cr(24)
13685         .kr(9)
13686         .channels(channels)
13687         .qmax(128)
13688         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13689     }
13690   }
13691 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,multipixel)13692   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel) {
13693     TEST_REQUIRES_X86_AVX;
13694     for (size_t channels = 1; channels <= 120; channels += 23) {
13695       DWConvMicrokernelTester()
13696         .cr(24)
13697         .kr(9)
13698         .channels(channels)
13699         .width(3)
13700         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13701     }
13702   }
13703 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,multipixel_with_step)13704   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel_with_step) {
13705     TEST_REQUIRES_X86_AVX;
13706     for (size_t channels = 1; channels <= 120; channels += 23) {
13707       for (size_t step = 2; step <= 9; step++) {
13708         DWConvMicrokernelTester()
13709           .cr(24)
13710           .kr(9)
13711           .channels(channels)
13712           .width(3)
13713           .step(step)
13714           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13715       }
13716     }
13717   }
13718 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,multipixel_with_output_stride)13719   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel_with_output_stride) {
13720     TEST_REQUIRES_X86_AVX;
13721     for (size_t channels = 1; channels <= 120; channels += 23) {
13722       DWConvMicrokernelTester()
13723         .cr(24)
13724         .kr(9)
13725         .channels(24)
13726         .width(5)
13727         .output_stride(127)
13728         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13729     }
13730   }
13731 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,multipixel_with_qmin)13732   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel_with_qmin) {
13733     TEST_REQUIRES_X86_AVX;
13734     for (size_t channels = 1; channels <= 120; channels += 23) {
13735       DWConvMicrokernelTester()
13736         .cr(24)
13737         .kr(9)
13738         .channels(channels)
13739         .width(3)
13740         .qmin(128)
13741         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13742     }
13743   }
13744 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,multipixel_with_qmax)13745   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel_with_qmax) {
13746     TEST_REQUIRES_X86_AVX;
13747     for (size_t channels = 1; channels <= 120; channels += 23) {
13748       DWConvMicrokernelTester()
13749         .cr(24)
13750         .kr(9)
13751         .channels(channels)
13752         .width(3)
13753         .qmax(128)
13754         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13755     }
13756   }
13757 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,input_offset)13758   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, input_offset) {
13759     TEST_REQUIRES_X86_AVX;
13760     for (uint32_t channels = 48; channels < 384; channels += 72) {
13761       DWConvMicrokernelTester()
13762         .cr(24)
13763         .kr(9)
13764         .channels(channels)
13765         .input_offset(464)
13766         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13767     }
13768   }
13769 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,zero)13770   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, zero) {
13771     TEST_REQUIRES_X86_AVX;
13772     for (uint32_t mz = 0; mz < 9; mz++) {
13773       for (uint32_t channels = 48; channels < 384; channels += 72) {
13774         DWConvMicrokernelTester()
13775           .cr(24)
13776           .kr(9)
13777           .channels(channels)
13778           .input_offset(464)
13779           .zero_index(mz)
13780           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13781       }
13782     }
13783   }
13784 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
13785 
13786 
13787 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_eq_24)13788   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_eq_24) {
13789     TEST_REQUIRES_X86_AVX2;
13790     DWConvMicrokernelTester()
13791       .cr(24)
13792       .kr(9)
13793       .channels(24)
13794       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
13795   }
13796 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_div_24)13797   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_div_24) {
13798     TEST_REQUIRES_X86_AVX2;
13799     for (uint32_t channels = 48; channels < 384; channels += 72) {
13800       DWConvMicrokernelTester()
13801         .cr(24)
13802         .kr(9)
13803         .channels(channels)
13804         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
13805     }
13806   }
13807 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_div_24_with_qmin)13808   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_div_24_with_qmin) {
13809     TEST_REQUIRES_X86_AVX2;
13810     for (uint32_t channels = 48; channels < 384; channels += 72) {
13811       DWConvMicrokernelTester()
13812         .cr(24)
13813         .kr(9)
13814         .channels(channels)
13815         .qmin(128)
13816         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
13817     }
13818   }
13819 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_div_24_with_qmax)13820   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_div_24_with_qmax) {
13821     TEST_REQUIRES_X86_AVX2;
13822     for (uint32_t channels = 48; channels < 384; channels += 72) {
13823       DWConvMicrokernelTester()
13824         .cr(24)
13825         .kr(9)
13826         .channels(channels)
13827         .qmax(128)
13828         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
13829     }
13830   }
13831 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_lt_24)13832   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_lt_24) {
13833     TEST_REQUIRES_X86_AVX2;
13834     for (uint32_t channels = 1; channels < 24; channels++) {
13835       DWConvMicrokernelTester()
13836         .cr(24)
13837         .kr(9)
13838         .channels(channels)
13839         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
13840     }
13841   }
13842 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_gt_24)13843   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_gt_24) {
13844     TEST_REQUIRES_X86_AVX2;
13845     for (uint32_t channels = 25; channels < 48; channels++) {
13846       DWConvMicrokernelTester()
13847         .cr(24)
13848         .kr(9)
13849         .channels(channels)
13850         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
13851     }
13852   }
13853 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_gt_24_with_qmin)13854   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_gt_24_with_qmin) {
13855     TEST_REQUIRES_X86_AVX2;
13856     for (uint32_t channels = 25; channels < 48; channels++) {
13857       DWConvMicrokernelTester()
13858         .cr(24)
13859         .kr(9)
13860         .channels(channels)
13861         .qmin(128)
13862         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
13863     }
13864   }
13865 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_gt_24_with_qmax)13866   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_gt_24_with_qmax) {
13867     TEST_REQUIRES_X86_AVX2;
13868     for (uint32_t channels = 25; channels < 48; channels++) {
13869       DWConvMicrokernelTester()
13870         .cr(24)
13871         .kr(9)
13872         .channels(channels)
13873         .qmax(128)
13874         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
13875     }
13876   }
13877 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,multipixel)13878   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel) {
13879     TEST_REQUIRES_X86_AVX2;
13880     for (size_t channels = 1; channels <= 120; channels += 23) {
13881       DWConvMicrokernelTester()
13882         .cr(24)
13883         .kr(9)
13884         .channels(channels)
13885         .width(3)
13886         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
13887     }
13888   }
13889 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,multipixel_with_step)13890   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_step) {
13891     TEST_REQUIRES_X86_AVX2;
13892     for (size_t channels = 1; channels <= 120; channels += 23) {
13893       for (size_t step = 2; step <= 9; step++) {
13894         DWConvMicrokernelTester()
13895           .cr(24)
13896           .kr(9)
13897           .channels(channels)
13898           .width(3)
13899           .step(step)
13900           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
13901       }
13902     }
13903   }
13904 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,multipixel_with_output_stride)13905   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_output_stride) {
13906     TEST_REQUIRES_X86_AVX2;
13907     for (size_t channels = 1; channels <= 120; channels += 23) {
13908       DWConvMicrokernelTester()
13909         .cr(24)
13910         .kr(9)
13911         .channels(24)
13912         .width(5)
13913         .output_stride(127)
13914         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
13915     }
13916   }
13917 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,multipixel_with_qmin)13918   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_qmin) {
13919     TEST_REQUIRES_X86_AVX2;
13920     for (size_t channels = 1; channels <= 120; channels += 23) {
13921       DWConvMicrokernelTester()
13922         .cr(24)
13923         .kr(9)
13924         .channels(channels)
13925         .width(3)
13926         .qmin(128)
13927         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
13928     }
13929   }
13930 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,multipixel_with_qmax)13931   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_qmax) {
13932     TEST_REQUIRES_X86_AVX2;
13933     for (size_t channels = 1; channels <= 120; channels += 23) {
13934       DWConvMicrokernelTester()
13935         .cr(24)
13936         .kr(9)
13937         .channels(channels)
13938         .width(3)
13939         .qmax(128)
13940         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
13941     }
13942   }
13943 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,input_offset)13944   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, input_offset) {
13945     TEST_REQUIRES_X86_AVX2;
13946     for (uint32_t channels = 48; channels < 384; channels += 72) {
13947       DWConvMicrokernelTester()
13948         .cr(24)
13949         .kr(9)
13950         .channels(channels)
13951         .input_offset(464)
13952         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
13953     }
13954   }
13955 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,zero)13956   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, zero) {
13957     TEST_REQUIRES_X86_AVX2;
13958     for (uint32_t mz = 0; mz < 9; mz++) {
13959       for (uint32_t channels = 48; channels < 384; channels += 72) {
13960         DWConvMicrokernelTester()
13961           .cr(24)
13962           .kr(9)
13963           .channels(channels)
13964           .input_offset(464)
13965           .zero_index(mz)
13966           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
13967       }
13968     }
13969   }
13970 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
13971 
13972 
13973 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_eq_24)13974   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_eq_24) {
13975     TEST_REQUIRES_X86_XOP;
13976     DWConvMicrokernelTester()
13977       .cr(24)
13978       .kr(9)
13979       .channels(24)
13980       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13981   }
13982 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_div_24)13983   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_div_24) {
13984     TEST_REQUIRES_X86_XOP;
13985     for (uint32_t channels = 48; channels < 384; channels += 72) {
13986       DWConvMicrokernelTester()
13987         .cr(24)
13988         .kr(9)
13989         .channels(channels)
13990         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13991     }
13992   }
13993 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_div_24_with_qmin)13994   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_div_24_with_qmin) {
13995     TEST_REQUIRES_X86_XOP;
13996     for (uint32_t channels = 48; channels < 384; channels += 72) {
13997       DWConvMicrokernelTester()
13998         .cr(24)
13999         .kr(9)
14000         .channels(channels)
14001         .qmin(128)
14002         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14003     }
14004   }
14005 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_div_24_with_qmax)14006   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_div_24_with_qmax) {
14007     TEST_REQUIRES_X86_XOP;
14008     for (uint32_t channels = 48; channels < 384; channels += 72) {
14009       DWConvMicrokernelTester()
14010         .cr(24)
14011         .kr(9)
14012         .channels(channels)
14013         .qmax(128)
14014         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14015     }
14016   }
14017 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_lt_24)14018   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_lt_24) {
14019     TEST_REQUIRES_X86_XOP;
14020     for (uint32_t channels = 1; channels < 24; channels++) {
14021       DWConvMicrokernelTester()
14022         .cr(24)
14023         .kr(9)
14024         .channels(channels)
14025         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14026     }
14027   }
14028 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_gt_24)14029   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_gt_24) {
14030     TEST_REQUIRES_X86_XOP;
14031     for (uint32_t channels = 25; channels < 48; channels++) {
14032       DWConvMicrokernelTester()
14033         .cr(24)
14034         .kr(9)
14035         .channels(channels)
14036         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14037     }
14038   }
14039 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_gt_24_with_qmin)14040   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_gt_24_with_qmin) {
14041     TEST_REQUIRES_X86_XOP;
14042     for (uint32_t channels = 25; channels < 48; channels++) {
14043       DWConvMicrokernelTester()
14044         .cr(24)
14045         .kr(9)
14046         .channels(channels)
14047         .qmin(128)
14048         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14049     }
14050   }
14051 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_gt_24_with_qmax)14052   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_gt_24_with_qmax) {
14053     TEST_REQUIRES_X86_XOP;
14054     for (uint32_t channels = 25; channels < 48; channels++) {
14055       DWConvMicrokernelTester()
14056         .cr(24)
14057         .kr(9)
14058         .channels(channels)
14059         .qmax(128)
14060         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14061     }
14062   }
14063 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,multipixel)14064   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel) {
14065     TEST_REQUIRES_X86_XOP;
14066     for (size_t channels = 1; channels <= 120; channels += 23) {
14067       DWConvMicrokernelTester()
14068         .cr(24)
14069         .kr(9)
14070         .channels(channels)
14071         .width(3)
14072         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14073     }
14074   }
14075 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,multipixel_with_step)14076   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel_with_step) {
14077     TEST_REQUIRES_X86_XOP;
14078     for (size_t channels = 1; channels <= 120; channels += 23) {
14079       for (size_t step = 2; step <= 9; step++) {
14080         DWConvMicrokernelTester()
14081           .cr(24)
14082           .kr(9)
14083           .channels(channels)
14084           .width(3)
14085           .step(step)
14086           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14087       }
14088     }
14089   }
14090 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,multipixel_with_output_stride)14091   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel_with_output_stride) {
14092     TEST_REQUIRES_X86_XOP;
14093     for (size_t channels = 1; channels <= 120; channels += 23) {
14094       DWConvMicrokernelTester()
14095         .cr(24)
14096         .kr(9)
14097         .channels(24)
14098         .width(5)
14099         .output_stride(127)
14100         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14101     }
14102   }
14103 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,multipixel_with_qmin)14104   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel_with_qmin) {
14105     TEST_REQUIRES_X86_XOP;
14106     for (size_t channels = 1; channels <= 120; channels += 23) {
14107       DWConvMicrokernelTester()
14108         .cr(24)
14109         .kr(9)
14110         .channels(channels)
14111         .width(3)
14112         .qmin(128)
14113         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14114     }
14115   }
14116 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,multipixel_with_qmax)14117   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel_with_qmax) {
14118     TEST_REQUIRES_X86_XOP;
14119     for (size_t channels = 1; channels <= 120; channels += 23) {
14120       DWConvMicrokernelTester()
14121         .cr(24)
14122         .kr(9)
14123         .channels(channels)
14124         .width(3)
14125         .qmax(128)
14126         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14127     }
14128   }
14129 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,input_offset)14130   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, input_offset) {
14131     TEST_REQUIRES_X86_XOP;
14132     for (uint32_t channels = 48; channels < 384; channels += 72) {
14133       DWConvMicrokernelTester()
14134         .cr(24)
14135         .kr(9)
14136         .channels(channels)
14137         .input_offset(464)
14138         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14139     }
14140   }
14141 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,zero)14142   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, zero) {
14143     TEST_REQUIRES_X86_XOP;
14144     for (uint32_t mz = 0; mz < 9; mz++) {
14145       for (uint32_t channels = 48; channels < 384; channels += 72) {
14146         DWConvMicrokernelTester()
14147           .cr(24)
14148           .kr(9)
14149           .channels(channels)
14150           .input_offset(464)
14151           .zero_index(mz)
14152           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14153       }
14154     }
14155   }
14156 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
14157 
14158 
14159 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_eq_24)14160   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_eq_24) {
14161     TEST_REQUIRES_X86_AVX;
14162     DWConvMicrokernelTester()
14163       .cr(24)
14164       .kr(25)
14165       .channels(24)
14166       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14167   }
14168 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_div_24)14169   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_div_24) {
14170     TEST_REQUIRES_X86_AVX;
14171     for (uint32_t channels = 48; channels < 384; channels += 72) {
14172       DWConvMicrokernelTester()
14173         .cr(24)
14174         .kr(25)
14175         .channels(channels)
14176         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14177     }
14178   }
14179 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_div_24_with_qmin)14180   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_div_24_with_qmin) {
14181     TEST_REQUIRES_X86_AVX;
14182     for (uint32_t channels = 48; channels < 384; channels += 72) {
14183       DWConvMicrokernelTester()
14184         .cr(24)
14185         .kr(25)
14186         .channels(channels)
14187         .qmin(128)
14188         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14189     }
14190   }
14191 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_div_24_with_qmax)14192   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_div_24_with_qmax) {
14193     TEST_REQUIRES_X86_AVX;
14194     for (uint32_t channels = 48; channels < 384; channels += 72) {
14195       DWConvMicrokernelTester()
14196         .cr(24)
14197         .kr(25)
14198         .channels(channels)
14199         .qmax(128)
14200         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14201     }
14202   }
14203 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_lt_24)14204   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_lt_24) {
14205     TEST_REQUIRES_X86_AVX;
14206     for (uint32_t channels = 1; channels < 24; channels++) {
14207       DWConvMicrokernelTester()
14208         .cr(24)
14209         .kr(25)
14210         .channels(channels)
14211         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14212     }
14213   }
14214 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_gt_24)14215   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_gt_24) {
14216     TEST_REQUIRES_X86_AVX;
14217     for (uint32_t channels = 25; channels < 48; channels++) {
14218       DWConvMicrokernelTester()
14219         .cr(24)
14220         .kr(25)
14221         .channels(channels)
14222         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14223     }
14224   }
14225 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_gt_24_with_qmin)14226   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_gt_24_with_qmin) {
14227     TEST_REQUIRES_X86_AVX;
14228     for (uint32_t channels = 25; channels < 48; channels++) {
14229       DWConvMicrokernelTester()
14230         .cr(24)
14231         .kr(25)
14232         .channels(channels)
14233         .qmin(128)
14234         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14235     }
14236   }
14237 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_gt_24_with_qmax)14238   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_gt_24_with_qmax) {
14239     TEST_REQUIRES_X86_AVX;
14240     for (uint32_t channels = 25; channels < 48; channels++) {
14241       DWConvMicrokernelTester()
14242         .cr(24)
14243         .kr(25)
14244         .channels(channels)
14245         .qmax(128)
14246         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14247     }
14248   }
14249 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,multipixel)14250   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel) {
14251     TEST_REQUIRES_X86_AVX;
14252     for (size_t channels = 1; channels <= 120; channels += 23) {
14253       DWConvMicrokernelTester()
14254         .cr(24)
14255         .kr(25)
14256         .channels(channels)
14257         .width(3)
14258         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14259     }
14260   }
14261 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,multipixel_with_step)14262   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel_with_step) {
14263     TEST_REQUIRES_X86_AVX;
14264     for (size_t channels = 1; channels <= 120; channels += 23) {
14265       for (size_t step = 2; step <= 25; step++) {
14266         DWConvMicrokernelTester()
14267           .cr(24)
14268           .kr(25)
14269           .channels(channels)
14270           .width(3)
14271           .step(step)
14272           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14273       }
14274     }
14275   }
14276 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,multipixel_with_output_stride)14277   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel_with_output_stride) {
14278     TEST_REQUIRES_X86_AVX;
14279     for (size_t channels = 1; channels <= 120; channels += 23) {
14280       DWConvMicrokernelTester()
14281         .cr(24)
14282         .kr(25)
14283         .channels(24)
14284         .width(5)
14285         .output_stride(127)
14286         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14287     }
14288   }
14289 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,multipixel_with_qmin)14290   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel_with_qmin) {
14291     TEST_REQUIRES_X86_AVX;
14292     for (size_t channels = 1; channels <= 120; channels += 23) {
14293       DWConvMicrokernelTester()
14294         .cr(24)
14295         .kr(25)
14296         .channels(channels)
14297         .width(3)
14298         .qmin(128)
14299         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14300     }
14301   }
14302 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,multipixel_with_qmax)14303   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel_with_qmax) {
14304     TEST_REQUIRES_X86_AVX;
14305     for (size_t channels = 1; channels <= 120; channels += 23) {
14306       DWConvMicrokernelTester()
14307         .cr(24)
14308         .kr(25)
14309         .channels(channels)
14310         .width(3)
14311         .qmax(128)
14312         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14313     }
14314   }
14315 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,input_offset)14316   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, input_offset) {
14317     TEST_REQUIRES_X86_AVX;
14318     for (uint32_t channels = 48; channels < 384; channels += 72) {
14319       DWConvMicrokernelTester()
14320         .cr(24)
14321         .kr(25)
14322         .channels(channels)
14323         .input_offset(464)
14324         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14325     }
14326   }
14327 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,zero)14328   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, zero) {
14329     TEST_REQUIRES_X86_AVX;
14330     for (uint32_t mz = 0; mz < 25; mz++) {
14331       for (uint32_t channels = 48; channels < 384; channels += 72) {
14332         DWConvMicrokernelTester()
14333           .cr(24)
14334           .kr(25)
14335           .channels(channels)
14336           .input_offset(464)
14337           .zero_index(mz)
14338           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14339       }
14340     }
14341   }
14342 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
14343 
14344 
14345 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_eq_24)14346   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_eq_24) {
14347     TEST_REQUIRES_X86_AVX;
14348     DWConvMicrokernelTester()
14349       .cr(24)
14350       .kr(25)
14351       .channels(24)
14352       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14353   }
14354 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_div_24)14355   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_div_24) {
14356     TEST_REQUIRES_X86_AVX;
14357     for (uint32_t channels = 48; channels < 384; channels += 72) {
14358       DWConvMicrokernelTester()
14359         .cr(24)
14360         .kr(25)
14361         .channels(channels)
14362         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14363     }
14364   }
14365 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_div_24_with_qmin)14366   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_div_24_with_qmin) {
14367     TEST_REQUIRES_X86_AVX;
14368     for (uint32_t channels = 48; channels < 384; channels += 72) {
14369       DWConvMicrokernelTester()
14370         .cr(24)
14371         .kr(25)
14372         .channels(channels)
14373         .qmin(128)
14374         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14375     }
14376   }
14377 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_div_24_with_qmax)14378   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_div_24_with_qmax) {
14379     TEST_REQUIRES_X86_AVX;
14380     for (uint32_t channels = 48; channels < 384; channels += 72) {
14381       DWConvMicrokernelTester()
14382         .cr(24)
14383         .kr(25)
14384         .channels(channels)
14385         .qmax(128)
14386         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14387     }
14388   }
14389 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_lt_24)14390   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_lt_24) {
14391     TEST_REQUIRES_X86_AVX;
14392     for (uint32_t channels = 1; channels < 24; channels++) {
14393       DWConvMicrokernelTester()
14394         .cr(24)
14395         .kr(25)
14396         .channels(channels)
14397         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14398     }
14399   }
14400 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_gt_24)14401   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_gt_24) {
14402     TEST_REQUIRES_X86_AVX;
14403     for (uint32_t channels = 25; channels < 48; channels++) {
14404       DWConvMicrokernelTester()
14405         .cr(24)
14406         .kr(25)
14407         .channels(channels)
14408         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14409     }
14410   }
14411 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_gt_24_with_qmin)14412   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_gt_24_with_qmin) {
14413     TEST_REQUIRES_X86_AVX;
14414     for (uint32_t channels = 25; channels < 48; channels++) {
14415       DWConvMicrokernelTester()
14416         .cr(24)
14417         .kr(25)
14418         .channels(channels)
14419         .qmin(128)
14420         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14421     }
14422   }
14423 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_gt_24_with_qmax)14424   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_gt_24_with_qmax) {
14425     TEST_REQUIRES_X86_AVX;
14426     for (uint32_t channels = 25; channels < 48; channels++) {
14427       DWConvMicrokernelTester()
14428         .cr(24)
14429         .kr(25)
14430         .channels(channels)
14431         .qmax(128)
14432         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14433     }
14434   }
14435 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,multipixel)14436   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel) {
14437     TEST_REQUIRES_X86_AVX;
14438     for (size_t channels = 1; channels <= 120; channels += 23) {
14439       DWConvMicrokernelTester()
14440         .cr(24)
14441         .kr(25)
14442         .channels(channels)
14443         .width(3)
14444         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14445     }
14446   }
14447 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,multipixel_with_step)14448   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel_with_step) {
14449     TEST_REQUIRES_X86_AVX;
14450     for (size_t channels = 1; channels <= 120; channels += 23) {
14451       for (size_t step = 2; step <= 25; step++) {
14452         DWConvMicrokernelTester()
14453           .cr(24)
14454           .kr(25)
14455           .channels(channels)
14456           .width(3)
14457           .step(step)
14458           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14459       }
14460     }
14461   }
14462 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,multipixel_with_output_stride)14463   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel_with_output_stride) {
14464     TEST_REQUIRES_X86_AVX;
14465     for (size_t channels = 1; channels <= 120; channels += 23) {
14466       DWConvMicrokernelTester()
14467         .cr(24)
14468         .kr(25)
14469         .channels(24)
14470         .width(5)
14471         .output_stride(127)
14472         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14473     }
14474   }
14475 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,multipixel_with_qmin)14476   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel_with_qmin) {
14477     TEST_REQUIRES_X86_AVX;
14478     for (size_t channels = 1; channels <= 120; channels += 23) {
14479       DWConvMicrokernelTester()
14480         .cr(24)
14481         .kr(25)
14482         .channels(channels)
14483         .width(3)
14484         .qmin(128)
14485         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14486     }
14487   }
14488 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,multipixel_with_qmax)14489   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel_with_qmax) {
14490     TEST_REQUIRES_X86_AVX;
14491     for (size_t channels = 1; channels <= 120; channels += 23) {
14492       DWConvMicrokernelTester()
14493         .cr(24)
14494         .kr(25)
14495         .channels(channels)
14496         .width(3)
14497         .qmax(128)
14498         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14499     }
14500   }
14501 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,input_offset)14502   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, input_offset) {
14503     TEST_REQUIRES_X86_AVX;
14504     for (uint32_t channels = 48; channels < 384; channels += 72) {
14505       DWConvMicrokernelTester()
14506         .cr(24)
14507         .kr(25)
14508         .channels(channels)
14509         .input_offset(464)
14510         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14511     }
14512   }
14513 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,zero)14514   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, zero) {
14515     TEST_REQUIRES_X86_AVX;
14516     for (uint32_t mz = 0; mz < 25; mz++) {
14517       for (uint32_t channels = 48; channels < 384; channels += 72) {
14518         DWConvMicrokernelTester()
14519           .cr(24)
14520           .kr(25)
14521           .channels(channels)
14522           .input_offset(464)
14523           .zero_index(mz)
14524           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14525       }
14526     }
14527   }
14528 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
14529 
14530 
14531 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_eq_24)14532   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_eq_24) {
14533     TEST_REQUIRES_X86_AVX2;
14534     DWConvMicrokernelTester()
14535       .cr(24)
14536       .kr(25)
14537       .channels(24)
14538       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14539   }
14540 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_div_24)14541   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_div_24) {
14542     TEST_REQUIRES_X86_AVX2;
14543     for (uint32_t channels = 48; channels < 384; channels += 72) {
14544       DWConvMicrokernelTester()
14545         .cr(24)
14546         .kr(25)
14547         .channels(channels)
14548         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14549     }
14550   }
14551 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_div_24_with_qmin)14552   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_div_24_with_qmin) {
14553     TEST_REQUIRES_X86_AVX2;
14554     for (uint32_t channels = 48; channels < 384; channels += 72) {
14555       DWConvMicrokernelTester()
14556         .cr(24)
14557         .kr(25)
14558         .channels(channels)
14559         .qmin(128)
14560         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14561     }
14562   }
14563 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_div_24_with_qmax)14564   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_div_24_with_qmax) {
14565     TEST_REQUIRES_X86_AVX2;
14566     for (uint32_t channels = 48; channels < 384; channels += 72) {
14567       DWConvMicrokernelTester()
14568         .cr(24)
14569         .kr(25)
14570         .channels(channels)
14571         .qmax(128)
14572         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14573     }
14574   }
14575 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_lt_24)14576   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_lt_24) {
14577     TEST_REQUIRES_X86_AVX2;
14578     for (uint32_t channels = 1; channels < 24; channels++) {
14579       DWConvMicrokernelTester()
14580         .cr(24)
14581         .kr(25)
14582         .channels(channels)
14583         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14584     }
14585   }
14586 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_gt_24)14587   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_gt_24) {
14588     TEST_REQUIRES_X86_AVX2;
14589     for (uint32_t channels = 25; channels < 48; channels++) {
14590       DWConvMicrokernelTester()
14591         .cr(24)
14592         .kr(25)
14593         .channels(channels)
14594         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14595     }
14596   }
14597 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_gt_24_with_qmin)14598   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_gt_24_with_qmin) {
14599     TEST_REQUIRES_X86_AVX2;
14600     for (uint32_t channels = 25; channels < 48; channels++) {
14601       DWConvMicrokernelTester()
14602         .cr(24)
14603         .kr(25)
14604         .channels(channels)
14605         .qmin(128)
14606         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14607     }
14608   }
14609 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_gt_24_with_qmax)14610   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_gt_24_with_qmax) {
14611     TEST_REQUIRES_X86_AVX2;
14612     for (uint32_t channels = 25; channels < 48; channels++) {
14613       DWConvMicrokernelTester()
14614         .cr(24)
14615         .kr(25)
14616         .channels(channels)
14617         .qmax(128)
14618         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14619     }
14620   }
14621 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,multipixel)14622   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel) {
14623     TEST_REQUIRES_X86_AVX2;
14624     for (size_t channels = 1; channels <= 120; channels += 23) {
14625       DWConvMicrokernelTester()
14626         .cr(24)
14627         .kr(25)
14628         .channels(channels)
14629         .width(3)
14630         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14631     }
14632   }
14633 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,multipixel_with_step)14634   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_step) {
14635     TEST_REQUIRES_X86_AVX2;
14636     for (size_t channels = 1; channels <= 120; channels += 23) {
14637       for (size_t step = 2; step <= 25; step++) {
14638         DWConvMicrokernelTester()
14639           .cr(24)
14640           .kr(25)
14641           .channels(channels)
14642           .width(3)
14643           .step(step)
14644           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14645       }
14646     }
14647   }
14648 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,multipixel_with_output_stride)14649   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_output_stride) {
14650     TEST_REQUIRES_X86_AVX2;
14651     for (size_t channels = 1; channels <= 120; channels += 23) {
14652       DWConvMicrokernelTester()
14653         .cr(24)
14654         .kr(25)
14655         .channels(24)
14656         .width(5)
14657         .output_stride(127)
14658         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14659     }
14660   }
14661 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,multipixel_with_qmin)14662   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_qmin) {
14663     TEST_REQUIRES_X86_AVX2;
14664     for (size_t channels = 1; channels <= 120; channels += 23) {
14665       DWConvMicrokernelTester()
14666         .cr(24)
14667         .kr(25)
14668         .channels(channels)
14669         .width(3)
14670         .qmin(128)
14671         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14672     }
14673   }
14674 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,multipixel_with_qmax)14675   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_qmax) {
14676     TEST_REQUIRES_X86_AVX2;
14677     for (size_t channels = 1; channels <= 120; channels += 23) {
14678       DWConvMicrokernelTester()
14679         .cr(24)
14680         .kr(25)
14681         .channels(channels)
14682         .width(3)
14683         .qmax(128)
14684         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14685     }
14686   }
14687 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,input_offset)14688   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, input_offset) {
14689     TEST_REQUIRES_X86_AVX2;
14690     for (uint32_t channels = 48; channels < 384; channels += 72) {
14691       DWConvMicrokernelTester()
14692         .cr(24)
14693         .kr(25)
14694         .channels(channels)
14695         .input_offset(464)
14696         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14697     }
14698   }
14699 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,zero)14700   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, zero) {
14701     TEST_REQUIRES_X86_AVX2;
14702     for (uint32_t mz = 0; mz < 25; mz++) {
14703       for (uint32_t channels = 48; channels < 384; channels += 72) {
14704         DWConvMicrokernelTester()
14705           .cr(24)
14706           .kr(25)
14707           .channels(channels)
14708           .input_offset(464)
14709           .zero_index(mz)
14710           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14711       }
14712     }
14713   }
14714 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
14715 
14716 
14717 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_eq_24)14718   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_eq_24) {
14719     TEST_REQUIRES_X86_XOP;
14720     DWConvMicrokernelTester()
14721       .cr(24)
14722       .kr(25)
14723       .channels(24)
14724       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14725   }
14726 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_div_24)14727   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_div_24) {
14728     TEST_REQUIRES_X86_XOP;
14729     for (uint32_t channels = 48; channels < 384; channels += 72) {
14730       DWConvMicrokernelTester()
14731         .cr(24)
14732         .kr(25)
14733         .channels(channels)
14734         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14735     }
14736   }
14737 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_div_24_with_qmin)14738   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_div_24_with_qmin) {
14739     TEST_REQUIRES_X86_XOP;
14740     for (uint32_t channels = 48; channels < 384; channels += 72) {
14741       DWConvMicrokernelTester()
14742         .cr(24)
14743         .kr(25)
14744         .channels(channels)
14745         .qmin(128)
14746         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14747     }
14748   }
14749 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_div_24_with_qmax)14750   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_div_24_with_qmax) {
14751     TEST_REQUIRES_X86_XOP;
14752     for (uint32_t channels = 48; channels < 384; channels += 72) {
14753       DWConvMicrokernelTester()
14754         .cr(24)
14755         .kr(25)
14756         .channels(channels)
14757         .qmax(128)
14758         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14759     }
14760   }
14761 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_lt_24)14762   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_lt_24) {
14763     TEST_REQUIRES_X86_XOP;
14764     for (uint32_t channels = 1; channels < 24; channels++) {
14765       DWConvMicrokernelTester()
14766         .cr(24)
14767         .kr(25)
14768         .channels(channels)
14769         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14770     }
14771   }
14772 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_gt_24)14773   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_gt_24) {
14774     TEST_REQUIRES_X86_XOP;
14775     for (uint32_t channels = 25; channels < 48; channels++) {
14776       DWConvMicrokernelTester()
14777         .cr(24)
14778         .kr(25)
14779         .channels(channels)
14780         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14781     }
14782   }
14783 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_gt_24_with_qmin)14784   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_gt_24_with_qmin) {
14785     TEST_REQUIRES_X86_XOP;
14786     for (uint32_t channels = 25; channels < 48; channels++) {
14787       DWConvMicrokernelTester()
14788         .cr(24)
14789         .kr(25)
14790         .channels(channels)
14791         .qmin(128)
14792         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14793     }
14794   }
14795 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_gt_24_with_qmax)14796   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_gt_24_with_qmax) {
14797     TEST_REQUIRES_X86_XOP;
14798     for (uint32_t channels = 25; channels < 48; channels++) {
14799       DWConvMicrokernelTester()
14800         .cr(24)
14801         .kr(25)
14802         .channels(channels)
14803         .qmax(128)
14804         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14805     }
14806   }
14807 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,multipixel)14808   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel) {
14809     TEST_REQUIRES_X86_XOP;
14810     for (size_t channels = 1; channels <= 120; channels += 23) {
14811       DWConvMicrokernelTester()
14812         .cr(24)
14813         .kr(25)
14814         .channels(channels)
14815         .width(3)
14816         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14817     }
14818   }
14819 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,multipixel_with_step)14820   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel_with_step) {
14821     TEST_REQUIRES_X86_XOP;
14822     for (size_t channels = 1; channels <= 120; channels += 23) {
14823       for (size_t step = 2; step <= 25; step++) {
14824         DWConvMicrokernelTester()
14825           .cr(24)
14826           .kr(25)
14827           .channels(channels)
14828           .width(3)
14829           .step(step)
14830           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14831       }
14832     }
14833   }
14834 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,multipixel_with_output_stride)14835   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel_with_output_stride) {
14836     TEST_REQUIRES_X86_XOP;
14837     for (size_t channels = 1; channels <= 120; channels += 23) {
14838       DWConvMicrokernelTester()
14839         .cr(24)
14840         .kr(25)
14841         .channels(24)
14842         .width(5)
14843         .output_stride(127)
14844         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14845     }
14846   }
14847 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,multipixel_with_qmin)14848   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel_with_qmin) {
14849     TEST_REQUIRES_X86_XOP;
14850     for (size_t channels = 1; channels <= 120; channels += 23) {
14851       DWConvMicrokernelTester()
14852         .cr(24)
14853         .kr(25)
14854         .channels(channels)
14855         .width(3)
14856         .qmin(128)
14857         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14858     }
14859   }
14860 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,multipixel_with_qmax)14861   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel_with_qmax) {
14862     TEST_REQUIRES_X86_XOP;
14863     for (size_t channels = 1; channels <= 120; channels += 23) {
14864       DWConvMicrokernelTester()
14865         .cr(24)
14866         .kr(25)
14867         .channels(channels)
14868         .width(3)
14869         .qmax(128)
14870         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14871     }
14872   }
14873 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,input_offset)14874   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, input_offset) {
14875     TEST_REQUIRES_X86_XOP;
14876     for (uint32_t channels = 48; channels < 384; channels += 72) {
14877       DWConvMicrokernelTester()
14878         .cr(24)
14879         .kr(25)
14880         .channels(channels)
14881         .input_offset(464)
14882         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14883     }
14884   }
14885 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,zero)14886   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, zero) {
14887     TEST_REQUIRES_X86_XOP;
14888     for (uint32_t mz = 0; mz < 25; mz++) {
14889       for (uint32_t channels = 48; channels < 384; channels += 72) {
14890         DWConvMicrokernelTester()
14891           .cr(24)
14892           .kr(25)
14893           .channels(channels)
14894           .input_offset(464)
14895           .zero_index(mz)
14896           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14897       }
14898     }
14899   }
14900 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
14901 
14902 
14903 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_eq_32)14904   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_eq_32) {
14905     TEST_REQUIRES_X86_AVX2;
14906     DWConvMicrokernelTester()
14907       .cr(32)
14908       .kr(9)
14909       .channels(32)
14910       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14911   }
14912 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_div_32)14913   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_32) {
14914     TEST_REQUIRES_X86_AVX2;
14915     for (uint32_t channels = 64; channels < 512; channels += 96) {
14916       DWConvMicrokernelTester()
14917         .cr(32)
14918         .kr(9)
14919         .channels(channels)
14920         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14921     }
14922   }
14923 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_div_32_with_qmin)14924   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_32_with_qmin) {
14925     TEST_REQUIRES_X86_AVX2;
14926     for (uint32_t channels = 64; channels < 512; channels += 96) {
14927       DWConvMicrokernelTester()
14928         .cr(32)
14929         .kr(9)
14930         .channels(channels)
14931         .qmin(128)
14932         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14933     }
14934   }
14935 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_div_32_with_qmax)14936   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_32_with_qmax) {
14937     TEST_REQUIRES_X86_AVX2;
14938     for (uint32_t channels = 64; channels < 512; channels += 96) {
14939       DWConvMicrokernelTester()
14940         .cr(32)
14941         .kr(9)
14942         .channels(channels)
14943         .qmax(128)
14944         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14945     }
14946   }
14947 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_lt_32)14948   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_lt_32) {
14949     TEST_REQUIRES_X86_AVX2;
14950     for (uint32_t channels = 1; channels < 32; channels++) {
14951       DWConvMicrokernelTester()
14952         .cr(32)
14953         .kr(9)
14954         .channels(channels)
14955         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14956     }
14957   }
14958 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_gt_32)14959   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32) {
14960     TEST_REQUIRES_X86_AVX2;
14961     for (uint32_t channels = 33; channels < 64; channels++) {
14962       DWConvMicrokernelTester()
14963         .cr(32)
14964         .kr(9)
14965         .channels(channels)
14966         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14967     }
14968   }
14969 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_gt_32_with_qmin)14970   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32_with_qmin) {
14971     TEST_REQUIRES_X86_AVX2;
14972     for (uint32_t channels = 33; channels < 64; channels++) {
14973       DWConvMicrokernelTester()
14974         .cr(32)
14975         .kr(9)
14976         .channels(channels)
14977         .qmin(128)
14978         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14979     }
14980   }
14981 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_gt_32_with_qmax)14982   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32_with_qmax) {
14983     TEST_REQUIRES_X86_AVX2;
14984     for (uint32_t channels = 33; channels < 64; channels++) {
14985       DWConvMicrokernelTester()
14986         .cr(32)
14987         .kr(9)
14988         .channels(channels)
14989         .qmax(128)
14990         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14991     }
14992   }
14993 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel)14994   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel) {
14995     TEST_REQUIRES_X86_AVX2;
14996     for (size_t channels = 1; channels <= 160; channels += 31) {
14997       DWConvMicrokernelTester()
14998         .cr(32)
14999         .kr(9)
15000         .channels(channels)
15001         .width(3)
15002         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15003     }
15004   }
15005 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_step)15006   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_step) {
15007     TEST_REQUIRES_X86_AVX2;
15008     for (size_t channels = 1; channels <= 160; channels += 31) {
15009       for (size_t step = 2; step <= 9; step++) {
15010         DWConvMicrokernelTester()
15011           .cr(32)
15012           .kr(9)
15013           .channels(channels)
15014           .width(3)
15015           .step(step)
15016           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15017       }
15018     }
15019   }
15020 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_output_stride)15021   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_output_stride) {
15022     TEST_REQUIRES_X86_AVX2;
15023     for (size_t channels = 1; channels <= 160; channels += 31) {
15024       DWConvMicrokernelTester()
15025         .cr(32)
15026         .kr(9)
15027         .channels(32)
15028         .width(5)
15029         .output_stride(163)
15030         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15031     }
15032   }
15033 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmin)15034   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmin) {
15035     TEST_REQUIRES_X86_AVX2;
15036     for (size_t channels = 1; channels <= 160; channels += 31) {
15037       DWConvMicrokernelTester()
15038         .cr(32)
15039         .kr(9)
15040         .channels(channels)
15041         .width(3)
15042         .qmin(128)
15043         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15044     }
15045   }
15046 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmax)15047   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmax) {
15048     TEST_REQUIRES_X86_AVX2;
15049     for (size_t channels = 1; channels <= 160; channels += 31) {
15050       DWConvMicrokernelTester()
15051         .cr(32)
15052         .kr(9)
15053         .channels(channels)
15054         .width(3)
15055         .qmax(128)
15056         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15057     }
15058   }
15059 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,input_offset)15060   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, input_offset) {
15061     TEST_REQUIRES_X86_AVX2;
15062     for (uint32_t channels = 64; channels < 512; channels += 96) {
15063       DWConvMicrokernelTester()
15064         .cr(32)
15065         .kr(9)
15066         .channels(channels)
15067         .input_offset(592)
15068         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15069     }
15070   }
15071 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,zero)15072   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, zero) {
15073     TEST_REQUIRES_X86_AVX2;
15074     for (uint32_t mz = 0; mz < 9; mz++) {
15075       for (uint32_t channels = 64; channels < 512; channels += 96) {
15076         DWConvMicrokernelTester()
15077           .cr(32)
15078           .kr(9)
15079           .channels(channels)
15080           .input_offset(592)
15081           .zero_index(mz)
15082           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15083       }
15084     }
15085   }
15086 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
15087 
15088 
15089 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_eq_32)15090   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_eq_32) {
15091     TEST_REQUIRES_X86_AVX2;
15092     DWConvMicrokernelTester()
15093       .cr(32)
15094       .kr(9)
15095       .channels(32)
15096       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15097   }
15098 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_div_32)15099   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_div_32) {
15100     TEST_REQUIRES_X86_AVX2;
15101     for (uint32_t channels = 64; channels < 512; channels += 96) {
15102       DWConvMicrokernelTester()
15103         .cr(32)
15104         .kr(9)
15105         .channels(channels)
15106         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15107     }
15108   }
15109 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_div_32_with_qmin)15110   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_div_32_with_qmin) {
15111     TEST_REQUIRES_X86_AVX2;
15112     for (uint32_t channels = 64; channels < 512; channels += 96) {
15113       DWConvMicrokernelTester()
15114         .cr(32)
15115         .kr(9)
15116         .channels(channels)
15117         .qmin(128)
15118         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15119     }
15120   }
15121 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_div_32_with_qmax)15122   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_div_32_with_qmax) {
15123     TEST_REQUIRES_X86_AVX2;
15124     for (uint32_t channels = 64; channels < 512; channels += 96) {
15125       DWConvMicrokernelTester()
15126         .cr(32)
15127         .kr(9)
15128         .channels(channels)
15129         .qmax(128)
15130         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15131     }
15132   }
15133 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_lt_32)15134   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_lt_32) {
15135     TEST_REQUIRES_X86_AVX2;
15136     for (uint32_t channels = 1; channels < 32; channels++) {
15137       DWConvMicrokernelTester()
15138         .cr(32)
15139         .kr(9)
15140         .channels(channels)
15141         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15142     }
15143   }
15144 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_gt_32)15145   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_gt_32) {
15146     TEST_REQUIRES_X86_AVX2;
15147     for (uint32_t channels = 33; channels < 64; channels++) {
15148       DWConvMicrokernelTester()
15149         .cr(32)
15150         .kr(9)
15151         .channels(channels)
15152         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15153     }
15154   }
15155 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_gt_32_with_qmin)15156   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_gt_32_with_qmin) {
15157     TEST_REQUIRES_X86_AVX2;
15158     for (uint32_t channels = 33; channels < 64; channels++) {
15159       DWConvMicrokernelTester()
15160         .cr(32)
15161         .kr(9)
15162         .channels(channels)
15163         .qmin(128)
15164         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15165     }
15166   }
15167 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_gt_32_with_qmax)15168   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_gt_32_with_qmax) {
15169     TEST_REQUIRES_X86_AVX2;
15170     for (uint32_t channels = 33; channels < 64; channels++) {
15171       DWConvMicrokernelTester()
15172         .cr(32)
15173         .kr(9)
15174         .channels(channels)
15175         .qmax(128)
15176         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15177     }
15178   }
15179 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,multipixel)15180   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel) {
15181     TEST_REQUIRES_X86_AVX2;
15182     for (size_t channels = 1; channels <= 160; channels += 31) {
15183       DWConvMicrokernelTester()
15184         .cr(32)
15185         .kr(9)
15186         .channels(channels)
15187         .width(3)
15188         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15189     }
15190   }
15191 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,multipixel_with_step)15192   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel_with_step) {
15193     TEST_REQUIRES_X86_AVX2;
15194     for (size_t channels = 1; channels <= 160; channels += 31) {
15195       for (size_t step = 2; step <= 9; step++) {
15196         DWConvMicrokernelTester()
15197           .cr(32)
15198           .kr(9)
15199           .channels(channels)
15200           .width(3)
15201           .step(step)
15202           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15203       }
15204     }
15205   }
15206 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,multipixel_with_output_stride)15207   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel_with_output_stride) {
15208     TEST_REQUIRES_X86_AVX2;
15209     for (size_t channels = 1; channels <= 160; channels += 31) {
15210       DWConvMicrokernelTester()
15211         .cr(32)
15212         .kr(9)
15213         .channels(32)
15214         .width(5)
15215         .output_stride(163)
15216         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15217     }
15218   }
15219 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,multipixel_with_qmin)15220   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel_with_qmin) {
15221     TEST_REQUIRES_X86_AVX2;
15222     for (size_t channels = 1; channels <= 160; channels += 31) {
15223       DWConvMicrokernelTester()
15224         .cr(32)
15225         .kr(9)
15226         .channels(channels)
15227         .width(3)
15228         .qmin(128)
15229         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15230     }
15231   }
15232 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,multipixel_with_qmax)15233   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel_with_qmax) {
15234     TEST_REQUIRES_X86_AVX2;
15235     for (size_t channels = 1; channels <= 160; channels += 31) {
15236       DWConvMicrokernelTester()
15237         .cr(32)
15238         .kr(9)
15239         .channels(channels)
15240         .width(3)
15241         .qmax(128)
15242         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15243     }
15244   }
15245 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,input_offset)15246   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, input_offset) {
15247     TEST_REQUIRES_X86_AVX2;
15248     for (uint32_t channels = 64; channels < 512; channels += 96) {
15249       DWConvMicrokernelTester()
15250         .cr(32)
15251         .kr(9)
15252         .channels(channels)
15253         .input_offset(592)
15254         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15255     }
15256   }
15257 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,zero)15258   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, zero) {
15259     TEST_REQUIRES_X86_AVX2;
15260     for (uint32_t mz = 0; mz < 9; mz++) {
15261       for (uint32_t channels = 64; channels < 512; channels += 96) {
15262         DWConvMicrokernelTester()
15263           .cr(32)
15264           .kr(9)
15265           .channels(channels)
15266           .input_offset(592)
15267           .zero_index(mz)
15268           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15269       }
15270     }
15271   }
15272 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
15273 
15274 
15275 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_eq_32)15276   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_eq_32) {
15277     TEST_REQUIRES_X86_AVX2;
15278     DWConvMicrokernelTester()
15279       .cr(32)
15280       .kr(9)
15281       .channels(32)
15282       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15283   }
15284 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_div_32)15285   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_div_32) {
15286     TEST_REQUIRES_X86_AVX2;
15287     for (uint32_t channels = 64; channels < 512; channels += 96) {
15288       DWConvMicrokernelTester()
15289         .cr(32)
15290         .kr(9)
15291         .channels(channels)
15292         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15293     }
15294   }
15295 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_div_32_with_qmin)15296   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_div_32_with_qmin) {
15297     TEST_REQUIRES_X86_AVX2;
15298     for (uint32_t channels = 64; channels < 512; channels += 96) {
15299       DWConvMicrokernelTester()
15300         .cr(32)
15301         .kr(9)
15302         .channels(channels)
15303         .qmin(128)
15304         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15305     }
15306   }
15307 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_div_32_with_qmax)15308   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_div_32_with_qmax) {
15309     TEST_REQUIRES_X86_AVX2;
15310     for (uint32_t channels = 64; channels < 512; channels += 96) {
15311       DWConvMicrokernelTester()
15312         .cr(32)
15313         .kr(9)
15314         .channels(channels)
15315         .qmax(128)
15316         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15317     }
15318   }
15319 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_lt_32)15320   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_lt_32) {
15321     TEST_REQUIRES_X86_AVX2;
15322     for (uint32_t channels = 1; channels < 32; channels++) {
15323       DWConvMicrokernelTester()
15324         .cr(32)
15325         .kr(9)
15326         .channels(channels)
15327         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15328     }
15329   }
15330 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_gt_32)15331   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_gt_32) {
15332     TEST_REQUIRES_X86_AVX2;
15333     for (uint32_t channels = 33; channels < 64; channels++) {
15334       DWConvMicrokernelTester()
15335         .cr(32)
15336         .kr(9)
15337         .channels(channels)
15338         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15339     }
15340   }
15341 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_gt_32_with_qmin)15342   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_gt_32_with_qmin) {
15343     TEST_REQUIRES_X86_AVX2;
15344     for (uint32_t channels = 33; channels < 64; channels++) {
15345       DWConvMicrokernelTester()
15346         .cr(32)
15347         .kr(9)
15348         .channels(channels)
15349         .qmin(128)
15350         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15351     }
15352   }
15353 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_gt_32_with_qmax)15354   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_gt_32_with_qmax) {
15355     TEST_REQUIRES_X86_AVX2;
15356     for (uint32_t channels = 33; channels < 64; channels++) {
15357       DWConvMicrokernelTester()
15358         .cr(32)
15359         .kr(9)
15360         .channels(channels)
15361         .qmax(128)
15362         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15363     }
15364   }
15365 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,multipixel)15366   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel) {
15367     TEST_REQUIRES_X86_AVX2;
15368     for (size_t channels = 1; channels <= 160; channels += 31) {
15369       DWConvMicrokernelTester()
15370         .cr(32)
15371         .kr(9)
15372         .channels(channels)
15373         .width(3)
15374         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15375     }
15376   }
15377 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,multipixel_with_step)15378   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel_with_step) {
15379     TEST_REQUIRES_X86_AVX2;
15380     for (size_t channels = 1; channels <= 160; channels += 31) {
15381       for (size_t step = 2; step <= 9; step++) {
15382         DWConvMicrokernelTester()
15383           .cr(32)
15384           .kr(9)
15385           .channels(channels)
15386           .width(3)
15387           .step(step)
15388           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15389       }
15390     }
15391   }
15392 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,multipixel_with_output_stride)15393   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel_with_output_stride) {
15394     TEST_REQUIRES_X86_AVX2;
15395     for (size_t channels = 1; channels <= 160; channels += 31) {
15396       DWConvMicrokernelTester()
15397         .cr(32)
15398         .kr(9)
15399         .channels(32)
15400         .width(5)
15401         .output_stride(163)
15402         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15403     }
15404   }
15405 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,multipixel_with_qmin)15406   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel_with_qmin) {
15407     TEST_REQUIRES_X86_AVX2;
15408     for (size_t channels = 1; channels <= 160; channels += 31) {
15409       DWConvMicrokernelTester()
15410         .cr(32)
15411         .kr(9)
15412         .channels(channels)
15413         .width(3)
15414         .qmin(128)
15415         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15416     }
15417   }
15418 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,multipixel_with_qmax)15419   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel_with_qmax) {
15420     TEST_REQUIRES_X86_AVX2;
15421     for (size_t channels = 1; channels <= 160; channels += 31) {
15422       DWConvMicrokernelTester()
15423         .cr(32)
15424         .kr(9)
15425         .channels(channels)
15426         .width(3)
15427         .qmax(128)
15428         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15429     }
15430   }
15431 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,input_offset)15432   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, input_offset) {
15433     TEST_REQUIRES_X86_AVX2;
15434     for (uint32_t channels = 64; channels < 512; channels += 96) {
15435       DWConvMicrokernelTester()
15436         .cr(32)
15437         .kr(9)
15438         .channels(channels)
15439         .input_offset(592)
15440         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15441     }
15442   }
15443 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,zero)15444   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, zero) {
15445     TEST_REQUIRES_X86_AVX2;
15446     for (uint32_t mz = 0; mz < 9; mz++) {
15447       for (uint32_t channels = 64; channels < 512; channels += 96) {
15448         DWConvMicrokernelTester()
15449           .cr(32)
15450           .kr(9)
15451           .channels(channels)
15452           .input_offset(592)
15453           .zero_index(mz)
15454           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15455       }
15456     }
15457   }
15458 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
15459 
15460 
15461 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_eq_32)15462   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_eq_32) {
15463     TEST_REQUIRES_X86_AVX2;
15464     DWConvMicrokernelTester()
15465       .cr(32)
15466       .kr(9)
15467       .channels(32)
15468       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15469   }
15470 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_div_32)15471   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32) {
15472     TEST_REQUIRES_X86_AVX2;
15473     for (uint32_t channels = 64; channels < 512; channels += 96) {
15474       DWConvMicrokernelTester()
15475         .cr(32)
15476         .kr(9)
15477         .channels(channels)
15478         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15479     }
15480   }
15481 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_div_32_with_qmin)15482   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32_with_qmin) {
15483     TEST_REQUIRES_X86_AVX2;
15484     for (uint32_t channels = 64; channels < 512; channels += 96) {
15485       DWConvMicrokernelTester()
15486         .cr(32)
15487         .kr(9)
15488         .channels(channels)
15489         .qmin(128)
15490         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15491     }
15492   }
15493 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_div_32_with_qmax)15494   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32_with_qmax) {
15495     TEST_REQUIRES_X86_AVX2;
15496     for (uint32_t channels = 64; channels < 512; channels += 96) {
15497       DWConvMicrokernelTester()
15498         .cr(32)
15499         .kr(9)
15500         .channels(channels)
15501         .qmax(128)
15502         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15503     }
15504   }
15505 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_lt_32)15506   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_lt_32) {
15507     TEST_REQUIRES_X86_AVX2;
15508     for (uint32_t channels = 1; channels < 32; channels++) {
15509       DWConvMicrokernelTester()
15510         .cr(32)
15511         .kr(9)
15512         .channels(channels)
15513         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15514     }
15515   }
15516 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_gt_32)15517   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32) {
15518     TEST_REQUIRES_X86_AVX2;
15519     for (uint32_t channels = 33; channels < 64; channels++) {
15520       DWConvMicrokernelTester()
15521         .cr(32)
15522         .kr(9)
15523         .channels(channels)
15524         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15525     }
15526   }
15527 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_gt_32_with_qmin)15528   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32_with_qmin) {
15529     TEST_REQUIRES_X86_AVX2;
15530     for (uint32_t channels = 33; channels < 64; channels++) {
15531       DWConvMicrokernelTester()
15532         .cr(32)
15533         .kr(9)
15534         .channels(channels)
15535         .qmin(128)
15536         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15537     }
15538   }
15539 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_gt_32_with_qmax)15540   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32_with_qmax) {
15541     TEST_REQUIRES_X86_AVX2;
15542     for (uint32_t channels = 33; channels < 64; channels++) {
15543       DWConvMicrokernelTester()
15544         .cr(32)
15545         .kr(9)
15546         .channels(channels)
15547         .qmax(128)
15548         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15549     }
15550   }
15551 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,multipixel)15552   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel) {
15553     TEST_REQUIRES_X86_AVX2;
15554     for (size_t channels = 1; channels <= 160; channels += 31) {
15555       DWConvMicrokernelTester()
15556         .cr(32)
15557         .kr(9)
15558         .channels(channels)
15559         .width(3)
15560         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15561     }
15562   }
15563 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,multipixel_with_step)15564   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_step) {
15565     TEST_REQUIRES_X86_AVX2;
15566     for (size_t channels = 1; channels <= 160; channels += 31) {
15567       for (size_t step = 2; step <= 9; step++) {
15568         DWConvMicrokernelTester()
15569           .cr(32)
15570           .kr(9)
15571           .channels(channels)
15572           .width(3)
15573           .step(step)
15574           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15575       }
15576     }
15577   }
15578 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,multipixel_with_output_stride)15579   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_output_stride) {
15580     TEST_REQUIRES_X86_AVX2;
15581     for (size_t channels = 1; channels <= 160; channels += 31) {
15582       DWConvMicrokernelTester()
15583         .cr(32)
15584         .kr(9)
15585         .channels(32)
15586         .width(5)
15587         .output_stride(163)
15588         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15589     }
15590   }
15591 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,multipixel_with_qmin)15592   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_qmin) {
15593     TEST_REQUIRES_X86_AVX2;
15594     for (size_t channels = 1; channels <= 160; channels += 31) {
15595       DWConvMicrokernelTester()
15596         .cr(32)
15597         .kr(9)
15598         .channels(channels)
15599         .width(3)
15600         .qmin(128)
15601         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15602     }
15603   }
15604 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,multipixel_with_qmax)15605   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_qmax) {
15606     TEST_REQUIRES_X86_AVX2;
15607     for (size_t channels = 1; channels <= 160; channels += 31) {
15608       DWConvMicrokernelTester()
15609         .cr(32)
15610         .kr(9)
15611         .channels(channels)
15612         .width(3)
15613         .qmax(128)
15614         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15615     }
15616   }
15617 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,input_offset)15618   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, input_offset) {
15619     TEST_REQUIRES_X86_AVX2;
15620     for (uint32_t channels = 64; channels < 512; channels += 96) {
15621       DWConvMicrokernelTester()
15622         .cr(32)
15623         .kr(9)
15624         .channels(channels)
15625         .input_offset(592)
15626         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15627     }
15628   }
15629 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,zero)15630   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, zero) {
15631     TEST_REQUIRES_X86_AVX2;
15632     for (uint32_t mz = 0; mz < 9; mz++) {
15633       for (uint32_t channels = 64; channels < 512; channels += 96) {
15634         DWConvMicrokernelTester()
15635           .cr(32)
15636           .kr(9)
15637           .channels(channels)
15638           .input_offset(592)
15639           .zero_index(mz)
15640           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15641       }
15642     }
15643   }
15644 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
15645 
15646 
15647 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_eq_32)15648   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_eq_32) {
15649     TEST_REQUIRES_X86_AVX2;
15650     DWConvMicrokernelTester()
15651       .cr(32)
15652       .kr(25)
15653       .channels(32)
15654       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15655   }
15656 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_div_32)15657   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_32) {
15658     TEST_REQUIRES_X86_AVX2;
15659     for (uint32_t channels = 64; channels < 512; channels += 96) {
15660       DWConvMicrokernelTester()
15661         .cr(32)
15662         .kr(25)
15663         .channels(channels)
15664         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15665     }
15666   }
15667 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_div_32_with_qmin)15668   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_32_with_qmin) {
15669     TEST_REQUIRES_X86_AVX2;
15670     for (uint32_t channels = 64; channels < 512; channels += 96) {
15671       DWConvMicrokernelTester()
15672         .cr(32)
15673         .kr(25)
15674         .channels(channels)
15675         .qmin(128)
15676         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15677     }
15678   }
15679 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_div_32_with_qmax)15680   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_32_with_qmax) {
15681     TEST_REQUIRES_X86_AVX2;
15682     for (uint32_t channels = 64; channels < 512; channels += 96) {
15683       DWConvMicrokernelTester()
15684         .cr(32)
15685         .kr(25)
15686         .channels(channels)
15687         .qmax(128)
15688         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15689     }
15690   }
15691 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_lt_32)15692   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_lt_32) {
15693     TEST_REQUIRES_X86_AVX2;
15694     for (uint32_t channels = 1; channels < 32; channels++) {
15695       DWConvMicrokernelTester()
15696         .cr(32)
15697         .kr(25)
15698         .channels(channels)
15699         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15700     }
15701   }
15702 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_gt_32)15703   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32) {
15704     TEST_REQUIRES_X86_AVX2;
15705     for (uint32_t channels = 33; channels < 64; channels++) {
15706       DWConvMicrokernelTester()
15707         .cr(32)
15708         .kr(25)
15709         .channels(channels)
15710         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15711     }
15712   }
15713 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_gt_32_with_qmin)15714   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32_with_qmin) {
15715     TEST_REQUIRES_X86_AVX2;
15716     for (uint32_t channels = 33; channels < 64; channels++) {
15717       DWConvMicrokernelTester()
15718         .cr(32)
15719         .kr(25)
15720         .channels(channels)
15721         .qmin(128)
15722         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15723     }
15724   }
15725 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_gt_32_with_qmax)15726   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32_with_qmax) {
15727     TEST_REQUIRES_X86_AVX2;
15728     for (uint32_t channels = 33; channels < 64; channels++) {
15729       DWConvMicrokernelTester()
15730         .cr(32)
15731         .kr(25)
15732         .channels(channels)
15733         .qmax(128)
15734         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15735     }
15736   }
15737 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel)15738   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel) {
15739     TEST_REQUIRES_X86_AVX2;
15740     for (size_t channels = 1; channels <= 160; channels += 31) {
15741       DWConvMicrokernelTester()
15742         .cr(32)
15743         .kr(25)
15744         .channels(channels)
15745         .width(3)
15746         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15747     }
15748   }
15749 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_step)15750   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_step) {
15751     TEST_REQUIRES_X86_AVX2;
15752     for (size_t channels = 1; channels <= 160; channels += 31) {
15753       for (size_t step = 2; step <= 25; step++) {
15754         DWConvMicrokernelTester()
15755           .cr(32)
15756           .kr(25)
15757           .channels(channels)
15758           .width(3)
15759           .step(step)
15760           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15761       }
15762     }
15763   }
15764 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_output_stride)15765   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_output_stride) {
15766     TEST_REQUIRES_X86_AVX2;
15767     for (size_t channels = 1; channels <= 160; channels += 31) {
15768       DWConvMicrokernelTester()
15769         .cr(32)
15770         .kr(25)
15771         .channels(32)
15772         .width(5)
15773         .output_stride(163)
15774         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15775     }
15776   }
15777 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmin)15778   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmin) {
15779     TEST_REQUIRES_X86_AVX2;
15780     for (size_t channels = 1; channels <= 160; channels += 31) {
15781       DWConvMicrokernelTester()
15782         .cr(32)
15783         .kr(25)
15784         .channels(channels)
15785         .width(3)
15786         .qmin(128)
15787         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15788     }
15789   }
15790 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmax)15791   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmax) {
15792     TEST_REQUIRES_X86_AVX2;
15793     for (size_t channels = 1; channels <= 160; channels += 31) {
15794       DWConvMicrokernelTester()
15795         .cr(32)
15796         .kr(25)
15797         .channels(channels)
15798         .width(3)
15799         .qmax(128)
15800         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15801     }
15802   }
15803 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,input_offset)15804   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, input_offset) {
15805     TEST_REQUIRES_X86_AVX2;
15806     for (uint32_t channels = 64; channels < 512; channels += 96) {
15807       DWConvMicrokernelTester()
15808         .cr(32)
15809         .kr(25)
15810         .channels(channels)
15811         .input_offset(592)
15812         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15813     }
15814   }
15815 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,zero)15816   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, zero) {
15817     TEST_REQUIRES_X86_AVX2;
15818     for (uint32_t mz = 0; mz < 25; mz++) {
15819       for (uint32_t channels = 64; channels < 512; channels += 96) {
15820         DWConvMicrokernelTester()
15821           .cr(32)
15822           .kr(25)
15823           .channels(channels)
15824           .input_offset(592)
15825           .zero_index(mz)
15826           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15827       }
15828     }
15829   }
15830 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
15831 
15832 
15833 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_eq_32)15834   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_eq_32) {
15835     TEST_REQUIRES_X86_AVX2;
15836     DWConvMicrokernelTester()
15837       .cr(32)
15838       .kr(25)
15839       .channels(32)
15840       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15841   }
15842 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_div_32)15843   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_div_32) {
15844     TEST_REQUIRES_X86_AVX2;
15845     for (uint32_t channels = 64; channels < 512; channels += 96) {
15846       DWConvMicrokernelTester()
15847         .cr(32)
15848         .kr(25)
15849         .channels(channels)
15850         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15851     }
15852   }
15853 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_div_32_with_qmin)15854   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_div_32_with_qmin) {
15855     TEST_REQUIRES_X86_AVX2;
15856     for (uint32_t channels = 64; channels < 512; channels += 96) {
15857       DWConvMicrokernelTester()
15858         .cr(32)
15859         .kr(25)
15860         .channels(channels)
15861         .qmin(128)
15862         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15863     }
15864   }
15865 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_div_32_with_qmax)15866   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_div_32_with_qmax) {
15867     TEST_REQUIRES_X86_AVX2;
15868     for (uint32_t channels = 64; channels < 512; channels += 96) {
15869       DWConvMicrokernelTester()
15870         .cr(32)
15871         .kr(25)
15872         .channels(channels)
15873         .qmax(128)
15874         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15875     }
15876   }
15877 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_lt_32)15878   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_lt_32) {
15879     TEST_REQUIRES_X86_AVX2;
15880     for (uint32_t channels = 1; channels < 32; channels++) {
15881       DWConvMicrokernelTester()
15882         .cr(32)
15883         .kr(25)
15884         .channels(channels)
15885         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15886     }
15887   }
15888 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_gt_32)15889   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_gt_32) {
15890     TEST_REQUIRES_X86_AVX2;
15891     for (uint32_t channels = 33; channels < 64; channels++) {
15892       DWConvMicrokernelTester()
15893         .cr(32)
15894         .kr(25)
15895         .channels(channels)
15896         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15897     }
15898   }
15899 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_gt_32_with_qmin)15900   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_gt_32_with_qmin) {
15901     TEST_REQUIRES_X86_AVX2;
15902     for (uint32_t channels = 33; channels < 64; channels++) {
15903       DWConvMicrokernelTester()
15904         .cr(32)
15905         .kr(25)
15906         .channels(channels)
15907         .qmin(128)
15908         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15909     }
15910   }
15911 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_gt_32_with_qmax)15912   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_gt_32_with_qmax) {
15913     TEST_REQUIRES_X86_AVX2;
15914     for (uint32_t channels = 33; channels < 64; channels++) {
15915       DWConvMicrokernelTester()
15916         .cr(32)
15917         .kr(25)
15918         .channels(channels)
15919         .qmax(128)
15920         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15921     }
15922   }
15923 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,multipixel)15924   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel) {
15925     TEST_REQUIRES_X86_AVX2;
15926     for (size_t channels = 1; channels <= 160; channels += 31) {
15927       DWConvMicrokernelTester()
15928         .cr(32)
15929         .kr(25)
15930         .channels(channels)
15931         .width(3)
15932         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15933     }
15934   }
15935 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,multipixel_with_step)15936   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel_with_step) {
15937     TEST_REQUIRES_X86_AVX2;
15938     for (size_t channels = 1; channels <= 160; channels += 31) {
15939       for (size_t step = 2; step <= 25; step++) {
15940         DWConvMicrokernelTester()
15941           .cr(32)
15942           .kr(25)
15943           .channels(channels)
15944           .width(3)
15945           .step(step)
15946           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15947       }
15948     }
15949   }
15950 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,multipixel_with_output_stride)15951   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel_with_output_stride) {
15952     TEST_REQUIRES_X86_AVX2;
15953     for (size_t channels = 1; channels <= 160; channels += 31) {
15954       DWConvMicrokernelTester()
15955         .cr(32)
15956         .kr(25)
15957         .channels(32)
15958         .width(5)
15959         .output_stride(163)
15960         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15961     }
15962   }
15963 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,multipixel_with_qmin)15964   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel_with_qmin) {
15965     TEST_REQUIRES_X86_AVX2;
15966     for (size_t channels = 1; channels <= 160; channels += 31) {
15967       DWConvMicrokernelTester()
15968         .cr(32)
15969         .kr(25)
15970         .channels(channels)
15971         .width(3)
15972         .qmin(128)
15973         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15974     }
15975   }
15976 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,multipixel_with_qmax)15977   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel_with_qmax) {
15978     TEST_REQUIRES_X86_AVX2;
15979     for (size_t channels = 1; channels <= 160; channels += 31) {
15980       DWConvMicrokernelTester()
15981         .cr(32)
15982         .kr(25)
15983         .channels(channels)
15984         .width(3)
15985         .qmax(128)
15986         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15987     }
15988   }
15989 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,input_offset)15990   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, input_offset) {
15991     TEST_REQUIRES_X86_AVX2;
15992     for (uint32_t channels = 64; channels < 512; channels += 96) {
15993       DWConvMicrokernelTester()
15994         .cr(32)
15995         .kr(25)
15996         .channels(channels)
15997         .input_offset(592)
15998         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15999     }
16000   }
16001 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,zero)16002   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, zero) {
16003     TEST_REQUIRES_X86_AVX2;
16004     for (uint32_t mz = 0; mz < 25; mz++) {
16005       for (uint32_t channels = 64; channels < 512; channels += 96) {
16006         DWConvMicrokernelTester()
16007           .cr(32)
16008           .kr(25)
16009           .channels(channels)
16010           .input_offset(592)
16011           .zero_index(mz)
16012           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16013       }
16014     }
16015   }
16016 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
16017 
16018 
16019 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_eq_32)16020   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_eq_32) {
16021     TEST_REQUIRES_X86_AVX2;
16022     DWConvMicrokernelTester()
16023       .cr(32)
16024       .kr(25)
16025       .channels(32)
16026       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16027   }
16028 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_div_32)16029   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_div_32) {
16030     TEST_REQUIRES_X86_AVX2;
16031     for (uint32_t channels = 64; channels < 512; channels += 96) {
16032       DWConvMicrokernelTester()
16033         .cr(32)
16034         .kr(25)
16035         .channels(channels)
16036         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16037     }
16038   }
16039 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_div_32_with_qmin)16040   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_div_32_with_qmin) {
16041     TEST_REQUIRES_X86_AVX2;
16042     for (uint32_t channels = 64; channels < 512; channels += 96) {
16043       DWConvMicrokernelTester()
16044         .cr(32)
16045         .kr(25)
16046         .channels(channels)
16047         .qmin(128)
16048         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16049     }
16050   }
16051 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_div_32_with_qmax)16052   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_div_32_with_qmax) {
16053     TEST_REQUIRES_X86_AVX2;
16054     for (uint32_t channels = 64; channels < 512; channels += 96) {
16055       DWConvMicrokernelTester()
16056         .cr(32)
16057         .kr(25)
16058         .channels(channels)
16059         .qmax(128)
16060         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16061     }
16062   }
16063 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_lt_32)16064   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_lt_32) {
16065     TEST_REQUIRES_X86_AVX2;
16066     for (uint32_t channels = 1; channels < 32; channels++) {
16067       DWConvMicrokernelTester()
16068         .cr(32)
16069         .kr(25)
16070         .channels(channels)
16071         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16072     }
16073   }
16074 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_gt_32)16075   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_gt_32) {
16076     TEST_REQUIRES_X86_AVX2;
16077     for (uint32_t channels = 33; channels < 64; channels++) {
16078       DWConvMicrokernelTester()
16079         .cr(32)
16080         .kr(25)
16081         .channels(channels)
16082         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16083     }
16084   }
16085 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_gt_32_with_qmin)16086   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_gt_32_with_qmin) {
16087     TEST_REQUIRES_X86_AVX2;
16088     for (uint32_t channels = 33; channels < 64; channels++) {
16089       DWConvMicrokernelTester()
16090         .cr(32)
16091         .kr(25)
16092         .channels(channels)
16093         .qmin(128)
16094         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16095     }
16096   }
16097 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_gt_32_with_qmax)16098   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_gt_32_with_qmax) {
16099     TEST_REQUIRES_X86_AVX2;
16100     for (uint32_t channels = 33; channels < 64; channels++) {
16101       DWConvMicrokernelTester()
16102         .cr(32)
16103         .kr(25)
16104         .channels(channels)
16105         .qmax(128)
16106         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16107     }
16108   }
16109 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,multipixel)16110   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel) {
16111     TEST_REQUIRES_X86_AVX2;
16112     for (size_t channels = 1; channels <= 160; channels += 31) {
16113       DWConvMicrokernelTester()
16114         .cr(32)
16115         .kr(25)
16116         .channels(channels)
16117         .width(3)
16118         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16119     }
16120   }
16121 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,multipixel_with_step)16122   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel_with_step) {
16123     TEST_REQUIRES_X86_AVX2;
16124     for (size_t channels = 1; channels <= 160; channels += 31) {
16125       for (size_t step = 2; step <= 25; step++) {
16126         DWConvMicrokernelTester()
16127           .cr(32)
16128           .kr(25)
16129           .channels(channels)
16130           .width(3)
16131           .step(step)
16132           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16133       }
16134     }
16135   }
16136 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,multipixel_with_output_stride)16137   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel_with_output_stride) {
16138     TEST_REQUIRES_X86_AVX2;
16139     for (size_t channels = 1; channels <= 160; channels += 31) {
16140       DWConvMicrokernelTester()
16141         .cr(32)
16142         .kr(25)
16143         .channels(32)
16144         .width(5)
16145         .output_stride(163)
16146         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16147     }
16148   }
16149 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,multipixel_with_qmin)16150   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel_with_qmin) {
16151     TEST_REQUIRES_X86_AVX2;
16152     for (size_t channels = 1; channels <= 160; channels += 31) {
16153       DWConvMicrokernelTester()
16154         .cr(32)
16155         .kr(25)
16156         .channels(channels)
16157         .width(3)
16158         .qmin(128)
16159         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16160     }
16161   }
16162 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,multipixel_with_qmax)16163   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel_with_qmax) {
16164     TEST_REQUIRES_X86_AVX2;
16165     for (size_t channels = 1; channels <= 160; channels += 31) {
16166       DWConvMicrokernelTester()
16167         .cr(32)
16168         .kr(25)
16169         .channels(channels)
16170         .width(3)
16171         .qmax(128)
16172         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16173     }
16174   }
16175 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,input_offset)16176   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, input_offset) {
16177     TEST_REQUIRES_X86_AVX2;
16178     for (uint32_t channels = 64; channels < 512; channels += 96) {
16179       DWConvMicrokernelTester()
16180         .cr(32)
16181         .kr(25)
16182         .channels(channels)
16183         .input_offset(592)
16184         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16185     }
16186   }
16187 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,zero)16188   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, zero) {
16189     TEST_REQUIRES_X86_AVX2;
16190     for (uint32_t mz = 0; mz < 25; mz++) {
16191       for (uint32_t channels = 64; channels < 512; channels += 96) {
16192         DWConvMicrokernelTester()
16193           .cr(32)
16194           .kr(25)
16195           .channels(channels)
16196           .input_offset(592)
16197           .zero_index(mz)
16198           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16199       }
16200     }
16201   }
16202 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
16203 
16204 
16205 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_eq_32)16206   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_eq_32) {
16207     TEST_REQUIRES_X86_AVX2;
16208     DWConvMicrokernelTester()
16209       .cr(32)
16210       .kr(25)
16211       .channels(32)
16212       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16213   }
16214 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_div_32)16215   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32) {
16216     TEST_REQUIRES_X86_AVX2;
16217     for (uint32_t channels = 64; channels < 512; channels += 96) {
16218       DWConvMicrokernelTester()
16219         .cr(32)
16220         .kr(25)
16221         .channels(channels)
16222         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16223     }
16224   }
16225 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_div_32_with_qmin)16226   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32_with_qmin) {
16227     TEST_REQUIRES_X86_AVX2;
16228     for (uint32_t channels = 64; channels < 512; channels += 96) {
16229       DWConvMicrokernelTester()
16230         .cr(32)
16231         .kr(25)
16232         .channels(channels)
16233         .qmin(128)
16234         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16235     }
16236   }
16237 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_div_32_with_qmax)16238   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32_with_qmax) {
16239     TEST_REQUIRES_X86_AVX2;
16240     for (uint32_t channels = 64; channels < 512; channels += 96) {
16241       DWConvMicrokernelTester()
16242         .cr(32)
16243         .kr(25)
16244         .channels(channels)
16245         .qmax(128)
16246         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16247     }
16248   }
16249 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_lt_32)16250   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_lt_32) {
16251     TEST_REQUIRES_X86_AVX2;
16252     for (uint32_t channels = 1; channels < 32; channels++) {
16253       DWConvMicrokernelTester()
16254         .cr(32)
16255         .kr(25)
16256         .channels(channels)
16257         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16258     }
16259   }
16260 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_gt_32)16261   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32) {
16262     TEST_REQUIRES_X86_AVX2;
16263     for (uint32_t channels = 33; channels < 64; channels++) {
16264       DWConvMicrokernelTester()
16265         .cr(32)
16266         .kr(25)
16267         .channels(channels)
16268         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16269     }
16270   }
16271 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_gt_32_with_qmin)16272   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32_with_qmin) {
16273     TEST_REQUIRES_X86_AVX2;
16274     for (uint32_t channels = 33; channels < 64; channels++) {
16275       DWConvMicrokernelTester()
16276         .cr(32)
16277         .kr(25)
16278         .channels(channels)
16279         .qmin(128)
16280         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16281     }
16282   }
16283 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_gt_32_with_qmax)16284   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32_with_qmax) {
16285     TEST_REQUIRES_X86_AVX2;
16286     for (uint32_t channels = 33; channels < 64; channels++) {
16287       DWConvMicrokernelTester()
16288         .cr(32)
16289         .kr(25)
16290         .channels(channels)
16291         .qmax(128)
16292         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16293     }
16294   }
16295 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,multipixel)16296   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel) {
16297     TEST_REQUIRES_X86_AVX2;
16298     for (size_t channels = 1; channels <= 160; channels += 31) {
16299       DWConvMicrokernelTester()
16300         .cr(32)
16301         .kr(25)
16302         .channels(channels)
16303         .width(3)
16304         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16305     }
16306   }
16307 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,multipixel_with_step)16308   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_step) {
16309     TEST_REQUIRES_X86_AVX2;
16310     for (size_t channels = 1; channels <= 160; channels += 31) {
16311       for (size_t step = 2; step <= 25; step++) {
16312         DWConvMicrokernelTester()
16313           .cr(32)
16314           .kr(25)
16315           .channels(channels)
16316           .width(3)
16317           .step(step)
16318           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16319       }
16320     }
16321   }
16322 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,multipixel_with_output_stride)16323   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_output_stride) {
16324     TEST_REQUIRES_X86_AVX2;
16325     for (size_t channels = 1; channels <= 160; channels += 31) {
16326       DWConvMicrokernelTester()
16327         .cr(32)
16328         .kr(25)
16329         .channels(32)
16330         .width(5)
16331         .output_stride(163)
16332         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16333     }
16334   }
16335 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,multipixel_with_qmin)16336   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_qmin) {
16337     TEST_REQUIRES_X86_AVX2;
16338     for (size_t channels = 1; channels <= 160; channels += 31) {
16339       DWConvMicrokernelTester()
16340         .cr(32)
16341         .kr(25)
16342         .channels(channels)
16343         .width(3)
16344         .qmin(128)
16345         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16346     }
16347   }
16348 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,multipixel_with_qmax)16349   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_qmax) {
16350     TEST_REQUIRES_X86_AVX2;
16351     for (size_t channels = 1; channels <= 160; channels += 31) {
16352       DWConvMicrokernelTester()
16353         .cr(32)
16354         .kr(25)
16355         .channels(channels)
16356         .width(3)
16357         .qmax(128)
16358         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16359     }
16360   }
16361 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,input_offset)16362   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, input_offset) {
16363     TEST_REQUIRES_X86_AVX2;
16364     for (uint32_t channels = 64; channels < 512; channels += 96) {
16365       DWConvMicrokernelTester()
16366         .cr(32)
16367         .kr(25)
16368         .channels(channels)
16369         .input_offset(592)
16370         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16371     }
16372   }
16373 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,zero)16374   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, zero) {
16375     TEST_REQUIRES_X86_AVX2;
16376     for (uint32_t mz = 0; mz < 25; mz++) {
16377       for (uint32_t channels = 64; channels < 512; channels += 96) {
16378         DWConvMicrokernelTester()
16379           .cr(32)
16380           .kr(25)
16381           .channels(channels)
16382           .input_offset(592)
16383           .zero_index(mz)
16384           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16385       }
16386     }
16387   }
16388 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
16389 
16390 
16391 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_eq_16)16392   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_eq_16) {
16393     TEST_REQUIRES_X86_AVX512SKX;
16394     DWConvMicrokernelTester()
16395       .cr(16)
16396       .kr(9)
16397       .channels(16)
16398       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16399   }
16400 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_div_16)16401   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16) {
16402     TEST_REQUIRES_X86_AVX512SKX;
16403     for (uint32_t channels = 32; channels < 256; channels += 48) {
16404       DWConvMicrokernelTester()
16405         .cr(16)
16406         .kr(9)
16407         .channels(channels)
16408         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16409     }
16410   }
16411 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_div_16_with_qmin)16412   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmin) {
16413     TEST_REQUIRES_X86_AVX512SKX;
16414     for (uint32_t channels = 32; channels < 256; channels += 48) {
16415       DWConvMicrokernelTester()
16416         .cr(16)
16417         .kr(9)
16418         .channels(channels)
16419         .qmin(128)
16420         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16421     }
16422   }
16423 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_div_16_with_qmax)16424   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmax) {
16425     TEST_REQUIRES_X86_AVX512SKX;
16426     for (uint32_t channels = 32; channels < 256; channels += 48) {
16427       DWConvMicrokernelTester()
16428         .cr(16)
16429         .kr(9)
16430         .channels(channels)
16431         .qmax(128)
16432         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16433     }
16434   }
16435 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_lt_16)16436   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_lt_16) {
16437     TEST_REQUIRES_X86_AVX512SKX;
16438     for (uint32_t channels = 1; channels < 16; channels++) {
16439       DWConvMicrokernelTester()
16440         .cr(16)
16441         .kr(9)
16442         .channels(channels)
16443         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16444     }
16445   }
16446 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_gt_16)16447   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16) {
16448     TEST_REQUIRES_X86_AVX512SKX;
16449     for (uint32_t channels = 17; channels < 32; channels++) {
16450       DWConvMicrokernelTester()
16451         .cr(16)
16452         .kr(9)
16453         .channels(channels)
16454         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16455     }
16456   }
16457 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_gt_16_with_qmin)16458   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmin) {
16459     TEST_REQUIRES_X86_AVX512SKX;
16460     for (uint32_t channels = 17; channels < 32; channels++) {
16461       DWConvMicrokernelTester()
16462         .cr(16)
16463         .kr(9)
16464         .channels(channels)
16465         .qmin(128)
16466         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16467     }
16468   }
16469 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_gt_16_with_qmax)16470   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmax) {
16471     TEST_REQUIRES_X86_AVX512SKX;
16472     for (uint32_t channels = 17; channels < 32; channels++) {
16473       DWConvMicrokernelTester()
16474         .cr(16)
16475         .kr(9)
16476         .channels(channels)
16477         .qmax(128)
16478         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16479     }
16480   }
16481 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,multipixel)16482   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel) {
16483     TEST_REQUIRES_X86_AVX512SKX;
16484     for (size_t channels = 1; channels <= 80; channels += 15) {
16485       DWConvMicrokernelTester()
16486         .cr(16)
16487         .kr(9)
16488         .channels(channels)
16489         .width(3)
16490         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16491     }
16492   }
16493 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,multipixel_with_step)16494   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_step) {
16495     TEST_REQUIRES_X86_AVX512SKX;
16496     for (size_t channels = 1; channels <= 80; channels += 15) {
16497       for (size_t step = 2; step <= 9; step++) {
16498         DWConvMicrokernelTester()
16499           .cr(16)
16500           .kr(9)
16501           .channels(channels)
16502           .width(3)
16503           .step(step)
16504           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16505       }
16506     }
16507   }
16508 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,multipixel_with_output_stride)16509   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
16510     TEST_REQUIRES_X86_AVX512SKX;
16511     for (size_t channels = 1; channels <= 80; channels += 15) {
16512       DWConvMicrokernelTester()
16513         .cr(16)
16514         .kr(9)
16515         .channels(16)
16516         .width(5)
16517         .output_stride(83)
16518         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16519     }
16520   }
16521 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,multipixel_with_qmin)16522   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_qmin) {
16523     TEST_REQUIRES_X86_AVX512SKX;
16524     for (size_t channels = 1; channels <= 80; channels += 15) {
16525       DWConvMicrokernelTester()
16526         .cr(16)
16527         .kr(9)
16528         .channels(channels)
16529         .width(3)
16530         .qmin(128)
16531         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16532     }
16533   }
16534 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,multipixel_with_qmax)16535   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_qmax) {
16536     TEST_REQUIRES_X86_AVX512SKX;
16537     for (size_t channels = 1; channels <= 80; channels += 15) {
16538       DWConvMicrokernelTester()
16539         .cr(16)
16540         .kr(9)
16541         .channels(channels)
16542         .width(3)
16543         .qmax(128)
16544         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16545     }
16546   }
16547 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,input_offset)16548   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, input_offset) {
16549     TEST_REQUIRES_X86_AVX512SKX;
16550     for (uint32_t channels = 32; channels < 256; channels += 48) {
16551       DWConvMicrokernelTester()
16552         .cr(16)
16553         .kr(9)
16554         .channels(channels)
16555         .input_offset(304)
16556         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16557     }
16558   }
16559 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,zero)16560   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, zero) {
16561     TEST_REQUIRES_X86_AVX512SKX;
16562     for (uint32_t mz = 0; mz < 9; mz++) {
16563       for (uint32_t channels = 32; channels < 256; channels += 48) {
16564         DWConvMicrokernelTester()
16565           .cr(16)
16566           .kr(9)
16567           .channels(channels)
16568           .input_offset(304)
16569           .zero_index(mz)
16570           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16571       }
16572     }
16573   }
16574 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
16575 
16576 
16577 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_eq_16)16578   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_eq_16) {
16579     TEST_REQUIRES_X86_AVX512SKX;
16580     DWConvMicrokernelTester()
16581       .cr(16)
16582       .kr(25)
16583       .channels(16)
16584       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16585   }
16586 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_div_16)16587   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16) {
16588     TEST_REQUIRES_X86_AVX512SKX;
16589     for (uint32_t channels = 32; channels < 256; channels += 48) {
16590       DWConvMicrokernelTester()
16591         .cr(16)
16592         .kr(25)
16593         .channels(channels)
16594         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16595     }
16596   }
16597 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_div_16_with_qmin)16598   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16_with_qmin) {
16599     TEST_REQUIRES_X86_AVX512SKX;
16600     for (uint32_t channels = 32; channels < 256; channels += 48) {
16601       DWConvMicrokernelTester()
16602         .cr(16)
16603         .kr(25)
16604         .channels(channels)
16605         .qmin(128)
16606         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16607     }
16608   }
16609 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_div_16_with_qmax)16610   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16_with_qmax) {
16611     TEST_REQUIRES_X86_AVX512SKX;
16612     for (uint32_t channels = 32; channels < 256; channels += 48) {
16613       DWConvMicrokernelTester()
16614         .cr(16)
16615         .kr(25)
16616         .channels(channels)
16617         .qmax(128)
16618         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16619     }
16620   }
16621 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_lt_16)16622   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_lt_16) {
16623     TEST_REQUIRES_X86_AVX512SKX;
16624     for (uint32_t channels = 1; channels < 16; channels++) {
16625       DWConvMicrokernelTester()
16626         .cr(16)
16627         .kr(25)
16628         .channels(channels)
16629         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16630     }
16631   }
16632 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_gt_16)16633   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16) {
16634     TEST_REQUIRES_X86_AVX512SKX;
16635     for (uint32_t channels = 17; channels < 32; channels++) {
16636       DWConvMicrokernelTester()
16637         .cr(16)
16638         .kr(25)
16639         .channels(channels)
16640         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16641     }
16642   }
16643 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_gt_16_with_qmin)16644   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16_with_qmin) {
16645     TEST_REQUIRES_X86_AVX512SKX;
16646     for (uint32_t channels = 17; channels < 32; channels++) {
16647       DWConvMicrokernelTester()
16648         .cr(16)
16649         .kr(25)
16650         .channels(channels)
16651         .qmin(128)
16652         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16653     }
16654   }
16655 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_gt_16_with_qmax)16656   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16_with_qmax) {
16657     TEST_REQUIRES_X86_AVX512SKX;
16658     for (uint32_t channels = 17; channels < 32; channels++) {
16659       DWConvMicrokernelTester()
16660         .cr(16)
16661         .kr(25)
16662         .channels(channels)
16663         .qmax(128)
16664         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16665     }
16666   }
16667 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,multipixel)16668   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel) {
16669     TEST_REQUIRES_X86_AVX512SKX;
16670     for (size_t channels = 1; channels <= 80; channels += 15) {
16671       DWConvMicrokernelTester()
16672         .cr(16)
16673         .kr(25)
16674         .channels(channels)
16675         .width(3)
16676         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16677     }
16678   }
16679 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,multipixel_with_step)16680   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_step) {
16681     TEST_REQUIRES_X86_AVX512SKX;
16682     for (size_t channels = 1; channels <= 80; channels += 15) {
16683       for (size_t step = 2; step <= 25; step++) {
16684         DWConvMicrokernelTester()
16685           .cr(16)
16686           .kr(25)
16687           .channels(channels)
16688           .width(3)
16689           .step(step)
16690           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16691       }
16692     }
16693   }
16694 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,multipixel_with_output_stride)16695   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_output_stride) {
16696     TEST_REQUIRES_X86_AVX512SKX;
16697     for (size_t channels = 1; channels <= 80; channels += 15) {
16698       DWConvMicrokernelTester()
16699         .cr(16)
16700         .kr(25)
16701         .channels(16)
16702         .width(5)
16703         .output_stride(83)
16704         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16705     }
16706   }
16707 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,multipixel_with_qmin)16708   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_qmin) {
16709     TEST_REQUIRES_X86_AVX512SKX;
16710     for (size_t channels = 1; channels <= 80; channels += 15) {
16711       DWConvMicrokernelTester()
16712         .cr(16)
16713         .kr(25)
16714         .channels(channels)
16715         .width(3)
16716         .qmin(128)
16717         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16718     }
16719   }
16720 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,multipixel_with_qmax)16721   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_qmax) {
16722     TEST_REQUIRES_X86_AVX512SKX;
16723     for (size_t channels = 1; channels <= 80; channels += 15) {
16724       DWConvMicrokernelTester()
16725         .cr(16)
16726         .kr(25)
16727         .channels(channels)
16728         .width(3)
16729         .qmax(128)
16730         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16731     }
16732   }
16733 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,input_offset)16734   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, input_offset) {
16735     TEST_REQUIRES_X86_AVX512SKX;
16736     for (uint32_t channels = 32; channels < 256; channels += 48) {
16737       DWConvMicrokernelTester()
16738         .cr(16)
16739         .kr(25)
16740         .channels(channels)
16741         .input_offset(304)
16742         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16743     }
16744   }
16745 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,zero)16746   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, zero) {
16747     TEST_REQUIRES_X86_AVX512SKX;
16748     for (uint32_t mz = 0; mz < 25; mz++) {
16749       for (uint32_t channels = 32; channels < 256; channels += 48) {
16750         DWConvMicrokernelTester()
16751           .cr(16)
16752           .kr(25)
16753           .channels(channels)
16754           .input_offset(304)
16755           .zero_index(mz)
16756           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16757       }
16758     }
16759   }
16760 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
16761 
16762 
16763 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_eq_32)16764   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_eq_32) {
16765     TEST_REQUIRES_X86_AVX512SKX;
16766     DWConvMicrokernelTester()
16767       .cr(32)
16768       .kr(9)
16769       .channels(32)
16770       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16771   }
16772 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_div_32)16773   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32) {
16774     TEST_REQUIRES_X86_AVX512SKX;
16775     for (uint32_t channels = 64; channels < 512; channels += 96) {
16776       DWConvMicrokernelTester()
16777         .cr(32)
16778         .kr(9)
16779         .channels(channels)
16780         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16781     }
16782   }
16783 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_div_32_with_qmin)16784   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmin) {
16785     TEST_REQUIRES_X86_AVX512SKX;
16786     for (uint32_t channels = 64; channels < 512; channels += 96) {
16787       DWConvMicrokernelTester()
16788         .cr(32)
16789         .kr(9)
16790         .channels(channels)
16791         .qmin(128)
16792         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16793     }
16794   }
16795 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_div_32_with_qmax)16796   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmax) {
16797     TEST_REQUIRES_X86_AVX512SKX;
16798     for (uint32_t channels = 64; channels < 512; channels += 96) {
16799       DWConvMicrokernelTester()
16800         .cr(32)
16801         .kr(9)
16802         .channels(channels)
16803         .qmax(128)
16804         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16805     }
16806   }
16807 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_lt_32)16808   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_lt_32) {
16809     TEST_REQUIRES_X86_AVX512SKX;
16810     for (uint32_t channels = 1; channels < 32; channels++) {
16811       DWConvMicrokernelTester()
16812         .cr(32)
16813         .kr(9)
16814         .channels(channels)
16815         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16816     }
16817   }
16818 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_gt_32)16819   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32) {
16820     TEST_REQUIRES_X86_AVX512SKX;
16821     for (uint32_t channels = 33; channels < 64; channels++) {
16822       DWConvMicrokernelTester()
16823         .cr(32)
16824         .kr(9)
16825         .channels(channels)
16826         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16827     }
16828   }
16829 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_gt_32_with_qmin)16830   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmin) {
16831     TEST_REQUIRES_X86_AVX512SKX;
16832     for (uint32_t channels = 33; channels < 64; channels++) {
16833       DWConvMicrokernelTester()
16834         .cr(32)
16835         .kr(9)
16836         .channels(channels)
16837         .qmin(128)
16838         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16839     }
16840   }
16841 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_gt_32_with_qmax)16842   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmax) {
16843     TEST_REQUIRES_X86_AVX512SKX;
16844     for (uint32_t channels = 33; channels < 64; channels++) {
16845       DWConvMicrokernelTester()
16846         .cr(32)
16847         .kr(9)
16848         .channels(channels)
16849         .qmax(128)
16850         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16851     }
16852   }
16853 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,multipixel)16854   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel) {
16855     TEST_REQUIRES_X86_AVX512SKX;
16856     for (size_t channels = 1; channels <= 160; channels += 31) {
16857       DWConvMicrokernelTester()
16858         .cr(32)
16859         .kr(9)
16860         .channels(channels)
16861         .width(3)
16862         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16863     }
16864   }
16865 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,multipixel_with_step)16866   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_step) {
16867     TEST_REQUIRES_X86_AVX512SKX;
16868     for (size_t channels = 1; channels <= 160; channels += 31) {
16869       for (size_t step = 2; step <= 9; step++) {
16870         DWConvMicrokernelTester()
16871           .cr(32)
16872           .kr(9)
16873           .channels(channels)
16874           .width(3)
16875           .step(step)
16876           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16877       }
16878     }
16879   }
16880 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,multipixel_with_output_stride)16881   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
16882     TEST_REQUIRES_X86_AVX512SKX;
16883     for (size_t channels = 1; channels <= 160; channels += 31) {
16884       DWConvMicrokernelTester()
16885         .cr(32)
16886         .kr(9)
16887         .channels(32)
16888         .width(5)
16889         .output_stride(163)
16890         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16891     }
16892   }
16893 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,multipixel_with_qmin)16894   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_qmin) {
16895     TEST_REQUIRES_X86_AVX512SKX;
16896     for (size_t channels = 1; channels <= 160; channels += 31) {
16897       DWConvMicrokernelTester()
16898         .cr(32)
16899         .kr(9)
16900         .channels(channels)
16901         .width(3)
16902         .qmin(128)
16903         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16904     }
16905   }
16906 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,multipixel_with_qmax)16907   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_qmax) {
16908     TEST_REQUIRES_X86_AVX512SKX;
16909     for (size_t channels = 1; channels <= 160; channels += 31) {
16910       DWConvMicrokernelTester()
16911         .cr(32)
16912         .kr(9)
16913         .channels(channels)
16914         .width(3)
16915         .qmax(128)
16916         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16917     }
16918   }
16919 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,input_offset)16920   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, input_offset) {
16921     TEST_REQUIRES_X86_AVX512SKX;
16922     for (uint32_t channels = 64; channels < 512; channels += 96) {
16923       DWConvMicrokernelTester()
16924         .cr(32)
16925         .kr(9)
16926         .channels(channels)
16927         .input_offset(592)
16928         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16929     }
16930   }
16931 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,zero)16932   TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, zero) {
16933     TEST_REQUIRES_X86_AVX512SKX;
16934     for (uint32_t mz = 0; mz < 9; mz++) {
16935       for (uint32_t channels = 64; channels < 512; channels += 96) {
16936         DWConvMicrokernelTester()
16937           .cr(32)
16938           .kr(9)
16939           .channels(channels)
16940           .input_offset(592)
16941           .zero_index(mz)
16942           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16943       }
16944     }
16945   }
16946 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
16947 
16948 
16949 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_eq_32)16950   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_eq_32) {
16951     TEST_REQUIRES_X86_AVX512SKX;
16952     DWConvMicrokernelTester()
16953       .cr(32)
16954       .kr(25)
16955       .channels(32)
16956       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16957   }
16958 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_div_32)16959   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32) {
16960     TEST_REQUIRES_X86_AVX512SKX;
16961     for (uint32_t channels = 64; channels < 512; channels += 96) {
16962       DWConvMicrokernelTester()
16963         .cr(32)
16964         .kr(25)
16965         .channels(channels)
16966         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16967     }
16968   }
16969 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_div_32_with_qmin)16970   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32_with_qmin) {
16971     TEST_REQUIRES_X86_AVX512SKX;
16972     for (uint32_t channels = 64; channels < 512; channels += 96) {
16973       DWConvMicrokernelTester()
16974         .cr(32)
16975         .kr(25)
16976         .channels(channels)
16977         .qmin(128)
16978         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16979     }
16980   }
16981 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_div_32_with_qmax)16982   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32_with_qmax) {
16983     TEST_REQUIRES_X86_AVX512SKX;
16984     for (uint32_t channels = 64; channels < 512; channels += 96) {
16985       DWConvMicrokernelTester()
16986         .cr(32)
16987         .kr(25)
16988         .channels(channels)
16989         .qmax(128)
16990         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
16991     }
16992   }
16993 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_lt_32)16994   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_lt_32) {
16995     TEST_REQUIRES_X86_AVX512SKX;
16996     for (uint32_t channels = 1; channels < 32; channels++) {
16997       DWConvMicrokernelTester()
16998         .cr(32)
16999         .kr(25)
17000         .channels(channels)
17001         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
17002     }
17003   }
17004 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_gt_32)17005   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32) {
17006     TEST_REQUIRES_X86_AVX512SKX;
17007     for (uint32_t channels = 33; channels < 64; channels++) {
17008       DWConvMicrokernelTester()
17009         .cr(32)
17010         .kr(25)
17011         .channels(channels)
17012         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
17013     }
17014   }
17015 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_gt_32_with_qmin)17016   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32_with_qmin) {
17017     TEST_REQUIRES_X86_AVX512SKX;
17018     for (uint32_t channels = 33; channels < 64; channels++) {
17019       DWConvMicrokernelTester()
17020         .cr(32)
17021         .kr(25)
17022         .channels(channels)
17023         .qmin(128)
17024         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
17025     }
17026   }
17027 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_gt_32_with_qmax)17028   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32_with_qmax) {
17029     TEST_REQUIRES_X86_AVX512SKX;
17030     for (uint32_t channels = 33; channels < 64; channels++) {
17031       DWConvMicrokernelTester()
17032         .cr(32)
17033         .kr(25)
17034         .channels(channels)
17035         .qmax(128)
17036         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
17037     }
17038   }
17039 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,multipixel)17040   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel) {
17041     TEST_REQUIRES_X86_AVX512SKX;
17042     for (size_t channels = 1; channels <= 160; channels += 31) {
17043       DWConvMicrokernelTester()
17044         .cr(32)
17045         .kr(25)
17046         .channels(channels)
17047         .width(3)
17048         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
17049     }
17050   }
17051 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,multipixel_with_step)17052   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_step) {
17053     TEST_REQUIRES_X86_AVX512SKX;
17054     for (size_t channels = 1; channels <= 160; channels += 31) {
17055       for (size_t step = 2; step <= 25; step++) {
17056         DWConvMicrokernelTester()
17057           .cr(32)
17058           .kr(25)
17059           .channels(channels)
17060           .width(3)
17061           .step(step)
17062           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
17063       }
17064     }
17065   }
17066 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,multipixel_with_output_stride)17067   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_output_stride) {
17068     TEST_REQUIRES_X86_AVX512SKX;
17069     for (size_t channels = 1; channels <= 160; channels += 31) {
17070       DWConvMicrokernelTester()
17071         .cr(32)
17072         .kr(25)
17073         .channels(32)
17074         .width(5)
17075         .output_stride(163)
17076         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
17077     }
17078   }
17079 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,multipixel_with_qmin)17080   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_qmin) {
17081     TEST_REQUIRES_X86_AVX512SKX;
17082     for (size_t channels = 1; channels <= 160; channels += 31) {
17083       DWConvMicrokernelTester()
17084         .cr(32)
17085         .kr(25)
17086         .channels(channels)
17087         .width(3)
17088         .qmin(128)
17089         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
17090     }
17091   }
17092 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,multipixel_with_qmax)17093   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_qmax) {
17094     TEST_REQUIRES_X86_AVX512SKX;
17095     for (size_t channels = 1; channels <= 160; channels += 31) {
17096       DWConvMicrokernelTester()
17097         .cr(32)
17098         .kr(25)
17099         .channels(channels)
17100         .width(3)
17101         .qmax(128)
17102         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
17103     }
17104   }
17105 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,input_offset)17106   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, input_offset) {
17107     TEST_REQUIRES_X86_AVX512SKX;
17108     for (uint32_t channels = 64; channels < 512; channels += 96) {
17109       DWConvMicrokernelTester()
17110         .cr(32)
17111         .kr(25)
17112         .channels(channels)
17113         .input_offset(592)
17114         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
17115     }
17116   }
17117 
TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,zero)17118   TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, zero) {
17119     TEST_REQUIRES_X86_AVX512SKX;
17120     for (uint32_t mz = 0; mz < 25; mz++) {
17121       for (uint32_t channels = 64; channels < 512; channels += 96) {
17122         DWConvMicrokernelTester()
17123           .cr(32)
17124           .kr(25)
17125           .channels(channels)
17126           .input_offset(592)
17127           .zero_index(mz)
17128           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
17129       }
17130     }
17131   }
17132 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
17133 
17134 
17135 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_eq_8)17136   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_eq_8) {
17137     DWConvMicrokernelTester()
17138       .cr(8)
17139       .kr(9)
17140       .channels(8)
17141       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17142   }
17143 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_div_8)17144   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_div_8) {
17145     for (uint32_t channels = 16; channels < 128; channels += 24) {
17146       DWConvMicrokernelTester()
17147         .cr(8)
17148         .kr(9)
17149         .channels(channels)
17150         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17151     }
17152   }
17153 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_div_8_with_qmin)17154   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmin) {
17155     for (uint32_t channels = 16; channels < 128; channels += 24) {
17156       DWConvMicrokernelTester()
17157         .cr(8)
17158         .kr(9)
17159         .channels(channels)
17160         .qmin(128)
17161         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17162     }
17163   }
17164 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_div_8_with_qmax)17165   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmax) {
17166     for (uint32_t channels = 16; channels < 128; channels += 24) {
17167       DWConvMicrokernelTester()
17168         .cr(8)
17169         .kr(9)
17170         .channels(channels)
17171         .qmax(128)
17172         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17173     }
17174   }
17175 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_lt_8)17176   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_lt_8) {
17177     for (uint32_t channels = 1; channels < 8; channels++) {
17178       DWConvMicrokernelTester()
17179         .cr(8)
17180         .kr(9)
17181         .channels(channels)
17182         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17183     }
17184   }
17185 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_gt_8)17186   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_gt_8) {
17187     for (uint32_t channels = 9; channels < 16; channels++) {
17188       DWConvMicrokernelTester()
17189         .cr(8)
17190         .kr(9)
17191         .channels(channels)
17192         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17193     }
17194   }
17195 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_gt_8_with_qmin)17196   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmin) {
17197     for (uint32_t channels = 9; channels < 16; channels++) {
17198       DWConvMicrokernelTester()
17199         .cr(8)
17200         .kr(9)
17201         .channels(channels)
17202         .qmin(128)
17203         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17204     }
17205   }
17206 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_gt_8_with_qmax)17207   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmax) {
17208     for (uint32_t channels = 9; channels < 16; channels++) {
17209       DWConvMicrokernelTester()
17210         .cr(8)
17211         .kr(9)
17212         .channels(channels)
17213         .qmax(128)
17214         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17215     }
17216   }
17217 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,multipixel)17218   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel) {
17219     for (size_t channels = 1; channels <= 40; channels += 7) {
17220       DWConvMicrokernelTester()
17221         .cr(8)
17222         .kr(9)
17223         .channels(channels)
17224         .width(3)
17225         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17226     }
17227   }
17228 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,multipixel_with_step)17229   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_step) {
17230     for (size_t channels = 1; channels <= 40; channels += 7) {
17231       for (size_t step = 2; step <= 9; step++) {
17232         DWConvMicrokernelTester()
17233           .cr(8)
17234           .kr(9)
17235           .channels(channels)
17236           .width(3)
17237           .step(step)
17238           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17239       }
17240     }
17241   }
17242 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,multipixel_with_output_stride)17243   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
17244     for (size_t channels = 1; channels <= 40; channels += 7) {
17245       DWConvMicrokernelTester()
17246         .cr(8)
17247         .kr(9)
17248         .channels(8)
17249         .width(5)
17250         .output_stride(43)
17251         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17252     }
17253   }
17254 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,multipixel_with_qmin)17255   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_qmin) {
17256     for (size_t channels = 1; channels <= 40; channels += 7) {
17257       DWConvMicrokernelTester()
17258         .cr(8)
17259         .kr(9)
17260         .channels(channels)
17261         .width(3)
17262         .qmin(128)
17263         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17264     }
17265   }
17266 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,multipixel_with_qmax)17267   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_qmax) {
17268     for (size_t channels = 1; channels <= 40; channels += 7) {
17269       DWConvMicrokernelTester()
17270         .cr(8)
17271         .kr(9)
17272         .channels(channels)
17273         .width(3)
17274         .qmax(128)
17275         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17276     }
17277   }
17278 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,input_offset)17279   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, input_offset) {
17280     for (uint32_t channels = 16; channels < 128; channels += 24) {
17281       DWConvMicrokernelTester()
17282         .cr(8)
17283         .kr(9)
17284         .channels(channels)
17285         .input_offset(176)
17286         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17287     }
17288   }
17289 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,zero)17290   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, zero) {
17291     for (uint32_t mz = 0; mz < 9; mz++) {
17292       for (uint32_t channels = 16; channels < 128; channels += 24) {
17293         DWConvMicrokernelTester()
17294           .cr(8)
17295           .kr(9)
17296           .channels(channels)
17297           .input_offset(176)
17298           .zero_index(mz)
17299           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17300       }
17301     }
17302   }
17303 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
17304 
17305 
17306 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_eq_8)17307   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_eq_8) {
17308     DWConvMicrokernelTester()
17309       .cr(8)
17310       .kr(9)
17311       .channels(8)
17312       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17313   }
17314 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_div_8)17315   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_div_8) {
17316     for (uint32_t channels = 16; channels < 128; channels += 24) {
17317       DWConvMicrokernelTester()
17318         .cr(8)
17319         .kr(9)
17320         .channels(channels)
17321         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17322     }
17323   }
17324 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_div_8_with_qmin)17325   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_div_8_with_qmin) {
17326     for (uint32_t channels = 16; channels < 128; channels += 24) {
17327       DWConvMicrokernelTester()
17328         .cr(8)
17329         .kr(9)
17330         .channels(channels)
17331         .qmin(128)
17332         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17333     }
17334   }
17335 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_div_8_with_qmax)17336   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_div_8_with_qmax) {
17337     for (uint32_t channels = 16; channels < 128; channels += 24) {
17338       DWConvMicrokernelTester()
17339         .cr(8)
17340         .kr(9)
17341         .channels(channels)
17342         .qmax(128)
17343         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17344     }
17345   }
17346 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_lt_8)17347   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_lt_8) {
17348     for (uint32_t channels = 1; channels < 8; channels++) {
17349       DWConvMicrokernelTester()
17350         .cr(8)
17351         .kr(9)
17352         .channels(channels)
17353         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17354     }
17355   }
17356 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_gt_8)17357   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_gt_8) {
17358     for (uint32_t channels = 9; channels < 16; channels++) {
17359       DWConvMicrokernelTester()
17360         .cr(8)
17361         .kr(9)
17362         .channels(channels)
17363         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17364     }
17365   }
17366 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_gt_8_with_qmin)17367   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_gt_8_with_qmin) {
17368     for (uint32_t channels = 9; channels < 16; channels++) {
17369       DWConvMicrokernelTester()
17370         .cr(8)
17371         .kr(9)
17372         .channels(channels)
17373         .qmin(128)
17374         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17375     }
17376   }
17377 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_gt_8_with_qmax)17378   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_gt_8_with_qmax) {
17379     for (uint32_t channels = 9; channels < 16; channels++) {
17380       DWConvMicrokernelTester()
17381         .cr(8)
17382         .kr(9)
17383         .channels(channels)
17384         .qmax(128)
17385         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17386     }
17387   }
17388 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,multipixel)17389   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel) {
17390     for (size_t channels = 1; channels <= 40; channels += 7) {
17391       DWConvMicrokernelTester()
17392         .cr(8)
17393         .kr(9)
17394         .channels(channels)
17395         .width(3)
17396         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17397     }
17398   }
17399 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,multipixel_with_step)17400   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
17401     for (size_t channels = 1; channels <= 40; channels += 7) {
17402       for (size_t step = 2; step <= 9; step++) {
17403         DWConvMicrokernelTester()
17404           .cr(8)
17405           .kr(9)
17406           .channels(channels)
17407           .width(3)
17408           .step(step)
17409           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17410       }
17411     }
17412   }
17413 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,multipixel_with_output_stride)17414   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
17415     for (size_t channels = 1; channels <= 40; channels += 7) {
17416       DWConvMicrokernelTester()
17417         .cr(8)
17418         .kr(9)
17419         .channels(8)
17420         .width(5)
17421         .output_stride(43)
17422         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17423     }
17424   }
17425 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,multipixel_with_qmin)17426   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
17427     for (size_t channels = 1; channels <= 40; channels += 7) {
17428       DWConvMicrokernelTester()
17429         .cr(8)
17430         .kr(9)
17431         .channels(channels)
17432         .width(3)
17433         .qmin(128)
17434         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17435     }
17436   }
17437 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,multipixel_with_qmax)17438   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
17439     for (size_t channels = 1; channels <= 40; channels += 7) {
17440       DWConvMicrokernelTester()
17441         .cr(8)
17442         .kr(9)
17443         .channels(channels)
17444         .width(3)
17445         .qmax(128)
17446         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17447     }
17448   }
17449 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,input_offset)17450   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, input_offset) {
17451     for (uint32_t channels = 16; channels < 128; channels += 24) {
17452       DWConvMicrokernelTester()
17453         .cr(8)
17454         .kr(9)
17455         .channels(channels)
17456         .input_offset(176)
17457         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17458     }
17459   }
17460 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,zero)17461   TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, zero) {
17462     for (uint32_t mz = 0; mz < 9; mz++) {
17463       for (uint32_t channels = 16; channels < 128; channels += 24) {
17464         DWConvMicrokernelTester()
17465           .cr(8)
17466           .kr(9)
17467           .channels(channels)
17468           .input_offset(176)
17469           .zero_index(mz)
17470           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17471       }
17472     }
17473   }
17474 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
17475 
17476 
17477 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_eq_8)17478   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_eq_8) {
17479     DWConvMicrokernelTester()
17480       .cr(8)
17481       .kr(25)
17482       .channels(8)
17483       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17484   }
17485 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_div_8)17486   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_div_8) {
17487     for (uint32_t channels = 16; channels < 128; channels += 24) {
17488       DWConvMicrokernelTester()
17489         .cr(8)
17490         .kr(25)
17491         .channels(channels)
17492         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17493     }
17494   }
17495 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_div_8_with_qmin)17496   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_div_8_with_qmin) {
17497     for (uint32_t channels = 16; channels < 128; channels += 24) {
17498       DWConvMicrokernelTester()
17499         .cr(8)
17500         .kr(25)
17501         .channels(channels)
17502         .qmin(128)
17503         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17504     }
17505   }
17506 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_div_8_with_qmax)17507   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_div_8_with_qmax) {
17508     for (uint32_t channels = 16; channels < 128; channels += 24) {
17509       DWConvMicrokernelTester()
17510         .cr(8)
17511         .kr(25)
17512         .channels(channels)
17513         .qmax(128)
17514         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17515     }
17516   }
17517 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_lt_8)17518   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_lt_8) {
17519     for (uint32_t channels = 1; channels < 8; channels++) {
17520       DWConvMicrokernelTester()
17521         .cr(8)
17522         .kr(25)
17523         .channels(channels)
17524         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17525     }
17526   }
17527 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_gt_8)17528   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_gt_8) {
17529     for (uint32_t channels = 9; channels < 16; channels++) {
17530       DWConvMicrokernelTester()
17531         .cr(8)
17532         .kr(25)
17533         .channels(channels)
17534         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17535     }
17536   }
17537 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_gt_8_with_qmin)17538   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_gt_8_with_qmin) {
17539     for (uint32_t channels = 9; channels < 16; channels++) {
17540       DWConvMicrokernelTester()
17541         .cr(8)
17542         .kr(25)
17543         .channels(channels)
17544         .qmin(128)
17545         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17546     }
17547   }
17548 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_gt_8_with_qmax)17549   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_gt_8_with_qmax) {
17550     for (uint32_t channels = 9; channels < 16; channels++) {
17551       DWConvMicrokernelTester()
17552         .cr(8)
17553         .kr(25)
17554         .channels(channels)
17555         .qmax(128)
17556         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17557     }
17558   }
17559 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,multipixel)17560   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel) {
17561     for (size_t channels = 1; channels <= 40; channels += 7) {
17562       DWConvMicrokernelTester()
17563         .cr(8)
17564         .kr(25)
17565         .channels(channels)
17566         .width(3)
17567         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17568     }
17569   }
17570 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,multipixel_with_step)17571   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_step) {
17572     for (size_t channels = 1; channels <= 40; channels += 7) {
17573       for (size_t step = 2; step <= 25; step++) {
17574         DWConvMicrokernelTester()
17575           .cr(8)
17576           .kr(25)
17577           .channels(channels)
17578           .width(3)
17579           .step(step)
17580           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17581       }
17582     }
17583   }
17584 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,multipixel_with_output_stride)17585   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
17586     for (size_t channels = 1; channels <= 40; channels += 7) {
17587       DWConvMicrokernelTester()
17588         .cr(8)
17589         .kr(25)
17590         .channels(8)
17591         .width(5)
17592         .output_stride(43)
17593         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17594     }
17595   }
17596 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,multipixel_with_qmin)17597   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_qmin) {
17598     for (size_t channels = 1; channels <= 40; channels += 7) {
17599       DWConvMicrokernelTester()
17600         .cr(8)
17601         .kr(25)
17602         .channels(channels)
17603         .width(3)
17604         .qmin(128)
17605         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17606     }
17607   }
17608 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,multipixel_with_qmax)17609   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_qmax) {
17610     for (size_t channels = 1; channels <= 40; channels += 7) {
17611       DWConvMicrokernelTester()
17612         .cr(8)
17613         .kr(25)
17614         .channels(channels)
17615         .width(3)
17616         .qmax(128)
17617         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17618     }
17619   }
17620 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,input_offset)17621   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, input_offset) {
17622     for (uint32_t channels = 16; channels < 128; channels += 24) {
17623       DWConvMicrokernelTester()
17624         .cr(8)
17625         .kr(25)
17626         .channels(channels)
17627         .input_offset(176)
17628         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17629     }
17630   }
17631 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,zero)17632   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, zero) {
17633     for (uint32_t mz = 0; mz < 25; mz++) {
17634       for (uint32_t channels = 16; channels < 128; channels += 24) {
17635         DWConvMicrokernelTester()
17636           .cr(8)
17637           .kr(25)
17638           .channels(channels)
17639           .input_offset(176)
17640           .zero_index(mz)
17641           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17642       }
17643     }
17644   }
17645 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
17646 
17647 
17648 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_eq_8)17649   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_eq_8) {
17650     DWConvMicrokernelTester()
17651       .cr(8)
17652       .kr(25)
17653       .channels(8)
17654       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17655   }
17656 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_div_8)17657   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_div_8) {
17658     for (uint32_t channels = 16; channels < 128; channels += 24) {
17659       DWConvMicrokernelTester()
17660         .cr(8)
17661         .kr(25)
17662         .channels(channels)
17663         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17664     }
17665   }
17666 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_div_8_with_qmin)17667   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_div_8_with_qmin) {
17668     for (uint32_t channels = 16; channels < 128; channels += 24) {
17669       DWConvMicrokernelTester()
17670         .cr(8)
17671         .kr(25)
17672         .channels(channels)
17673         .qmin(128)
17674         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17675     }
17676   }
17677 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_div_8_with_qmax)17678   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_div_8_with_qmax) {
17679     for (uint32_t channels = 16; channels < 128; channels += 24) {
17680       DWConvMicrokernelTester()
17681         .cr(8)
17682         .kr(25)
17683         .channels(channels)
17684         .qmax(128)
17685         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17686     }
17687   }
17688 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_lt_8)17689   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_lt_8) {
17690     for (uint32_t channels = 1; channels < 8; channels++) {
17691       DWConvMicrokernelTester()
17692         .cr(8)
17693         .kr(25)
17694         .channels(channels)
17695         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17696     }
17697   }
17698 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_gt_8)17699   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_gt_8) {
17700     for (uint32_t channels = 9; channels < 16; channels++) {
17701       DWConvMicrokernelTester()
17702         .cr(8)
17703         .kr(25)
17704         .channels(channels)
17705         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17706     }
17707   }
17708 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_gt_8_with_qmin)17709   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_gt_8_with_qmin) {
17710     for (uint32_t channels = 9; channels < 16; channels++) {
17711       DWConvMicrokernelTester()
17712         .cr(8)
17713         .kr(25)
17714         .channels(channels)
17715         .qmin(128)
17716         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17717     }
17718   }
17719 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_gt_8_with_qmax)17720   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_gt_8_with_qmax) {
17721     for (uint32_t channels = 9; channels < 16; channels++) {
17722       DWConvMicrokernelTester()
17723         .cr(8)
17724         .kr(25)
17725         .channels(channels)
17726         .qmax(128)
17727         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17728     }
17729   }
17730 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,multipixel)17731   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel) {
17732     for (size_t channels = 1; channels <= 40; channels += 7) {
17733       DWConvMicrokernelTester()
17734         .cr(8)
17735         .kr(25)
17736         .channels(channels)
17737         .width(3)
17738         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17739     }
17740   }
17741 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,multipixel_with_step)17742   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
17743     for (size_t channels = 1; channels <= 40; channels += 7) {
17744       for (size_t step = 2; step <= 25; step++) {
17745         DWConvMicrokernelTester()
17746           .cr(8)
17747           .kr(25)
17748           .channels(channels)
17749           .width(3)
17750           .step(step)
17751           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17752       }
17753     }
17754   }
17755 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,multipixel_with_output_stride)17756   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
17757     for (size_t channels = 1; channels <= 40; channels += 7) {
17758       DWConvMicrokernelTester()
17759         .cr(8)
17760         .kr(25)
17761         .channels(8)
17762         .width(5)
17763         .output_stride(43)
17764         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17765     }
17766   }
17767 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,multipixel_with_qmin)17768   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
17769     for (size_t channels = 1; channels <= 40; channels += 7) {
17770       DWConvMicrokernelTester()
17771         .cr(8)
17772         .kr(25)
17773         .channels(channels)
17774         .width(3)
17775         .qmin(128)
17776         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17777     }
17778   }
17779 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,multipixel_with_qmax)17780   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
17781     for (size_t channels = 1; channels <= 40; channels += 7) {
17782       DWConvMicrokernelTester()
17783         .cr(8)
17784         .kr(25)
17785         .channels(channels)
17786         .width(3)
17787         .qmax(128)
17788         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17789     }
17790   }
17791 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,input_offset)17792   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, input_offset) {
17793     for (uint32_t channels = 16; channels < 128; channels += 24) {
17794       DWConvMicrokernelTester()
17795         .cr(8)
17796         .kr(25)
17797         .channels(channels)
17798         .input_offset(176)
17799         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17800     }
17801   }
17802 
TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,zero)17803   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, zero) {
17804     for (uint32_t mz = 0; mz < 25; mz++) {
17805       for (uint32_t channels = 16; channels < 128; channels += 24) {
17806         DWConvMicrokernelTester()
17807           .cr(8)
17808           .kr(25)
17809           .channels(channels)
17810           .input_offset(176)
17811           .zero_index(mz)
17812           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17813       }
17814     }
17815   }
17816 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
17817 
17818 
17819 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_eq_16)17820   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_eq_16) {
17821     DWConvMicrokernelTester()
17822       .cr(16)
17823       .kr(9)
17824       .channels(16)
17825       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17826   }
17827 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_div_16)17828   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_div_16) {
17829     for (uint32_t channels = 32; channels < 256; channels += 48) {
17830       DWConvMicrokernelTester()
17831         .cr(16)
17832         .kr(9)
17833         .channels(channels)
17834         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17835     }
17836   }
17837 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_div_16_with_qmin)17838   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmin) {
17839     for (uint32_t channels = 32; channels < 256; channels += 48) {
17840       DWConvMicrokernelTester()
17841         .cr(16)
17842         .kr(9)
17843         .channels(channels)
17844         .qmin(128)
17845         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17846     }
17847   }
17848 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_div_16_with_qmax)17849   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmax) {
17850     for (uint32_t channels = 32; channels < 256; channels += 48) {
17851       DWConvMicrokernelTester()
17852         .cr(16)
17853         .kr(9)
17854         .channels(channels)
17855         .qmax(128)
17856         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17857     }
17858   }
17859 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_lt_16)17860   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_lt_16) {
17861     for (uint32_t channels = 1; channels < 16; channels++) {
17862       DWConvMicrokernelTester()
17863         .cr(16)
17864         .kr(9)
17865         .channels(channels)
17866         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17867     }
17868   }
17869 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_gt_16)17870   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_gt_16) {
17871     for (uint32_t channels = 17; channels < 32; channels++) {
17872       DWConvMicrokernelTester()
17873         .cr(16)
17874         .kr(9)
17875         .channels(channels)
17876         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17877     }
17878   }
17879 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_gt_16_with_qmin)17880   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmin) {
17881     for (uint32_t channels = 17; channels < 32; channels++) {
17882       DWConvMicrokernelTester()
17883         .cr(16)
17884         .kr(9)
17885         .channels(channels)
17886         .qmin(128)
17887         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17888     }
17889   }
17890 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_gt_16_with_qmax)17891   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmax) {
17892     for (uint32_t channels = 17; channels < 32; channels++) {
17893       DWConvMicrokernelTester()
17894         .cr(16)
17895         .kr(9)
17896         .channels(channels)
17897         .qmax(128)
17898         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17899     }
17900   }
17901 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,multipixel)17902   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel) {
17903     for (size_t channels = 1; channels <= 80; channels += 15) {
17904       DWConvMicrokernelTester()
17905         .cr(16)
17906         .kr(9)
17907         .channels(channels)
17908         .width(3)
17909         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17910     }
17911   }
17912 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,multipixel_with_step)17913   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_step) {
17914     for (size_t channels = 1; channels <= 80; channels += 15) {
17915       for (size_t step = 2; step <= 9; step++) {
17916         DWConvMicrokernelTester()
17917           .cr(16)
17918           .kr(9)
17919           .channels(channels)
17920           .width(3)
17921           .step(step)
17922           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17923       }
17924     }
17925   }
17926 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,multipixel_with_output_stride)17927   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
17928     for (size_t channels = 1; channels <= 80; channels += 15) {
17929       DWConvMicrokernelTester()
17930         .cr(16)
17931         .kr(9)
17932         .channels(16)
17933         .width(5)
17934         .output_stride(83)
17935         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17936     }
17937   }
17938 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,multipixel_with_qmin)17939   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_qmin) {
17940     for (size_t channels = 1; channels <= 80; channels += 15) {
17941       DWConvMicrokernelTester()
17942         .cr(16)
17943         .kr(9)
17944         .channels(channels)
17945         .width(3)
17946         .qmin(128)
17947         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17948     }
17949   }
17950 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,multipixel_with_qmax)17951   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_qmax) {
17952     for (size_t channels = 1; channels <= 80; channels += 15) {
17953       DWConvMicrokernelTester()
17954         .cr(16)
17955         .kr(9)
17956         .channels(channels)
17957         .width(3)
17958         .qmax(128)
17959         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17960     }
17961   }
17962 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,input_offset)17963   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, input_offset) {
17964     for (uint32_t channels = 32; channels < 256; channels += 48) {
17965       DWConvMicrokernelTester()
17966         .cr(16)
17967         .kr(9)
17968         .channels(channels)
17969         .input_offset(304)
17970         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17971     }
17972   }
17973 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,zero)17974   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, zero) {
17975     for (uint32_t mz = 0; mz < 9; mz++) {
17976       for (uint32_t channels = 32; channels < 256; channels += 48) {
17977         DWConvMicrokernelTester()
17978           .cr(16)
17979           .kr(9)
17980           .channels(channels)
17981           .input_offset(304)
17982           .zero_index(mz)
17983           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17984       }
17985     }
17986   }
17987 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
17988 
17989 
17990 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_eq_16)17991   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_eq_16) {
17992     DWConvMicrokernelTester()
17993       .cr(16)
17994       .kr(9)
17995       .channels(16)
17996       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
17997   }
17998 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_div_16)17999   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_div_16) {
18000     for (uint32_t channels = 32; channels < 256; channels += 48) {
18001       DWConvMicrokernelTester()
18002         .cr(16)
18003         .kr(9)
18004         .channels(channels)
18005         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18006     }
18007   }
18008 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_div_16_with_qmin)18009   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_div_16_with_qmin) {
18010     for (uint32_t channels = 32; channels < 256; channels += 48) {
18011       DWConvMicrokernelTester()
18012         .cr(16)
18013         .kr(9)
18014         .channels(channels)
18015         .qmin(128)
18016         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18017     }
18018   }
18019 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_div_16_with_qmax)18020   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_div_16_with_qmax) {
18021     for (uint32_t channels = 32; channels < 256; channels += 48) {
18022       DWConvMicrokernelTester()
18023         .cr(16)
18024         .kr(9)
18025         .channels(channels)
18026         .qmax(128)
18027         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18028     }
18029   }
18030 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_lt_16)18031   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_lt_16) {
18032     for (uint32_t channels = 1; channels < 16; channels++) {
18033       DWConvMicrokernelTester()
18034         .cr(16)
18035         .kr(9)
18036         .channels(channels)
18037         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18038     }
18039   }
18040 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_gt_16)18041   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_gt_16) {
18042     for (uint32_t channels = 17; channels < 32; channels++) {
18043       DWConvMicrokernelTester()
18044         .cr(16)
18045         .kr(9)
18046         .channels(channels)
18047         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18048     }
18049   }
18050 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_gt_16_with_qmin)18051   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmin) {
18052     for (uint32_t channels = 17; channels < 32; channels++) {
18053       DWConvMicrokernelTester()
18054         .cr(16)
18055         .kr(9)
18056         .channels(channels)
18057         .qmin(128)
18058         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18059     }
18060   }
18061 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_gt_16_with_qmax)18062   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmax) {
18063     for (uint32_t channels = 17; channels < 32; channels++) {
18064       DWConvMicrokernelTester()
18065         .cr(16)
18066         .kr(9)
18067         .channels(channels)
18068         .qmax(128)
18069         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18070     }
18071   }
18072 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,multipixel)18073   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel) {
18074     for (size_t channels = 1; channels <= 80; channels += 15) {
18075       DWConvMicrokernelTester()
18076         .cr(16)
18077         .kr(9)
18078         .channels(channels)
18079         .width(3)
18080         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18081     }
18082   }
18083 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,multipixel_with_step)18084   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
18085     for (size_t channels = 1; channels <= 80; channels += 15) {
18086       for (size_t step = 2; step <= 9; step++) {
18087         DWConvMicrokernelTester()
18088           .cr(16)
18089           .kr(9)
18090           .channels(channels)
18091           .width(3)
18092           .step(step)
18093           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18094       }
18095     }
18096   }
18097 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,multipixel_with_output_stride)18098   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
18099     for (size_t channels = 1; channels <= 80; channels += 15) {
18100       DWConvMicrokernelTester()
18101         .cr(16)
18102         .kr(9)
18103         .channels(16)
18104         .width(5)
18105         .output_stride(83)
18106         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18107     }
18108   }
18109 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,multipixel_with_qmin)18110   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
18111     for (size_t channels = 1; channels <= 80; channels += 15) {
18112       DWConvMicrokernelTester()
18113         .cr(16)
18114         .kr(9)
18115         .channels(channels)
18116         .width(3)
18117         .qmin(128)
18118         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18119     }
18120   }
18121 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,multipixel_with_qmax)18122   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
18123     for (size_t channels = 1; channels <= 80; channels += 15) {
18124       DWConvMicrokernelTester()
18125         .cr(16)
18126         .kr(9)
18127         .channels(channels)
18128         .width(3)
18129         .qmax(128)
18130         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18131     }
18132   }
18133 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,input_offset)18134   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, input_offset) {
18135     for (uint32_t channels = 32; channels < 256; channels += 48) {
18136       DWConvMicrokernelTester()
18137         .cr(16)
18138         .kr(9)
18139         .channels(channels)
18140         .input_offset(304)
18141         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18142     }
18143   }
18144 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,zero)18145   TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, zero) {
18146     for (uint32_t mz = 0; mz < 9; mz++) {
18147       for (uint32_t channels = 32; channels < 256; channels += 48) {
18148         DWConvMicrokernelTester()
18149           .cr(16)
18150           .kr(9)
18151           .channels(channels)
18152           .input_offset(304)
18153           .zero_index(mz)
18154           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18155       }
18156     }
18157   }
18158 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
18159 
18160 
18161 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_eq_16)18162   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_eq_16) {
18163     DWConvMicrokernelTester()
18164       .cr(16)
18165       .kr(25)
18166       .channels(16)
18167       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18168   }
18169 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_div_16)18170   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_div_16) {
18171     for (uint32_t channels = 32; channels < 256; channels += 48) {
18172       DWConvMicrokernelTester()
18173         .cr(16)
18174         .kr(25)
18175         .channels(channels)
18176         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18177     }
18178   }
18179 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_div_16_with_qmin)18180   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_div_16_with_qmin) {
18181     for (uint32_t channels = 32; channels < 256; channels += 48) {
18182       DWConvMicrokernelTester()
18183         .cr(16)
18184         .kr(25)
18185         .channels(channels)
18186         .qmin(128)
18187         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18188     }
18189   }
18190 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_div_16_with_qmax)18191   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_div_16_with_qmax) {
18192     for (uint32_t channels = 32; channels < 256; channels += 48) {
18193       DWConvMicrokernelTester()
18194         .cr(16)
18195         .kr(25)
18196         .channels(channels)
18197         .qmax(128)
18198         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18199     }
18200   }
18201 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_lt_16)18202   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_lt_16) {
18203     for (uint32_t channels = 1; channels < 16; channels++) {
18204       DWConvMicrokernelTester()
18205         .cr(16)
18206         .kr(25)
18207         .channels(channels)
18208         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18209     }
18210   }
18211 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_gt_16)18212   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_gt_16) {
18213     for (uint32_t channels = 17; channels < 32; channels++) {
18214       DWConvMicrokernelTester()
18215         .cr(16)
18216         .kr(25)
18217         .channels(channels)
18218         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18219     }
18220   }
18221 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_gt_16_with_qmin)18222   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_gt_16_with_qmin) {
18223     for (uint32_t channels = 17; channels < 32; channels++) {
18224       DWConvMicrokernelTester()
18225         .cr(16)
18226         .kr(25)
18227         .channels(channels)
18228         .qmin(128)
18229         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18230     }
18231   }
18232 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_gt_16_with_qmax)18233   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_gt_16_with_qmax) {
18234     for (uint32_t channels = 17; channels < 32; channels++) {
18235       DWConvMicrokernelTester()
18236         .cr(16)
18237         .kr(25)
18238         .channels(channels)
18239         .qmax(128)
18240         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18241     }
18242   }
18243 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,multipixel)18244   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel) {
18245     for (size_t channels = 1; channels <= 80; channels += 15) {
18246       DWConvMicrokernelTester()
18247         .cr(16)
18248         .kr(25)
18249         .channels(channels)
18250         .width(3)
18251         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18252     }
18253   }
18254 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,multipixel_with_step)18255   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_step) {
18256     for (size_t channels = 1; channels <= 80; channels += 15) {
18257       for (size_t step = 2; step <= 25; step++) {
18258         DWConvMicrokernelTester()
18259           .cr(16)
18260           .kr(25)
18261           .channels(channels)
18262           .width(3)
18263           .step(step)
18264           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18265       }
18266     }
18267   }
18268 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,multipixel_with_output_stride)18269   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
18270     for (size_t channels = 1; channels <= 80; channels += 15) {
18271       DWConvMicrokernelTester()
18272         .cr(16)
18273         .kr(25)
18274         .channels(16)
18275         .width(5)
18276         .output_stride(83)
18277         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18278     }
18279   }
18280 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,multipixel_with_qmin)18281   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_qmin) {
18282     for (size_t channels = 1; channels <= 80; channels += 15) {
18283       DWConvMicrokernelTester()
18284         .cr(16)
18285         .kr(25)
18286         .channels(channels)
18287         .width(3)
18288         .qmin(128)
18289         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18290     }
18291   }
18292 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,multipixel_with_qmax)18293   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_qmax) {
18294     for (size_t channels = 1; channels <= 80; channels += 15) {
18295       DWConvMicrokernelTester()
18296         .cr(16)
18297         .kr(25)
18298         .channels(channels)
18299         .width(3)
18300         .qmax(128)
18301         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18302     }
18303   }
18304 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,input_offset)18305   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, input_offset) {
18306     for (uint32_t channels = 32; channels < 256; channels += 48) {
18307       DWConvMicrokernelTester()
18308         .cr(16)
18309         .kr(25)
18310         .channels(channels)
18311         .input_offset(304)
18312         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18313     }
18314   }
18315 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,zero)18316   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, zero) {
18317     for (uint32_t mz = 0; mz < 25; mz++) {
18318       for (uint32_t channels = 32; channels < 256; channels += 48) {
18319         DWConvMicrokernelTester()
18320           .cr(16)
18321           .kr(25)
18322           .channels(channels)
18323           .input_offset(304)
18324           .zero_index(mz)
18325           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18326       }
18327     }
18328   }
18329 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
18330 
18331 
18332 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_eq_16)18333   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_eq_16) {
18334     DWConvMicrokernelTester()
18335       .cr(16)
18336       .kr(25)
18337       .channels(16)
18338       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18339   }
18340 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_div_16)18341   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_div_16) {
18342     for (uint32_t channels = 32; channels < 256; channels += 48) {
18343       DWConvMicrokernelTester()
18344         .cr(16)
18345         .kr(25)
18346         .channels(channels)
18347         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18348     }
18349   }
18350 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_div_16_with_qmin)18351   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_div_16_with_qmin) {
18352     for (uint32_t channels = 32; channels < 256; channels += 48) {
18353       DWConvMicrokernelTester()
18354         .cr(16)
18355         .kr(25)
18356         .channels(channels)
18357         .qmin(128)
18358         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18359     }
18360   }
18361 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_div_16_with_qmax)18362   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_div_16_with_qmax) {
18363     for (uint32_t channels = 32; channels < 256; channels += 48) {
18364       DWConvMicrokernelTester()
18365         .cr(16)
18366         .kr(25)
18367         .channels(channels)
18368         .qmax(128)
18369         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18370     }
18371   }
18372 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_lt_16)18373   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_lt_16) {
18374     for (uint32_t channels = 1; channels < 16; channels++) {
18375       DWConvMicrokernelTester()
18376         .cr(16)
18377         .kr(25)
18378         .channels(channels)
18379         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18380     }
18381   }
18382 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_gt_16)18383   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_gt_16) {
18384     for (uint32_t channels = 17; channels < 32; channels++) {
18385       DWConvMicrokernelTester()
18386         .cr(16)
18387         .kr(25)
18388         .channels(channels)
18389         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18390     }
18391   }
18392 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_gt_16_with_qmin)18393   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmin) {
18394     for (uint32_t channels = 17; channels < 32; channels++) {
18395       DWConvMicrokernelTester()
18396         .cr(16)
18397         .kr(25)
18398         .channels(channels)
18399         .qmin(128)
18400         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18401     }
18402   }
18403 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_gt_16_with_qmax)18404   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmax) {
18405     for (uint32_t channels = 17; channels < 32; channels++) {
18406       DWConvMicrokernelTester()
18407         .cr(16)
18408         .kr(25)
18409         .channels(channels)
18410         .qmax(128)
18411         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18412     }
18413   }
18414 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,multipixel)18415   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel) {
18416     for (size_t channels = 1; channels <= 80; channels += 15) {
18417       DWConvMicrokernelTester()
18418         .cr(16)
18419         .kr(25)
18420         .channels(channels)
18421         .width(3)
18422         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18423     }
18424   }
18425 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,multipixel_with_step)18426   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
18427     for (size_t channels = 1; channels <= 80; channels += 15) {
18428       for (size_t step = 2; step <= 25; step++) {
18429         DWConvMicrokernelTester()
18430           .cr(16)
18431           .kr(25)
18432           .channels(channels)
18433           .width(3)
18434           .step(step)
18435           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18436       }
18437     }
18438   }
18439 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,multipixel_with_output_stride)18440   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
18441     for (size_t channels = 1; channels <= 80; channels += 15) {
18442       DWConvMicrokernelTester()
18443         .cr(16)
18444         .kr(25)
18445         .channels(16)
18446         .width(5)
18447         .output_stride(83)
18448         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18449     }
18450   }
18451 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,multipixel_with_qmin)18452   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
18453     for (size_t channels = 1; channels <= 80; channels += 15) {
18454       DWConvMicrokernelTester()
18455         .cr(16)
18456         .kr(25)
18457         .channels(channels)
18458         .width(3)
18459         .qmin(128)
18460         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18461     }
18462   }
18463 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,multipixel_with_qmax)18464   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
18465     for (size_t channels = 1; channels <= 80; channels += 15) {
18466       DWConvMicrokernelTester()
18467         .cr(16)
18468         .kr(25)
18469         .channels(channels)
18470         .width(3)
18471         .qmax(128)
18472         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18473     }
18474   }
18475 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,input_offset)18476   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, input_offset) {
18477     for (uint32_t channels = 32; channels < 256; channels += 48) {
18478       DWConvMicrokernelTester()
18479         .cr(16)
18480         .kr(25)
18481         .channels(channels)
18482         .input_offset(304)
18483         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18484     }
18485   }
18486 
TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,zero)18487   TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, zero) {
18488     for (uint32_t mz = 0; mz < 25; mz++) {
18489       for (uint32_t channels = 32; channels < 256; channels += 48) {
18490         DWConvMicrokernelTester()
18491           .cr(16)
18492           .kr(25)
18493           .channels(channels)
18494           .input_offset(304)
18495           .zero_index(mz)
18496           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18497       }
18498     }
18499   }
18500 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
18501 
18502 
18503 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_eq_24)18504   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_eq_24) {
18505     DWConvMicrokernelTester()
18506       .cr(24)
18507       .kr(9)
18508       .channels(24)
18509       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18510   }
18511 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_div_24)18512   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_div_24) {
18513     for (uint32_t channels = 48; channels < 384; channels += 72) {
18514       DWConvMicrokernelTester()
18515         .cr(24)
18516         .kr(9)
18517         .channels(channels)
18518         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18519     }
18520   }
18521 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_div_24_with_qmin)18522   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmin) {
18523     for (uint32_t channels = 48; channels < 384; channels += 72) {
18524       DWConvMicrokernelTester()
18525         .cr(24)
18526         .kr(9)
18527         .channels(channels)
18528         .qmin(128)
18529         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18530     }
18531   }
18532 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_div_24_with_qmax)18533   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmax) {
18534     for (uint32_t channels = 48; channels < 384; channels += 72) {
18535       DWConvMicrokernelTester()
18536         .cr(24)
18537         .kr(9)
18538         .channels(channels)
18539         .qmax(128)
18540         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18541     }
18542   }
18543 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_lt_24)18544   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_lt_24) {
18545     for (uint32_t channels = 1; channels < 24; channels++) {
18546       DWConvMicrokernelTester()
18547         .cr(24)
18548         .kr(9)
18549         .channels(channels)
18550         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18551     }
18552   }
18553 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_gt_24)18554   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_gt_24) {
18555     for (uint32_t channels = 25; channels < 48; channels++) {
18556       DWConvMicrokernelTester()
18557         .cr(24)
18558         .kr(9)
18559         .channels(channels)
18560         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18561     }
18562   }
18563 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_gt_24_with_qmin)18564   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmin) {
18565     for (uint32_t channels = 25; channels < 48; channels++) {
18566       DWConvMicrokernelTester()
18567         .cr(24)
18568         .kr(9)
18569         .channels(channels)
18570         .qmin(128)
18571         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18572     }
18573   }
18574 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_gt_24_with_qmax)18575   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmax) {
18576     for (uint32_t channels = 25; channels < 48; channels++) {
18577       DWConvMicrokernelTester()
18578         .cr(24)
18579         .kr(9)
18580         .channels(channels)
18581         .qmax(128)
18582         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18583     }
18584   }
18585 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,multipixel)18586   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel) {
18587     for (size_t channels = 1; channels <= 120; channels += 23) {
18588       DWConvMicrokernelTester()
18589         .cr(24)
18590         .kr(9)
18591         .channels(channels)
18592         .width(3)
18593         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18594     }
18595   }
18596 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,multipixel_with_step)18597   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_step) {
18598     for (size_t channels = 1; channels <= 120; channels += 23) {
18599       for (size_t step = 2; step <= 9; step++) {
18600         DWConvMicrokernelTester()
18601           .cr(24)
18602           .kr(9)
18603           .channels(channels)
18604           .width(3)
18605           .step(step)
18606           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18607       }
18608     }
18609   }
18610 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,multipixel_with_output_stride)18611   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
18612     for (size_t channels = 1; channels <= 120; channels += 23) {
18613       DWConvMicrokernelTester()
18614         .cr(24)
18615         .kr(9)
18616         .channels(24)
18617         .width(5)
18618         .output_stride(127)
18619         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18620     }
18621   }
18622 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,multipixel_with_qmin)18623   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_qmin) {
18624     for (size_t channels = 1; channels <= 120; channels += 23) {
18625       DWConvMicrokernelTester()
18626         .cr(24)
18627         .kr(9)
18628         .channels(channels)
18629         .width(3)
18630         .qmin(128)
18631         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18632     }
18633   }
18634 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,multipixel_with_qmax)18635   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_qmax) {
18636     for (size_t channels = 1; channels <= 120; channels += 23) {
18637       DWConvMicrokernelTester()
18638         .cr(24)
18639         .kr(9)
18640         .channels(channels)
18641         .width(3)
18642         .qmax(128)
18643         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18644     }
18645   }
18646 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,input_offset)18647   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, input_offset) {
18648     for (uint32_t channels = 48; channels < 384; channels += 72) {
18649       DWConvMicrokernelTester()
18650         .cr(24)
18651         .kr(9)
18652         .channels(channels)
18653         .input_offset(464)
18654         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18655     }
18656   }
18657 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,zero)18658   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, zero) {
18659     for (uint32_t mz = 0; mz < 9; mz++) {
18660       for (uint32_t channels = 48; channels < 384; channels += 72) {
18661         DWConvMicrokernelTester()
18662           .cr(24)
18663           .kr(9)
18664           .channels(channels)
18665           .input_offset(464)
18666           .zero_index(mz)
18667           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18668       }
18669     }
18670   }
18671 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
18672 
18673 
18674 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_eq_24)18675   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_eq_24) {
18676     DWConvMicrokernelTester()
18677       .cr(24)
18678       .kr(9)
18679       .channels(24)
18680       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18681   }
18682 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_div_24)18683   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_div_24) {
18684     for (uint32_t channels = 48; channels < 384; channels += 72) {
18685       DWConvMicrokernelTester()
18686         .cr(24)
18687         .kr(9)
18688         .channels(channels)
18689         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18690     }
18691   }
18692 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_div_24_with_qmin)18693   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_div_24_with_qmin) {
18694     for (uint32_t channels = 48; channels < 384; channels += 72) {
18695       DWConvMicrokernelTester()
18696         .cr(24)
18697         .kr(9)
18698         .channels(channels)
18699         .qmin(128)
18700         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18701     }
18702   }
18703 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_div_24_with_qmax)18704   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_div_24_with_qmax) {
18705     for (uint32_t channels = 48; channels < 384; channels += 72) {
18706       DWConvMicrokernelTester()
18707         .cr(24)
18708         .kr(9)
18709         .channels(channels)
18710         .qmax(128)
18711         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18712     }
18713   }
18714 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_lt_24)18715   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_lt_24) {
18716     for (uint32_t channels = 1; channels < 24; channels++) {
18717       DWConvMicrokernelTester()
18718         .cr(24)
18719         .kr(9)
18720         .channels(channels)
18721         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18722     }
18723   }
18724 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_gt_24)18725   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_gt_24) {
18726     for (uint32_t channels = 25; channels < 48; channels++) {
18727       DWConvMicrokernelTester()
18728         .cr(24)
18729         .kr(9)
18730         .channels(channels)
18731         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18732     }
18733   }
18734 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_gt_24_with_qmin)18735   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_gt_24_with_qmin) {
18736     for (uint32_t channels = 25; channels < 48; channels++) {
18737       DWConvMicrokernelTester()
18738         .cr(24)
18739         .kr(9)
18740         .channels(channels)
18741         .qmin(128)
18742         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18743     }
18744   }
18745 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_gt_24_with_qmax)18746   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_gt_24_with_qmax) {
18747     for (uint32_t channels = 25; channels < 48; channels++) {
18748       DWConvMicrokernelTester()
18749         .cr(24)
18750         .kr(9)
18751         .channels(channels)
18752         .qmax(128)
18753         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18754     }
18755   }
18756 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,multipixel)18757   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel) {
18758     for (size_t channels = 1; channels <= 120; channels += 23) {
18759       DWConvMicrokernelTester()
18760         .cr(24)
18761         .kr(9)
18762         .channels(channels)
18763         .width(3)
18764         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18765     }
18766   }
18767 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,multipixel_with_step)18768   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
18769     for (size_t channels = 1; channels <= 120; channels += 23) {
18770       for (size_t step = 2; step <= 9; step++) {
18771         DWConvMicrokernelTester()
18772           .cr(24)
18773           .kr(9)
18774           .channels(channels)
18775           .width(3)
18776           .step(step)
18777           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18778       }
18779     }
18780   }
18781 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,multipixel_with_output_stride)18782   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
18783     for (size_t channels = 1; channels <= 120; channels += 23) {
18784       DWConvMicrokernelTester()
18785         .cr(24)
18786         .kr(9)
18787         .channels(24)
18788         .width(5)
18789         .output_stride(127)
18790         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18791     }
18792   }
18793 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,multipixel_with_qmin)18794   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
18795     for (size_t channels = 1; channels <= 120; channels += 23) {
18796       DWConvMicrokernelTester()
18797         .cr(24)
18798         .kr(9)
18799         .channels(channels)
18800         .width(3)
18801         .qmin(128)
18802         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18803     }
18804   }
18805 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,multipixel_with_qmax)18806   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
18807     for (size_t channels = 1; channels <= 120; channels += 23) {
18808       DWConvMicrokernelTester()
18809         .cr(24)
18810         .kr(9)
18811         .channels(channels)
18812         .width(3)
18813         .qmax(128)
18814         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18815     }
18816   }
18817 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,input_offset)18818   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, input_offset) {
18819     for (uint32_t channels = 48; channels < 384; channels += 72) {
18820       DWConvMicrokernelTester()
18821         .cr(24)
18822         .kr(9)
18823         .channels(channels)
18824         .input_offset(464)
18825         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18826     }
18827   }
18828 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,zero)18829   TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, zero) {
18830     for (uint32_t mz = 0; mz < 9; mz++) {
18831       for (uint32_t channels = 48; channels < 384; channels += 72) {
18832         DWConvMicrokernelTester()
18833           .cr(24)
18834           .kr(9)
18835           .channels(channels)
18836           .input_offset(464)
18837           .zero_index(mz)
18838           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18839       }
18840     }
18841   }
18842 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
18843 
18844 
18845 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_eq_24)18846   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_eq_24) {
18847     DWConvMicrokernelTester()
18848       .cr(24)
18849       .kr(25)
18850       .channels(24)
18851       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18852   }
18853 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_div_24)18854   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_div_24) {
18855     for (uint32_t channels = 48; channels < 384; channels += 72) {
18856       DWConvMicrokernelTester()
18857         .cr(24)
18858         .kr(25)
18859         .channels(channels)
18860         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18861     }
18862   }
18863 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_div_24_with_qmin)18864   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_div_24_with_qmin) {
18865     for (uint32_t channels = 48; channels < 384; channels += 72) {
18866       DWConvMicrokernelTester()
18867         .cr(24)
18868         .kr(25)
18869         .channels(channels)
18870         .qmin(128)
18871         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18872     }
18873   }
18874 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_div_24_with_qmax)18875   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_div_24_with_qmax) {
18876     for (uint32_t channels = 48; channels < 384; channels += 72) {
18877       DWConvMicrokernelTester()
18878         .cr(24)
18879         .kr(25)
18880         .channels(channels)
18881         .qmax(128)
18882         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18883     }
18884   }
18885 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_lt_24)18886   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_lt_24) {
18887     for (uint32_t channels = 1; channels < 24; channels++) {
18888       DWConvMicrokernelTester()
18889         .cr(24)
18890         .kr(25)
18891         .channels(channels)
18892         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18893     }
18894   }
18895 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_gt_24)18896   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_gt_24) {
18897     for (uint32_t channels = 25; channels < 48; channels++) {
18898       DWConvMicrokernelTester()
18899         .cr(24)
18900         .kr(25)
18901         .channels(channels)
18902         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18903     }
18904   }
18905 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_gt_24_with_qmin)18906   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_gt_24_with_qmin) {
18907     for (uint32_t channels = 25; channels < 48; channels++) {
18908       DWConvMicrokernelTester()
18909         .cr(24)
18910         .kr(25)
18911         .channels(channels)
18912         .qmin(128)
18913         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18914     }
18915   }
18916 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_gt_24_with_qmax)18917   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_gt_24_with_qmax) {
18918     for (uint32_t channels = 25; channels < 48; channels++) {
18919       DWConvMicrokernelTester()
18920         .cr(24)
18921         .kr(25)
18922         .channels(channels)
18923         .qmax(128)
18924         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18925     }
18926   }
18927 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,multipixel)18928   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel) {
18929     for (size_t channels = 1; channels <= 120; channels += 23) {
18930       DWConvMicrokernelTester()
18931         .cr(24)
18932         .kr(25)
18933         .channels(channels)
18934         .width(3)
18935         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18936     }
18937   }
18938 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,multipixel_with_step)18939   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_step) {
18940     for (size_t channels = 1; channels <= 120; channels += 23) {
18941       for (size_t step = 2; step <= 25; step++) {
18942         DWConvMicrokernelTester()
18943           .cr(24)
18944           .kr(25)
18945           .channels(channels)
18946           .width(3)
18947           .step(step)
18948           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18949       }
18950     }
18951   }
18952 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,multipixel_with_output_stride)18953   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
18954     for (size_t channels = 1; channels <= 120; channels += 23) {
18955       DWConvMicrokernelTester()
18956         .cr(24)
18957         .kr(25)
18958         .channels(24)
18959         .width(5)
18960         .output_stride(127)
18961         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18962     }
18963   }
18964 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,multipixel_with_qmin)18965   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_qmin) {
18966     for (size_t channels = 1; channels <= 120; channels += 23) {
18967       DWConvMicrokernelTester()
18968         .cr(24)
18969         .kr(25)
18970         .channels(channels)
18971         .width(3)
18972         .qmin(128)
18973         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18974     }
18975   }
18976 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,multipixel_with_qmax)18977   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_qmax) {
18978     for (size_t channels = 1; channels <= 120; channels += 23) {
18979       DWConvMicrokernelTester()
18980         .cr(24)
18981         .kr(25)
18982         .channels(channels)
18983         .width(3)
18984         .qmax(128)
18985         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18986     }
18987   }
18988 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,input_offset)18989   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, input_offset) {
18990     for (uint32_t channels = 48; channels < 384; channels += 72) {
18991       DWConvMicrokernelTester()
18992         .cr(24)
18993         .kr(25)
18994         .channels(channels)
18995         .input_offset(464)
18996         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
18997     }
18998   }
18999 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,zero)19000   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, zero) {
19001     for (uint32_t mz = 0; mz < 25; mz++) {
19002       for (uint32_t channels = 48; channels < 384; channels += 72) {
19003         DWConvMicrokernelTester()
19004           .cr(24)
19005           .kr(25)
19006           .channels(channels)
19007           .input_offset(464)
19008           .zero_index(mz)
19009           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
19010       }
19011     }
19012   }
19013 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
19014 
19015 
19016 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_eq_24)19017   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_eq_24) {
19018     DWConvMicrokernelTester()
19019       .cr(24)
19020       .kr(25)
19021       .channels(24)
19022       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
19023   }
19024 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_div_24)19025   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_div_24) {
19026     for (uint32_t channels = 48; channels < 384; channels += 72) {
19027       DWConvMicrokernelTester()
19028         .cr(24)
19029         .kr(25)
19030         .channels(channels)
19031         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
19032     }
19033   }
19034 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_div_24_with_qmin)19035   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_div_24_with_qmin) {
19036     for (uint32_t channels = 48; channels < 384; channels += 72) {
19037       DWConvMicrokernelTester()
19038         .cr(24)
19039         .kr(25)
19040         .channels(channels)
19041         .qmin(128)
19042         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
19043     }
19044   }
19045 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_div_24_with_qmax)19046   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_div_24_with_qmax) {
19047     for (uint32_t channels = 48; channels < 384; channels += 72) {
19048       DWConvMicrokernelTester()
19049         .cr(24)
19050         .kr(25)
19051         .channels(channels)
19052         .qmax(128)
19053         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
19054     }
19055   }
19056 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_lt_24)19057   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_lt_24) {
19058     for (uint32_t channels = 1; channels < 24; channels++) {
19059       DWConvMicrokernelTester()
19060         .cr(24)
19061         .kr(25)
19062         .channels(channels)
19063         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
19064     }
19065   }
19066 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_gt_24)19067   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_gt_24) {
19068     for (uint32_t channels = 25; channels < 48; channels++) {
19069       DWConvMicrokernelTester()
19070         .cr(24)
19071         .kr(25)
19072         .channels(channels)
19073         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
19074     }
19075   }
19076 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_gt_24_with_qmin)19077   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_gt_24_with_qmin) {
19078     for (uint32_t channels = 25; channels < 48; channels++) {
19079       DWConvMicrokernelTester()
19080         .cr(24)
19081         .kr(25)
19082         .channels(channels)
19083         .qmin(128)
19084         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
19085     }
19086   }
19087 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_gt_24_with_qmax)19088   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_gt_24_with_qmax) {
19089     for (uint32_t channels = 25; channels < 48; channels++) {
19090       DWConvMicrokernelTester()
19091         .cr(24)
19092         .kr(25)
19093         .channels(channels)
19094         .qmax(128)
19095         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
19096     }
19097   }
19098 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,multipixel)19099   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel) {
19100     for (size_t channels = 1; channels <= 120; channels += 23) {
19101       DWConvMicrokernelTester()
19102         .cr(24)
19103         .kr(25)
19104         .channels(channels)
19105         .width(3)
19106         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
19107     }
19108   }
19109 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,multipixel_with_step)19110   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
19111     for (size_t channels = 1; channels <= 120; channels += 23) {
19112       for (size_t step = 2; step <= 25; step++) {
19113         DWConvMicrokernelTester()
19114           .cr(24)
19115           .kr(25)
19116           .channels(channels)
19117           .width(3)
19118           .step(step)
19119           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
19120       }
19121     }
19122   }
19123 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,multipixel_with_output_stride)19124   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
19125     for (size_t channels = 1; channels <= 120; channels += 23) {
19126       DWConvMicrokernelTester()
19127         .cr(24)
19128         .kr(25)
19129         .channels(24)
19130         .width(5)
19131         .output_stride(127)
19132         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
19133     }
19134   }
19135 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,multipixel_with_qmin)19136   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
19137     for (size_t channels = 1; channels <= 120; channels += 23) {
19138       DWConvMicrokernelTester()
19139         .cr(24)
19140         .kr(25)
19141         .channels(channels)
19142         .width(3)
19143         .qmin(128)
19144         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
19145     }
19146   }
19147 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,multipixel_with_qmax)19148   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
19149     for (size_t channels = 1; channels <= 120; channels += 23) {
19150       DWConvMicrokernelTester()
19151         .cr(24)
19152         .kr(25)
19153         .channels(channels)
19154         .width(3)
19155         .qmax(128)
19156         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
19157     }
19158   }
19159 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,input_offset)19160   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, input_offset) {
19161     for (uint32_t channels = 48; channels < 384; channels += 72) {
19162       DWConvMicrokernelTester()
19163         .cr(24)
19164         .kr(25)
19165         .channels(channels)
19166         .input_offset(464)
19167         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
19168     }
19169   }
19170 
TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,zero)19171   TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, zero) {
19172     for (uint32_t mz = 0; mz < 25; mz++) {
19173       for (uint32_t channels = 48; channels < 384; channels += 72) {
19174         DWConvMicrokernelTester()
19175           .cr(24)
19176           .kr(25)
19177           .channels(channels)
19178           .input_offset(464)
19179           .zero_index(mz)
19180           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
19181       }
19182     }
19183   }
19184 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
19185 
19186 
19187 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,c_eq_1)19188   TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_eq_1) {
19189     DWConvMicrokernelTester()
19190       .cr(1)
19191       .kr(9)
19192       .channels(1)
19193       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19194   }
19195 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,c_gt_1)19196   TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_gt_1) {
19197     for (uint32_t channels = 2; channels < 10; channels++) {
19198       DWConvMicrokernelTester()
19199         .cr(1)
19200         .kr(9)
19201         .channels(channels)
19202         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19203     }
19204   }
19205 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,c_gt_1_with_qmin)19206   TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_gt_1_with_qmin) {
19207     for (uint32_t channels = 2; channels < 10; channels++) {
19208       DWConvMicrokernelTester()
19209         .cr(1)
19210         .kr(9)
19211         .channels(channels)
19212         .qmin(128)
19213         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19214     }
19215   }
19216 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,c_gt_1_with_qmax)19217   TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_gt_1_with_qmax) {
19218     for (uint32_t channels = 2; channels < 10; channels++) {
19219       DWConvMicrokernelTester()
19220         .cr(1)
19221         .kr(9)
19222         .channels(channels)
19223         .qmax(128)
19224         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19225     }
19226   }
19227 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,multipixel)19228   TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel) {
19229     for (size_t channels = 1; channels <= 5; channels += 1) {
19230       DWConvMicrokernelTester()
19231         .cr(1)
19232         .kr(9)
19233         .channels(channels)
19234         .width(3)
19235         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19236     }
19237   }
19238 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,multipixel_with_step)19239   TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_step) {
19240     for (size_t channels = 1; channels <= 5; channels += 1) {
19241       for (size_t step = 2; step <= 9; step++) {
19242         DWConvMicrokernelTester()
19243           .cr(1)
19244           .kr(9)
19245           .channels(channels)
19246           .width(3)
19247           .step(step)
19248           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19249       }
19250     }
19251   }
19252 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,multipixel_with_output_stride)19253   TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_output_stride) {
19254     for (size_t channels = 1; channels <= 5; channels += 1) {
19255       DWConvMicrokernelTester()
19256         .cr(1)
19257         .kr(9)
19258         .channels(1)
19259         .width(5)
19260         .output_stride(7)
19261         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19262     }
19263   }
19264 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,multipixel_with_qmin)19265   TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_qmin) {
19266     for (size_t channels = 1; channels <= 5; channels += 1) {
19267       DWConvMicrokernelTester()
19268         .cr(1)
19269         .kr(9)
19270         .channels(channels)
19271         .width(3)
19272         .qmin(128)
19273         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19274     }
19275   }
19276 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,multipixel_with_qmax)19277   TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_qmax) {
19278     for (size_t channels = 1; channels <= 5; channels += 1) {
19279       DWConvMicrokernelTester()
19280         .cr(1)
19281         .kr(9)
19282         .channels(channels)
19283         .width(3)
19284         .qmax(128)
19285         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19286     }
19287   }
19288 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,input_offset)19289   TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, input_offset) {
19290     for (uint32_t channels = 2; channels < 16; channels += 3) {
19291       DWConvMicrokernelTester()
19292         .cr(1)
19293         .kr(9)
19294         .channels(channels)
19295         .input_offset(48)
19296         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19297     }
19298   }
19299 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,zero)19300   TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, zero) {
19301     for (uint32_t mz = 0; mz < 9; mz++) {
19302       for (uint32_t channels = 2; channels < 16; channels += 3) {
19303         DWConvMicrokernelTester()
19304           .cr(1)
19305           .kr(9)
19306           .channels(channels)
19307           .input_offset(48)
19308           .zero_index(mz)
19309           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19310       }
19311     }
19312   }
19313 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
19314 
19315 
19316 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,c_eq_1)19317   TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_eq_1) {
19318     DWConvMicrokernelTester()
19319       .cr(1)
19320       .kr(25)
19321       .channels(1)
19322       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19323   }
19324 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,c_gt_1)19325   TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_gt_1) {
19326     for (uint32_t channels = 2; channels < 10; channels++) {
19327       DWConvMicrokernelTester()
19328         .cr(1)
19329         .kr(25)
19330         .channels(channels)
19331         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19332     }
19333   }
19334 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,c_gt_1_with_qmin)19335   TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_gt_1_with_qmin) {
19336     for (uint32_t channels = 2; channels < 10; channels++) {
19337       DWConvMicrokernelTester()
19338         .cr(1)
19339         .kr(25)
19340         .channels(channels)
19341         .qmin(128)
19342         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19343     }
19344   }
19345 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,c_gt_1_with_qmax)19346   TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_gt_1_with_qmax) {
19347     for (uint32_t channels = 2; channels < 10; channels++) {
19348       DWConvMicrokernelTester()
19349         .cr(1)
19350         .kr(25)
19351         .channels(channels)
19352         .qmax(128)
19353         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19354     }
19355   }
19356 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,multipixel)19357   TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel) {
19358     for (size_t channels = 1; channels <= 5; channels += 1) {
19359       DWConvMicrokernelTester()
19360         .cr(1)
19361         .kr(25)
19362         .channels(channels)
19363         .width(3)
19364         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19365     }
19366   }
19367 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,multipixel_with_step)19368   TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_step) {
19369     for (size_t channels = 1; channels <= 5; channels += 1) {
19370       for (size_t step = 2; step <= 25; step++) {
19371         DWConvMicrokernelTester()
19372           .cr(1)
19373           .kr(25)
19374           .channels(channels)
19375           .width(3)
19376           .step(step)
19377           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19378       }
19379     }
19380   }
19381 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,multipixel_with_output_stride)19382   TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_output_stride) {
19383     for (size_t channels = 1; channels <= 5; channels += 1) {
19384       DWConvMicrokernelTester()
19385         .cr(1)
19386         .kr(25)
19387         .channels(1)
19388         .width(5)
19389         .output_stride(7)
19390         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19391     }
19392   }
19393 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,multipixel_with_qmin)19394   TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_qmin) {
19395     for (size_t channels = 1; channels <= 5; channels += 1) {
19396       DWConvMicrokernelTester()
19397         .cr(1)
19398         .kr(25)
19399         .channels(channels)
19400         .width(3)
19401         .qmin(128)
19402         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19403     }
19404   }
19405 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,multipixel_with_qmax)19406   TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_qmax) {
19407     for (size_t channels = 1; channels <= 5; channels += 1) {
19408       DWConvMicrokernelTester()
19409         .cr(1)
19410         .kr(25)
19411         .channels(channels)
19412         .width(3)
19413         .qmax(128)
19414         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19415     }
19416   }
19417 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,input_offset)19418   TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, input_offset) {
19419     for (uint32_t channels = 2; channels < 16; channels += 3) {
19420       DWConvMicrokernelTester()
19421         .cr(1)
19422         .kr(25)
19423         .channels(channels)
19424         .input_offset(48)
19425         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19426     }
19427   }
19428 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,zero)19429   TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, zero) {
19430     for (uint32_t mz = 0; mz < 25; mz++) {
19431       for (uint32_t channels = 2; channels < 16; channels += 3) {
19432         DWConvMicrokernelTester()
19433           .cr(1)
19434           .kr(25)
19435           .channels(channels)
19436           .input_offset(48)
19437           .zero_index(mz)
19438           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19439       }
19440     }
19441   }
19442 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
19443 
19444 
19445 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_eq_2)19446   TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_eq_2) {
19447     DWConvMicrokernelTester()
19448       .cr(2)
19449       .kr(9)
19450       .channels(2)
19451       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19452   }
19453 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_div_2)19454   TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_div_2) {
19455     for (uint32_t channels = 4; channels < 32; channels += 6) {
19456       DWConvMicrokernelTester()
19457         .cr(2)
19458         .kr(9)
19459         .channels(channels)
19460         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19461     }
19462   }
19463 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_div_2_with_qmin)19464   TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_div_2_with_qmin) {
19465     for (uint32_t channels = 4; channels < 32; channels += 6) {
19466       DWConvMicrokernelTester()
19467         .cr(2)
19468         .kr(9)
19469         .channels(channels)
19470         .qmin(128)
19471         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19472     }
19473   }
19474 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_div_2_with_qmax)19475   TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_div_2_with_qmax) {
19476     for (uint32_t channels = 4; channels < 32; channels += 6) {
19477       DWConvMicrokernelTester()
19478         .cr(2)
19479         .kr(9)
19480         .channels(channels)
19481         .qmax(128)
19482         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19483     }
19484   }
19485 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_lt_2)19486   TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_lt_2) {
19487     for (uint32_t channels = 1; channels < 2; channels++) {
19488       DWConvMicrokernelTester()
19489         .cr(2)
19490         .kr(9)
19491         .channels(channels)
19492         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19493     }
19494   }
19495 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_gt_2)19496   TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_gt_2) {
19497     for (uint32_t channels = 3; channels < 4; channels++) {
19498       DWConvMicrokernelTester()
19499         .cr(2)
19500         .kr(9)
19501         .channels(channels)
19502         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19503     }
19504   }
19505 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_gt_2_with_qmin)19506   TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_gt_2_with_qmin) {
19507     for (uint32_t channels = 3; channels < 4; channels++) {
19508       DWConvMicrokernelTester()
19509         .cr(2)
19510         .kr(9)
19511         .channels(channels)
19512         .qmin(128)
19513         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19514     }
19515   }
19516 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_gt_2_with_qmax)19517   TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_gt_2_with_qmax) {
19518     for (uint32_t channels = 3; channels < 4; channels++) {
19519       DWConvMicrokernelTester()
19520         .cr(2)
19521         .kr(9)
19522         .channels(channels)
19523         .qmax(128)
19524         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19525     }
19526   }
19527 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,multipixel)19528   TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel) {
19529     for (size_t channels = 1; channels <= 10; channels += 1) {
19530       DWConvMicrokernelTester()
19531         .cr(2)
19532         .kr(9)
19533         .channels(channels)
19534         .width(3)
19535         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19536     }
19537   }
19538 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,multipixel_with_step)19539   TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_step) {
19540     for (size_t channels = 1; channels <= 10; channels += 1) {
19541       for (size_t step = 2; step <= 9; step++) {
19542         DWConvMicrokernelTester()
19543           .cr(2)
19544           .kr(9)
19545           .channels(channels)
19546           .width(3)
19547           .step(step)
19548           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19549       }
19550     }
19551   }
19552 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,multipixel_with_output_stride)19553   TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_output_stride) {
19554     for (size_t channels = 1; channels <= 10; channels += 1) {
19555       DWConvMicrokernelTester()
19556         .cr(2)
19557         .kr(9)
19558         .channels(2)
19559         .width(5)
19560         .output_stride(13)
19561         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19562     }
19563   }
19564 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,multipixel_with_qmin)19565   TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_qmin) {
19566     for (size_t channels = 1; channels <= 10; channels += 1) {
19567       DWConvMicrokernelTester()
19568         .cr(2)
19569         .kr(9)
19570         .channels(channels)
19571         .width(3)
19572         .qmin(128)
19573         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19574     }
19575   }
19576 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,multipixel_with_qmax)19577   TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_qmax) {
19578     for (size_t channels = 1; channels <= 10; channels += 1) {
19579       DWConvMicrokernelTester()
19580         .cr(2)
19581         .kr(9)
19582         .channels(channels)
19583         .width(3)
19584         .qmax(128)
19585         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19586     }
19587   }
19588 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,input_offset)19589   TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, input_offset) {
19590     for (uint32_t channels = 4; channels < 32; channels += 6) {
19591       DWConvMicrokernelTester()
19592         .cr(2)
19593         .kr(9)
19594         .channels(channels)
19595         .input_offset(80)
19596         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19597     }
19598   }
19599 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,zero)19600   TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, zero) {
19601     for (uint32_t mz = 0; mz < 9; mz++) {
19602       for (uint32_t channels = 4; channels < 32; channels += 6) {
19603         DWConvMicrokernelTester()
19604           .cr(2)
19605           .kr(9)
19606           .channels(channels)
19607           .input_offset(80)
19608           .zero_index(mz)
19609           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19610       }
19611     }
19612   }
19613 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
19614 
19615 
19616 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_eq_2)19617   TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_eq_2) {
19618     DWConvMicrokernelTester()
19619       .cr(2)
19620       .kr(25)
19621       .channels(2)
19622       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19623   }
19624 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_div_2)19625   TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_div_2) {
19626     for (uint32_t channels = 4; channels < 32; channels += 6) {
19627       DWConvMicrokernelTester()
19628         .cr(2)
19629         .kr(25)
19630         .channels(channels)
19631         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19632     }
19633   }
19634 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_div_2_with_qmin)19635   TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_div_2_with_qmin) {
19636     for (uint32_t channels = 4; channels < 32; channels += 6) {
19637       DWConvMicrokernelTester()
19638         .cr(2)
19639         .kr(25)
19640         .channels(channels)
19641         .qmin(128)
19642         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19643     }
19644   }
19645 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_div_2_with_qmax)19646   TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_div_2_with_qmax) {
19647     for (uint32_t channels = 4; channels < 32; channels += 6) {
19648       DWConvMicrokernelTester()
19649         .cr(2)
19650         .kr(25)
19651         .channels(channels)
19652         .qmax(128)
19653         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19654     }
19655   }
19656 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_lt_2)19657   TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_lt_2) {
19658     for (uint32_t channels = 1; channels < 2; channels++) {
19659       DWConvMicrokernelTester()
19660         .cr(2)
19661         .kr(25)
19662         .channels(channels)
19663         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19664     }
19665   }
19666 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_gt_2)19667   TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_gt_2) {
19668     for (uint32_t channels = 3; channels < 4; channels++) {
19669       DWConvMicrokernelTester()
19670         .cr(2)
19671         .kr(25)
19672         .channels(channels)
19673         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19674     }
19675   }
19676 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_gt_2_with_qmin)19677   TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_gt_2_with_qmin) {
19678     for (uint32_t channels = 3; channels < 4; channels++) {
19679       DWConvMicrokernelTester()
19680         .cr(2)
19681         .kr(25)
19682         .channels(channels)
19683         .qmin(128)
19684         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19685     }
19686   }
19687 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_gt_2_with_qmax)19688   TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_gt_2_with_qmax) {
19689     for (uint32_t channels = 3; channels < 4; channels++) {
19690       DWConvMicrokernelTester()
19691         .cr(2)
19692         .kr(25)
19693         .channels(channels)
19694         .qmax(128)
19695         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19696     }
19697   }
19698 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,multipixel)19699   TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel) {
19700     for (size_t channels = 1; channels <= 10; channels += 1) {
19701       DWConvMicrokernelTester()
19702         .cr(2)
19703         .kr(25)
19704         .channels(channels)
19705         .width(3)
19706         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19707     }
19708   }
19709 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,multipixel_with_step)19710   TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_step) {
19711     for (size_t channels = 1; channels <= 10; channels += 1) {
19712       for (size_t step = 2; step <= 25; step++) {
19713         DWConvMicrokernelTester()
19714           .cr(2)
19715           .kr(25)
19716           .channels(channels)
19717           .width(3)
19718           .step(step)
19719           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19720       }
19721     }
19722   }
19723 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,multipixel_with_output_stride)19724   TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_output_stride) {
19725     for (size_t channels = 1; channels <= 10; channels += 1) {
19726       DWConvMicrokernelTester()
19727         .cr(2)
19728         .kr(25)
19729         .channels(2)
19730         .width(5)
19731         .output_stride(13)
19732         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19733     }
19734   }
19735 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,multipixel_with_qmin)19736   TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_qmin) {
19737     for (size_t channels = 1; channels <= 10; channels += 1) {
19738       DWConvMicrokernelTester()
19739         .cr(2)
19740         .kr(25)
19741         .channels(channels)
19742         .width(3)
19743         .qmin(128)
19744         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19745     }
19746   }
19747 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,multipixel_with_qmax)19748   TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_qmax) {
19749     for (size_t channels = 1; channels <= 10; channels += 1) {
19750       DWConvMicrokernelTester()
19751         .cr(2)
19752         .kr(25)
19753         .channels(channels)
19754         .width(3)
19755         .qmax(128)
19756         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19757     }
19758   }
19759 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,input_offset)19760   TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, input_offset) {
19761     for (uint32_t channels = 4; channels < 32; channels += 6) {
19762       DWConvMicrokernelTester()
19763         .cr(2)
19764         .kr(25)
19765         .channels(channels)
19766         .input_offset(80)
19767         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19768     }
19769   }
19770 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,zero)19771   TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, zero) {
19772     for (uint32_t mz = 0; mz < 25; mz++) {
19773       for (uint32_t channels = 4; channels < 32; channels += 6) {
19774         DWConvMicrokernelTester()
19775           .cr(2)
19776           .kr(25)
19777           .channels(channels)
19778           .input_offset(80)
19779           .zero_index(mz)
19780           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19781       }
19782     }
19783   }
19784 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
19785 
19786 
19787 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_eq_4)19788   TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_eq_4) {
19789     DWConvMicrokernelTester()
19790       .cr(4)
19791       .kr(9)
19792       .channels(4)
19793       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19794   }
19795 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_div_4)19796   TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_div_4) {
19797     for (uint32_t channels = 8; channels < 64; channels += 12) {
19798       DWConvMicrokernelTester()
19799         .cr(4)
19800         .kr(9)
19801         .channels(channels)
19802         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19803     }
19804   }
19805 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_div_4_with_qmin)19806   TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_div_4_with_qmin) {
19807     for (uint32_t channels = 8; channels < 64; channels += 12) {
19808       DWConvMicrokernelTester()
19809         .cr(4)
19810         .kr(9)
19811         .channels(channels)
19812         .qmin(128)
19813         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19814     }
19815   }
19816 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_div_4_with_qmax)19817   TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_div_4_with_qmax) {
19818     for (uint32_t channels = 8; channels < 64; channels += 12) {
19819       DWConvMicrokernelTester()
19820         .cr(4)
19821         .kr(9)
19822         .channels(channels)
19823         .qmax(128)
19824         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19825     }
19826   }
19827 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_lt_4)19828   TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_lt_4) {
19829     for (uint32_t channels = 1; channels < 4; channels++) {
19830       DWConvMicrokernelTester()
19831         .cr(4)
19832         .kr(9)
19833         .channels(channels)
19834         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19835     }
19836   }
19837 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_gt_4)19838   TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_gt_4) {
19839     for (uint32_t channels = 5; channels < 8; channels++) {
19840       DWConvMicrokernelTester()
19841         .cr(4)
19842         .kr(9)
19843         .channels(channels)
19844         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19845     }
19846   }
19847 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_gt_4_with_qmin)19848   TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_gt_4_with_qmin) {
19849     for (uint32_t channels = 5; channels < 8; channels++) {
19850       DWConvMicrokernelTester()
19851         .cr(4)
19852         .kr(9)
19853         .channels(channels)
19854         .qmin(128)
19855         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19856     }
19857   }
19858 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_gt_4_with_qmax)19859   TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_gt_4_with_qmax) {
19860     for (uint32_t channels = 5; channels < 8; channels++) {
19861       DWConvMicrokernelTester()
19862         .cr(4)
19863         .kr(9)
19864         .channels(channels)
19865         .qmax(128)
19866         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19867     }
19868   }
19869 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,multipixel)19870   TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel) {
19871     for (size_t channels = 1; channels <= 20; channels += 3) {
19872       DWConvMicrokernelTester()
19873         .cr(4)
19874         .kr(9)
19875         .channels(channels)
19876         .width(3)
19877         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19878     }
19879   }
19880 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,multipixel_with_step)19881   TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_step) {
19882     for (size_t channels = 1; channels <= 20; channels += 3) {
19883       for (size_t step = 2; step <= 9; step++) {
19884         DWConvMicrokernelTester()
19885           .cr(4)
19886           .kr(9)
19887           .channels(channels)
19888           .width(3)
19889           .step(step)
19890           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19891       }
19892     }
19893   }
19894 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,multipixel_with_output_stride)19895   TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_output_stride) {
19896     for (size_t channels = 1; channels <= 20; channels += 3) {
19897       DWConvMicrokernelTester()
19898         .cr(4)
19899         .kr(9)
19900         .channels(4)
19901         .width(5)
19902         .output_stride(23)
19903         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19904     }
19905   }
19906 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,multipixel_with_qmin)19907   TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_qmin) {
19908     for (size_t channels = 1; channels <= 20; channels += 3) {
19909       DWConvMicrokernelTester()
19910         .cr(4)
19911         .kr(9)
19912         .channels(channels)
19913         .width(3)
19914         .qmin(128)
19915         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19916     }
19917   }
19918 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,multipixel_with_qmax)19919   TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_qmax) {
19920     for (size_t channels = 1; channels <= 20; channels += 3) {
19921       DWConvMicrokernelTester()
19922         .cr(4)
19923         .kr(9)
19924         .channels(channels)
19925         .width(3)
19926         .qmax(128)
19927         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19928     }
19929   }
19930 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,input_offset)19931   TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, input_offset) {
19932     for (uint32_t channels = 8; channels < 64; channels += 12) {
19933       DWConvMicrokernelTester()
19934         .cr(4)
19935         .kr(9)
19936         .channels(channels)
19937         .input_offset(112)
19938         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19939     }
19940   }
19941 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,zero)19942   TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, zero) {
19943     for (uint32_t mz = 0; mz < 9; mz++) {
19944       for (uint32_t channels = 8; channels < 64; channels += 12) {
19945         DWConvMicrokernelTester()
19946           .cr(4)
19947           .kr(9)
19948           .channels(channels)
19949           .input_offset(112)
19950           .zero_index(mz)
19951           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19952       }
19953     }
19954   }
19955 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
19956 
19957 
19958 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_eq_4)19959   TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_eq_4) {
19960     DWConvMicrokernelTester()
19961       .cr(4)
19962       .kr(25)
19963       .channels(4)
19964       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19965   }
19966 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_div_4)19967   TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_div_4) {
19968     for (uint32_t channels = 8; channels < 64; channels += 12) {
19969       DWConvMicrokernelTester()
19970         .cr(4)
19971         .kr(25)
19972         .channels(channels)
19973         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19974     }
19975   }
19976 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_div_4_with_qmin)19977   TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_div_4_with_qmin) {
19978     for (uint32_t channels = 8; channels < 64; channels += 12) {
19979       DWConvMicrokernelTester()
19980         .cr(4)
19981         .kr(25)
19982         .channels(channels)
19983         .qmin(128)
19984         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19985     }
19986   }
19987 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_div_4_with_qmax)19988   TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_div_4_with_qmax) {
19989     for (uint32_t channels = 8; channels < 64; channels += 12) {
19990       DWConvMicrokernelTester()
19991         .cr(4)
19992         .kr(25)
19993         .channels(channels)
19994         .qmax(128)
19995         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
19996     }
19997   }
19998 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_lt_4)19999   TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_lt_4) {
20000     for (uint32_t channels = 1; channels < 4; channels++) {
20001       DWConvMicrokernelTester()
20002         .cr(4)
20003         .kr(25)
20004         .channels(channels)
20005         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20006     }
20007   }
20008 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_gt_4)20009   TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_gt_4) {
20010     for (uint32_t channels = 5; channels < 8; channels++) {
20011       DWConvMicrokernelTester()
20012         .cr(4)
20013         .kr(25)
20014         .channels(channels)
20015         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20016     }
20017   }
20018 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_gt_4_with_qmin)20019   TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_gt_4_with_qmin) {
20020     for (uint32_t channels = 5; channels < 8; channels++) {
20021       DWConvMicrokernelTester()
20022         .cr(4)
20023         .kr(25)
20024         .channels(channels)
20025         .qmin(128)
20026         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20027     }
20028   }
20029 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_gt_4_with_qmax)20030   TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_gt_4_with_qmax) {
20031     for (uint32_t channels = 5; channels < 8; channels++) {
20032       DWConvMicrokernelTester()
20033         .cr(4)
20034         .kr(25)
20035         .channels(channels)
20036         .qmax(128)
20037         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20038     }
20039   }
20040 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,multipixel)20041   TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel) {
20042     for (size_t channels = 1; channels <= 20; channels += 3) {
20043       DWConvMicrokernelTester()
20044         .cr(4)
20045         .kr(25)
20046         .channels(channels)
20047         .width(3)
20048         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20049     }
20050   }
20051 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,multipixel_with_step)20052   TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_step) {
20053     for (size_t channels = 1; channels <= 20; channels += 3) {
20054       for (size_t step = 2; step <= 25; step++) {
20055         DWConvMicrokernelTester()
20056           .cr(4)
20057           .kr(25)
20058           .channels(channels)
20059           .width(3)
20060           .step(step)
20061           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20062       }
20063     }
20064   }
20065 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,multipixel_with_output_stride)20066   TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_output_stride) {
20067     for (size_t channels = 1; channels <= 20; channels += 3) {
20068       DWConvMicrokernelTester()
20069         .cr(4)
20070         .kr(25)
20071         .channels(4)
20072         .width(5)
20073         .output_stride(23)
20074         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20075     }
20076   }
20077 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,multipixel_with_qmin)20078   TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_qmin) {
20079     for (size_t channels = 1; channels <= 20; channels += 3) {
20080       DWConvMicrokernelTester()
20081         .cr(4)
20082         .kr(25)
20083         .channels(channels)
20084         .width(3)
20085         .qmin(128)
20086         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20087     }
20088   }
20089 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,multipixel_with_qmax)20090   TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_qmax) {
20091     for (size_t channels = 1; channels <= 20; channels += 3) {
20092       DWConvMicrokernelTester()
20093         .cr(4)
20094         .kr(25)
20095         .channels(channels)
20096         .width(3)
20097         .qmax(128)
20098         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20099     }
20100   }
20101 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,input_offset)20102   TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, input_offset) {
20103     for (uint32_t channels = 8; channels < 64; channels += 12) {
20104       DWConvMicrokernelTester()
20105         .cr(4)
20106         .kr(25)
20107         .channels(channels)
20108         .input_offset(112)
20109         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20110     }
20111   }
20112 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,zero)20113   TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, zero) {
20114     for (uint32_t mz = 0; mz < 25; mz++) {
20115       for (uint32_t channels = 8; channels < 64; channels += 12) {
20116         DWConvMicrokernelTester()
20117           .cr(4)
20118           .kr(25)
20119           .channels(channels)
20120           .input_offset(112)
20121           .zero_index(mz)
20122           .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20123       }
20124     }
20125   }
20126 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
20127 
20128 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,c_eq_1)20129 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_eq_1) {
20130   DWConvMicrokernelTester()
20131     .cr(1)
20132     .kr(9)
20133     .channels(1)
20134     .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20135 }
20136 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,c_gt_1)20137 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_gt_1) {
20138   for (uint32_t channels = 2; channels < 10; channels++) {
20139     DWConvMicrokernelTester()
20140       .cr(1)
20141       .kr(9)
20142       .channels(channels)
20143       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20144   }
20145 }
20146 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,c_gt_1_with_qmin)20147 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_gt_1_with_qmin) {
20148   for (uint32_t channels = 2; channels < 10; channels++) {
20149     DWConvMicrokernelTester()
20150       .cr(1)
20151       .kr(9)
20152       .channels(channels)
20153       .qmin(128)
20154       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20155   }
20156 }
20157 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,c_gt_1_with_qmax)20158 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_gt_1_with_qmax) {
20159   for (uint32_t channels = 2; channels < 10; channels++) {
20160     DWConvMicrokernelTester()
20161       .cr(1)
20162       .kr(9)
20163       .channels(channels)
20164       .qmax(128)
20165       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20166   }
20167 }
20168 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,multipixel)20169 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel) {
20170   for (size_t channels = 1; channels <= 5; channels += 1) {
20171     DWConvMicrokernelTester()
20172       .cr(1)
20173       .kr(9)
20174       .channels(channels)
20175       .width(3)
20176       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20177   }
20178 }
20179 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,multipixel_with_step)20180 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_step) {
20181   for (size_t channels = 1; channels <= 5; channels += 1) {
20182     for (size_t step = 2; step <= 9; step++) {
20183       DWConvMicrokernelTester()
20184         .cr(1)
20185         .kr(9)
20186         .channels(channels)
20187         .width(3)
20188         .step(step)
20189         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20190     }
20191   }
20192 }
20193 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,multipixel_with_output_stride)20194 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_output_stride) {
20195   for (size_t channels = 1; channels <= 5; channels += 1) {
20196     DWConvMicrokernelTester()
20197       .cr(1)
20198       .kr(9)
20199       .channels(1)
20200       .width(5)
20201       .output_stride(7)
20202       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20203   }
20204 }
20205 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,multipixel_with_qmin)20206 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_qmin) {
20207   for (size_t channels = 1; channels <= 5; channels += 1) {
20208     DWConvMicrokernelTester()
20209       .cr(1)
20210       .kr(9)
20211       .channels(channels)
20212       .width(3)
20213       .qmin(128)
20214       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20215   }
20216 }
20217 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,multipixel_with_qmax)20218 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_qmax) {
20219   for (size_t channels = 1; channels <= 5; channels += 1) {
20220     DWConvMicrokernelTester()
20221       .cr(1)
20222       .kr(9)
20223       .channels(channels)
20224       .width(3)
20225       .qmax(128)
20226       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20227   }
20228 }
20229 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,input_offset)20230 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, input_offset) {
20231   for (uint32_t channels = 2; channels < 16; channels += 3) {
20232     DWConvMicrokernelTester()
20233       .cr(1)
20234       .kr(9)
20235       .channels(channels)
20236       .input_offset(48)
20237       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20238   }
20239 }
20240 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,zero)20241 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, zero) {
20242   for (uint32_t mz = 0; mz < 9; mz++) {
20243     for (uint32_t channels = 2; channels < 16; channels += 3) {
20244       DWConvMicrokernelTester()
20245         .cr(1)
20246         .kr(9)
20247         .channels(channels)
20248         .input_offset(48)
20249         .zero_index(mz)
20250         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20251     }
20252   }
20253 }
20254 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,c_eq_1)20255 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_eq_1) {
20256   DWConvMicrokernelTester()
20257     .cr(1)
20258     .kr(9)
20259     .channels(1)
20260     .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20261 }
20262 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,c_gt_1)20263 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1) {
20264   for (uint32_t channels = 2; channels < 10; channels++) {
20265     DWConvMicrokernelTester()
20266       .cr(1)
20267       .kr(9)
20268       .channels(channels)
20269       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20270   }
20271 }
20272 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,c_gt_1_with_qmin)20273 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1_with_qmin) {
20274   for (uint32_t channels = 2; channels < 10; channels++) {
20275     DWConvMicrokernelTester()
20276       .cr(1)
20277       .kr(9)
20278       .channels(channels)
20279       .qmin(128)
20280       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20281   }
20282 }
20283 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,c_gt_1_with_qmax)20284 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1_with_qmax) {
20285   for (uint32_t channels = 2; channels < 10; channels++) {
20286     DWConvMicrokernelTester()
20287       .cr(1)
20288       .kr(9)
20289       .channels(channels)
20290       .qmax(128)
20291       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20292   }
20293 }
20294 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,multipixel)20295 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel) {
20296   for (size_t channels = 1; channels <= 5; channels += 1) {
20297     DWConvMicrokernelTester()
20298       .cr(1)
20299       .kr(9)
20300       .channels(channels)
20301       .width(3)
20302       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20303   }
20304 }
20305 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,multipixel_with_step)20306 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_step) {
20307   for (size_t channels = 1; channels <= 5; channels += 1) {
20308     for (size_t step = 2; step <= 9; step++) {
20309       DWConvMicrokernelTester()
20310         .cr(1)
20311         .kr(9)
20312         .channels(channels)
20313         .width(3)
20314         .step(step)
20315         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20316     }
20317   }
20318 }
20319 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,multipixel_with_output_stride)20320 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
20321   for (size_t channels = 1; channels <= 5; channels += 1) {
20322     DWConvMicrokernelTester()
20323       .cr(1)
20324       .kr(9)
20325       .channels(1)
20326       .width(5)
20327       .output_stride(7)
20328       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20329   }
20330 }
20331 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,multipixel_with_qmin)20332 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_qmin) {
20333   for (size_t channels = 1; channels <= 5; channels += 1) {
20334     DWConvMicrokernelTester()
20335       .cr(1)
20336       .kr(9)
20337       .channels(channels)
20338       .width(3)
20339       .qmin(128)
20340       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20341   }
20342 }
20343 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,multipixel_with_qmax)20344 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_qmax) {
20345   for (size_t channels = 1; channels <= 5; channels += 1) {
20346     DWConvMicrokernelTester()
20347       .cr(1)
20348       .kr(9)
20349       .channels(channels)
20350       .width(3)
20351       .qmax(128)
20352       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20353   }
20354 }
20355 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,input_offset)20356 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, input_offset) {
20357   for (uint32_t channels = 2; channels < 16; channels += 3) {
20358     DWConvMicrokernelTester()
20359       .cr(1)
20360       .kr(9)
20361       .channels(channels)
20362       .input_offset(48)
20363       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20364   }
20365 }
20366 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,zero)20367 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, zero) {
20368   for (uint32_t mz = 0; mz < 9; mz++) {
20369     for (uint32_t channels = 2; channels < 16; channels += 3) {
20370       DWConvMicrokernelTester()
20371         .cr(1)
20372         .kr(9)
20373         .channels(channels)
20374         .input_offset(48)
20375         .zero_index(mz)
20376         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20377     }
20378   }
20379 }
20380 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,c_eq_1)20381 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_eq_1) {
20382   DWConvMicrokernelTester()
20383     .cr(1)
20384     .kr(9)
20385     .channels(1)
20386     .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20387 }
20388 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,c_gt_1)20389 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1) {
20390   for (uint32_t channels = 2; channels < 10; channels++) {
20391     DWConvMicrokernelTester()
20392       .cr(1)
20393       .kr(9)
20394       .channels(channels)
20395       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20396   }
20397 }
20398 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,c_gt_1_with_qmin)20399 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmin) {
20400   for (uint32_t channels = 2; channels < 10; channels++) {
20401     DWConvMicrokernelTester()
20402       .cr(1)
20403       .kr(9)
20404       .channels(channels)
20405       .qmin(128)
20406       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20407   }
20408 }
20409 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,c_gt_1_with_qmax)20410 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmax) {
20411   for (uint32_t channels = 2; channels < 10; channels++) {
20412     DWConvMicrokernelTester()
20413       .cr(1)
20414       .kr(9)
20415       .channels(channels)
20416       .qmax(128)
20417       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20418   }
20419 }
20420 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,multipixel)20421 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel) {
20422   for (size_t channels = 1; channels <= 5; channels += 1) {
20423     DWConvMicrokernelTester()
20424       .cr(1)
20425       .kr(9)
20426       .channels(channels)
20427       .width(3)
20428       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20429   }
20430 }
20431 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,multipixel_with_step)20432 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_step) {
20433   for (size_t channels = 1; channels <= 5; channels += 1) {
20434     for (size_t step = 2; step <= 9; step++) {
20435       DWConvMicrokernelTester()
20436         .cr(1)
20437         .kr(9)
20438         .channels(channels)
20439         .width(3)
20440         .step(step)
20441         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20442     }
20443   }
20444 }
20445 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,multipixel_with_output_stride)20446 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_output_stride) {
20447   for (size_t channels = 1; channels <= 5; channels += 1) {
20448     DWConvMicrokernelTester()
20449       .cr(1)
20450       .kr(9)
20451       .channels(1)
20452       .width(5)
20453       .output_stride(7)
20454       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20455   }
20456 }
20457 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,multipixel_with_qmin)20458 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmin) {
20459   for (size_t channels = 1; channels <= 5; channels += 1) {
20460     DWConvMicrokernelTester()
20461       .cr(1)
20462       .kr(9)
20463       .channels(channels)
20464       .width(3)
20465       .qmin(128)
20466       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20467   }
20468 }
20469 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,multipixel_with_qmax)20470 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmax) {
20471   for (size_t channels = 1; channels <= 5; channels += 1) {
20472     DWConvMicrokernelTester()
20473       .cr(1)
20474       .kr(9)
20475       .channels(channels)
20476       .width(3)
20477       .qmax(128)
20478       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20479   }
20480 }
20481 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,input_offset)20482 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, input_offset) {
20483   for (uint32_t channels = 2; channels < 16; channels += 3) {
20484     DWConvMicrokernelTester()
20485       .cr(1)
20486       .kr(9)
20487       .channels(channels)
20488       .input_offset(48)
20489       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20490   }
20491 }
20492 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,zero)20493 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, zero) {
20494   for (uint32_t mz = 0; mz < 9; mz++) {
20495     for (uint32_t channels = 2; channels < 16; channels += 3) {
20496       DWConvMicrokernelTester()
20497         .cr(1)
20498         .kr(9)
20499         .channels(channels)
20500         .input_offset(48)
20501         .zero_index(mz)
20502         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20503     }
20504   }
20505 }
20506 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,c_eq_1)20507 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_eq_1) {
20508   DWConvMicrokernelTester()
20509     .cr(1)
20510     .kr(25)
20511     .channels(1)
20512     .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20513 }
20514 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,c_gt_1)20515 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_gt_1) {
20516   for (uint32_t channels = 2; channels < 10; channels++) {
20517     DWConvMicrokernelTester()
20518       .cr(1)
20519       .kr(25)
20520       .channels(channels)
20521       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20522   }
20523 }
20524 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,c_gt_1_with_qmin)20525 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_gt_1_with_qmin) {
20526   for (uint32_t channels = 2; channels < 10; channels++) {
20527     DWConvMicrokernelTester()
20528       .cr(1)
20529       .kr(25)
20530       .channels(channels)
20531       .qmin(128)
20532       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20533   }
20534 }
20535 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,c_gt_1_with_qmax)20536 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_gt_1_with_qmax) {
20537   for (uint32_t channels = 2; channels < 10; channels++) {
20538     DWConvMicrokernelTester()
20539       .cr(1)
20540       .kr(25)
20541       .channels(channels)
20542       .qmax(128)
20543       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20544   }
20545 }
20546 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,multipixel)20547 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel) {
20548   for (size_t channels = 1; channels <= 5; channels += 1) {
20549     DWConvMicrokernelTester()
20550       .cr(1)
20551       .kr(25)
20552       .channels(channels)
20553       .width(3)
20554       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20555   }
20556 }
20557 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,multipixel_with_step)20558 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_step) {
20559   for (size_t channels = 1; channels <= 5; channels += 1) {
20560     for (size_t step = 2; step <= 25; step++) {
20561       DWConvMicrokernelTester()
20562         .cr(1)
20563         .kr(25)
20564         .channels(channels)
20565         .width(3)
20566         .step(step)
20567         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20568     }
20569   }
20570 }
20571 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,multipixel_with_output_stride)20572 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_output_stride) {
20573   for (size_t channels = 1; channels <= 5; channels += 1) {
20574     DWConvMicrokernelTester()
20575       .cr(1)
20576       .kr(25)
20577       .channels(1)
20578       .width(5)
20579       .output_stride(7)
20580       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20581   }
20582 }
20583 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,multipixel_with_qmin)20584 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_qmin) {
20585   for (size_t channels = 1; channels <= 5; channels += 1) {
20586     DWConvMicrokernelTester()
20587       .cr(1)
20588       .kr(25)
20589       .channels(channels)
20590       .width(3)
20591       .qmin(128)
20592       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20593   }
20594 }
20595 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,multipixel_with_qmax)20596 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_qmax) {
20597   for (size_t channels = 1; channels <= 5; channels += 1) {
20598     DWConvMicrokernelTester()
20599       .cr(1)
20600       .kr(25)
20601       .channels(channels)
20602       .width(3)
20603       .qmax(128)
20604       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20605   }
20606 }
20607 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,input_offset)20608 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, input_offset) {
20609   for (uint32_t channels = 2; channels < 16; channels += 3) {
20610     DWConvMicrokernelTester()
20611       .cr(1)
20612       .kr(25)
20613       .channels(channels)
20614       .input_offset(48)
20615       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20616   }
20617 }
20618 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,zero)20619 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, zero) {
20620   for (uint32_t mz = 0; mz < 25; mz++) {
20621     for (uint32_t channels = 2; channels < 16; channels += 3) {
20622       DWConvMicrokernelTester()
20623         .cr(1)
20624         .kr(25)
20625         .channels(channels)
20626         .input_offset(48)
20627         .zero_index(mz)
20628         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20629     }
20630   }
20631 }
20632 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,c_eq_1)20633 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_eq_1) {
20634   DWConvMicrokernelTester()
20635     .cr(1)
20636     .kr(25)
20637     .channels(1)
20638     .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20639 }
20640 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,c_gt_1)20641 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1) {
20642   for (uint32_t channels = 2; channels < 10; channels++) {
20643     DWConvMicrokernelTester()
20644       .cr(1)
20645       .kr(25)
20646       .channels(channels)
20647       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20648   }
20649 }
20650 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,c_gt_1_with_qmin)20651 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1_with_qmin) {
20652   for (uint32_t channels = 2; channels < 10; channels++) {
20653     DWConvMicrokernelTester()
20654       .cr(1)
20655       .kr(25)
20656       .channels(channels)
20657       .qmin(128)
20658       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20659   }
20660 }
20661 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,c_gt_1_with_qmax)20662 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1_with_qmax) {
20663   for (uint32_t channels = 2; channels < 10; channels++) {
20664     DWConvMicrokernelTester()
20665       .cr(1)
20666       .kr(25)
20667       .channels(channels)
20668       .qmax(128)
20669       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20670   }
20671 }
20672 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,multipixel)20673 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel) {
20674   for (size_t channels = 1; channels <= 5; channels += 1) {
20675     DWConvMicrokernelTester()
20676       .cr(1)
20677       .kr(25)
20678       .channels(channels)
20679       .width(3)
20680       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20681   }
20682 }
20683 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,multipixel_with_step)20684 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_step) {
20685   for (size_t channels = 1; channels <= 5; channels += 1) {
20686     for (size_t step = 2; step <= 25; step++) {
20687       DWConvMicrokernelTester()
20688         .cr(1)
20689         .kr(25)
20690         .channels(channels)
20691         .width(3)
20692         .step(step)
20693         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20694     }
20695   }
20696 }
20697 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,multipixel_with_output_stride)20698 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
20699   for (size_t channels = 1; channels <= 5; channels += 1) {
20700     DWConvMicrokernelTester()
20701       .cr(1)
20702       .kr(25)
20703       .channels(1)
20704       .width(5)
20705       .output_stride(7)
20706       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20707   }
20708 }
20709 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,multipixel_with_qmin)20710 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_qmin) {
20711   for (size_t channels = 1; channels <= 5; channels += 1) {
20712     DWConvMicrokernelTester()
20713       .cr(1)
20714       .kr(25)
20715       .channels(channels)
20716       .width(3)
20717       .qmin(128)
20718       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20719   }
20720 }
20721 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,multipixel_with_qmax)20722 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_qmax) {
20723   for (size_t channels = 1; channels <= 5; channels += 1) {
20724     DWConvMicrokernelTester()
20725       .cr(1)
20726       .kr(25)
20727       .channels(channels)
20728       .width(3)
20729       .qmax(128)
20730       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20731   }
20732 }
20733 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,input_offset)20734 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, input_offset) {
20735   for (uint32_t channels = 2; channels < 16; channels += 3) {
20736     DWConvMicrokernelTester()
20737       .cr(1)
20738       .kr(25)
20739       .channels(channels)
20740       .input_offset(48)
20741       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20742   }
20743 }
20744 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,zero)20745 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, zero) {
20746   for (uint32_t mz = 0; mz < 25; mz++) {
20747     for (uint32_t channels = 2; channels < 16; channels += 3) {
20748       DWConvMicrokernelTester()
20749         .cr(1)
20750         .kr(25)
20751         .channels(channels)
20752         .input_offset(48)
20753         .zero_index(mz)
20754         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
20755     }
20756   }
20757 }
20758 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,c_eq_1)20759 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_eq_1) {
20760   DWConvMicrokernelTester()
20761     .cr(1)
20762     .kr(25)
20763     .channels(1)
20764     .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20765 }
20766 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,c_gt_1)20767 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1) {
20768   for (uint32_t channels = 2; channels < 10; channels++) {
20769     DWConvMicrokernelTester()
20770       .cr(1)
20771       .kr(25)
20772       .channels(channels)
20773       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20774   }
20775 }
20776 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,c_gt_1_with_qmin)20777 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmin) {
20778   for (uint32_t channels = 2; channels < 10; channels++) {
20779     DWConvMicrokernelTester()
20780       .cr(1)
20781       .kr(25)
20782       .channels(channels)
20783       .qmin(128)
20784       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20785   }
20786 }
20787 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,c_gt_1_with_qmax)20788 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmax) {
20789   for (uint32_t channels = 2; channels < 10; channels++) {
20790     DWConvMicrokernelTester()
20791       .cr(1)
20792       .kr(25)
20793       .channels(channels)
20794       .qmax(128)
20795       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20796   }
20797 }
20798 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,multipixel)20799 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel) {
20800   for (size_t channels = 1; channels <= 5; channels += 1) {
20801     DWConvMicrokernelTester()
20802       .cr(1)
20803       .kr(25)
20804       .channels(channels)
20805       .width(3)
20806       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20807   }
20808 }
20809 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,multipixel_with_step)20810 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_step) {
20811   for (size_t channels = 1; channels <= 5; channels += 1) {
20812     for (size_t step = 2; step <= 25; step++) {
20813       DWConvMicrokernelTester()
20814         .cr(1)
20815         .kr(25)
20816         .channels(channels)
20817         .width(3)
20818         .step(step)
20819         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20820     }
20821   }
20822 }
20823 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,multipixel_with_output_stride)20824 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_output_stride) {
20825   for (size_t channels = 1; channels <= 5; channels += 1) {
20826     DWConvMicrokernelTester()
20827       .cr(1)
20828       .kr(25)
20829       .channels(1)
20830       .width(5)
20831       .output_stride(7)
20832       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20833   }
20834 }
20835 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,multipixel_with_qmin)20836 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmin) {
20837   for (size_t channels = 1; channels <= 5; channels += 1) {
20838     DWConvMicrokernelTester()
20839       .cr(1)
20840       .kr(25)
20841       .channels(channels)
20842       .width(3)
20843       .qmin(128)
20844       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20845   }
20846 }
20847 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,multipixel_with_qmax)20848 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmax) {
20849   for (size_t channels = 1; channels <= 5; channels += 1) {
20850     DWConvMicrokernelTester()
20851       .cr(1)
20852       .kr(25)
20853       .channels(channels)
20854       .width(3)
20855       .qmax(128)
20856       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20857   }
20858 }
20859 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,input_offset)20860 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, input_offset) {
20861   for (uint32_t channels = 2; channels < 16; channels += 3) {
20862     DWConvMicrokernelTester()
20863       .cr(1)
20864       .kr(25)
20865       .channels(channels)
20866       .input_offset(48)
20867       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20868   }
20869 }
20870 
TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,zero)20871 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, zero) {
20872   for (uint32_t mz = 0; mz < 25; mz++) {
20873     for (uint32_t channels = 2; channels < 16; channels += 3) {
20874       DWConvMicrokernelTester()
20875         .cr(1)
20876         .kr(25)
20877         .channels(channels)
20878         .input_offset(48)
20879         .zero_index(mz)
20880         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
20881     }
20882   }
20883 }
20884 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_eq_2)20885 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_eq_2) {
20886   DWConvMicrokernelTester()
20887     .cr(2)
20888     .kr(9)
20889     .channels(2)
20890     .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20891 }
20892 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_div_2)20893 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_div_2) {
20894   for (uint32_t channels = 4; channels < 32; channels += 6) {
20895     DWConvMicrokernelTester()
20896       .cr(2)
20897       .kr(9)
20898       .channels(channels)
20899       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20900   }
20901 }
20902 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_div_2_with_qmin)20903 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_div_2_with_qmin) {
20904   for (uint32_t channels = 4; channels < 32; channels += 6) {
20905     DWConvMicrokernelTester()
20906       .cr(2)
20907       .kr(9)
20908       .channels(channels)
20909       .qmin(128)
20910       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20911   }
20912 }
20913 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_div_2_with_qmax)20914 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_div_2_with_qmax) {
20915   for (uint32_t channels = 4; channels < 32; channels += 6) {
20916     DWConvMicrokernelTester()
20917       .cr(2)
20918       .kr(9)
20919       .channels(channels)
20920       .qmax(128)
20921       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20922   }
20923 }
20924 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_lt_2)20925 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_lt_2) {
20926   for (uint32_t channels = 1; channels < 2; channels++) {
20927     DWConvMicrokernelTester()
20928       .cr(2)
20929       .kr(9)
20930       .channels(channels)
20931       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20932   }
20933 }
20934 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_gt_2)20935 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_gt_2) {
20936   for (uint32_t channels = 3; channels < 4; channels++) {
20937     DWConvMicrokernelTester()
20938       .cr(2)
20939       .kr(9)
20940       .channels(channels)
20941       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20942   }
20943 }
20944 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_gt_2_with_qmin)20945 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_gt_2_with_qmin) {
20946   for (uint32_t channels = 3; channels < 4; channels++) {
20947     DWConvMicrokernelTester()
20948       .cr(2)
20949       .kr(9)
20950       .channels(channels)
20951       .qmin(128)
20952       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20953   }
20954 }
20955 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_gt_2_with_qmax)20956 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_gt_2_with_qmax) {
20957   for (uint32_t channels = 3; channels < 4; channels++) {
20958     DWConvMicrokernelTester()
20959       .cr(2)
20960       .kr(9)
20961       .channels(channels)
20962       .qmax(128)
20963       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20964   }
20965 }
20966 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,multipixel)20967 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel) {
20968   for (size_t channels = 1; channels <= 10; channels += 1) {
20969     DWConvMicrokernelTester()
20970       .cr(2)
20971       .kr(9)
20972       .channels(channels)
20973       .width(3)
20974       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20975   }
20976 }
20977 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,multipixel_with_step)20978 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_step) {
20979   for (size_t channels = 1; channels <= 10; channels += 1) {
20980     for (size_t step = 2; step <= 9; step++) {
20981       DWConvMicrokernelTester()
20982         .cr(2)
20983         .kr(9)
20984         .channels(channels)
20985         .width(3)
20986         .step(step)
20987         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
20988     }
20989   }
20990 }
20991 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,multipixel_with_output_stride)20992 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_output_stride) {
20993   for (size_t channels = 1; channels <= 10; channels += 1) {
20994     DWConvMicrokernelTester()
20995       .cr(2)
20996       .kr(9)
20997       .channels(2)
20998       .width(5)
20999       .output_stride(13)
21000       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21001   }
21002 }
21003 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,multipixel_with_qmin)21004 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_qmin) {
21005   for (size_t channels = 1; channels <= 10; channels += 1) {
21006     DWConvMicrokernelTester()
21007       .cr(2)
21008       .kr(9)
21009       .channels(channels)
21010       .width(3)
21011       .qmin(128)
21012       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21013   }
21014 }
21015 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,multipixel_with_qmax)21016 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_qmax) {
21017   for (size_t channels = 1; channels <= 10; channels += 1) {
21018     DWConvMicrokernelTester()
21019       .cr(2)
21020       .kr(9)
21021       .channels(channels)
21022       .width(3)
21023       .qmax(128)
21024       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21025   }
21026 }
21027 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,input_offset)21028 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, input_offset) {
21029   for (uint32_t channels = 4; channels < 32; channels += 6) {
21030     DWConvMicrokernelTester()
21031       .cr(2)
21032       .kr(9)
21033       .channels(channels)
21034       .input_offset(80)
21035       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21036   }
21037 }
21038 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,zero)21039 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, zero) {
21040   for (uint32_t mz = 0; mz < 9; mz++) {
21041     for (uint32_t channels = 4; channels < 32; channels += 6) {
21042       DWConvMicrokernelTester()
21043         .cr(2)
21044         .kr(9)
21045         .channels(channels)
21046         .input_offset(80)
21047         .zero_index(mz)
21048         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21049     }
21050   }
21051 }
21052 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_eq_2)21053 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_eq_2) {
21054   DWConvMicrokernelTester()
21055     .cr(2)
21056     .kr(9)
21057     .channels(2)
21058     .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21059 }
21060 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_div_2)21061 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2) {
21062   for (uint32_t channels = 4; channels < 32; channels += 6) {
21063     DWConvMicrokernelTester()
21064       .cr(2)
21065       .kr(9)
21066       .channels(channels)
21067       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21068   }
21069 }
21070 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_div_2_with_qmin)21071 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2_with_qmin) {
21072   for (uint32_t channels = 4; channels < 32; channels += 6) {
21073     DWConvMicrokernelTester()
21074       .cr(2)
21075       .kr(9)
21076       .channels(channels)
21077       .qmin(128)
21078       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21079   }
21080 }
21081 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_div_2_with_qmax)21082 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2_with_qmax) {
21083   for (uint32_t channels = 4; channels < 32; channels += 6) {
21084     DWConvMicrokernelTester()
21085       .cr(2)
21086       .kr(9)
21087       .channels(channels)
21088       .qmax(128)
21089       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21090   }
21091 }
21092 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_lt_2)21093 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_lt_2) {
21094   for (uint32_t channels = 1; channels < 2; channels++) {
21095     DWConvMicrokernelTester()
21096       .cr(2)
21097       .kr(9)
21098       .channels(channels)
21099       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21100   }
21101 }
21102 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_gt_2)21103 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2) {
21104   for (uint32_t channels = 3; channels < 4; channels++) {
21105     DWConvMicrokernelTester()
21106       .cr(2)
21107       .kr(9)
21108       .channels(channels)
21109       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21110   }
21111 }
21112 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_gt_2_with_qmin)21113 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2_with_qmin) {
21114   for (uint32_t channels = 3; channels < 4; channels++) {
21115     DWConvMicrokernelTester()
21116       .cr(2)
21117       .kr(9)
21118       .channels(channels)
21119       .qmin(128)
21120       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21121   }
21122 }
21123 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_gt_2_with_qmax)21124 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2_with_qmax) {
21125   for (uint32_t channels = 3; channels < 4; channels++) {
21126     DWConvMicrokernelTester()
21127       .cr(2)
21128       .kr(9)
21129       .channels(channels)
21130       .qmax(128)
21131       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21132   }
21133 }
21134 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,multipixel)21135 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel) {
21136   for (size_t channels = 1; channels <= 10; channels += 1) {
21137     DWConvMicrokernelTester()
21138       .cr(2)
21139       .kr(9)
21140       .channels(channels)
21141       .width(3)
21142       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21143   }
21144 }
21145 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,multipixel_with_step)21146 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_step) {
21147   for (size_t channels = 1; channels <= 10; channels += 1) {
21148     for (size_t step = 2; step <= 9; step++) {
21149       DWConvMicrokernelTester()
21150         .cr(2)
21151         .kr(9)
21152         .channels(channels)
21153         .width(3)
21154         .step(step)
21155         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21156     }
21157   }
21158 }
21159 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,multipixel_with_output_stride)21160 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
21161   for (size_t channels = 1; channels <= 10; channels += 1) {
21162     DWConvMicrokernelTester()
21163       .cr(2)
21164       .kr(9)
21165       .channels(2)
21166       .width(5)
21167       .output_stride(13)
21168       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21169   }
21170 }
21171 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,multipixel_with_qmin)21172 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_qmin) {
21173   for (size_t channels = 1; channels <= 10; channels += 1) {
21174     DWConvMicrokernelTester()
21175       .cr(2)
21176       .kr(9)
21177       .channels(channels)
21178       .width(3)
21179       .qmin(128)
21180       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21181   }
21182 }
21183 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,multipixel_with_qmax)21184 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_qmax) {
21185   for (size_t channels = 1; channels <= 10; channels += 1) {
21186     DWConvMicrokernelTester()
21187       .cr(2)
21188       .kr(9)
21189       .channels(channels)
21190       .width(3)
21191       .qmax(128)
21192       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21193   }
21194 }
21195 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,input_offset)21196 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, input_offset) {
21197   for (uint32_t channels = 4; channels < 32; channels += 6) {
21198     DWConvMicrokernelTester()
21199       .cr(2)
21200       .kr(9)
21201       .channels(channels)
21202       .input_offset(80)
21203       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21204   }
21205 }
21206 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,zero)21207 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, zero) {
21208   for (uint32_t mz = 0; mz < 9; mz++) {
21209     for (uint32_t channels = 4; channels < 32; channels += 6) {
21210       DWConvMicrokernelTester()
21211         .cr(2)
21212         .kr(9)
21213         .channels(channels)
21214         .input_offset(80)
21215         .zero_index(mz)
21216         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21217     }
21218   }
21219 }
21220 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_eq_2)21221 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_eq_2) {
21222   DWConvMicrokernelTester()
21223     .cr(2)
21224     .kr(9)
21225     .channels(2)
21226     .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21227 }
21228 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_div_2)21229 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2) {
21230   for (uint32_t channels = 4; channels < 32; channels += 6) {
21231     DWConvMicrokernelTester()
21232       .cr(2)
21233       .kr(9)
21234       .channels(channels)
21235       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21236   }
21237 }
21238 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_div_2_with_qmin)21239 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmin) {
21240   for (uint32_t channels = 4; channels < 32; channels += 6) {
21241     DWConvMicrokernelTester()
21242       .cr(2)
21243       .kr(9)
21244       .channels(channels)
21245       .qmin(128)
21246       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21247   }
21248 }
21249 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_div_2_with_qmax)21250 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmax) {
21251   for (uint32_t channels = 4; channels < 32; channels += 6) {
21252     DWConvMicrokernelTester()
21253       .cr(2)
21254       .kr(9)
21255       .channels(channels)
21256       .qmax(128)
21257       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21258   }
21259 }
21260 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_lt_2)21261 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_lt_2) {
21262   for (uint32_t channels = 1; channels < 2; channels++) {
21263     DWConvMicrokernelTester()
21264       .cr(2)
21265       .kr(9)
21266       .channels(channels)
21267       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21268   }
21269 }
21270 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_gt_2)21271 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2) {
21272   for (uint32_t channels = 3; channels < 4; channels++) {
21273     DWConvMicrokernelTester()
21274       .cr(2)
21275       .kr(9)
21276       .channels(channels)
21277       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21278   }
21279 }
21280 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_gt_2_with_qmin)21281 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmin) {
21282   for (uint32_t channels = 3; channels < 4; channels++) {
21283     DWConvMicrokernelTester()
21284       .cr(2)
21285       .kr(9)
21286       .channels(channels)
21287       .qmin(128)
21288       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21289   }
21290 }
21291 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_gt_2_with_qmax)21292 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmax) {
21293   for (uint32_t channels = 3; channels < 4; channels++) {
21294     DWConvMicrokernelTester()
21295       .cr(2)
21296       .kr(9)
21297       .channels(channels)
21298       .qmax(128)
21299       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21300   }
21301 }
21302 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,multipixel)21303 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel) {
21304   for (size_t channels = 1; channels <= 10; channels += 1) {
21305     DWConvMicrokernelTester()
21306       .cr(2)
21307       .kr(9)
21308       .channels(channels)
21309       .width(3)
21310       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21311   }
21312 }
21313 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,multipixel_with_step)21314 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_step) {
21315   for (size_t channels = 1; channels <= 10; channels += 1) {
21316     for (size_t step = 2; step <= 9; step++) {
21317       DWConvMicrokernelTester()
21318         .cr(2)
21319         .kr(9)
21320         .channels(channels)
21321         .width(3)
21322         .step(step)
21323         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21324     }
21325   }
21326 }
21327 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,multipixel_with_output_stride)21328 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_output_stride) {
21329   for (size_t channels = 1; channels <= 10; channels += 1) {
21330     DWConvMicrokernelTester()
21331       .cr(2)
21332       .kr(9)
21333       .channels(2)
21334       .width(5)
21335       .output_stride(13)
21336       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21337   }
21338 }
21339 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,multipixel_with_qmin)21340 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmin) {
21341   for (size_t channels = 1; channels <= 10; channels += 1) {
21342     DWConvMicrokernelTester()
21343       .cr(2)
21344       .kr(9)
21345       .channels(channels)
21346       .width(3)
21347       .qmin(128)
21348       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21349   }
21350 }
21351 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,multipixel_with_qmax)21352 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmax) {
21353   for (size_t channels = 1; channels <= 10; channels += 1) {
21354     DWConvMicrokernelTester()
21355       .cr(2)
21356       .kr(9)
21357       .channels(channels)
21358       .width(3)
21359       .qmax(128)
21360       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21361   }
21362 }
21363 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,input_offset)21364 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, input_offset) {
21365   for (uint32_t channels = 4; channels < 32; channels += 6) {
21366     DWConvMicrokernelTester()
21367       .cr(2)
21368       .kr(9)
21369       .channels(channels)
21370       .input_offset(80)
21371       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21372   }
21373 }
21374 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,zero)21375 TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, zero) {
21376   for (uint32_t mz = 0; mz < 9; mz++) {
21377     for (uint32_t channels = 4; channels < 32; channels += 6) {
21378       DWConvMicrokernelTester()
21379         .cr(2)
21380         .kr(9)
21381         .channels(channels)
21382         .input_offset(80)
21383         .zero_index(mz)
21384         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21385     }
21386   }
21387 }
21388 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_eq_2)21389 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_eq_2) {
21390   DWConvMicrokernelTester()
21391     .cr(2)
21392     .kr(25)
21393     .channels(2)
21394     .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21395 }
21396 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_div_2)21397 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_div_2) {
21398   for (uint32_t channels = 4; channels < 32; channels += 6) {
21399     DWConvMicrokernelTester()
21400       .cr(2)
21401       .kr(25)
21402       .channels(channels)
21403       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21404   }
21405 }
21406 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_div_2_with_qmin)21407 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_div_2_with_qmin) {
21408   for (uint32_t channels = 4; channels < 32; channels += 6) {
21409     DWConvMicrokernelTester()
21410       .cr(2)
21411       .kr(25)
21412       .channels(channels)
21413       .qmin(128)
21414       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21415   }
21416 }
21417 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_div_2_with_qmax)21418 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_div_2_with_qmax) {
21419   for (uint32_t channels = 4; channels < 32; channels += 6) {
21420     DWConvMicrokernelTester()
21421       .cr(2)
21422       .kr(25)
21423       .channels(channels)
21424       .qmax(128)
21425       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21426   }
21427 }
21428 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_lt_2)21429 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_lt_2) {
21430   for (uint32_t channels = 1; channels < 2; channels++) {
21431     DWConvMicrokernelTester()
21432       .cr(2)
21433       .kr(25)
21434       .channels(channels)
21435       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21436   }
21437 }
21438 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_gt_2)21439 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_gt_2) {
21440   for (uint32_t channels = 3; channels < 4; channels++) {
21441     DWConvMicrokernelTester()
21442       .cr(2)
21443       .kr(25)
21444       .channels(channels)
21445       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21446   }
21447 }
21448 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_gt_2_with_qmin)21449 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_gt_2_with_qmin) {
21450   for (uint32_t channels = 3; channels < 4; channels++) {
21451     DWConvMicrokernelTester()
21452       .cr(2)
21453       .kr(25)
21454       .channels(channels)
21455       .qmin(128)
21456       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21457   }
21458 }
21459 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_gt_2_with_qmax)21460 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_gt_2_with_qmax) {
21461   for (uint32_t channels = 3; channels < 4; channels++) {
21462     DWConvMicrokernelTester()
21463       .cr(2)
21464       .kr(25)
21465       .channels(channels)
21466       .qmax(128)
21467       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21468   }
21469 }
21470 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,multipixel)21471 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel) {
21472   for (size_t channels = 1; channels <= 10; channels += 1) {
21473     DWConvMicrokernelTester()
21474       .cr(2)
21475       .kr(25)
21476       .channels(channels)
21477       .width(3)
21478       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21479   }
21480 }
21481 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,multipixel_with_step)21482 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_step) {
21483   for (size_t channels = 1; channels <= 10; channels += 1) {
21484     for (size_t step = 2; step <= 25; step++) {
21485       DWConvMicrokernelTester()
21486         .cr(2)
21487         .kr(25)
21488         .channels(channels)
21489         .width(3)
21490         .step(step)
21491         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21492     }
21493   }
21494 }
21495 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,multipixel_with_output_stride)21496 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_output_stride) {
21497   for (size_t channels = 1; channels <= 10; channels += 1) {
21498     DWConvMicrokernelTester()
21499       .cr(2)
21500       .kr(25)
21501       .channels(2)
21502       .width(5)
21503       .output_stride(13)
21504       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21505   }
21506 }
21507 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,multipixel_with_qmin)21508 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_qmin) {
21509   for (size_t channels = 1; channels <= 10; channels += 1) {
21510     DWConvMicrokernelTester()
21511       .cr(2)
21512       .kr(25)
21513       .channels(channels)
21514       .width(3)
21515       .qmin(128)
21516       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21517   }
21518 }
21519 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,multipixel_with_qmax)21520 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_qmax) {
21521   for (size_t channels = 1; channels <= 10; channels += 1) {
21522     DWConvMicrokernelTester()
21523       .cr(2)
21524       .kr(25)
21525       .channels(channels)
21526       .width(3)
21527       .qmax(128)
21528       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21529   }
21530 }
21531 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,input_offset)21532 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, input_offset) {
21533   for (uint32_t channels = 4; channels < 32; channels += 6) {
21534     DWConvMicrokernelTester()
21535       .cr(2)
21536       .kr(25)
21537       .channels(channels)
21538       .input_offset(80)
21539       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21540   }
21541 }
21542 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,zero)21543 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, zero) {
21544   for (uint32_t mz = 0; mz < 25; mz++) {
21545     for (uint32_t channels = 4; channels < 32; channels += 6) {
21546       DWConvMicrokernelTester()
21547         .cr(2)
21548         .kr(25)
21549         .channels(channels)
21550         .input_offset(80)
21551         .zero_index(mz)
21552         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21553     }
21554   }
21555 }
21556 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_eq_2)21557 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_eq_2) {
21558   DWConvMicrokernelTester()
21559     .cr(2)
21560     .kr(25)
21561     .channels(2)
21562     .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21563 }
21564 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_div_2)21565 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2) {
21566   for (uint32_t channels = 4; channels < 32; channels += 6) {
21567     DWConvMicrokernelTester()
21568       .cr(2)
21569       .kr(25)
21570       .channels(channels)
21571       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21572   }
21573 }
21574 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_div_2_with_qmin)21575 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2_with_qmin) {
21576   for (uint32_t channels = 4; channels < 32; channels += 6) {
21577     DWConvMicrokernelTester()
21578       .cr(2)
21579       .kr(25)
21580       .channels(channels)
21581       .qmin(128)
21582       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21583   }
21584 }
21585 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_div_2_with_qmax)21586 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2_with_qmax) {
21587   for (uint32_t channels = 4; channels < 32; channels += 6) {
21588     DWConvMicrokernelTester()
21589       .cr(2)
21590       .kr(25)
21591       .channels(channels)
21592       .qmax(128)
21593       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21594   }
21595 }
21596 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_lt_2)21597 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_lt_2) {
21598   for (uint32_t channels = 1; channels < 2; channels++) {
21599     DWConvMicrokernelTester()
21600       .cr(2)
21601       .kr(25)
21602       .channels(channels)
21603       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21604   }
21605 }
21606 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_gt_2)21607 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2) {
21608   for (uint32_t channels = 3; channels < 4; channels++) {
21609     DWConvMicrokernelTester()
21610       .cr(2)
21611       .kr(25)
21612       .channels(channels)
21613       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21614   }
21615 }
21616 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_gt_2_with_qmin)21617 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2_with_qmin) {
21618   for (uint32_t channels = 3; channels < 4; channels++) {
21619     DWConvMicrokernelTester()
21620       .cr(2)
21621       .kr(25)
21622       .channels(channels)
21623       .qmin(128)
21624       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21625   }
21626 }
21627 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_gt_2_with_qmax)21628 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2_with_qmax) {
21629   for (uint32_t channels = 3; channels < 4; channels++) {
21630     DWConvMicrokernelTester()
21631       .cr(2)
21632       .kr(25)
21633       .channels(channels)
21634       .qmax(128)
21635       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21636   }
21637 }
21638 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,multipixel)21639 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel) {
21640   for (size_t channels = 1; channels <= 10; channels += 1) {
21641     DWConvMicrokernelTester()
21642       .cr(2)
21643       .kr(25)
21644       .channels(channels)
21645       .width(3)
21646       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21647   }
21648 }
21649 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,multipixel_with_step)21650 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_step) {
21651   for (size_t channels = 1; channels <= 10; channels += 1) {
21652     for (size_t step = 2; step <= 25; step++) {
21653       DWConvMicrokernelTester()
21654         .cr(2)
21655         .kr(25)
21656         .channels(channels)
21657         .width(3)
21658         .step(step)
21659         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21660     }
21661   }
21662 }
21663 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,multipixel_with_output_stride)21664 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
21665   for (size_t channels = 1; channels <= 10; channels += 1) {
21666     DWConvMicrokernelTester()
21667       .cr(2)
21668       .kr(25)
21669       .channels(2)
21670       .width(5)
21671       .output_stride(13)
21672       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21673   }
21674 }
21675 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,multipixel_with_qmin)21676 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_qmin) {
21677   for (size_t channels = 1; channels <= 10; channels += 1) {
21678     DWConvMicrokernelTester()
21679       .cr(2)
21680       .kr(25)
21681       .channels(channels)
21682       .width(3)
21683       .qmin(128)
21684       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21685   }
21686 }
21687 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,multipixel_with_qmax)21688 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_qmax) {
21689   for (size_t channels = 1; channels <= 10; channels += 1) {
21690     DWConvMicrokernelTester()
21691       .cr(2)
21692       .kr(25)
21693       .channels(channels)
21694       .width(3)
21695       .qmax(128)
21696       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21697   }
21698 }
21699 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,input_offset)21700 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, input_offset) {
21701   for (uint32_t channels = 4; channels < 32; channels += 6) {
21702     DWConvMicrokernelTester()
21703       .cr(2)
21704       .kr(25)
21705       .channels(channels)
21706       .input_offset(80)
21707       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21708   }
21709 }
21710 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,zero)21711 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, zero) {
21712   for (uint32_t mz = 0; mz < 25; mz++) {
21713     for (uint32_t channels = 4; channels < 32; channels += 6) {
21714       DWConvMicrokernelTester()
21715         .cr(2)
21716         .kr(25)
21717         .channels(channels)
21718         .input_offset(80)
21719         .zero_index(mz)
21720         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
21721     }
21722   }
21723 }
21724 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_eq_2)21725 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_eq_2) {
21726   DWConvMicrokernelTester()
21727     .cr(2)
21728     .kr(25)
21729     .channels(2)
21730     .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21731 }
21732 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_div_2)21733 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2) {
21734   for (uint32_t channels = 4; channels < 32; channels += 6) {
21735     DWConvMicrokernelTester()
21736       .cr(2)
21737       .kr(25)
21738       .channels(channels)
21739       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21740   }
21741 }
21742 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_div_2_with_qmin)21743 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmin) {
21744   for (uint32_t channels = 4; channels < 32; channels += 6) {
21745     DWConvMicrokernelTester()
21746       .cr(2)
21747       .kr(25)
21748       .channels(channels)
21749       .qmin(128)
21750       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21751   }
21752 }
21753 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_div_2_with_qmax)21754 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmax) {
21755   for (uint32_t channels = 4; channels < 32; channels += 6) {
21756     DWConvMicrokernelTester()
21757       .cr(2)
21758       .kr(25)
21759       .channels(channels)
21760       .qmax(128)
21761       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21762   }
21763 }
21764 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_lt_2)21765 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_lt_2) {
21766   for (uint32_t channels = 1; channels < 2; channels++) {
21767     DWConvMicrokernelTester()
21768       .cr(2)
21769       .kr(25)
21770       .channels(channels)
21771       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21772   }
21773 }
21774 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_gt_2)21775 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2) {
21776   for (uint32_t channels = 3; channels < 4; channels++) {
21777     DWConvMicrokernelTester()
21778       .cr(2)
21779       .kr(25)
21780       .channels(channels)
21781       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21782   }
21783 }
21784 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_gt_2_with_qmin)21785 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmin) {
21786   for (uint32_t channels = 3; channels < 4; channels++) {
21787     DWConvMicrokernelTester()
21788       .cr(2)
21789       .kr(25)
21790       .channels(channels)
21791       .qmin(128)
21792       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21793   }
21794 }
21795 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_gt_2_with_qmax)21796 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmax) {
21797   for (uint32_t channels = 3; channels < 4; channels++) {
21798     DWConvMicrokernelTester()
21799       .cr(2)
21800       .kr(25)
21801       .channels(channels)
21802       .qmax(128)
21803       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21804   }
21805 }
21806 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,multipixel)21807 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel) {
21808   for (size_t channels = 1; channels <= 10; channels += 1) {
21809     DWConvMicrokernelTester()
21810       .cr(2)
21811       .kr(25)
21812       .channels(channels)
21813       .width(3)
21814       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21815   }
21816 }
21817 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,multipixel_with_step)21818 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_step) {
21819   for (size_t channels = 1; channels <= 10; channels += 1) {
21820     for (size_t step = 2; step <= 25; step++) {
21821       DWConvMicrokernelTester()
21822         .cr(2)
21823         .kr(25)
21824         .channels(channels)
21825         .width(3)
21826         .step(step)
21827         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21828     }
21829   }
21830 }
21831 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,multipixel_with_output_stride)21832 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_output_stride) {
21833   for (size_t channels = 1; channels <= 10; channels += 1) {
21834     DWConvMicrokernelTester()
21835       .cr(2)
21836       .kr(25)
21837       .channels(2)
21838       .width(5)
21839       .output_stride(13)
21840       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21841   }
21842 }
21843 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,multipixel_with_qmin)21844 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmin) {
21845   for (size_t channels = 1; channels <= 10; channels += 1) {
21846     DWConvMicrokernelTester()
21847       .cr(2)
21848       .kr(25)
21849       .channels(channels)
21850       .width(3)
21851       .qmin(128)
21852       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21853   }
21854 }
21855 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,multipixel_with_qmax)21856 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmax) {
21857   for (size_t channels = 1; channels <= 10; channels += 1) {
21858     DWConvMicrokernelTester()
21859       .cr(2)
21860       .kr(25)
21861       .channels(channels)
21862       .width(3)
21863       .qmax(128)
21864       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21865   }
21866 }
21867 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,input_offset)21868 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, input_offset) {
21869   for (uint32_t channels = 4; channels < 32; channels += 6) {
21870     DWConvMicrokernelTester()
21871       .cr(2)
21872       .kr(25)
21873       .channels(channels)
21874       .input_offset(80)
21875       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21876   }
21877 }
21878 
TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,zero)21879 TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, zero) {
21880   for (uint32_t mz = 0; mz < 25; mz++) {
21881     for (uint32_t channels = 4; channels < 32; channels += 6) {
21882       DWConvMicrokernelTester()
21883         .cr(2)
21884         .kr(25)
21885         .channels(channels)
21886         .input_offset(80)
21887         .zero_index(mz)
21888         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
21889     }
21890   }
21891 }
21892 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_eq_4)21893 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_eq_4) {
21894   DWConvMicrokernelTester()
21895     .cr(4)
21896     .kr(9)
21897     .channels(4)
21898     .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21899 }
21900 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_div_4)21901 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_div_4) {
21902   for (uint32_t channels = 8; channels < 64; channels += 12) {
21903     DWConvMicrokernelTester()
21904       .cr(4)
21905       .kr(9)
21906       .channels(channels)
21907       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21908   }
21909 }
21910 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_div_4_with_qmin)21911 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_div_4_with_qmin) {
21912   for (uint32_t channels = 8; channels < 64; channels += 12) {
21913     DWConvMicrokernelTester()
21914       .cr(4)
21915       .kr(9)
21916       .channels(channels)
21917       .qmin(128)
21918       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21919   }
21920 }
21921 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_div_4_with_qmax)21922 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_div_4_with_qmax) {
21923   for (uint32_t channels = 8; channels < 64; channels += 12) {
21924     DWConvMicrokernelTester()
21925       .cr(4)
21926       .kr(9)
21927       .channels(channels)
21928       .qmax(128)
21929       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21930   }
21931 }
21932 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_lt_4)21933 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_lt_4) {
21934   for (uint32_t channels = 1; channels < 4; channels++) {
21935     DWConvMicrokernelTester()
21936       .cr(4)
21937       .kr(9)
21938       .channels(channels)
21939       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21940   }
21941 }
21942 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_gt_4)21943 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_gt_4) {
21944   for (uint32_t channels = 5; channels < 8; channels++) {
21945     DWConvMicrokernelTester()
21946       .cr(4)
21947       .kr(9)
21948       .channels(channels)
21949       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21950   }
21951 }
21952 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_gt_4_with_qmin)21953 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_gt_4_with_qmin) {
21954   for (uint32_t channels = 5; channels < 8; channels++) {
21955     DWConvMicrokernelTester()
21956       .cr(4)
21957       .kr(9)
21958       .channels(channels)
21959       .qmin(128)
21960       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21961   }
21962 }
21963 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_gt_4_with_qmax)21964 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_gt_4_with_qmax) {
21965   for (uint32_t channels = 5; channels < 8; channels++) {
21966     DWConvMicrokernelTester()
21967       .cr(4)
21968       .kr(9)
21969       .channels(channels)
21970       .qmax(128)
21971       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21972   }
21973 }
21974 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,multipixel)21975 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel) {
21976   for (size_t channels = 1; channels <= 20; channels += 3) {
21977     DWConvMicrokernelTester()
21978       .cr(4)
21979       .kr(9)
21980       .channels(channels)
21981       .width(3)
21982       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21983   }
21984 }
21985 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,multipixel_with_step)21986 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_step) {
21987   for (size_t channels = 1; channels <= 20; channels += 3) {
21988     for (size_t step = 2; step <= 9; step++) {
21989       DWConvMicrokernelTester()
21990         .cr(4)
21991         .kr(9)
21992         .channels(channels)
21993         .width(3)
21994         .step(step)
21995         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
21996     }
21997   }
21998 }
21999 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,multipixel_with_output_stride)22000 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_output_stride) {
22001   for (size_t channels = 1; channels <= 20; channels += 3) {
22002     DWConvMicrokernelTester()
22003       .cr(4)
22004       .kr(9)
22005       .channels(4)
22006       .width(5)
22007       .output_stride(23)
22008       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
22009   }
22010 }
22011 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,multipixel_with_qmin)22012 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_qmin) {
22013   for (size_t channels = 1; channels <= 20; channels += 3) {
22014     DWConvMicrokernelTester()
22015       .cr(4)
22016       .kr(9)
22017       .channels(channels)
22018       .width(3)
22019       .qmin(128)
22020       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
22021   }
22022 }
22023 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,multipixel_with_qmax)22024 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_qmax) {
22025   for (size_t channels = 1; channels <= 20; channels += 3) {
22026     DWConvMicrokernelTester()
22027       .cr(4)
22028       .kr(9)
22029       .channels(channels)
22030       .width(3)
22031       .qmax(128)
22032       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
22033   }
22034 }
22035 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,input_offset)22036 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, input_offset) {
22037   for (uint32_t channels = 8; channels < 64; channels += 12) {
22038     DWConvMicrokernelTester()
22039       .cr(4)
22040       .kr(9)
22041       .channels(channels)
22042       .input_offset(112)
22043       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
22044   }
22045 }
22046 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,zero)22047 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, zero) {
22048   for (uint32_t mz = 0; mz < 9; mz++) {
22049     for (uint32_t channels = 8; channels < 64; channels += 12) {
22050       DWConvMicrokernelTester()
22051         .cr(4)
22052         .kr(9)
22053         .channels(channels)
22054         .input_offset(112)
22055         .zero_index(mz)
22056         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
22057     }
22058   }
22059 }
22060 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_eq_4)22061 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_eq_4) {
22062   DWConvMicrokernelTester()
22063     .cr(4)
22064     .kr(9)
22065     .channels(4)
22066     .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22067 }
22068 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_div_4)22069 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4) {
22070   for (uint32_t channels = 8; channels < 64; channels += 12) {
22071     DWConvMicrokernelTester()
22072       .cr(4)
22073       .kr(9)
22074       .channels(channels)
22075       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22076   }
22077 }
22078 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_div_4_with_qmin)22079 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4_with_qmin) {
22080   for (uint32_t channels = 8; channels < 64; channels += 12) {
22081     DWConvMicrokernelTester()
22082       .cr(4)
22083       .kr(9)
22084       .channels(channels)
22085       .qmin(128)
22086       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22087   }
22088 }
22089 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_div_4_with_qmax)22090 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4_with_qmax) {
22091   for (uint32_t channels = 8; channels < 64; channels += 12) {
22092     DWConvMicrokernelTester()
22093       .cr(4)
22094       .kr(9)
22095       .channels(channels)
22096       .qmax(128)
22097       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22098   }
22099 }
22100 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_lt_4)22101 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_lt_4) {
22102   for (uint32_t channels = 1; channels < 4; channels++) {
22103     DWConvMicrokernelTester()
22104       .cr(4)
22105       .kr(9)
22106       .channels(channels)
22107       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22108   }
22109 }
22110 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_gt_4)22111 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4) {
22112   for (uint32_t channels = 5; channels < 8; channels++) {
22113     DWConvMicrokernelTester()
22114       .cr(4)
22115       .kr(9)
22116       .channels(channels)
22117       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22118   }
22119 }
22120 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_gt_4_with_qmin)22121 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4_with_qmin) {
22122   for (uint32_t channels = 5; channels < 8; channels++) {
22123     DWConvMicrokernelTester()
22124       .cr(4)
22125       .kr(9)
22126       .channels(channels)
22127       .qmin(128)
22128       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22129   }
22130 }
22131 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_gt_4_with_qmax)22132 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4_with_qmax) {
22133   for (uint32_t channels = 5; channels < 8; channels++) {
22134     DWConvMicrokernelTester()
22135       .cr(4)
22136       .kr(9)
22137       .channels(channels)
22138       .qmax(128)
22139       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22140   }
22141 }
22142 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,multipixel)22143 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel) {
22144   for (size_t channels = 1; channels <= 20; channels += 3) {
22145     DWConvMicrokernelTester()
22146       .cr(4)
22147       .kr(9)
22148       .channels(channels)
22149       .width(3)
22150       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22151   }
22152 }
22153 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,multipixel_with_step)22154 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_step) {
22155   for (size_t channels = 1; channels <= 20; channels += 3) {
22156     for (size_t step = 2; step <= 9; step++) {
22157       DWConvMicrokernelTester()
22158         .cr(4)
22159         .kr(9)
22160         .channels(channels)
22161         .width(3)
22162         .step(step)
22163         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22164     }
22165   }
22166 }
22167 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,multipixel_with_output_stride)22168 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
22169   for (size_t channels = 1; channels <= 20; channels += 3) {
22170     DWConvMicrokernelTester()
22171       .cr(4)
22172       .kr(9)
22173       .channels(4)
22174       .width(5)
22175       .output_stride(23)
22176       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22177   }
22178 }
22179 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,multipixel_with_qmin)22180 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_qmin) {
22181   for (size_t channels = 1; channels <= 20; channels += 3) {
22182     DWConvMicrokernelTester()
22183       .cr(4)
22184       .kr(9)
22185       .channels(channels)
22186       .width(3)
22187       .qmin(128)
22188       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22189   }
22190 }
22191 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,multipixel_with_qmax)22192 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_qmax) {
22193   for (size_t channels = 1; channels <= 20; channels += 3) {
22194     DWConvMicrokernelTester()
22195       .cr(4)
22196       .kr(9)
22197       .channels(channels)
22198       .width(3)
22199       .qmax(128)
22200       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22201   }
22202 }
22203 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,input_offset)22204 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, input_offset) {
22205   for (uint32_t channels = 8; channels < 64; channels += 12) {
22206     DWConvMicrokernelTester()
22207       .cr(4)
22208       .kr(9)
22209       .channels(channels)
22210       .input_offset(112)
22211       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22212   }
22213 }
22214 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,zero)22215 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, zero) {
22216   for (uint32_t mz = 0; mz < 9; mz++) {
22217     for (uint32_t channels = 8; channels < 64; channels += 12) {
22218       DWConvMicrokernelTester()
22219         .cr(4)
22220         .kr(9)
22221         .channels(channels)
22222         .input_offset(112)
22223         .zero_index(mz)
22224         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22225     }
22226   }
22227 }
22228 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_eq_4)22229 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_eq_4) {
22230   DWConvMicrokernelTester()
22231     .cr(4)
22232     .kr(9)
22233     .channels(4)
22234     .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22235 }
22236 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_div_4)22237 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4) {
22238   for (uint32_t channels = 8; channels < 64; channels += 12) {
22239     DWConvMicrokernelTester()
22240       .cr(4)
22241       .kr(9)
22242       .channels(channels)
22243       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22244   }
22245 }
22246 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_div_4_with_qmin)22247 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmin) {
22248   for (uint32_t channels = 8; channels < 64; channels += 12) {
22249     DWConvMicrokernelTester()
22250       .cr(4)
22251       .kr(9)
22252       .channels(channels)
22253       .qmin(128)
22254       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22255   }
22256 }
22257 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_div_4_with_qmax)22258 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmax) {
22259   for (uint32_t channels = 8; channels < 64; channels += 12) {
22260     DWConvMicrokernelTester()
22261       .cr(4)
22262       .kr(9)
22263       .channels(channels)
22264       .qmax(128)
22265       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22266   }
22267 }
22268 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_lt_4)22269 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_lt_4) {
22270   for (uint32_t channels = 1; channels < 4; channels++) {
22271     DWConvMicrokernelTester()
22272       .cr(4)
22273       .kr(9)
22274       .channels(channels)
22275       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22276   }
22277 }
22278 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_gt_4)22279 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4) {
22280   for (uint32_t channels = 5; channels < 8; channels++) {
22281     DWConvMicrokernelTester()
22282       .cr(4)
22283       .kr(9)
22284       .channels(channels)
22285       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22286   }
22287 }
22288 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_gt_4_with_qmin)22289 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmin) {
22290   for (uint32_t channels = 5; channels < 8; channels++) {
22291     DWConvMicrokernelTester()
22292       .cr(4)
22293       .kr(9)
22294       .channels(channels)
22295       .qmin(128)
22296       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22297   }
22298 }
22299 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_gt_4_with_qmax)22300 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmax) {
22301   for (uint32_t channels = 5; channels < 8; channels++) {
22302     DWConvMicrokernelTester()
22303       .cr(4)
22304       .kr(9)
22305       .channels(channels)
22306       .qmax(128)
22307       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22308   }
22309 }
22310 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,multipixel)22311 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel) {
22312   for (size_t channels = 1; channels <= 20; channels += 3) {
22313     DWConvMicrokernelTester()
22314       .cr(4)
22315       .kr(9)
22316       .channels(channels)
22317       .width(3)
22318       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22319   }
22320 }
22321 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,multipixel_with_step)22322 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_step) {
22323   for (size_t channels = 1; channels <= 20; channels += 3) {
22324     for (size_t step = 2; step <= 9; step++) {
22325       DWConvMicrokernelTester()
22326         .cr(4)
22327         .kr(9)
22328         .channels(channels)
22329         .width(3)
22330         .step(step)
22331         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22332     }
22333   }
22334 }
22335 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,multipixel_with_output_stride)22336 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_output_stride) {
22337   for (size_t channels = 1; channels <= 20; channels += 3) {
22338     DWConvMicrokernelTester()
22339       .cr(4)
22340       .kr(9)
22341       .channels(4)
22342       .width(5)
22343       .output_stride(23)
22344       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22345   }
22346 }
22347 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,multipixel_with_qmin)22348 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmin) {
22349   for (size_t channels = 1; channels <= 20; channels += 3) {
22350     DWConvMicrokernelTester()
22351       .cr(4)
22352       .kr(9)
22353       .channels(channels)
22354       .width(3)
22355       .qmin(128)
22356       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22357   }
22358 }
22359 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,multipixel_with_qmax)22360 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmax) {
22361   for (size_t channels = 1; channels <= 20; channels += 3) {
22362     DWConvMicrokernelTester()
22363       .cr(4)
22364       .kr(9)
22365       .channels(channels)
22366       .width(3)
22367       .qmax(128)
22368       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22369   }
22370 }
22371 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,input_offset)22372 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, input_offset) {
22373   for (uint32_t channels = 8; channels < 64; channels += 12) {
22374     DWConvMicrokernelTester()
22375       .cr(4)
22376       .kr(9)
22377       .channels(channels)
22378       .input_offset(112)
22379       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22380   }
22381 }
22382 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,zero)22383 TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, zero) {
22384   for (uint32_t mz = 0; mz < 9; mz++) {
22385     for (uint32_t channels = 8; channels < 64; channels += 12) {
22386       DWConvMicrokernelTester()
22387         .cr(4)
22388         .kr(9)
22389         .channels(channels)
22390         .input_offset(112)
22391         .zero_index(mz)
22392         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22393     }
22394   }
22395 }
22396 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_eq_4)22397 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_eq_4) {
22398   DWConvMicrokernelTester()
22399     .cr(4)
22400     .kr(25)
22401     .channels(4)
22402     .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
22403 }
22404 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_div_4)22405 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_div_4) {
22406   for (uint32_t channels = 8; channels < 64; channels += 12) {
22407     DWConvMicrokernelTester()
22408       .cr(4)
22409       .kr(25)
22410       .channels(channels)
22411       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
22412   }
22413 }
22414 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_div_4_with_qmin)22415 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_div_4_with_qmin) {
22416   for (uint32_t channels = 8; channels < 64; channels += 12) {
22417     DWConvMicrokernelTester()
22418       .cr(4)
22419       .kr(25)
22420       .channels(channels)
22421       .qmin(128)
22422       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
22423   }
22424 }
22425 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_div_4_with_qmax)22426 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_div_4_with_qmax) {
22427   for (uint32_t channels = 8; channels < 64; channels += 12) {
22428     DWConvMicrokernelTester()
22429       .cr(4)
22430       .kr(25)
22431       .channels(channels)
22432       .qmax(128)
22433       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
22434   }
22435 }
22436 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_lt_4)22437 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_lt_4) {
22438   for (uint32_t channels = 1; channels < 4; channels++) {
22439     DWConvMicrokernelTester()
22440       .cr(4)
22441       .kr(25)
22442       .channels(channels)
22443       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
22444   }
22445 }
22446 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_gt_4)22447 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_gt_4) {
22448   for (uint32_t channels = 5; channels < 8; channels++) {
22449     DWConvMicrokernelTester()
22450       .cr(4)
22451       .kr(25)
22452       .channels(channels)
22453       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
22454   }
22455 }
22456 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_gt_4_with_qmin)22457 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_gt_4_with_qmin) {
22458   for (uint32_t channels = 5; channels < 8; channels++) {
22459     DWConvMicrokernelTester()
22460       .cr(4)
22461       .kr(25)
22462       .channels(channels)
22463       .qmin(128)
22464       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
22465   }
22466 }
22467 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_gt_4_with_qmax)22468 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_gt_4_with_qmax) {
22469   for (uint32_t channels = 5; channels < 8; channels++) {
22470     DWConvMicrokernelTester()
22471       .cr(4)
22472       .kr(25)
22473       .channels(channels)
22474       .qmax(128)
22475       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
22476   }
22477 }
22478 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,multipixel)22479 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel) {
22480   for (size_t channels = 1; channels <= 20; channels += 3) {
22481     DWConvMicrokernelTester()
22482       .cr(4)
22483       .kr(25)
22484       .channels(channels)
22485       .width(3)
22486       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
22487   }
22488 }
22489 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,multipixel_with_step)22490 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_step) {
22491   for (size_t channels = 1; channels <= 20; channels += 3) {
22492     for (size_t step = 2; step <= 25; step++) {
22493       DWConvMicrokernelTester()
22494         .cr(4)
22495         .kr(25)
22496         .channels(channels)
22497         .width(3)
22498         .step(step)
22499         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
22500     }
22501   }
22502 }
22503 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,multipixel_with_output_stride)22504 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_output_stride) {
22505   for (size_t channels = 1; channels <= 20; channels += 3) {
22506     DWConvMicrokernelTester()
22507       .cr(4)
22508       .kr(25)
22509       .channels(4)
22510       .width(5)
22511       .output_stride(23)
22512       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
22513   }
22514 }
22515 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,multipixel_with_qmin)22516 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_qmin) {
22517   for (size_t channels = 1; channels <= 20; channels += 3) {
22518     DWConvMicrokernelTester()
22519       .cr(4)
22520       .kr(25)
22521       .channels(channels)
22522       .width(3)
22523       .qmin(128)
22524       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
22525   }
22526 }
22527 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,multipixel_with_qmax)22528 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_qmax) {
22529   for (size_t channels = 1; channels <= 20; channels += 3) {
22530     DWConvMicrokernelTester()
22531       .cr(4)
22532       .kr(25)
22533       .channels(channels)
22534       .width(3)
22535       .qmax(128)
22536       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
22537   }
22538 }
22539 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,input_offset)22540 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, input_offset) {
22541   for (uint32_t channels = 8; channels < 64; channels += 12) {
22542     DWConvMicrokernelTester()
22543       .cr(4)
22544       .kr(25)
22545       .channels(channels)
22546       .input_offset(112)
22547       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
22548   }
22549 }
22550 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,zero)22551 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, zero) {
22552   for (uint32_t mz = 0; mz < 25; mz++) {
22553     for (uint32_t channels = 8; channels < 64; channels += 12) {
22554       DWConvMicrokernelTester()
22555         .cr(4)
22556         .kr(25)
22557         .channels(channels)
22558         .input_offset(112)
22559         .zero_index(mz)
22560         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
22561     }
22562   }
22563 }
22564 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_eq_4)22565 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_eq_4) {
22566   DWConvMicrokernelTester()
22567     .cr(4)
22568     .kr(25)
22569     .channels(4)
22570     .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22571 }
22572 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_div_4)22573 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4) {
22574   for (uint32_t channels = 8; channels < 64; channels += 12) {
22575     DWConvMicrokernelTester()
22576       .cr(4)
22577       .kr(25)
22578       .channels(channels)
22579       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22580   }
22581 }
22582 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_div_4_with_qmin)22583 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4_with_qmin) {
22584   for (uint32_t channels = 8; channels < 64; channels += 12) {
22585     DWConvMicrokernelTester()
22586       .cr(4)
22587       .kr(25)
22588       .channels(channels)
22589       .qmin(128)
22590       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22591   }
22592 }
22593 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_div_4_with_qmax)22594 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4_with_qmax) {
22595   for (uint32_t channels = 8; channels < 64; channels += 12) {
22596     DWConvMicrokernelTester()
22597       .cr(4)
22598       .kr(25)
22599       .channels(channels)
22600       .qmax(128)
22601       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22602   }
22603 }
22604 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_lt_4)22605 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_lt_4) {
22606   for (uint32_t channels = 1; channels < 4; channels++) {
22607     DWConvMicrokernelTester()
22608       .cr(4)
22609       .kr(25)
22610       .channels(channels)
22611       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22612   }
22613 }
22614 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_gt_4)22615 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4) {
22616   for (uint32_t channels = 5; channels < 8; channels++) {
22617     DWConvMicrokernelTester()
22618       .cr(4)
22619       .kr(25)
22620       .channels(channels)
22621       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22622   }
22623 }
22624 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_gt_4_with_qmin)22625 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4_with_qmin) {
22626   for (uint32_t channels = 5; channels < 8; channels++) {
22627     DWConvMicrokernelTester()
22628       .cr(4)
22629       .kr(25)
22630       .channels(channels)
22631       .qmin(128)
22632       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22633   }
22634 }
22635 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_gt_4_with_qmax)22636 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4_with_qmax) {
22637   for (uint32_t channels = 5; channels < 8; channels++) {
22638     DWConvMicrokernelTester()
22639       .cr(4)
22640       .kr(25)
22641       .channels(channels)
22642       .qmax(128)
22643       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22644   }
22645 }
22646 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,multipixel)22647 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel) {
22648   for (size_t channels = 1; channels <= 20; channels += 3) {
22649     DWConvMicrokernelTester()
22650       .cr(4)
22651       .kr(25)
22652       .channels(channels)
22653       .width(3)
22654       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22655   }
22656 }
22657 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,multipixel_with_step)22658 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_step) {
22659   for (size_t channels = 1; channels <= 20; channels += 3) {
22660     for (size_t step = 2; step <= 25; step++) {
22661       DWConvMicrokernelTester()
22662         .cr(4)
22663         .kr(25)
22664         .channels(channels)
22665         .width(3)
22666         .step(step)
22667         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22668     }
22669   }
22670 }
22671 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,multipixel_with_output_stride)22672 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
22673   for (size_t channels = 1; channels <= 20; channels += 3) {
22674     DWConvMicrokernelTester()
22675       .cr(4)
22676       .kr(25)
22677       .channels(4)
22678       .width(5)
22679       .output_stride(23)
22680       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22681   }
22682 }
22683 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,multipixel_with_qmin)22684 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_qmin) {
22685   for (size_t channels = 1; channels <= 20; channels += 3) {
22686     DWConvMicrokernelTester()
22687       .cr(4)
22688       .kr(25)
22689       .channels(channels)
22690       .width(3)
22691       .qmin(128)
22692       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22693   }
22694 }
22695 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,multipixel_with_qmax)22696 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_qmax) {
22697   for (size_t channels = 1; channels <= 20; channels += 3) {
22698     DWConvMicrokernelTester()
22699       .cr(4)
22700       .kr(25)
22701       .channels(channels)
22702       .width(3)
22703       .qmax(128)
22704       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22705   }
22706 }
22707 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,input_offset)22708 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, input_offset) {
22709   for (uint32_t channels = 8; channels < 64; channels += 12) {
22710     DWConvMicrokernelTester()
22711       .cr(4)
22712       .kr(25)
22713       .channels(channels)
22714       .input_offset(112)
22715       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22716   }
22717 }
22718 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,zero)22719 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, zero) {
22720   for (uint32_t mz = 0; mz < 25; mz++) {
22721     for (uint32_t channels = 8; channels < 64; channels += 12) {
22722       DWConvMicrokernelTester()
22723         .cr(4)
22724         .kr(25)
22725         .channels(channels)
22726         .input_offset(112)
22727         .zero_index(mz)
22728         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
22729     }
22730   }
22731 }
22732 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_eq_4)22733 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_eq_4) {
22734   DWConvMicrokernelTester()
22735     .cr(4)
22736     .kr(25)
22737     .channels(4)
22738     .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22739 }
22740 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_div_4)22741 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4) {
22742   for (uint32_t channels = 8; channels < 64; channels += 12) {
22743     DWConvMicrokernelTester()
22744       .cr(4)
22745       .kr(25)
22746       .channels(channels)
22747       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22748   }
22749 }
22750 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_div_4_with_qmin)22751 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmin) {
22752   for (uint32_t channels = 8; channels < 64; channels += 12) {
22753     DWConvMicrokernelTester()
22754       .cr(4)
22755       .kr(25)
22756       .channels(channels)
22757       .qmin(128)
22758       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22759   }
22760 }
22761 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_div_4_with_qmax)22762 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmax) {
22763   for (uint32_t channels = 8; channels < 64; channels += 12) {
22764     DWConvMicrokernelTester()
22765       .cr(4)
22766       .kr(25)
22767       .channels(channels)
22768       .qmax(128)
22769       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22770   }
22771 }
22772 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_lt_4)22773 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_lt_4) {
22774   for (uint32_t channels = 1; channels < 4; channels++) {
22775     DWConvMicrokernelTester()
22776       .cr(4)
22777       .kr(25)
22778       .channels(channels)
22779       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22780   }
22781 }
22782 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_gt_4)22783 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4) {
22784   for (uint32_t channels = 5; channels < 8; channels++) {
22785     DWConvMicrokernelTester()
22786       .cr(4)
22787       .kr(25)
22788       .channels(channels)
22789       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22790   }
22791 }
22792 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_gt_4_with_qmin)22793 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmin) {
22794   for (uint32_t channels = 5; channels < 8; channels++) {
22795     DWConvMicrokernelTester()
22796       .cr(4)
22797       .kr(25)
22798       .channels(channels)
22799       .qmin(128)
22800       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22801   }
22802 }
22803 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_gt_4_with_qmax)22804 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmax) {
22805   for (uint32_t channels = 5; channels < 8; channels++) {
22806     DWConvMicrokernelTester()
22807       .cr(4)
22808       .kr(25)
22809       .channels(channels)
22810       .qmax(128)
22811       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22812   }
22813 }
22814 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,multipixel)22815 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel) {
22816   for (size_t channels = 1; channels <= 20; channels += 3) {
22817     DWConvMicrokernelTester()
22818       .cr(4)
22819       .kr(25)
22820       .channels(channels)
22821       .width(3)
22822       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22823   }
22824 }
22825 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,multipixel_with_step)22826 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_step) {
22827   for (size_t channels = 1; channels <= 20; channels += 3) {
22828     for (size_t step = 2; step <= 25; step++) {
22829       DWConvMicrokernelTester()
22830         .cr(4)
22831         .kr(25)
22832         .channels(channels)
22833         .width(3)
22834         .step(step)
22835         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22836     }
22837   }
22838 }
22839 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,multipixel_with_output_stride)22840 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_output_stride) {
22841   for (size_t channels = 1; channels <= 20; channels += 3) {
22842     DWConvMicrokernelTester()
22843       .cr(4)
22844       .kr(25)
22845       .channels(4)
22846       .width(5)
22847       .output_stride(23)
22848       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22849   }
22850 }
22851 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,multipixel_with_qmin)22852 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmin) {
22853   for (size_t channels = 1; channels <= 20; channels += 3) {
22854     DWConvMicrokernelTester()
22855       .cr(4)
22856       .kr(25)
22857       .channels(channels)
22858       .width(3)
22859       .qmin(128)
22860       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22861   }
22862 }
22863 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,multipixel_with_qmax)22864 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmax) {
22865   for (size_t channels = 1; channels <= 20; channels += 3) {
22866     DWConvMicrokernelTester()
22867       .cr(4)
22868       .kr(25)
22869       .channels(channels)
22870       .width(3)
22871       .qmax(128)
22872       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22873   }
22874 }
22875 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,input_offset)22876 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, input_offset) {
22877   for (uint32_t channels = 8; channels < 64; channels += 12) {
22878     DWConvMicrokernelTester()
22879       .cr(4)
22880       .kr(25)
22881       .channels(channels)
22882       .input_offset(112)
22883       .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22884   }
22885 }
22886 
TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,zero)22887 TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, zero) {
22888   for (uint32_t mz = 0; mz < 25; mz++) {
22889     for (uint32_t channels = 8; channels < 64; channels += 12) {
22890       DWConvMicrokernelTester()
22891         .cr(4)
22892         .kr(25)
22893         .channels(channels)
22894         .input_offset(112)
22895         .zero_index(mz)
22896         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
22897     }
22898   }
22899 }