1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/qc8-dwconv-minmax-fp32.yaml
11 // Generator: tools/generate-dwconv-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/common.h>
17 #include <xnnpack/isa-checks.h>
18
19 #include <xnnpack/dwconv.h>
20 #include "dwconv-microkernel-tester.h"
21
22
23 #if XNN_ARCH_ARM
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_eq_8)24 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_eq_8) {
25 TEST_REQUIRES_ARM_NEON_V8;
26 DWConvMicrokernelTester()
27 .cr(8)
28 .kr(3)
29 .channels(8)
30 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
31 }
32
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_div_8)33 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_div_8) {
34 TEST_REQUIRES_ARM_NEON_V8;
35 for (uint32_t channels = 16; channels < 128; channels += 24) {
36 DWConvMicrokernelTester()
37 .cr(8)
38 .kr(3)
39 .channels(channels)
40 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
41 }
42 }
43
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_div_8_with_qmin)44 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_div_8_with_qmin) {
45 TEST_REQUIRES_ARM_NEON_V8;
46 for (uint32_t channels = 16; channels < 128; channels += 24) {
47 DWConvMicrokernelTester()
48 .cr(8)
49 .kr(3)
50 .channels(channels)
51 .qmin(128)
52 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
53 }
54 }
55
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_div_8_with_qmax)56 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_div_8_with_qmax) {
57 TEST_REQUIRES_ARM_NEON_V8;
58 for (uint32_t channels = 16; channels < 128; channels += 24) {
59 DWConvMicrokernelTester()
60 .cr(8)
61 .kr(3)
62 .channels(channels)
63 .qmax(128)
64 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
65 }
66 }
67
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_lt_8)68 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_lt_8) {
69 TEST_REQUIRES_ARM_NEON_V8;
70 for (uint32_t channels = 1; channels < 8; channels++) {
71 DWConvMicrokernelTester()
72 .cr(8)
73 .kr(3)
74 .channels(channels)
75 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
76 }
77 }
78
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_gt_8)79 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_gt_8) {
80 TEST_REQUIRES_ARM_NEON_V8;
81 for (uint32_t channels = 9; channels < 16; channels++) {
82 DWConvMicrokernelTester()
83 .cr(8)
84 .kr(3)
85 .channels(channels)
86 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
87 }
88 }
89
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_gt_8_with_qmin)90 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_gt_8_with_qmin) {
91 TEST_REQUIRES_ARM_NEON_V8;
92 for (uint32_t channels = 9; channels < 16; channels++) {
93 DWConvMicrokernelTester()
94 .cr(8)
95 .kr(3)
96 .channels(channels)
97 .qmin(128)
98 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
99 }
100 }
101
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_gt_8_with_qmax)102 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_gt_8_with_qmax) {
103 TEST_REQUIRES_ARM_NEON_V8;
104 for (uint32_t channels = 9; channels < 16; channels++) {
105 DWConvMicrokernelTester()
106 .cr(8)
107 .kr(3)
108 .channels(channels)
109 .qmax(128)
110 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
111 }
112 }
113
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,multipixel)114 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, multipixel) {
115 TEST_REQUIRES_ARM_NEON_V8;
116 for (size_t channels = 1; channels <= 40; channels += 7) {
117 DWConvMicrokernelTester()
118 .cr(8)
119 .kr(3)
120 .channels(channels)
121 .width(3)
122 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
123 }
124 }
125
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,multipixel_with_step)126 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, multipixel_with_step) {
127 TEST_REQUIRES_ARM_NEON_V8;
128 for (size_t channels = 1; channels <= 40; channels += 7) {
129 for (size_t step = 2; step <= 3; step++) {
130 DWConvMicrokernelTester()
131 .cr(8)
132 .kr(3)
133 .channels(channels)
134 .width(3)
135 .step(step)
136 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
137 }
138 }
139 }
140
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,multipixel_with_output_stride)141 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, multipixel_with_output_stride) {
142 TEST_REQUIRES_ARM_NEON_V8;
143 for (size_t channels = 1; channels <= 40; channels += 7) {
144 DWConvMicrokernelTester()
145 .cr(8)
146 .kr(3)
147 .channels(8)
148 .width(5)
149 .output_stride(43)
150 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
151 }
152 }
153
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,multipixel_with_qmin)154 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, multipixel_with_qmin) {
155 TEST_REQUIRES_ARM_NEON_V8;
156 for (size_t channels = 1; channels <= 40; channels += 7) {
157 DWConvMicrokernelTester()
158 .cr(8)
159 .kr(3)
160 .channels(channels)
161 .width(3)
162 .qmin(128)
163 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
164 }
165 }
166
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,multipixel_with_qmax)167 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, multipixel_with_qmax) {
168 TEST_REQUIRES_ARM_NEON_V8;
169 for (size_t channels = 1; channels <= 40; channels += 7) {
170 DWConvMicrokernelTester()
171 .cr(8)
172 .kr(3)
173 .channels(channels)
174 .width(3)
175 .qmax(128)
176 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
177 }
178 }
179
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,input_offset)180 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, input_offset) {
181 TEST_REQUIRES_ARM_NEON_V8;
182 for (uint32_t channels = 16; channels < 128; channels += 24) {
183 DWConvMicrokernelTester()
184 .cr(8)
185 .kr(3)
186 .channels(channels)
187 .input_offset(176)
188 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
189 }
190 }
191
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35,zero)192 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__AARCH32_NEONV8_MLA8_CORTEX_A35, zero) {
193 TEST_REQUIRES_ARM_NEON_V8;
194 for (uint32_t mz = 0; mz < 3; mz++) {
195 for (uint32_t channels = 16; channels < 128; channels += 24) {
196 DWConvMicrokernelTester()
197 .cr(8)
198 .kr(3)
199 .channels(channels)
200 .input_offset(176)
201 .zero_index(mz)
202 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
203 }
204 }
205 }
206 #endif // XNN_ARCH_ARM
207
208
209 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,c_eq_8)210 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, c_eq_8) {
211 TEST_REQUIRES_ARM_NEON;
212 DWConvMicrokernelTester()
213 .cr(8)
214 .kr(3)
215 .channels(8)
216 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
217 }
218
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,c_div_8)219 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, c_div_8) {
220 TEST_REQUIRES_ARM_NEON;
221 for (uint32_t channels = 16; channels < 128; channels += 24) {
222 DWConvMicrokernelTester()
223 .cr(8)
224 .kr(3)
225 .channels(channels)
226 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
227 }
228 }
229
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,c_div_8_with_qmin)230 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, c_div_8_with_qmin) {
231 TEST_REQUIRES_ARM_NEON;
232 for (uint32_t channels = 16; channels < 128; channels += 24) {
233 DWConvMicrokernelTester()
234 .cr(8)
235 .kr(3)
236 .channels(channels)
237 .qmin(128)
238 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
239 }
240 }
241
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,c_div_8_with_qmax)242 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, c_div_8_with_qmax) {
243 TEST_REQUIRES_ARM_NEON;
244 for (uint32_t channels = 16; channels < 128; channels += 24) {
245 DWConvMicrokernelTester()
246 .cr(8)
247 .kr(3)
248 .channels(channels)
249 .qmax(128)
250 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
251 }
252 }
253
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,c_lt_8)254 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, c_lt_8) {
255 TEST_REQUIRES_ARM_NEON;
256 for (uint32_t channels = 1; channels < 8; channels++) {
257 DWConvMicrokernelTester()
258 .cr(8)
259 .kr(3)
260 .channels(channels)
261 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
262 }
263 }
264
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,c_gt_8)265 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, c_gt_8) {
266 TEST_REQUIRES_ARM_NEON;
267 for (uint32_t channels = 9; channels < 16; channels++) {
268 DWConvMicrokernelTester()
269 .cr(8)
270 .kr(3)
271 .channels(channels)
272 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
273 }
274 }
275
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,c_gt_8_with_qmin)276 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, c_gt_8_with_qmin) {
277 TEST_REQUIRES_ARM_NEON;
278 for (uint32_t channels = 9; channels < 16; channels++) {
279 DWConvMicrokernelTester()
280 .cr(8)
281 .kr(3)
282 .channels(channels)
283 .qmin(128)
284 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
285 }
286 }
287
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,c_gt_8_with_qmax)288 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, c_gt_8_with_qmax) {
289 TEST_REQUIRES_ARM_NEON;
290 for (uint32_t channels = 9; channels < 16; channels++) {
291 DWConvMicrokernelTester()
292 .cr(8)
293 .kr(3)
294 .channels(channels)
295 .qmax(128)
296 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
297 }
298 }
299
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,multipixel)300 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, multipixel) {
301 TEST_REQUIRES_ARM_NEON;
302 for (size_t channels = 1; channels <= 40; channels += 7) {
303 DWConvMicrokernelTester()
304 .cr(8)
305 .kr(3)
306 .channels(channels)
307 .width(3)
308 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
309 }
310 }
311
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,multipixel_with_step)312 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, multipixel_with_step) {
313 TEST_REQUIRES_ARM_NEON;
314 for (size_t channels = 1; channels <= 40; channels += 7) {
315 for (size_t step = 2; step <= 3; step++) {
316 DWConvMicrokernelTester()
317 .cr(8)
318 .kr(3)
319 .channels(channels)
320 .width(3)
321 .step(step)
322 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
323 }
324 }
325 }
326
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,multipixel_with_output_stride)327 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, multipixel_with_output_stride) {
328 TEST_REQUIRES_ARM_NEON;
329 for (size_t channels = 1; channels <= 40; channels += 7) {
330 DWConvMicrokernelTester()
331 .cr(8)
332 .kr(3)
333 .channels(8)
334 .width(5)
335 .output_stride(43)
336 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
337 }
338 }
339
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,multipixel_with_qmin)340 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, multipixel_with_qmin) {
341 TEST_REQUIRES_ARM_NEON;
342 for (size_t channels = 1; channels <= 40; channels += 7) {
343 DWConvMicrokernelTester()
344 .cr(8)
345 .kr(3)
346 .channels(channels)
347 .width(3)
348 .qmin(128)
349 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
350 }
351 }
352
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,multipixel_with_qmax)353 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, multipixel_with_qmax) {
354 TEST_REQUIRES_ARM_NEON;
355 for (size_t channels = 1; channels <= 40; channels += 7) {
356 DWConvMicrokernelTester()
357 .cr(8)
358 .kr(3)
359 .channels(channels)
360 .width(3)
361 .qmax(128)
362 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
363 }
364 }
365
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,input_offset)366 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, input_offset) {
367 TEST_REQUIRES_ARM_NEON;
368 for (uint32_t channels = 16; channels < 128; channels += 24) {
369 DWConvMicrokernelTester()
370 .cr(8)
371 .kr(3)
372 .channels(channels)
373 .input_offset(176)
374 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
375 }
376 }
377
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64,zero)378 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEON_MLA8_LD64, zero) {
379 TEST_REQUIRES_ARM_NEON;
380 for (uint32_t mz = 0; mz < 3; mz++) {
381 for (uint32_t channels = 16; channels < 128; channels += 24) {
382 DWConvMicrokernelTester()
383 .cr(8)
384 .kr(3)
385 .channels(channels)
386 .input_offset(176)
387 .zero_index(mz)
388 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
389 }
390 }
391 }
392 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
393
394
395 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,c_eq_8)396 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, c_eq_8) {
397 TEST_REQUIRES_ARM_NEON_V8;
398 DWConvMicrokernelTester()
399 .cr(8)
400 .kr(3)
401 .channels(8)
402 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
403 }
404
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,c_div_8)405 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, c_div_8) {
406 TEST_REQUIRES_ARM_NEON_V8;
407 for (uint32_t channels = 16; channels < 128; channels += 24) {
408 DWConvMicrokernelTester()
409 .cr(8)
410 .kr(3)
411 .channels(channels)
412 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
413 }
414 }
415
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,c_div_8_with_qmin)416 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, c_div_8_with_qmin) {
417 TEST_REQUIRES_ARM_NEON_V8;
418 for (uint32_t channels = 16; channels < 128; channels += 24) {
419 DWConvMicrokernelTester()
420 .cr(8)
421 .kr(3)
422 .channels(channels)
423 .qmin(128)
424 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
425 }
426 }
427
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,c_div_8_with_qmax)428 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, c_div_8_with_qmax) {
429 TEST_REQUIRES_ARM_NEON_V8;
430 for (uint32_t channels = 16; channels < 128; channels += 24) {
431 DWConvMicrokernelTester()
432 .cr(8)
433 .kr(3)
434 .channels(channels)
435 .qmax(128)
436 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
437 }
438 }
439
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,c_lt_8)440 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, c_lt_8) {
441 TEST_REQUIRES_ARM_NEON_V8;
442 for (uint32_t channels = 1; channels < 8; channels++) {
443 DWConvMicrokernelTester()
444 .cr(8)
445 .kr(3)
446 .channels(channels)
447 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
448 }
449 }
450
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,c_gt_8)451 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, c_gt_8) {
452 TEST_REQUIRES_ARM_NEON_V8;
453 for (uint32_t channels = 9; channels < 16; channels++) {
454 DWConvMicrokernelTester()
455 .cr(8)
456 .kr(3)
457 .channels(channels)
458 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
459 }
460 }
461
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,c_gt_8_with_qmin)462 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, c_gt_8_with_qmin) {
463 TEST_REQUIRES_ARM_NEON_V8;
464 for (uint32_t channels = 9; channels < 16; channels++) {
465 DWConvMicrokernelTester()
466 .cr(8)
467 .kr(3)
468 .channels(channels)
469 .qmin(128)
470 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
471 }
472 }
473
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,c_gt_8_with_qmax)474 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, c_gt_8_with_qmax) {
475 TEST_REQUIRES_ARM_NEON_V8;
476 for (uint32_t channels = 9; channels < 16; channels++) {
477 DWConvMicrokernelTester()
478 .cr(8)
479 .kr(3)
480 .channels(channels)
481 .qmax(128)
482 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
483 }
484 }
485
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,multipixel)486 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, multipixel) {
487 TEST_REQUIRES_ARM_NEON_V8;
488 for (size_t channels = 1; channels <= 40; channels += 7) {
489 DWConvMicrokernelTester()
490 .cr(8)
491 .kr(3)
492 .channels(channels)
493 .width(3)
494 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
495 }
496 }
497
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,multipixel_with_step)498 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, multipixel_with_step) {
499 TEST_REQUIRES_ARM_NEON_V8;
500 for (size_t channels = 1; channels <= 40; channels += 7) {
501 for (size_t step = 2; step <= 3; step++) {
502 DWConvMicrokernelTester()
503 .cr(8)
504 .kr(3)
505 .channels(channels)
506 .width(3)
507 .step(step)
508 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
509 }
510 }
511 }
512
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,multipixel_with_output_stride)513 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, multipixel_with_output_stride) {
514 TEST_REQUIRES_ARM_NEON_V8;
515 for (size_t channels = 1; channels <= 40; channels += 7) {
516 DWConvMicrokernelTester()
517 .cr(8)
518 .kr(3)
519 .channels(8)
520 .width(5)
521 .output_stride(43)
522 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
523 }
524 }
525
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,multipixel_with_qmin)526 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, multipixel_with_qmin) {
527 TEST_REQUIRES_ARM_NEON_V8;
528 for (size_t channels = 1; channels <= 40; channels += 7) {
529 DWConvMicrokernelTester()
530 .cr(8)
531 .kr(3)
532 .channels(channels)
533 .width(3)
534 .qmin(128)
535 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
536 }
537 }
538
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,multipixel_with_qmax)539 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, multipixel_with_qmax) {
540 TEST_REQUIRES_ARM_NEON_V8;
541 for (size_t channels = 1; channels <= 40; channels += 7) {
542 DWConvMicrokernelTester()
543 .cr(8)
544 .kr(3)
545 .channels(channels)
546 .width(3)
547 .qmax(128)
548 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
549 }
550 }
551
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,input_offset)552 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, input_offset) {
553 TEST_REQUIRES_ARM_NEON_V8;
554 for (uint32_t channels = 16; channels < 128; channels += 24) {
555 DWConvMicrokernelTester()
556 .cr(8)
557 .kr(3)
558 .channels(channels)
559 .input_offset(176)
560 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
561 }
562 }
563
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64,zero)564 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__NEONV8_MLA8_LD64, zero) {
565 TEST_REQUIRES_ARM_NEON_V8;
566 for (uint32_t mz = 0; mz < 3; mz++) {
567 for (uint32_t channels = 16; channels < 128; channels += 24) {
568 DWConvMicrokernelTester()
569 .cr(8)
570 .kr(3)
571 .channels(channels)
572 .input_offset(176)
573 .zero_index(mz)
574 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
575 }
576 }
577 }
578 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
579
580
581 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,c_eq_8)582 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_eq_8) {
583 TEST_REQUIRES_ARM_NEON;
584 DWConvMicrokernelTester()
585 .cr(8)
586 .kr(9)
587 .channels(8)
588 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
589 }
590
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,c_div_8)591 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_div_8) {
592 TEST_REQUIRES_ARM_NEON;
593 for (uint32_t channels = 16; channels < 128; channels += 24) {
594 DWConvMicrokernelTester()
595 .cr(8)
596 .kr(9)
597 .channels(channels)
598 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
599 }
600 }
601
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,c_div_8_with_qmin)602 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_div_8_with_qmin) {
603 TEST_REQUIRES_ARM_NEON;
604 for (uint32_t channels = 16; channels < 128; channels += 24) {
605 DWConvMicrokernelTester()
606 .cr(8)
607 .kr(9)
608 .channels(channels)
609 .qmin(128)
610 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
611 }
612 }
613
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,c_div_8_with_qmax)614 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_div_8_with_qmax) {
615 TEST_REQUIRES_ARM_NEON;
616 for (uint32_t channels = 16; channels < 128; channels += 24) {
617 DWConvMicrokernelTester()
618 .cr(8)
619 .kr(9)
620 .channels(channels)
621 .qmax(128)
622 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
623 }
624 }
625
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,c_lt_8)626 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_lt_8) {
627 TEST_REQUIRES_ARM_NEON;
628 for (uint32_t channels = 1; channels < 8; channels++) {
629 DWConvMicrokernelTester()
630 .cr(8)
631 .kr(9)
632 .channels(channels)
633 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
634 }
635 }
636
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,c_gt_8)637 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_gt_8) {
638 TEST_REQUIRES_ARM_NEON;
639 for (uint32_t channels = 9; channels < 16; channels++) {
640 DWConvMicrokernelTester()
641 .cr(8)
642 .kr(9)
643 .channels(channels)
644 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
645 }
646 }
647
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,c_gt_8_with_qmin)648 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_gt_8_with_qmin) {
649 TEST_REQUIRES_ARM_NEON;
650 for (uint32_t channels = 9; channels < 16; channels++) {
651 DWConvMicrokernelTester()
652 .cr(8)
653 .kr(9)
654 .channels(channels)
655 .qmin(128)
656 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
657 }
658 }
659
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,c_gt_8_with_qmax)660 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, c_gt_8_with_qmax) {
661 TEST_REQUIRES_ARM_NEON;
662 for (uint32_t channels = 9; channels < 16; channels++) {
663 DWConvMicrokernelTester()
664 .cr(8)
665 .kr(9)
666 .channels(channels)
667 .qmax(128)
668 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
669 }
670 }
671
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,multipixel)672 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, multipixel) {
673 TEST_REQUIRES_ARM_NEON;
674 for (size_t channels = 1; channels <= 40; channels += 7) {
675 DWConvMicrokernelTester()
676 .cr(8)
677 .kr(9)
678 .channels(channels)
679 .width(3)
680 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
681 }
682 }
683
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,multipixel_with_step)684 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, multipixel_with_step) {
685 TEST_REQUIRES_ARM_NEON;
686 for (size_t channels = 1; channels <= 40; channels += 7) {
687 for (size_t step = 2; step <= 9; step++) {
688 DWConvMicrokernelTester()
689 .cr(8)
690 .kr(9)
691 .channels(channels)
692 .width(3)
693 .step(step)
694 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
695 }
696 }
697 }
698
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,multipixel_with_output_stride)699 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, multipixel_with_output_stride) {
700 TEST_REQUIRES_ARM_NEON;
701 for (size_t channels = 1; channels <= 40; channels += 7) {
702 DWConvMicrokernelTester()
703 .cr(8)
704 .kr(9)
705 .channels(8)
706 .width(5)
707 .output_stride(43)
708 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
709 }
710 }
711
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,multipixel_with_qmin)712 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, multipixel_with_qmin) {
713 TEST_REQUIRES_ARM_NEON;
714 for (size_t channels = 1; channels <= 40; channels += 7) {
715 DWConvMicrokernelTester()
716 .cr(8)
717 .kr(9)
718 .channels(channels)
719 .width(3)
720 .qmin(128)
721 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
722 }
723 }
724
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,multipixel_with_qmax)725 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, multipixel_with_qmax) {
726 TEST_REQUIRES_ARM_NEON;
727 for (size_t channels = 1; channels <= 40; channels += 7) {
728 DWConvMicrokernelTester()
729 .cr(8)
730 .kr(9)
731 .channels(channels)
732 .width(3)
733 .qmax(128)
734 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
735 }
736 }
737
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,input_offset)738 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, input_offset) {
739 TEST_REQUIRES_ARM_NEON;
740 for (uint32_t channels = 16; channels < 128; channels += 24) {
741 DWConvMicrokernelTester()
742 .cr(8)
743 .kr(9)
744 .channels(channels)
745 .input_offset(176)
746 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
747 }
748 }
749
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64,zero)750 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MLA8_LD64, zero) {
751 TEST_REQUIRES_ARM_NEON;
752 for (uint32_t mz = 0; mz < 9; mz++) {
753 for (uint32_t channels = 16; channels < 128; channels += 24) {
754 DWConvMicrokernelTester()
755 .cr(8)
756 .kr(9)
757 .channels(channels)
758 .input_offset(176)
759 .zero_index(mz)
760 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
761 }
762 }
763 }
764 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
765
766
767 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,c_eq_8)768 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_eq_8) {
769 TEST_REQUIRES_ARM_NEON;
770 DWConvMicrokernelTester()
771 .cr(8)
772 .kr(9)
773 .channels(8)
774 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
775 }
776
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,c_div_8)777 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_div_8) {
778 TEST_REQUIRES_ARM_NEON;
779 for (uint32_t channels = 16; channels < 128; channels += 24) {
780 DWConvMicrokernelTester()
781 .cr(8)
782 .kr(9)
783 .channels(channels)
784 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
785 }
786 }
787
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,c_div_8_with_qmin)788 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_div_8_with_qmin) {
789 TEST_REQUIRES_ARM_NEON;
790 for (uint32_t channels = 16; channels < 128; channels += 24) {
791 DWConvMicrokernelTester()
792 .cr(8)
793 .kr(9)
794 .channels(channels)
795 .qmin(128)
796 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
797 }
798 }
799
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,c_div_8_with_qmax)800 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_div_8_with_qmax) {
801 TEST_REQUIRES_ARM_NEON;
802 for (uint32_t channels = 16; channels < 128; channels += 24) {
803 DWConvMicrokernelTester()
804 .cr(8)
805 .kr(9)
806 .channels(channels)
807 .qmax(128)
808 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
809 }
810 }
811
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,c_lt_8)812 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_lt_8) {
813 TEST_REQUIRES_ARM_NEON;
814 for (uint32_t channels = 1; channels < 8; channels++) {
815 DWConvMicrokernelTester()
816 .cr(8)
817 .kr(9)
818 .channels(channels)
819 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
820 }
821 }
822
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,c_gt_8)823 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_gt_8) {
824 TEST_REQUIRES_ARM_NEON;
825 for (uint32_t channels = 9; channels < 16; channels++) {
826 DWConvMicrokernelTester()
827 .cr(8)
828 .kr(9)
829 .channels(channels)
830 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
831 }
832 }
833
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,c_gt_8_with_qmin)834 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_gt_8_with_qmin) {
835 TEST_REQUIRES_ARM_NEON;
836 for (uint32_t channels = 9; channels < 16; channels++) {
837 DWConvMicrokernelTester()
838 .cr(8)
839 .kr(9)
840 .channels(channels)
841 .qmin(128)
842 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
843 }
844 }
845
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,c_gt_8_with_qmax)846 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, c_gt_8_with_qmax) {
847 TEST_REQUIRES_ARM_NEON;
848 for (uint32_t channels = 9; channels < 16; channels++) {
849 DWConvMicrokernelTester()
850 .cr(8)
851 .kr(9)
852 .channels(channels)
853 .qmax(128)
854 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
855 }
856 }
857
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,multipixel)858 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, multipixel) {
859 TEST_REQUIRES_ARM_NEON;
860 for (size_t channels = 1; channels <= 40; channels += 7) {
861 DWConvMicrokernelTester()
862 .cr(8)
863 .kr(9)
864 .channels(channels)
865 .width(3)
866 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
867 }
868 }
869
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,multipixel_with_step)870 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, multipixel_with_step) {
871 TEST_REQUIRES_ARM_NEON;
872 for (size_t channels = 1; channels <= 40; channels += 7) {
873 for (size_t step = 2; step <= 9; step++) {
874 DWConvMicrokernelTester()
875 .cr(8)
876 .kr(9)
877 .channels(channels)
878 .width(3)
879 .step(step)
880 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
881 }
882 }
883 }
884
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,multipixel_with_output_stride)885 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, multipixel_with_output_stride) {
886 TEST_REQUIRES_ARM_NEON;
887 for (size_t channels = 1; channels <= 40; channels += 7) {
888 DWConvMicrokernelTester()
889 .cr(8)
890 .kr(9)
891 .channels(8)
892 .width(5)
893 .output_stride(43)
894 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
895 }
896 }
897
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,multipixel_with_qmin)898 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, multipixel_with_qmin) {
899 TEST_REQUIRES_ARM_NEON;
900 for (size_t channels = 1; channels <= 40; channels += 7) {
901 DWConvMicrokernelTester()
902 .cr(8)
903 .kr(9)
904 .channels(channels)
905 .width(3)
906 .qmin(128)
907 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
908 }
909 }
910
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,multipixel_with_qmax)911 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, multipixel_with_qmax) {
912 TEST_REQUIRES_ARM_NEON;
913 for (size_t channels = 1; channels <= 40; channels += 7) {
914 DWConvMicrokernelTester()
915 .cr(8)
916 .kr(9)
917 .channels(channels)
918 .width(3)
919 .qmax(128)
920 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
921 }
922 }
923
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,input_offset)924 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, input_offset) {
925 TEST_REQUIRES_ARM_NEON;
926 for (uint32_t channels = 16; channels < 128; channels += 24) {
927 DWConvMicrokernelTester()
928 .cr(8)
929 .kr(9)
930 .channels(channels)
931 .input_offset(176)
932 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
933 }
934 }
935
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64,zero)936 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL8_LD64, zero) {
937 TEST_REQUIRES_ARM_NEON;
938 for (uint32_t mz = 0; mz < 9; mz++) {
939 for (uint32_t channels = 16; channels < 128; channels += 24) {
940 DWConvMicrokernelTester()
941 .cr(8)
942 .kr(9)
943 .channels(channels)
944 .input_offset(176)
945 .zero_index(mz)
946 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
947 }
948 }
949 }
950 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
951
952
953 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_eq_8)954 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_eq_8) {
955 TEST_REQUIRES_ARM_NEON;
956 DWConvMicrokernelTester()
957 .cr(8)
958 .kr(9)
959 .channels(8)
960 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
961 }
962
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_div_8)963 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_div_8) {
964 TEST_REQUIRES_ARM_NEON;
965 for (uint32_t channels = 16; channels < 128; channels += 24) {
966 DWConvMicrokernelTester()
967 .cr(8)
968 .kr(9)
969 .channels(channels)
970 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
971 }
972 }
973
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_div_8_with_qmin)974 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_div_8_with_qmin) {
975 TEST_REQUIRES_ARM_NEON;
976 for (uint32_t channels = 16; channels < 128; channels += 24) {
977 DWConvMicrokernelTester()
978 .cr(8)
979 .kr(9)
980 .channels(channels)
981 .qmin(128)
982 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
983 }
984 }
985
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_div_8_with_qmax)986 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_div_8_with_qmax) {
987 TEST_REQUIRES_ARM_NEON;
988 for (uint32_t channels = 16; channels < 128; channels += 24) {
989 DWConvMicrokernelTester()
990 .cr(8)
991 .kr(9)
992 .channels(channels)
993 .qmax(128)
994 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
995 }
996 }
997
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_lt_8)998 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_lt_8) {
999 TEST_REQUIRES_ARM_NEON;
1000 for (uint32_t channels = 1; channels < 8; channels++) {
1001 DWConvMicrokernelTester()
1002 .cr(8)
1003 .kr(9)
1004 .channels(channels)
1005 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1006 }
1007 }
1008
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_gt_8)1009 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_gt_8) {
1010 TEST_REQUIRES_ARM_NEON;
1011 for (uint32_t channels = 9; channels < 16; channels++) {
1012 DWConvMicrokernelTester()
1013 .cr(8)
1014 .kr(9)
1015 .channels(channels)
1016 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1017 }
1018 }
1019
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_gt_8_with_qmin)1020 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_gt_8_with_qmin) {
1021 TEST_REQUIRES_ARM_NEON;
1022 for (uint32_t channels = 9; channels < 16; channels++) {
1023 DWConvMicrokernelTester()
1024 .cr(8)
1025 .kr(9)
1026 .channels(channels)
1027 .qmin(128)
1028 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1029 }
1030 }
1031
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_gt_8_with_qmax)1032 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_gt_8_with_qmax) {
1033 TEST_REQUIRES_ARM_NEON;
1034 for (uint32_t channels = 9; channels < 16; channels++) {
1035 DWConvMicrokernelTester()
1036 .cr(8)
1037 .kr(9)
1038 .channels(channels)
1039 .qmax(128)
1040 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1041 }
1042 }
1043
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,multipixel)1044 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel) {
1045 TEST_REQUIRES_ARM_NEON;
1046 for (size_t channels = 1; channels <= 40; channels += 7) {
1047 DWConvMicrokernelTester()
1048 .cr(8)
1049 .kr(9)
1050 .channels(channels)
1051 .width(3)
1052 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1053 }
1054 }
1055
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,multipixel_with_step)1056 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_step) {
1057 TEST_REQUIRES_ARM_NEON;
1058 for (size_t channels = 1; channels <= 40; channels += 7) {
1059 for (size_t step = 2; step <= 9; step++) {
1060 DWConvMicrokernelTester()
1061 .cr(8)
1062 .kr(9)
1063 .channels(channels)
1064 .width(3)
1065 .step(step)
1066 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1067 }
1068 }
1069 }
1070
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,multipixel_with_output_stride)1071 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_output_stride) {
1072 TEST_REQUIRES_ARM_NEON;
1073 for (size_t channels = 1; channels <= 40; channels += 7) {
1074 DWConvMicrokernelTester()
1075 .cr(8)
1076 .kr(9)
1077 .channels(8)
1078 .width(5)
1079 .output_stride(43)
1080 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1081 }
1082 }
1083
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,multipixel_with_qmin)1084 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_qmin) {
1085 TEST_REQUIRES_ARM_NEON;
1086 for (size_t channels = 1; channels <= 40; channels += 7) {
1087 DWConvMicrokernelTester()
1088 .cr(8)
1089 .kr(9)
1090 .channels(channels)
1091 .width(3)
1092 .qmin(128)
1093 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1094 }
1095 }
1096
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,multipixel_with_qmax)1097 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_qmax) {
1098 TEST_REQUIRES_ARM_NEON;
1099 for (size_t channels = 1; channels <= 40; channels += 7) {
1100 DWConvMicrokernelTester()
1101 .cr(8)
1102 .kr(9)
1103 .channels(channels)
1104 .width(3)
1105 .qmax(128)
1106 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1107 }
1108 }
1109
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,input_offset)1110 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, input_offset) {
1111 TEST_REQUIRES_ARM_NEON;
1112 for (uint32_t channels = 16; channels < 128; channels += 24) {
1113 DWConvMicrokernelTester()
1114 .cr(8)
1115 .kr(9)
1116 .channels(channels)
1117 .input_offset(176)
1118 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1119 }
1120 }
1121
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,zero)1122 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, zero) {
1123 TEST_REQUIRES_ARM_NEON;
1124 for (uint32_t mz = 0; mz < 9; mz++) {
1125 for (uint32_t channels = 16; channels < 128; channels += 24) {
1126 DWConvMicrokernelTester()
1127 .cr(8)
1128 .kr(9)
1129 .channels(channels)
1130 .input_offset(176)
1131 .zero_index(mz)
1132 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1133 }
1134 }
1135 }
1136 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1137
1138
1139 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,c_eq_8)1140 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_eq_8) {
1141 TEST_REQUIRES_ARM_NEON_V8;
1142 DWConvMicrokernelTester()
1143 .cr(8)
1144 .kr(9)
1145 .channels(8)
1146 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1147 }
1148
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,c_div_8)1149 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_div_8) {
1150 TEST_REQUIRES_ARM_NEON_V8;
1151 for (uint32_t channels = 16; channels < 128; channels += 24) {
1152 DWConvMicrokernelTester()
1153 .cr(8)
1154 .kr(9)
1155 .channels(channels)
1156 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1157 }
1158 }
1159
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,c_div_8_with_qmin)1160 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_div_8_with_qmin) {
1161 TEST_REQUIRES_ARM_NEON_V8;
1162 for (uint32_t channels = 16; channels < 128; channels += 24) {
1163 DWConvMicrokernelTester()
1164 .cr(8)
1165 .kr(9)
1166 .channels(channels)
1167 .qmin(128)
1168 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1169 }
1170 }
1171
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,c_div_8_with_qmax)1172 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_div_8_with_qmax) {
1173 TEST_REQUIRES_ARM_NEON_V8;
1174 for (uint32_t channels = 16; channels < 128; channels += 24) {
1175 DWConvMicrokernelTester()
1176 .cr(8)
1177 .kr(9)
1178 .channels(channels)
1179 .qmax(128)
1180 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1181 }
1182 }
1183
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,c_lt_8)1184 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_lt_8) {
1185 TEST_REQUIRES_ARM_NEON_V8;
1186 for (uint32_t channels = 1; channels < 8; channels++) {
1187 DWConvMicrokernelTester()
1188 .cr(8)
1189 .kr(9)
1190 .channels(channels)
1191 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1192 }
1193 }
1194
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,c_gt_8)1195 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_gt_8) {
1196 TEST_REQUIRES_ARM_NEON_V8;
1197 for (uint32_t channels = 9; channels < 16; channels++) {
1198 DWConvMicrokernelTester()
1199 .cr(8)
1200 .kr(9)
1201 .channels(channels)
1202 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1203 }
1204 }
1205
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,c_gt_8_with_qmin)1206 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_gt_8_with_qmin) {
1207 TEST_REQUIRES_ARM_NEON_V8;
1208 for (uint32_t channels = 9; channels < 16; channels++) {
1209 DWConvMicrokernelTester()
1210 .cr(8)
1211 .kr(9)
1212 .channels(channels)
1213 .qmin(128)
1214 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1215 }
1216 }
1217
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,c_gt_8_with_qmax)1218 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, c_gt_8_with_qmax) {
1219 TEST_REQUIRES_ARM_NEON_V8;
1220 for (uint32_t channels = 9; channels < 16; channels++) {
1221 DWConvMicrokernelTester()
1222 .cr(8)
1223 .kr(9)
1224 .channels(channels)
1225 .qmax(128)
1226 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1227 }
1228 }
1229
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,multipixel)1230 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, multipixel) {
1231 TEST_REQUIRES_ARM_NEON_V8;
1232 for (size_t channels = 1; channels <= 40; channels += 7) {
1233 DWConvMicrokernelTester()
1234 .cr(8)
1235 .kr(9)
1236 .channels(channels)
1237 .width(3)
1238 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1239 }
1240 }
1241
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,multipixel_with_step)1242 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, multipixel_with_step) {
1243 TEST_REQUIRES_ARM_NEON_V8;
1244 for (size_t channels = 1; channels <= 40; channels += 7) {
1245 for (size_t step = 2; step <= 9; step++) {
1246 DWConvMicrokernelTester()
1247 .cr(8)
1248 .kr(9)
1249 .channels(channels)
1250 .width(3)
1251 .step(step)
1252 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1253 }
1254 }
1255 }
1256
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,multipixel_with_output_stride)1257 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, multipixel_with_output_stride) {
1258 TEST_REQUIRES_ARM_NEON_V8;
1259 for (size_t channels = 1; channels <= 40; channels += 7) {
1260 DWConvMicrokernelTester()
1261 .cr(8)
1262 .kr(9)
1263 .channels(8)
1264 .width(5)
1265 .output_stride(43)
1266 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1267 }
1268 }
1269
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,multipixel_with_qmin)1270 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, multipixel_with_qmin) {
1271 TEST_REQUIRES_ARM_NEON_V8;
1272 for (size_t channels = 1; channels <= 40; channels += 7) {
1273 DWConvMicrokernelTester()
1274 .cr(8)
1275 .kr(9)
1276 .channels(channels)
1277 .width(3)
1278 .qmin(128)
1279 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1280 }
1281 }
1282
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,multipixel_with_qmax)1283 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, multipixel_with_qmax) {
1284 TEST_REQUIRES_ARM_NEON_V8;
1285 for (size_t channels = 1; channels <= 40; channels += 7) {
1286 DWConvMicrokernelTester()
1287 .cr(8)
1288 .kr(9)
1289 .channels(channels)
1290 .width(3)
1291 .qmax(128)
1292 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1293 }
1294 }
1295
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,input_offset)1296 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, input_offset) {
1297 TEST_REQUIRES_ARM_NEON_V8;
1298 for (uint32_t channels = 16; channels < 128; channels += 24) {
1299 DWConvMicrokernelTester()
1300 .cr(8)
1301 .kr(9)
1302 .channels(channels)
1303 .input_offset(176)
1304 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1305 }
1306 }
1307
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64,zero)1308 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MLA8_LD64, zero) {
1309 TEST_REQUIRES_ARM_NEON_V8;
1310 for (uint32_t mz = 0; mz < 9; mz++) {
1311 for (uint32_t channels = 16; channels < 128; channels += 24) {
1312 DWConvMicrokernelTester()
1313 .cr(8)
1314 .kr(9)
1315 .channels(channels)
1316 .input_offset(176)
1317 .zero_index(mz)
1318 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1319 }
1320 }
1321 }
1322 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1323
1324
1325 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,c_eq_8)1326 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_eq_8) {
1327 TEST_REQUIRES_ARM_NEON_V8;
1328 DWConvMicrokernelTester()
1329 .cr(8)
1330 .kr(9)
1331 .channels(8)
1332 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1333 }
1334
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,c_div_8)1335 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_div_8) {
1336 TEST_REQUIRES_ARM_NEON_V8;
1337 for (uint32_t channels = 16; channels < 128; channels += 24) {
1338 DWConvMicrokernelTester()
1339 .cr(8)
1340 .kr(9)
1341 .channels(channels)
1342 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1343 }
1344 }
1345
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,c_div_8_with_qmin)1346 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_div_8_with_qmin) {
1347 TEST_REQUIRES_ARM_NEON_V8;
1348 for (uint32_t channels = 16; channels < 128; channels += 24) {
1349 DWConvMicrokernelTester()
1350 .cr(8)
1351 .kr(9)
1352 .channels(channels)
1353 .qmin(128)
1354 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1355 }
1356 }
1357
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,c_div_8_with_qmax)1358 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_div_8_with_qmax) {
1359 TEST_REQUIRES_ARM_NEON_V8;
1360 for (uint32_t channels = 16; channels < 128; channels += 24) {
1361 DWConvMicrokernelTester()
1362 .cr(8)
1363 .kr(9)
1364 .channels(channels)
1365 .qmax(128)
1366 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1367 }
1368 }
1369
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,c_lt_8)1370 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_lt_8) {
1371 TEST_REQUIRES_ARM_NEON_V8;
1372 for (uint32_t channels = 1; channels < 8; channels++) {
1373 DWConvMicrokernelTester()
1374 .cr(8)
1375 .kr(9)
1376 .channels(channels)
1377 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1378 }
1379 }
1380
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,c_gt_8)1381 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_gt_8) {
1382 TEST_REQUIRES_ARM_NEON_V8;
1383 for (uint32_t channels = 9; channels < 16; channels++) {
1384 DWConvMicrokernelTester()
1385 .cr(8)
1386 .kr(9)
1387 .channels(channels)
1388 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1389 }
1390 }
1391
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,c_gt_8_with_qmin)1392 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_gt_8_with_qmin) {
1393 TEST_REQUIRES_ARM_NEON_V8;
1394 for (uint32_t channels = 9; channels < 16; channels++) {
1395 DWConvMicrokernelTester()
1396 .cr(8)
1397 .kr(9)
1398 .channels(channels)
1399 .qmin(128)
1400 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1401 }
1402 }
1403
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,c_gt_8_with_qmax)1404 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, c_gt_8_with_qmax) {
1405 TEST_REQUIRES_ARM_NEON_V8;
1406 for (uint32_t channels = 9; channels < 16; channels++) {
1407 DWConvMicrokernelTester()
1408 .cr(8)
1409 .kr(9)
1410 .channels(channels)
1411 .qmax(128)
1412 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1413 }
1414 }
1415
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,multipixel)1416 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, multipixel) {
1417 TEST_REQUIRES_ARM_NEON_V8;
1418 for (size_t channels = 1; channels <= 40; channels += 7) {
1419 DWConvMicrokernelTester()
1420 .cr(8)
1421 .kr(9)
1422 .channels(channels)
1423 .width(3)
1424 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1425 }
1426 }
1427
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,multipixel_with_step)1428 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, multipixel_with_step) {
1429 TEST_REQUIRES_ARM_NEON_V8;
1430 for (size_t channels = 1; channels <= 40; channels += 7) {
1431 for (size_t step = 2; step <= 9; step++) {
1432 DWConvMicrokernelTester()
1433 .cr(8)
1434 .kr(9)
1435 .channels(channels)
1436 .width(3)
1437 .step(step)
1438 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1439 }
1440 }
1441 }
1442
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,multipixel_with_output_stride)1443 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, multipixel_with_output_stride) {
1444 TEST_REQUIRES_ARM_NEON_V8;
1445 for (size_t channels = 1; channels <= 40; channels += 7) {
1446 DWConvMicrokernelTester()
1447 .cr(8)
1448 .kr(9)
1449 .channels(8)
1450 .width(5)
1451 .output_stride(43)
1452 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1453 }
1454 }
1455
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,multipixel_with_qmin)1456 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, multipixel_with_qmin) {
1457 TEST_REQUIRES_ARM_NEON_V8;
1458 for (size_t channels = 1; channels <= 40; channels += 7) {
1459 DWConvMicrokernelTester()
1460 .cr(8)
1461 .kr(9)
1462 .channels(channels)
1463 .width(3)
1464 .qmin(128)
1465 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1466 }
1467 }
1468
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,multipixel_with_qmax)1469 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, multipixel_with_qmax) {
1470 TEST_REQUIRES_ARM_NEON_V8;
1471 for (size_t channels = 1; channels <= 40; channels += 7) {
1472 DWConvMicrokernelTester()
1473 .cr(8)
1474 .kr(9)
1475 .channels(channels)
1476 .width(3)
1477 .qmax(128)
1478 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1479 }
1480 }
1481
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,input_offset)1482 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, input_offset) {
1483 TEST_REQUIRES_ARM_NEON_V8;
1484 for (uint32_t channels = 16; channels < 128; channels += 24) {
1485 DWConvMicrokernelTester()
1486 .cr(8)
1487 .kr(9)
1488 .channels(channels)
1489 .input_offset(176)
1490 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1491 }
1492 }
1493
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64,zero)1494 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL8_LD64, zero) {
1495 TEST_REQUIRES_ARM_NEON_V8;
1496 for (uint32_t mz = 0; mz < 9; mz++) {
1497 for (uint32_t channels = 16; channels < 128; channels += 24) {
1498 DWConvMicrokernelTester()
1499 .cr(8)
1500 .kr(9)
1501 .channels(channels)
1502 .input_offset(176)
1503 .zero_index(mz)
1504 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1505 }
1506 }
1507 }
1508 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1509
1510
1511 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_eq_8)1512 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_eq_8) {
1513 TEST_REQUIRES_ARM_NEON_V8;
1514 DWConvMicrokernelTester()
1515 .cr(8)
1516 .kr(9)
1517 .channels(8)
1518 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1519 }
1520
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_div_8)1521 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_div_8) {
1522 TEST_REQUIRES_ARM_NEON_V8;
1523 for (uint32_t channels = 16; channels < 128; channels += 24) {
1524 DWConvMicrokernelTester()
1525 .cr(8)
1526 .kr(9)
1527 .channels(channels)
1528 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1529 }
1530 }
1531
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_div_8_with_qmin)1532 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_div_8_with_qmin) {
1533 TEST_REQUIRES_ARM_NEON_V8;
1534 for (uint32_t channels = 16; channels < 128; channels += 24) {
1535 DWConvMicrokernelTester()
1536 .cr(8)
1537 .kr(9)
1538 .channels(channels)
1539 .qmin(128)
1540 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1541 }
1542 }
1543
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_div_8_with_qmax)1544 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_div_8_with_qmax) {
1545 TEST_REQUIRES_ARM_NEON_V8;
1546 for (uint32_t channels = 16; channels < 128; channels += 24) {
1547 DWConvMicrokernelTester()
1548 .cr(8)
1549 .kr(9)
1550 .channels(channels)
1551 .qmax(128)
1552 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1553 }
1554 }
1555
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_lt_8)1556 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_lt_8) {
1557 TEST_REQUIRES_ARM_NEON_V8;
1558 for (uint32_t channels = 1; channels < 8; channels++) {
1559 DWConvMicrokernelTester()
1560 .cr(8)
1561 .kr(9)
1562 .channels(channels)
1563 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1564 }
1565 }
1566
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_gt_8)1567 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_gt_8) {
1568 TEST_REQUIRES_ARM_NEON_V8;
1569 for (uint32_t channels = 9; channels < 16; channels++) {
1570 DWConvMicrokernelTester()
1571 .cr(8)
1572 .kr(9)
1573 .channels(channels)
1574 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1575 }
1576 }
1577
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_gt_8_with_qmin)1578 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_gt_8_with_qmin) {
1579 TEST_REQUIRES_ARM_NEON_V8;
1580 for (uint32_t channels = 9; channels < 16; channels++) {
1581 DWConvMicrokernelTester()
1582 .cr(8)
1583 .kr(9)
1584 .channels(channels)
1585 .qmin(128)
1586 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1587 }
1588 }
1589
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_gt_8_with_qmax)1590 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_gt_8_with_qmax) {
1591 TEST_REQUIRES_ARM_NEON_V8;
1592 for (uint32_t channels = 9; channels < 16; channels++) {
1593 DWConvMicrokernelTester()
1594 .cr(8)
1595 .kr(9)
1596 .channels(channels)
1597 .qmax(128)
1598 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1599 }
1600 }
1601
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,multipixel)1602 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel) {
1603 TEST_REQUIRES_ARM_NEON_V8;
1604 for (size_t channels = 1; channels <= 40; channels += 7) {
1605 DWConvMicrokernelTester()
1606 .cr(8)
1607 .kr(9)
1608 .channels(channels)
1609 .width(3)
1610 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1611 }
1612 }
1613
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,multipixel_with_step)1614 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_step) {
1615 TEST_REQUIRES_ARM_NEON_V8;
1616 for (size_t channels = 1; channels <= 40; channels += 7) {
1617 for (size_t step = 2; step <= 9; step++) {
1618 DWConvMicrokernelTester()
1619 .cr(8)
1620 .kr(9)
1621 .channels(channels)
1622 .width(3)
1623 .step(step)
1624 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1625 }
1626 }
1627 }
1628
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,multipixel_with_output_stride)1629 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_output_stride) {
1630 TEST_REQUIRES_ARM_NEON_V8;
1631 for (size_t channels = 1; channels <= 40; channels += 7) {
1632 DWConvMicrokernelTester()
1633 .cr(8)
1634 .kr(9)
1635 .channels(8)
1636 .width(5)
1637 .output_stride(43)
1638 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1639 }
1640 }
1641
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,multipixel_with_qmin)1642 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_qmin) {
1643 TEST_REQUIRES_ARM_NEON_V8;
1644 for (size_t channels = 1; channels <= 40; channels += 7) {
1645 DWConvMicrokernelTester()
1646 .cr(8)
1647 .kr(9)
1648 .channels(channels)
1649 .width(3)
1650 .qmin(128)
1651 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1652 }
1653 }
1654
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,multipixel_with_qmax)1655 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_qmax) {
1656 TEST_REQUIRES_ARM_NEON_V8;
1657 for (size_t channels = 1; channels <= 40; channels += 7) {
1658 DWConvMicrokernelTester()
1659 .cr(8)
1660 .kr(9)
1661 .channels(channels)
1662 .width(3)
1663 .qmax(128)
1664 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1665 }
1666 }
1667
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,input_offset)1668 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, input_offset) {
1669 TEST_REQUIRES_ARM_NEON_V8;
1670 for (uint32_t channels = 16; channels < 128; channels += 24) {
1671 DWConvMicrokernelTester()
1672 .cr(8)
1673 .kr(9)
1674 .channels(channels)
1675 .input_offset(176)
1676 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1677 }
1678 }
1679
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,zero)1680 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, zero) {
1681 TEST_REQUIRES_ARM_NEON_V8;
1682 for (uint32_t mz = 0; mz < 9; mz++) {
1683 for (uint32_t channels = 16; channels < 128; channels += 24) {
1684 DWConvMicrokernelTester()
1685 .cr(8)
1686 .kr(9)
1687 .channels(channels)
1688 .input_offset(176)
1689 .zero_index(mz)
1690 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1691 }
1692 }
1693 }
1694 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1695
1696
1697 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,c_eq_8)1698 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_eq_8) {
1699 TEST_REQUIRES_ARM_NEON;
1700 DWConvMicrokernelTester()
1701 .cr(8)
1702 .kr(25)
1703 .channels(8)
1704 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1705 }
1706
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,c_div_8)1707 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_div_8) {
1708 TEST_REQUIRES_ARM_NEON;
1709 for (uint32_t channels = 16; channels < 128; channels += 24) {
1710 DWConvMicrokernelTester()
1711 .cr(8)
1712 .kr(25)
1713 .channels(channels)
1714 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1715 }
1716 }
1717
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,c_div_8_with_qmin)1718 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_div_8_with_qmin) {
1719 TEST_REQUIRES_ARM_NEON;
1720 for (uint32_t channels = 16; channels < 128; channels += 24) {
1721 DWConvMicrokernelTester()
1722 .cr(8)
1723 .kr(25)
1724 .channels(channels)
1725 .qmin(128)
1726 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1727 }
1728 }
1729
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,c_div_8_with_qmax)1730 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_div_8_with_qmax) {
1731 TEST_REQUIRES_ARM_NEON;
1732 for (uint32_t channels = 16; channels < 128; channels += 24) {
1733 DWConvMicrokernelTester()
1734 .cr(8)
1735 .kr(25)
1736 .channels(channels)
1737 .qmax(128)
1738 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1739 }
1740 }
1741
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,c_lt_8)1742 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_lt_8) {
1743 TEST_REQUIRES_ARM_NEON;
1744 for (uint32_t channels = 1; channels < 8; channels++) {
1745 DWConvMicrokernelTester()
1746 .cr(8)
1747 .kr(25)
1748 .channels(channels)
1749 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1750 }
1751 }
1752
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,c_gt_8)1753 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_gt_8) {
1754 TEST_REQUIRES_ARM_NEON;
1755 for (uint32_t channels = 9; channels < 16; channels++) {
1756 DWConvMicrokernelTester()
1757 .cr(8)
1758 .kr(25)
1759 .channels(channels)
1760 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1761 }
1762 }
1763
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,c_gt_8_with_qmin)1764 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_gt_8_with_qmin) {
1765 TEST_REQUIRES_ARM_NEON;
1766 for (uint32_t channels = 9; channels < 16; channels++) {
1767 DWConvMicrokernelTester()
1768 .cr(8)
1769 .kr(25)
1770 .channels(channels)
1771 .qmin(128)
1772 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1773 }
1774 }
1775
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,c_gt_8_with_qmax)1776 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, c_gt_8_with_qmax) {
1777 TEST_REQUIRES_ARM_NEON;
1778 for (uint32_t channels = 9; channels < 16; channels++) {
1779 DWConvMicrokernelTester()
1780 .cr(8)
1781 .kr(25)
1782 .channels(channels)
1783 .qmax(128)
1784 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1785 }
1786 }
1787
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,multipixel)1788 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, multipixel) {
1789 TEST_REQUIRES_ARM_NEON;
1790 for (size_t channels = 1; channels <= 40; channels += 7) {
1791 DWConvMicrokernelTester()
1792 .cr(8)
1793 .kr(25)
1794 .channels(channels)
1795 .width(3)
1796 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1797 }
1798 }
1799
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,multipixel_with_step)1800 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, multipixel_with_step) {
1801 TEST_REQUIRES_ARM_NEON;
1802 for (size_t channels = 1; channels <= 40; channels += 7) {
1803 for (size_t step = 2; step <= 25; step++) {
1804 DWConvMicrokernelTester()
1805 .cr(8)
1806 .kr(25)
1807 .channels(channels)
1808 .width(3)
1809 .step(step)
1810 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1811 }
1812 }
1813 }
1814
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,multipixel_with_output_stride)1815 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, multipixel_with_output_stride) {
1816 TEST_REQUIRES_ARM_NEON;
1817 for (size_t channels = 1; channels <= 40; channels += 7) {
1818 DWConvMicrokernelTester()
1819 .cr(8)
1820 .kr(25)
1821 .channels(8)
1822 .width(5)
1823 .output_stride(43)
1824 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1825 }
1826 }
1827
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,multipixel_with_qmin)1828 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, multipixel_with_qmin) {
1829 TEST_REQUIRES_ARM_NEON;
1830 for (size_t channels = 1; channels <= 40; channels += 7) {
1831 DWConvMicrokernelTester()
1832 .cr(8)
1833 .kr(25)
1834 .channels(channels)
1835 .width(3)
1836 .qmin(128)
1837 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1838 }
1839 }
1840
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,multipixel_with_qmax)1841 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, multipixel_with_qmax) {
1842 TEST_REQUIRES_ARM_NEON;
1843 for (size_t channels = 1; channels <= 40; channels += 7) {
1844 DWConvMicrokernelTester()
1845 .cr(8)
1846 .kr(25)
1847 .channels(channels)
1848 .width(3)
1849 .qmax(128)
1850 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1851 }
1852 }
1853
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,input_offset)1854 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, input_offset) {
1855 TEST_REQUIRES_ARM_NEON;
1856 for (uint32_t channels = 16; channels < 128; channels += 24) {
1857 DWConvMicrokernelTester()
1858 .cr(8)
1859 .kr(25)
1860 .channels(channels)
1861 .input_offset(176)
1862 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1863 }
1864 }
1865
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64,zero)1866 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MLA8_LD64, zero) {
1867 TEST_REQUIRES_ARM_NEON;
1868 for (uint32_t mz = 0; mz < 25; mz++) {
1869 for (uint32_t channels = 16; channels < 128; channels += 24) {
1870 DWConvMicrokernelTester()
1871 .cr(8)
1872 .kr(25)
1873 .channels(channels)
1874 .input_offset(176)
1875 .zero_index(mz)
1876 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1877 }
1878 }
1879 }
1880 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1881
1882
1883 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,c_eq_8)1884 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_eq_8) {
1885 TEST_REQUIRES_ARM_NEON;
1886 DWConvMicrokernelTester()
1887 .cr(8)
1888 .kr(25)
1889 .channels(8)
1890 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1891 }
1892
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,c_div_8)1893 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_div_8) {
1894 TEST_REQUIRES_ARM_NEON;
1895 for (uint32_t channels = 16; channels < 128; channels += 24) {
1896 DWConvMicrokernelTester()
1897 .cr(8)
1898 .kr(25)
1899 .channels(channels)
1900 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1901 }
1902 }
1903
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,c_div_8_with_qmin)1904 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_div_8_with_qmin) {
1905 TEST_REQUIRES_ARM_NEON;
1906 for (uint32_t channels = 16; channels < 128; channels += 24) {
1907 DWConvMicrokernelTester()
1908 .cr(8)
1909 .kr(25)
1910 .channels(channels)
1911 .qmin(128)
1912 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1913 }
1914 }
1915
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,c_div_8_with_qmax)1916 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_div_8_with_qmax) {
1917 TEST_REQUIRES_ARM_NEON;
1918 for (uint32_t channels = 16; channels < 128; channels += 24) {
1919 DWConvMicrokernelTester()
1920 .cr(8)
1921 .kr(25)
1922 .channels(channels)
1923 .qmax(128)
1924 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1925 }
1926 }
1927
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,c_lt_8)1928 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_lt_8) {
1929 TEST_REQUIRES_ARM_NEON;
1930 for (uint32_t channels = 1; channels < 8; channels++) {
1931 DWConvMicrokernelTester()
1932 .cr(8)
1933 .kr(25)
1934 .channels(channels)
1935 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1936 }
1937 }
1938
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,c_gt_8)1939 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_gt_8) {
1940 TEST_REQUIRES_ARM_NEON;
1941 for (uint32_t channels = 9; channels < 16; channels++) {
1942 DWConvMicrokernelTester()
1943 .cr(8)
1944 .kr(25)
1945 .channels(channels)
1946 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1947 }
1948 }
1949
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,c_gt_8_with_qmin)1950 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_gt_8_with_qmin) {
1951 TEST_REQUIRES_ARM_NEON;
1952 for (uint32_t channels = 9; channels < 16; channels++) {
1953 DWConvMicrokernelTester()
1954 .cr(8)
1955 .kr(25)
1956 .channels(channels)
1957 .qmin(128)
1958 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1959 }
1960 }
1961
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,c_gt_8_with_qmax)1962 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_gt_8_with_qmax) {
1963 TEST_REQUIRES_ARM_NEON;
1964 for (uint32_t channels = 9; channels < 16; channels++) {
1965 DWConvMicrokernelTester()
1966 .cr(8)
1967 .kr(25)
1968 .channels(channels)
1969 .qmax(128)
1970 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1971 }
1972 }
1973
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,multipixel)1974 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, multipixel) {
1975 TEST_REQUIRES_ARM_NEON;
1976 for (size_t channels = 1; channels <= 40; channels += 7) {
1977 DWConvMicrokernelTester()
1978 .cr(8)
1979 .kr(25)
1980 .channels(channels)
1981 .width(3)
1982 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1983 }
1984 }
1985
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,multipixel_with_step)1986 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, multipixel_with_step) {
1987 TEST_REQUIRES_ARM_NEON;
1988 for (size_t channels = 1; channels <= 40; channels += 7) {
1989 for (size_t step = 2; step <= 25; step++) {
1990 DWConvMicrokernelTester()
1991 .cr(8)
1992 .kr(25)
1993 .channels(channels)
1994 .width(3)
1995 .step(step)
1996 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1997 }
1998 }
1999 }
2000
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,multipixel_with_output_stride)2001 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, multipixel_with_output_stride) {
2002 TEST_REQUIRES_ARM_NEON;
2003 for (size_t channels = 1; channels <= 40; channels += 7) {
2004 DWConvMicrokernelTester()
2005 .cr(8)
2006 .kr(25)
2007 .channels(8)
2008 .width(5)
2009 .output_stride(43)
2010 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2011 }
2012 }
2013
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,multipixel_with_qmin)2014 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, multipixel_with_qmin) {
2015 TEST_REQUIRES_ARM_NEON;
2016 for (size_t channels = 1; channels <= 40; channels += 7) {
2017 DWConvMicrokernelTester()
2018 .cr(8)
2019 .kr(25)
2020 .channels(channels)
2021 .width(3)
2022 .qmin(128)
2023 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2024 }
2025 }
2026
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,multipixel_with_qmax)2027 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, multipixel_with_qmax) {
2028 TEST_REQUIRES_ARM_NEON;
2029 for (size_t channels = 1; channels <= 40; channels += 7) {
2030 DWConvMicrokernelTester()
2031 .cr(8)
2032 .kr(25)
2033 .channels(channels)
2034 .width(3)
2035 .qmax(128)
2036 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2037 }
2038 }
2039
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,input_offset)2040 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, input_offset) {
2041 TEST_REQUIRES_ARM_NEON;
2042 for (uint32_t channels = 16; channels < 128; channels += 24) {
2043 DWConvMicrokernelTester()
2044 .cr(8)
2045 .kr(25)
2046 .channels(channels)
2047 .input_offset(176)
2048 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2049 }
2050 }
2051
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64,zero)2052 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, zero) {
2053 TEST_REQUIRES_ARM_NEON;
2054 for (uint32_t mz = 0; mz < 25; mz++) {
2055 for (uint32_t channels = 16; channels < 128; channels += 24) {
2056 DWConvMicrokernelTester()
2057 .cr(8)
2058 .kr(25)
2059 .channels(channels)
2060 .input_offset(176)
2061 .zero_index(mz)
2062 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2063 }
2064 }
2065 }
2066 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2067
2068
2069 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_eq_8)2070 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_eq_8) {
2071 TEST_REQUIRES_ARM_NEON;
2072 DWConvMicrokernelTester()
2073 .cr(8)
2074 .kr(25)
2075 .channels(8)
2076 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2077 }
2078
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_div_8)2079 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_div_8) {
2080 TEST_REQUIRES_ARM_NEON;
2081 for (uint32_t channels = 16; channels < 128; channels += 24) {
2082 DWConvMicrokernelTester()
2083 .cr(8)
2084 .kr(25)
2085 .channels(channels)
2086 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2087 }
2088 }
2089
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_div_8_with_qmin)2090 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_div_8_with_qmin) {
2091 TEST_REQUIRES_ARM_NEON;
2092 for (uint32_t channels = 16; channels < 128; channels += 24) {
2093 DWConvMicrokernelTester()
2094 .cr(8)
2095 .kr(25)
2096 .channels(channels)
2097 .qmin(128)
2098 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2099 }
2100 }
2101
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_div_8_with_qmax)2102 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_div_8_with_qmax) {
2103 TEST_REQUIRES_ARM_NEON;
2104 for (uint32_t channels = 16; channels < 128; channels += 24) {
2105 DWConvMicrokernelTester()
2106 .cr(8)
2107 .kr(25)
2108 .channels(channels)
2109 .qmax(128)
2110 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2111 }
2112 }
2113
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_lt_8)2114 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_lt_8) {
2115 TEST_REQUIRES_ARM_NEON;
2116 for (uint32_t channels = 1; channels < 8; channels++) {
2117 DWConvMicrokernelTester()
2118 .cr(8)
2119 .kr(25)
2120 .channels(channels)
2121 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2122 }
2123 }
2124
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_gt_8)2125 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_gt_8) {
2126 TEST_REQUIRES_ARM_NEON;
2127 for (uint32_t channels = 9; channels < 16; channels++) {
2128 DWConvMicrokernelTester()
2129 .cr(8)
2130 .kr(25)
2131 .channels(channels)
2132 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2133 }
2134 }
2135
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_gt_8_with_qmin)2136 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_gt_8_with_qmin) {
2137 TEST_REQUIRES_ARM_NEON;
2138 for (uint32_t channels = 9; channels < 16; channels++) {
2139 DWConvMicrokernelTester()
2140 .cr(8)
2141 .kr(25)
2142 .channels(channels)
2143 .qmin(128)
2144 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2145 }
2146 }
2147
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_gt_8_with_qmax)2148 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_gt_8_with_qmax) {
2149 TEST_REQUIRES_ARM_NEON;
2150 for (uint32_t channels = 9; channels < 16; channels++) {
2151 DWConvMicrokernelTester()
2152 .cr(8)
2153 .kr(25)
2154 .channels(channels)
2155 .qmax(128)
2156 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2157 }
2158 }
2159
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,multipixel)2160 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel) {
2161 TEST_REQUIRES_ARM_NEON;
2162 for (size_t channels = 1; channels <= 40; channels += 7) {
2163 DWConvMicrokernelTester()
2164 .cr(8)
2165 .kr(25)
2166 .channels(channels)
2167 .width(3)
2168 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2169 }
2170 }
2171
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,multipixel_with_step)2172 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_step) {
2173 TEST_REQUIRES_ARM_NEON;
2174 for (size_t channels = 1; channels <= 40; channels += 7) {
2175 for (size_t step = 2; step <= 25; step++) {
2176 DWConvMicrokernelTester()
2177 .cr(8)
2178 .kr(25)
2179 .channels(channels)
2180 .width(3)
2181 .step(step)
2182 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2183 }
2184 }
2185 }
2186
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,multipixel_with_output_stride)2187 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_output_stride) {
2188 TEST_REQUIRES_ARM_NEON;
2189 for (size_t channels = 1; channels <= 40; channels += 7) {
2190 DWConvMicrokernelTester()
2191 .cr(8)
2192 .kr(25)
2193 .channels(8)
2194 .width(5)
2195 .output_stride(43)
2196 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2197 }
2198 }
2199
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,multipixel_with_qmin)2200 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_qmin) {
2201 TEST_REQUIRES_ARM_NEON;
2202 for (size_t channels = 1; channels <= 40; channels += 7) {
2203 DWConvMicrokernelTester()
2204 .cr(8)
2205 .kr(25)
2206 .channels(channels)
2207 .width(3)
2208 .qmin(128)
2209 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2210 }
2211 }
2212
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,multipixel_with_qmax)2213 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_qmax) {
2214 TEST_REQUIRES_ARM_NEON;
2215 for (size_t channels = 1; channels <= 40; channels += 7) {
2216 DWConvMicrokernelTester()
2217 .cr(8)
2218 .kr(25)
2219 .channels(channels)
2220 .width(3)
2221 .qmax(128)
2222 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2223 }
2224 }
2225
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,input_offset)2226 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, input_offset) {
2227 TEST_REQUIRES_ARM_NEON;
2228 for (uint32_t channels = 16; channels < 128; channels += 24) {
2229 DWConvMicrokernelTester()
2230 .cr(8)
2231 .kr(25)
2232 .channels(channels)
2233 .input_offset(176)
2234 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2235 }
2236 }
2237
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,zero)2238 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, zero) {
2239 TEST_REQUIRES_ARM_NEON;
2240 for (uint32_t mz = 0; mz < 25; mz++) {
2241 for (uint32_t channels = 16; channels < 128; channels += 24) {
2242 DWConvMicrokernelTester()
2243 .cr(8)
2244 .kr(25)
2245 .channels(channels)
2246 .input_offset(176)
2247 .zero_index(mz)
2248 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2249 }
2250 }
2251 }
2252 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2253
2254
2255 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,c_eq_8)2256 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_eq_8) {
2257 TEST_REQUIRES_ARM_NEON_V8;
2258 DWConvMicrokernelTester()
2259 .cr(8)
2260 .kr(25)
2261 .channels(8)
2262 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2263 }
2264
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,c_div_8)2265 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_div_8) {
2266 TEST_REQUIRES_ARM_NEON_V8;
2267 for (uint32_t channels = 16; channels < 128; channels += 24) {
2268 DWConvMicrokernelTester()
2269 .cr(8)
2270 .kr(25)
2271 .channels(channels)
2272 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2273 }
2274 }
2275
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,c_div_8_with_qmin)2276 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_div_8_with_qmin) {
2277 TEST_REQUIRES_ARM_NEON_V8;
2278 for (uint32_t channels = 16; channels < 128; channels += 24) {
2279 DWConvMicrokernelTester()
2280 .cr(8)
2281 .kr(25)
2282 .channels(channels)
2283 .qmin(128)
2284 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2285 }
2286 }
2287
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,c_div_8_with_qmax)2288 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_div_8_with_qmax) {
2289 TEST_REQUIRES_ARM_NEON_V8;
2290 for (uint32_t channels = 16; channels < 128; channels += 24) {
2291 DWConvMicrokernelTester()
2292 .cr(8)
2293 .kr(25)
2294 .channels(channels)
2295 .qmax(128)
2296 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2297 }
2298 }
2299
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,c_lt_8)2300 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_lt_8) {
2301 TEST_REQUIRES_ARM_NEON_V8;
2302 for (uint32_t channels = 1; channels < 8; channels++) {
2303 DWConvMicrokernelTester()
2304 .cr(8)
2305 .kr(25)
2306 .channels(channels)
2307 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2308 }
2309 }
2310
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,c_gt_8)2311 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_gt_8) {
2312 TEST_REQUIRES_ARM_NEON_V8;
2313 for (uint32_t channels = 9; channels < 16; channels++) {
2314 DWConvMicrokernelTester()
2315 .cr(8)
2316 .kr(25)
2317 .channels(channels)
2318 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2319 }
2320 }
2321
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,c_gt_8_with_qmin)2322 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_gt_8_with_qmin) {
2323 TEST_REQUIRES_ARM_NEON_V8;
2324 for (uint32_t channels = 9; channels < 16; channels++) {
2325 DWConvMicrokernelTester()
2326 .cr(8)
2327 .kr(25)
2328 .channels(channels)
2329 .qmin(128)
2330 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2331 }
2332 }
2333
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,c_gt_8_with_qmax)2334 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, c_gt_8_with_qmax) {
2335 TEST_REQUIRES_ARM_NEON_V8;
2336 for (uint32_t channels = 9; channels < 16; channels++) {
2337 DWConvMicrokernelTester()
2338 .cr(8)
2339 .kr(25)
2340 .channels(channels)
2341 .qmax(128)
2342 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2343 }
2344 }
2345
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,multipixel)2346 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, multipixel) {
2347 TEST_REQUIRES_ARM_NEON_V8;
2348 for (size_t channels = 1; channels <= 40; channels += 7) {
2349 DWConvMicrokernelTester()
2350 .cr(8)
2351 .kr(25)
2352 .channels(channels)
2353 .width(3)
2354 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2355 }
2356 }
2357
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,multipixel_with_step)2358 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, multipixel_with_step) {
2359 TEST_REQUIRES_ARM_NEON_V8;
2360 for (size_t channels = 1; channels <= 40; channels += 7) {
2361 for (size_t step = 2; step <= 25; step++) {
2362 DWConvMicrokernelTester()
2363 .cr(8)
2364 .kr(25)
2365 .channels(channels)
2366 .width(3)
2367 .step(step)
2368 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2369 }
2370 }
2371 }
2372
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,multipixel_with_output_stride)2373 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, multipixel_with_output_stride) {
2374 TEST_REQUIRES_ARM_NEON_V8;
2375 for (size_t channels = 1; channels <= 40; channels += 7) {
2376 DWConvMicrokernelTester()
2377 .cr(8)
2378 .kr(25)
2379 .channels(8)
2380 .width(5)
2381 .output_stride(43)
2382 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2383 }
2384 }
2385
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,multipixel_with_qmin)2386 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, multipixel_with_qmin) {
2387 TEST_REQUIRES_ARM_NEON_V8;
2388 for (size_t channels = 1; channels <= 40; channels += 7) {
2389 DWConvMicrokernelTester()
2390 .cr(8)
2391 .kr(25)
2392 .channels(channels)
2393 .width(3)
2394 .qmin(128)
2395 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2396 }
2397 }
2398
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,multipixel_with_qmax)2399 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, multipixel_with_qmax) {
2400 TEST_REQUIRES_ARM_NEON_V8;
2401 for (size_t channels = 1; channels <= 40; channels += 7) {
2402 DWConvMicrokernelTester()
2403 .cr(8)
2404 .kr(25)
2405 .channels(channels)
2406 .width(3)
2407 .qmax(128)
2408 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2409 }
2410 }
2411
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,input_offset)2412 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, input_offset) {
2413 TEST_REQUIRES_ARM_NEON_V8;
2414 for (uint32_t channels = 16; channels < 128; channels += 24) {
2415 DWConvMicrokernelTester()
2416 .cr(8)
2417 .kr(25)
2418 .channels(channels)
2419 .input_offset(176)
2420 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2421 }
2422 }
2423
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64,zero)2424 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MLA8_LD64, zero) {
2425 TEST_REQUIRES_ARM_NEON_V8;
2426 for (uint32_t mz = 0; mz < 25; mz++) {
2427 for (uint32_t channels = 16; channels < 128; channels += 24) {
2428 DWConvMicrokernelTester()
2429 .cr(8)
2430 .kr(25)
2431 .channels(channels)
2432 .input_offset(176)
2433 .zero_index(mz)
2434 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2435 }
2436 }
2437 }
2438 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2439
2440
2441 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,c_eq_8)2442 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_eq_8) {
2443 TEST_REQUIRES_ARM_NEON_V8;
2444 DWConvMicrokernelTester()
2445 .cr(8)
2446 .kr(25)
2447 .channels(8)
2448 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2449 }
2450
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,c_div_8)2451 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_div_8) {
2452 TEST_REQUIRES_ARM_NEON_V8;
2453 for (uint32_t channels = 16; channels < 128; channels += 24) {
2454 DWConvMicrokernelTester()
2455 .cr(8)
2456 .kr(25)
2457 .channels(channels)
2458 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2459 }
2460 }
2461
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,c_div_8_with_qmin)2462 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_div_8_with_qmin) {
2463 TEST_REQUIRES_ARM_NEON_V8;
2464 for (uint32_t channels = 16; channels < 128; channels += 24) {
2465 DWConvMicrokernelTester()
2466 .cr(8)
2467 .kr(25)
2468 .channels(channels)
2469 .qmin(128)
2470 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2471 }
2472 }
2473
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,c_div_8_with_qmax)2474 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_div_8_with_qmax) {
2475 TEST_REQUIRES_ARM_NEON_V8;
2476 for (uint32_t channels = 16; channels < 128; channels += 24) {
2477 DWConvMicrokernelTester()
2478 .cr(8)
2479 .kr(25)
2480 .channels(channels)
2481 .qmax(128)
2482 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2483 }
2484 }
2485
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,c_lt_8)2486 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_lt_8) {
2487 TEST_REQUIRES_ARM_NEON_V8;
2488 for (uint32_t channels = 1; channels < 8; channels++) {
2489 DWConvMicrokernelTester()
2490 .cr(8)
2491 .kr(25)
2492 .channels(channels)
2493 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2494 }
2495 }
2496
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,c_gt_8)2497 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_gt_8) {
2498 TEST_REQUIRES_ARM_NEON_V8;
2499 for (uint32_t channels = 9; channels < 16; channels++) {
2500 DWConvMicrokernelTester()
2501 .cr(8)
2502 .kr(25)
2503 .channels(channels)
2504 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2505 }
2506 }
2507
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,c_gt_8_with_qmin)2508 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_gt_8_with_qmin) {
2509 TEST_REQUIRES_ARM_NEON_V8;
2510 for (uint32_t channels = 9; channels < 16; channels++) {
2511 DWConvMicrokernelTester()
2512 .cr(8)
2513 .kr(25)
2514 .channels(channels)
2515 .qmin(128)
2516 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2517 }
2518 }
2519
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,c_gt_8_with_qmax)2520 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, c_gt_8_with_qmax) {
2521 TEST_REQUIRES_ARM_NEON_V8;
2522 for (uint32_t channels = 9; channels < 16; channels++) {
2523 DWConvMicrokernelTester()
2524 .cr(8)
2525 .kr(25)
2526 .channels(channels)
2527 .qmax(128)
2528 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2529 }
2530 }
2531
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,multipixel)2532 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, multipixel) {
2533 TEST_REQUIRES_ARM_NEON_V8;
2534 for (size_t channels = 1; channels <= 40; channels += 7) {
2535 DWConvMicrokernelTester()
2536 .cr(8)
2537 .kr(25)
2538 .channels(channels)
2539 .width(3)
2540 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2541 }
2542 }
2543
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,multipixel_with_step)2544 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, multipixel_with_step) {
2545 TEST_REQUIRES_ARM_NEON_V8;
2546 for (size_t channels = 1; channels <= 40; channels += 7) {
2547 for (size_t step = 2; step <= 25; step++) {
2548 DWConvMicrokernelTester()
2549 .cr(8)
2550 .kr(25)
2551 .channels(channels)
2552 .width(3)
2553 .step(step)
2554 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2555 }
2556 }
2557 }
2558
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,multipixel_with_output_stride)2559 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, multipixel_with_output_stride) {
2560 TEST_REQUIRES_ARM_NEON_V8;
2561 for (size_t channels = 1; channels <= 40; channels += 7) {
2562 DWConvMicrokernelTester()
2563 .cr(8)
2564 .kr(25)
2565 .channels(8)
2566 .width(5)
2567 .output_stride(43)
2568 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2569 }
2570 }
2571
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,multipixel_with_qmin)2572 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, multipixel_with_qmin) {
2573 TEST_REQUIRES_ARM_NEON_V8;
2574 for (size_t channels = 1; channels <= 40; channels += 7) {
2575 DWConvMicrokernelTester()
2576 .cr(8)
2577 .kr(25)
2578 .channels(channels)
2579 .width(3)
2580 .qmin(128)
2581 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2582 }
2583 }
2584
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,multipixel_with_qmax)2585 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, multipixel_with_qmax) {
2586 TEST_REQUIRES_ARM_NEON_V8;
2587 for (size_t channels = 1; channels <= 40; channels += 7) {
2588 DWConvMicrokernelTester()
2589 .cr(8)
2590 .kr(25)
2591 .channels(channels)
2592 .width(3)
2593 .qmax(128)
2594 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2595 }
2596 }
2597
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,input_offset)2598 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, input_offset) {
2599 TEST_REQUIRES_ARM_NEON_V8;
2600 for (uint32_t channels = 16; channels < 128; channels += 24) {
2601 DWConvMicrokernelTester()
2602 .cr(8)
2603 .kr(25)
2604 .channels(channels)
2605 .input_offset(176)
2606 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2607 }
2608 }
2609
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64,zero)2610 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL8_LD64, zero) {
2611 TEST_REQUIRES_ARM_NEON_V8;
2612 for (uint32_t mz = 0; mz < 25; mz++) {
2613 for (uint32_t channels = 16; channels < 128; channels += 24) {
2614 DWConvMicrokernelTester()
2615 .cr(8)
2616 .kr(25)
2617 .channels(channels)
2618 .input_offset(176)
2619 .zero_index(mz)
2620 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2621 }
2622 }
2623 }
2624 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2625
2626
2627 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_eq_8)2628 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_eq_8) {
2629 TEST_REQUIRES_ARM_NEON_V8;
2630 DWConvMicrokernelTester()
2631 .cr(8)
2632 .kr(25)
2633 .channels(8)
2634 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2635 }
2636
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_div_8)2637 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_div_8) {
2638 TEST_REQUIRES_ARM_NEON_V8;
2639 for (uint32_t channels = 16; channels < 128; channels += 24) {
2640 DWConvMicrokernelTester()
2641 .cr(8)
2642 .kr(25)
2643 .channels(channels)
2644 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2645 }
2646 }
2647
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_div_8_with_qmin)2648 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_div_8_with_qmin) {
2649 TEST_REQUIRES_ARM_NEON_V8;
2650 for (uint32_t channels = 16; channels < 128; channels += 24) {
2651 DWConvMicrokernelTester()
2652 .cr(8)
2653 .kr(25)
2654 .channels(channels)
2655 .qmin(128)
2656 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2657 }
2658 }
2659
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_div_8_with_qmax)2660 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_div_8_with_qmax) {
2661 TEST_REQUIRES_ARM_NEON_V8;
2662 for (uint32_t channels = 16; channels < 128; channels += 24) {
2663 DWConvMicrokernelTester()
2664 .cr(8)
2665 .kr(25)
2666 .channels(channels)
2667 .qmax(128)
2668 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2669 }
2670 }
2671
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_lt_8)2672 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_lt_8) {
2673 TEST_REQUIRES_ARM_NEON_V8;
2674 for (uint32_t channels = 1; channels < 8; channels++) {
2675 DWConvMicrokernelTester()
2676 .cr(8)
2677 .kr(25)
2678 .channels(channels)
2679 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2680 }
2681 }
2682
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_gt_8)2683 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_gt_8) {
2684 TEST_REQUIRES_ARM_NEON_V8;
2685 for (uint32_t channels = 9; channels < 16; channels++) {
2686 DWConvMicrokernelTester()
2687 .cr(8)
2688 .kr(25)
2689 .channels(channels)
2690 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2691 }
2692 }
2693
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_gt_8_with_qmin)2694 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_gt_8_with_qmin) {
2695 TEST_REQUIRES_ARM_NEON_V8;
2696 for (uint32_t channels = 9; channels < 16; channels++) {
2697 DWConvMicrokernelTester()
2698 .cr(8)
2699 .kr(25)
2700 .channels(channels)
2701 .qmin(128)
2702 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2703 }
2704 }
2705
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_gt_8_with_qmax)2706 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_gt_8_with_qmax) {
2707 TEST_REQUIRES_ARM_NEON_V8;
2708 for (uint32_t channels = 9; channels < 16; channels++) {
2709 DWConvMicrokernelTester()
2710 .cr(8)
2711 .kr(25)
2712 .channels(channels)
2713 .qmax(128)
2714 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2715 }
2716 }
2717
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,multipixel)2718 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel) {
2719 TEST_REQUIRES_ARM_NEON_V8;
2720 for (size_t channels = 1; channels <= 40; channels += 7) {
2721 DWConvMicrokernelTester()
2722 .cr(8)
2723 .kr(25)
2724 .channels(channels)
2725 .width(3)
2726 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2727 }
2728 }
2729
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,multipixel_with_step)2730 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_step) {
2731 TEST_REQUIRES_ARM_NEON_V8;
2732 for (size_t channels = 1; channels <= 40; channels += 7) {
2733 for (size_t step = 2; step <= 25; step++) {
2734 DWConvMicrokernelTester()
2735 .cr(8)
2736 .kr(25)
2737 .channels(channels)
2738 .width(3)
2739 .step(step)
2740 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2741 }
2742 }
2743 }
2744
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,multipixel_with_output_stride)2745 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_output_stride) {
2746 TEST_REQUIRES_ARM_NEON_V8;
2747 for (size_t channels = 1; channels <= 40; channels += 7) {
2748 DWConvMicrokernelTester()
2749 .cr(8)
2750 .kr(25)
2751 .channels(8)
2752 .width(5)
2753 .output_stride(43)
2754 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2755 }
2756 }
2757
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,multipixel_with_qmin)2758 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_qmin) {
2759 TEST_REQUIRES_ARM_NEON_V8;
2760 for (size_t channels = 1; channels <= 40; channels += 7) {
2761 DWConvMicrokernelTester()
2762 .cr(8)
2763 .kr(25)
2764 .channels(channels)
2765 .width(3)
2766 .qmin(128)
2767 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2768 }
2769 }
2770
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,multipixel_with_qmax)2771 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_qmax) {
2772 TEST_REQUIRES_ARM_NEON_V8;
2773 for (size_t channels = 1; channels <= 40; channels += 7) {
2774 DWConvMicrokernelTester()
2775 .cr(8)
2776 .kr(25)
2777 .channels(channels)
2778 .width(3)
2779 .qmax(128)
2780 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2781 }
2782 }
2783
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,input_offset)2784 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, input_offset) {
2785 TEST_REQUIRES_ARM_NEON_V8;
2786 for (uint32_t channels = 16; channels < 128; channels += 24) {
2787 DWConvMicrokernelTester()
2788 .cr(8)
2789 .kr(25)
2790 .channels(channels)
2791 .input_offset(176)
2792 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2793 }
2794 }
2795
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,zero)2796 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, zero) {
2797 TEST_REQUIRES_ARM_NEON_V8;
2798 for (uint32_t mz = 0; mz < 25; mz++) {
2799 for (uint32_t channels = 16; channels < 128; channels += 24) {
2800 DWConvMicrokernelTester()
2801 .cr(8)
2802 .kr(25)
2803 .channels(channels)
2804 .input_offset(176)
2805 .zero_index(mz)
2806 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2807 }
2808 }
2809 }
2810 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2811
2812
2813 #if XNN_ARCH_ARM
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_eq_16)2814 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_eq_16) {
2815 TEST_REQUIRES_ARM_NEON_V8;
2816 DWConvMicrokernelTester()
2817 .cr(16)
2818 .kr(3)
2819 .channels(16)
2820 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2821 }
2822
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_div_16)2823 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_div_16) {
2824 TEST_REQUIRES_ARM_NEON_V8;
2825 for (uint32_t channels = 32; channels < 256; channels += 48) {
2826 DWConvMicrokernelTester()
2827 .cr(16)
2828 .kr(3)
2829 .channels(channels)
2830 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2831 }
2832 }
2833
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_div_16_with_qmin)2834 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_div_16_with_qmin) {
2835 TEST_REQUIRES_ARM_NEON_V8;
2836 for (uint32_t channels = 32; channels < 256; channels += 48) {
2837 DWConvMicrokernelTester()
2838 .cr(16)
2839 .kr(3)
2840 .channels(channels)
2841 .qmin(128)
2842 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2843 }
2844 }
2845
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_div_16_with_qmax)2846 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_div_16_with_qmax) {
2847 TEST_REQUIRES_ARM_NEON_V8;
2848 for (uint32_t channels = 32; channels < 256; channels += 48) {
2849 DWConvMicrokernelTester()
2850 .cr(16)
2851 .kr(3)
2852 .channels(channels)
2853 .qmax(128)
2854 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2855 }
2856 }
2857
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_lt_16)2858 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_lt_16) {
2859 TEST_REQUIRES_ARM_NEON_V8;
2860 for (uint32_t channels = 1; channels < 16; channels++) {
2861 DWConvMicrokernelTester()
2862 .cr(16)
2863 .kr(3)
2864 .channels(channels)
2865 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2866 }
2867 }
2868
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_gt_16)2869 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_gt_16) {
2870 TEST_REQUIRES_ARM_NEON_V8;
2871 for (uint32_t channels = 17; channels < 32; channels++) {
2872 DWConvMicrokernelTester()
2873 .cr(16)
2874 .kr(3)
2875 .channels(channels)
2876 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2877 }
2878 }
2879
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_gt_16_with_qmin)2880 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_gt_16_with_qmin) {
2881 TEST_REQUIRES_ARM_NEON_V8;
2882 for (uint32_t channels = 17; channels < 32; channels++) {
2883 DWConvMicrokernelTester()
2884 .cr(16)
2885 .kr(3)
2886 .channels(channels)
2887 .qmin(128)
2888 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2889 }
2890 }
2891
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,c_gt_16_with_qmax)2892 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, c_gt_16_with_qmax) {
2893 TEST_REQUIRES_ARM_NEON_V8;
2894 for (uint32_t channels = 17; channels < 32; channels++) {
2895 DWConvMicrokernelTester()
2896 .cr(16)
2897 .kr(3)
2898 .channels(channels)
2899 .qmax(128)
2900 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2901 }
2902 }
2903
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,multipixel)2904 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, multipixel) {
2905 TEST_REQUIRES_ARM_NEON_V8;
2906 for (size_t channels = 1; channels <= 80; channels += 15) {
2907 DWConvMicrokernelTester()
2908 .cr(16)
2909 .kr(3)
2910 .channels(channels)
2911 .width(3)
2912 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2913 }
2914 }
2915
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,multipixel_with_step)2916 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, multipixel_with_step) {
2917 TEST_REQUIRES_ARM_NEON_V8;
2918 for (size_t channels = 1; channels <= 80; channels += 15) {
2919 for (size_t step = 2; step <= 3; step++) {
2920 DWConvMicrokernelTester()
2921 .cr(16)
2922 .kr(3)
2923 .channels(channels)
2924 .width(3)
2925 .step(step)
2926 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2927 }
2928 }
2929 }
2930
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,multipixel_with_output_stride)2931 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, multipixel_with_output_stride) {
2932 TEST_REQUIRES_ARM_NEON_V8;
2933 for (size_t channels = 1; channels <= 80; channels += 15) {
2934 DWConvMicrokernelTester()
2935 .cr(16)
2936 .kr(3)
2937 .channels(16)
2938 .width(5)
2939 .output_stride(83)
2940 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2941 }
2942 }
2943
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,multipixel_with_qmin)2944 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, multipixel_with_qmin) {
2945 TEST_REQUIRES_ARM_NEON_V8;
2946 for (size_t channels = 1; channels <= 80; channels += 15) {
2947 DWConvMicrokernelTester()
2948 .cr(16)
2949 .kr(3)
2950 .channels(channels)
2951 .width(3)
2952 .qmin(128)
2953 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2954 }
2955 }
2956
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,multipixel_with_qmax)2957 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, multipixel_with_qmax) {
2958 TEST_REQUIRES_ARM_NEON_V8;
2959 for (size_t channels = 1; channels <= 80; channels += 15) {
2960 DWConvMicrokernelTester()
2961 .cr(16)
2962 .kr(3)
2963 .channels(channels)
2964 .width(3)
2965 .qmax(128)
2966 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2967 }
2968 }
2969
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,input_offset)2970 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, input_offset) {
2971 TEST_REQUIRES_ARM_NEON_V8;
2972 for (uint32_t channels = 32; channels < 256; channels += 48) {
2973 DWConvMicrokernelTester()
2974 .cr(16)
2975 .kr(3)
2976 .channels(channels)
2977 .input_offset(304)
2978 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2979 }
2980 }
2981
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35,zero)2982 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AARCH32_NEONV8_MLA8_CORTEX_A35, zero) {
2983 TEST_REQUIRES_ARM_NEON_V8;
2984 for (uint32_t mz = 0; mz < 3; mz++) {
2985 for (uint32_t channels = 32; channels < 256; channels += 48) {
2986 DWConvMicrokernelTester()
2987 .cr(16)
2988 .kr(3)
2989 .channels(channels)
2990 .input_offset(304)
2991 .zero_index(mz)
2992 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2993 }
2994 }
2995 }
2996 #endif // XNN_ARCH_ARM
2997
2998
2999 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,c_eq_16)3000 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, c_eq_16) {
3001 TEST_REQUIRES_ARM_NEON;
3002 DWConvMicrokernelTester()
3003 .cr(16)
3004 .kr(3)
3005 .channels(16)
3006 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3007 }
3008
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,c_div_16)3009 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, c_div_16) {
3010 TEST_REQUIRES_ARM_NEON;
3011 for (uint32_t channels = 32; channels < 256; channels += 48) {
3012 DWConvMicrokernelTester()
3013 .cr(16)
3014 .kr(3)
3015 .channels(channels)
3016 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3017 }
3018 }
3019
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,c_div_16_with_qmin)3020 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, c_div_16_with_qmin) {
3021 TEST_REQUIRES_ARM_NEON;
3022 for (uint32_t channels = 32; channels < 256; channels += 48) {
3023 DWConvMicrokernelTester()
3024 .cr(16)
3025 .kr(3)
3026 .channels(channels)
3027 .qmin(128)
3028 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3029 }
3030 }
3031
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,c_div_16_with_qmax)3032 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, c_div_16_with_qmax) {
3033 TEST_REQUIRES_ARM_NEON;
3034 for (uint32_t channels = 32; channels < 256; channels += 48) {
3035 DWConvMicrokernelTester()
3036 .cr(16)
3037 .kr(3)
3038 .channels(channels)
3039 .qmax(128)
3040 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3041 }
3042 }
3043
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,c_lt_16)3044 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, c_lt_16) {
3045 TEST_REQUIRES_ARM_NEON;
3046 for (uint32_t channels = 1; channels < 16; channels++) {
3047 DWConvMicrokernelTester()
3048 .cr(16)
3049 .kr(3)
3050 .channels(channels)
3051 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3052 }
3053 }
3054
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,c_gt_16)3055 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, c_gt_16) {
3056 TEST_REQUIRES_ARM_NEON;
3057 for (uint32_t channels = 17; channels < 32; channels++) {
3058 DWConvMicrokernelTester()
3059 .cr(16)
3060 .kr(3)
3061 .channels(channels)
3062 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3063 }
3064 }
3065
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,c_gt_16_with_qmin)3066 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, c_gt_16_with_qmin) {
3067 TEST_REQUIRES_ARM_NEON;
3068 for (uint32_t channels = 17; channels < 32; channels++) {
3069 DWConvMicrokernelTester()
3070 .cr(16)
3071 .kr(3)
3072 .channels(channels)
3073 .qmin(128)
3074 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3075 }
3076 }
3077
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,c_gt_16_with_qmax)3078 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, c_gt_16_with_qmax) {
3079 TEST_REQUIRES_ARM_NEON;
3080 for (uint32_t channels = 17; channels < 32; channels++) {
3081 DWConvMicrokernelTester()
3082 .cr(16)
3083 .kr(3)
3084 .channels(channels)
3085 .qmax(128)
3086 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3087 }
3088 }
3089
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,multipixel)3090 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, multipixel) {
3091 TEST_REQUIRES_ARM_NEON;
3092 for (size_t channels = 1; channels <= 80; channels += 15) {
3093 DWConvMicrokernelTester()
3094 .cr(16)
3095 .kr(3)
3096 .channels(channels)
3097 .width(3)
3098 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3099 }
3100 }
3101
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,multipixel_with_step)3102 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, multipixel_with_step) {
3103 TEST_REQUIRES_ARM_NEON;
3104 for (size_t channels = 1; channels <= 80; channels += 15) {
3105 for (size_t step = 2; step <= 3; step++) {
3106 DWConvMicrokernelTester()
3107 .cr(16)
3108 .kr(3)
3109 .channels(channels)
3110 .width(3)
3111 .step(step)
3112 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3113 }
3114 }
3115 }
3116
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,multipixel_with_output_stride)3117 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, multipixel_with_output_stride) {
3118 TEST_REQUIRES_ARM_NEON;
3119 for (size_t channels = 1; channels <= 80; channels += 15) {
3120 DWConvMicrokernelTester()
3121 .cr(16)
3122 .kr(3)
3123 .channels(16)
3124 .width(5)
3125 .output_stride(83)
3126 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3127 }
3128 }
3129
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,multipixel_with_qmin)3130 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, multipixel_with_qmin) {
3131 TEST_REQUIRES_ARM_NEON;
3132 for (size_t channels = 1; channels <= 80; channels += 15) {
3133 DWConvMicrokernelTester()
3134 .cr(16)
3135 .kr(3)
3136 .channels(channels)
3137 .width(3)
3138 .qmin(128)
3139 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3140 }
3141 }
3142
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,multipixel_with_qmax)3143 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, multipixel_with_qmax) {
3144 TEST_REQUIRES_ARM_NEON;
3145 for (size_t channels = 1; channels <= 80; channels += 15) {
3146 DWConvMicrokernelTester()
3147 .cr(16)
3148 .kr(3)
3149 .channels(channels)
3150 .width(3)
3151 .qmax(128)
3152 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3153 }
3154 }
3155
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,input_offset)3156 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, input_offset) {
3157 TEST_REQUIRES_ARM_NEON;
3158 for (uint32_t channels = 32; channels < 256; channels += 48) {
3159 DWConvMicrokernelTester()
3160 .cr(16)
3161 .kr(3)
3162 .channels(channels)
3163 .input_offset(304)
3164 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3165 }
3166 }
3167
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64,zero)3168 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD64, zero) {
3169 TEST_REQUIRES_ARM_NEON;
3170 for (uint32_t mz = 0; mz < 3; mz++) {
3171 for (uint32_t channels = 32; channels < 256; channels += 48) {
3172 DWConvMicrokernelTester()
3173 .cr(16)
3174 .kr(3)
3175 .channels(channels)
3176 .input_offset(304)
3177 .zero_index(mz)
3178 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3179 }
3180 }
3181 }
3182 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3183
3184
3185 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,c_eq_16)3186 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, c_eq_16) {
3187 TEST_REQUIRES_ARM_NEON;
3188 DWConvMicrokernelTester()
3189 .cr(16)
3190 .kr(3)
3191 .channels(16)
3192 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3193 }
3194
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,c_div_16)3195 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, c_div_16) {
3196 TEST_REQUIRES_ARM_NEON;
3197 for (uint32_t channels = 32; channels < 256; channels += 48) {
3198 DWConvMicrokernelTester()
3199 .cr(16)
3200 .kr(3)
3201 .channels(channels)
3202 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3203 }
3204 }
3205
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,c_div_16_with_qmin)3206 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, c_div_16_with_qmin) {
3207 TEST_REQUIRES_ARM_NEON;
3208 for (uint32_t channels = 32; channels < 256; channels += 48) {
3209 DWConvMicrokernelTester()
3210 .cr(16)
3211 .kr(3)
3212 .channels(channels)
3213 .qmin(128)
3214 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3215 }
3216 }
3217
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,c_div_16_with_qmax)3218 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, c_div_16_with_qmax) {
3219 TEST_REQUIRES_ARM_NEON;
3220 for (uint32_t channels = 32; channels < 256; channels += 48) {
3221 DWConvMicrokernelTester()
3222 .cr(16)
3223 .kr(3)
3224 .channels(channels)
3225 .qmax(128)
3226 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3227 }
3228 }
3229
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,c_lt_16)3230 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, c_lt_16) {
3231 TEST_REQUIRES_ARM_NEON;
3232 for (uint32_t channels = 1; channels < 16; channels++) {
3233 DWConvMicrokernelTester()
3234 .cr(16)
3235 .kr(3)
3236 .channels(channels)
3237 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3238 }
3239 }
3240
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,c_gt_16)3241 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, c_gt_16) {
3242 TEST_REQUIRES_ARM_NEON;
3243 for (uint32_t channels = 17; channels < 32; channels++) {
3244 DWConvMicrokernelTester()
3245 .cr(16)
3246 .kr(3)
3247 .channels(channels)
3248 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3249 }
3250 }
3251
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,c_gt_16_with_qmin)3252 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, c_gt_16_with_qmin) {
3253 TEST_REQUIRES_ARM_NEON;
3254 for (uint32_t channels = 17; channels < 32; channels++) {
3255 DWConvMicrokernelTester()
3256 .cr(16)
3257 .kr(3)
3258 .channels(channels)
3259 .qmin(128)
3260 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3261 }
3262 }
3263
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,c_gt_16_with_qmax)3264 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, c_gt_16_with_qmax) {
3265 TEST_REQUIRES_ARM_NEON;
3266 for (uint32_t channels = 17; channels < 32; channels++) {
3267 DWConvMicrokernelTester()
3268 .cr(16)
3269 .kr(3)
3270 .channels(channels)
3271 .qmax(128)
3272 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3273 }
3274 }
3275
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,multipixel)3276 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, multipixel) {
3277 TEST_REQUIRES_ARM_NEON;
3278 for (size_t channels = 1; channels <= 80; channels += 15) {
3279 DWConvMicrokernelTester()
3280 .cr(16)
3281 .kr(3)
3282 .channels(channels)
3283 .width(3)
3284 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3285 }
3286 }
3287
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,multipixel_with_step)3288 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, multipixel_with_step) {
3289 TEST_REQUIRES_ARM_NEON;
3290 for (size_t channels = 1; channels <= 80; channels += 15) {
3291 for (size_t step = 2; step <= 3; step++) {
3292 DWConvMicrokernelTester()
3293 .cr(16)
3294 .kr(3)
3295 .channels(channels)
3296 .width(3)
3297 .step(step)
3298 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3299 }
3300 }
3301 }
3302
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,multipixel_with_output_stride)3303 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, multipixel_with_output_stride) {
3304 TEST_REQUIRES_ARM_NEON;
3305 for (size_t channels = 1; channels <= 80; channels += 15) {
3306 DWConvMicrokernelTester()
3307 .cr(16)
3308 .kr(3)
3309 .channels(16)
3310 .width(5)
3311 .output_stride(83)
3312 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3313 }
3314 }
3315
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,multipixel_with_qmin)3316 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, multipixel_with_qmin) {
3317 TEST_REQUIRES_ARM_NEON;
3318 for (size_t channels = 1; channels <= 80; channels += 15) {
3319 DWConvMicrokernelTester()
3320 .cr(16)
3321 .kr(3)
3322 .channels(channels)
3323 .width(3)
3324 .qmin(128)
3325 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3326 }
3327 }
3328
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,multipixel_with_qmax)3329 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, multipixel_with_qmax) {
3330 TEST_REQUIRES_ARM_NEON;
3331 for (size_t channels = 1; channels <= 80; channels += 15) {
3332 DWConvMicrokernelTester()
3333 .cr(16)
3334 .kr(3)
3335 .channels(channels)
3336 .width(3)
3337 .qmax(128)
3338 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3339 }
3340 }
3341
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,input_offset)3342 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, input_offset) {
3343 TEST_REQUIRES_ARM_NEON;
3344 for (uint32_t channels = 32; channels < 256; channels += 48) {
3345 DWConvMicrokernelTester()
3346 .cr(16)
3347 .kr(3)
3348 .channels(channels)
3349 .input_offset(304)
3350 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3351 }
3352 }
3353
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128,zero)3354 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEON_MLA8_LD128, zero) {
3355 TEST_REQUIRES_ARM_NEON;
3356 for (uint32_t mz = 0; mz < 3; mz++) {
3357 for (uint32_t channels = 32; channels < 256; channels += 48) {
3358 DWConvMicrokernelTester()
3359 .cr(16)
3360 .kr(3)
3361 .channels(channels)
3362 .input_offset(304)
3363 .zero_index(mz)
3364 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3365 }
3366 }
3367 }
3368 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3369
3370
3371 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,c_eq_16)3372 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, c_eq_16) {
3373 TEST_REQUIRES_ARM_NEON_V8;
3374 DWConvMicrokernelTester()
3375 .cr(16)
3376 .kr(3)
3377 .channels(16)
3378 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3379 }
3380
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,c_div_16)3381 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, c_div_16) {
3382 TEST_REQUIRES_ARM_NEON_V8;
3383 for (uint32_t channels = 32; channels < 256; channels += 48) {
3384 DWConvMicrokernelTester()
3385 .cr(16)
3386 .kr(3)
3387 .channels(channels)
3388 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3389 }
3390 }
3391
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,c_div_16_with_qmin)3392 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, c_div_16_with_qmin) {
3393 TEST_REQUIRES_ARM_NEON_V8;
3394 for (uint32_t channels = 32; channels < 256; channels += 48) {
3395 DWConvMicrokernelTester()
3396 .cr(16)
3397 .kr(3)
3398 .channels(channels)
3399 .qmin(128)
3400 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3401 }
3402 }
3403
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,c_div_16_with_qmax)3404 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, c_div_16_with_qmax) {
3405 TEST_REQUIRES_ARM_NEON_V8;
3406 for (uint32_t channels = 32; channels < 256; channels += 48) {
3407 DWConvMicrokernelTester()
3408 .cr(16)
3409 .kr(3)
3410 .channels(channels)
3411 .qmax(128)
3412 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3413 }
3414 }
3415
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,c_lt_16)3416 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, c_lt_16) {
3417 TEST_REQUIRES_ARM_NEON_V8;
3418 for (uint32_t channels = 1; channels < 16; channels++) {
3419 DWConvMicrokernelTester()
3420 .cr(16)
3421 .kr(3)
3422 .channels(channels)
3423 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3424 }
3425 }
3426
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,c_gt_16)3427 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, c_gt_16) {
3428 TEST_REQUIRES_ARM_NEON_V8;
3429 for (uint32_t channels = 17; channels < 32; channels++) {
3430 DWConvMicrokernelTester()
3431 .cr(16)
3432 .kr(3)
3433 .channels(channels)
3434 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3435 }
3436 }
3437
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,c_gt_16_with_qmin)3438 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, c_gt_16_with_qmin) {
3439 TEST_REQUIRES_ARM_NEON_V8;
3440 for (uint32_t channels = 17; channels < 32; channels++) {
3441 DWConvMicrokernelTester()
3442 .cr(16)
3443 .kr(3)
3444 .channels(channels)
3445 .qmin(128)
3446 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3447 }
3448 }
3449
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,c_gt_16_with_qmax)3450 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, c_gt_16_with_qmax) {
3451 TEST_REQUIRES_ARM_NEON_V8;
3452 for (uint32_t channels = 17; channels < 32; channels++) {
3453 DWConvMicrokernelTester()
3454 .cr(16)
3455 .kr(3)
3456 .channels(channels)
3457 .qmax(128)
3458 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3459 }
3460 }
3461
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,multipixel)3462 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, multipixel) {
3463 TEST_REQUIRES_ARM_NEON_V8;
3464 for (size_t channels = 1; channels <= 80; channels += 15) {
3465 DWConvMicrokernelTester()
3466 .cr(16)
3467 .kr(3)
3468 .channels(channels)
3469 .width(3)
3470 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3471 }
3472 }
3473
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,multipixel_with_step)3474 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, multipixel_with_step) {
3475 TEST_REQUIRES_ARM_NEON_V8;
3476 for (size_t channels = 1; channels <= 80; channels += 15) {
3477 for (size_t step = 2; step <= 3; step++) {
3478 DWConvMicrokernelTester()
3479 .cr(16)
3480 .kr(3)
3481 .channels(channels)
3482 .width(3)
3483 .step(step)
3484 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3485 }
3486 }
3487 }
3488
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,multipixel_with_output_stride)3489 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, multipixel_with_output_stride) {
3490 TEST_REQUIRES_ARM_NEON_V8;
3491 for (size_t channels = 1; channels <= 80; channels += 15) {
3492 DWConvMicrokernelTester()
3493 .cr(16)
3494 .kr(3)
3495 .channels(16)
3496 .width(5)
3497 .output_stride(83)
3498 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3499 }
3500 }
3501
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,multipixel_with_qmin)3502 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, multipixel_with_qmin) {
3503 TEST_REQUIRES_ARM_NEON_V8;
3504 for (size_t channels = 1; channels <= 80; channels += 15) {
3505 DWConvMicrokernelTester()
3506 .cr(16)
3507 .kr(3)
3508 .channels(channels)
3509 .width(3)
3510 .qmin(128)
3511 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3512 }
3513 }
3514
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,multipixel_with_qmax)3515 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, multipixel_with_qmax) {
3516 TEST_REQUIRES_ARM_NEON_V8;
3517 for (size_t channels = 1; channels <= 80; channels += 15) {
3518 DWConvMicrokernelTester()
3519 .cr(16)
3520 .kr(3)
3521 .channels(channels)
3522 .width(3)
3523 .qmax(128)
3524 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3525 }
3526 }
3527
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,input_offset)3528 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, input_offset) {
3529 TEST_REQUIRES_ARM_NEON_V8;
3530 for (uint32_t channels = 32; channels < 256; channels += 48) {
3531 DWConvMicrokernelTester()
3532 .cr(16)
3533 .kr(3)
3534 .channels(channels)
3535 .input_offset(304)
3536 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3537 }
3538 }
3539
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64,zero)3540 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD64, zero) {
3541 TEST_REQUIRES_ARM_NEON_V8;
3542 for (uint32_t mz = 0; mz < 3; mz++) {
3543 for (uint32_t channels = 32; channels < 256; channels += 48) {
3544 DWConvMicrokernelTester()
3545 .cr(16)
3546 .kr(3)
3547 .channels(channels)
3548 .input_offset(304)
3549 .zero_index(mz)
3550 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3551 }
3552 }
3553 }
3554 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3555
3556
3557 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,c_eq_16)3558 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, c_eq_16) {
3559 TEST_REQUIRES_ARM_NEON_V8;
3560 DWConvMicrokernelTester()
3561 .cr(16)
3562 .kr(3)
3563 .channels(16)
3564 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3565 }
3566
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,c_div_16)3567 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, c_div_16) {
3568 TEST_REQUIRES_ARM_NEON_V8;
3569 for (uint32_t channels = 32; channels < 256; channels += 48) {
3570 DWConvMicrokernelTester()
3571 .cr(16)
3572 .kr(3)
3573 .channels(channels)
3574 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3575 }
3576 }
3577
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,c_div_16_with_qmin)3578 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, c_div_16_with_qmin) {
3579 TEST_REQUIRES_ARM_NEON_V8;
3580 for (uint32_t channels = 32; channels < 256; channels += 48) {
3581 DWConvMicrokernelTester()
3582 .cr(16)
3583 .kr(3)
3584 .channels(channels)
3585 .qmin(128)
3586 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3587 }
3588 }
3589
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,c_div_16_with_qmax)3590 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, c_div_16_with_qmax) {
3591 TEST_REQUIRES_ARM_NEON_V8;
3592 for (uint32_t channels = 32; channels < 256; channels += 48) {
3593 DWConvMicrokernelTester()
3594 .cr(16)
3595 .kr(3)
3596 .channels(channels)
3597 .qmax(128)
3598 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3599 }
3600 }
3601
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,c_lt_16)3602 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, c_lt_16) {
3603 TEST_REQUIRES_ARM_NEON_V8;
3604 for (uint32_t channels = 1; channels < 16; channels++) {
3605 DWConvMicrokernelTester()
3606 .cr(16)
3607 .kr(3)
3608 .channels(channels)
3609 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3610 }
3611 }
3612
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,c_gt_16)3613 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, c_gt_16) {
3614 TEST_REQUIRES_ARM_NEON_V8;
3615 for (uint32_t channels = 17; channels < 32; channels++) {
3616 DWConvMicrokernelTester()
3617 .cr(16)
3618 .kr(3)
3619 .channels(channels)
3620 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3621 }
3622 }
3623
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,c_gt_16_with_qmin)3624 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, c_gt_16_with_qmin) {
3625 TEST_REQUIRES_ARM_NEON_V8;
3626 for (uint32_t channels = 17; channels < 32; channels++) {
3627 DWConvMicrokernelTester()
3628 .cr(16)
3629 .kr(3)
3630 .channels(channels)
3631 .qmin(128)
3632 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3633 }
3634 }
3635
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,c_gt_16_with_qmax)3636 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, c_gt_16_with_qmax) {
3637 TEST_REQUIRES_ARM_NEON_V8;
3638 for (uint32_t channels = 17; channels < 32; channels++) {
3639 DWConvMicrokernelTester()
3640 .cr(16)
3641 .kr(3)
3642 .channels(channels)
3643 .qmax(128)
3644 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3645 }
3646 }
3647
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,multipixel)3648 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, multipixel) {
3649 TEST_REQUIRES_ARM_NEON_V8;
3650 for (size_t channels = 1; channels <= 80; channels += 15) {
3651 DWConvMicrokernelTester()
3652 .cr(16)
3653 .kr(3)
3654 .channels(channels)
3655 .width(3)
3656 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3657 }
3658 }
3659
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,multipixel_with_step)3660 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, multipixel_with_step) {
3661 TEST_REQUIRES_ARM_NEON_V8;
3662 for (size_t channels = 1; channels <= 80; channels += 15) {
3663 for (size_t step = 2; step <= 3; step++) {
3664 DWConvMicrokernelTester()
3665 .cr(16)
3666 .kr(3)
3667 .channels(channels)
3668 .width(3)
3669 .step(step)
3670 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3671 }
3672 }
3673 }
3674
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,multipixel_with_output_stride)3675 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, multipixel_with_output_stride) {
3676 TEST_REQUIRES_ARM_NEON_V8;
3677 for (size_t channels = 1; channels <= 80; channels += 15) {
3678 DWConvMicrokernelTester()
3679 .cr(16)
3680 .kr(3)
3681 .channels(16)
3682 .width(5)
3683 .output_stride(83)
3684 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3685 }
3686 }
3687
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,multipixel_with_qmin)3688 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, multipixel_with_qmin) {
3689 TEST_REQUIRES_ARM_NEON_V8;
3690 for (size_t channels = 1; channels <= 80; channels += 15) {
3691 DWConvMicrokernelTester()
3692 .cr(16)
3693 .kr(3)
3694 .channels(channels)
3695 .width(3)
3696 .qmin(128)
3697 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3698 }
3699 }
3700
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,multipixel_with_qmax)3701 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, multipixel_with_qmax) {
3702 TEST_REQUIRES_ARM_NEON_V8;
3703 for (size_t channels = 1; channels <= 80; channels += 15) {
3704 DWConvMicrokernelTester()
3705 .cr(16)
3706 .kr(3)
3707 .channels(channels)
3708 .width(3)
3709 .qmax(128)
3710 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3711 }
3712 }
3713
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,input_offset)3714 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, input_offset) {
3715 TEST_REQUIRES_ARM_NEON_V8;
3716 for (uint32_t channels = 32; channels < 256; channels += 48) {
3717 DWConvMicrokernelTester()
3718 .cr(16)
3719 .kr(3)
3720 .channels(channels)
3721 .input_offset(304)
3722 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3723 }
3724 }
3725
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128,zero)3726 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__NEONV8_MLA8_LD128, zero) {
3727 TEST_REQUIRES_ARM_NEON_V8;
3728 for (uint32_t mz = 0; mz < 3; mz++) {
3729 for (uint32_t channels = 32; channels < 256; channels += 48) {
3730 DWConvMicrokernelTester()
3731 .cr(16)
3732 .kr(3)
3733 .channels(channels)
3734 .input_offset(304)
3735 .zero_index(mz)
3736 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3737 }
3738 }
3739 }
3740 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3741
3742
3743 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,c_eq_16)3744 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_eq_16) {
3745 TEST_REQUIRES_ARM_NEON;
3746 DWConvMicrokernelTester()
3747 .cr(16)
3748 .kr(9)
3749 .channels(16)
3750 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3751 }
3752
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,c_div_16)3753 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_div_16) {
3754 TEST_REQUIRES_ARM_NEON;
3755 for (uint32_t channels = 32; channels < 256; channels += 48) {
3756 DWConvMicrokernelTester()
3757 .cr(16)
3758 .kr(9)
3759 .channels(channels)
3760 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3761 }
3762 }
3763
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,c_div_16_with_qmin)3764 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_div_16_with_qmin) {
3765 TEST_REQUIRES_ARM_NEON;
3766 for (uint32_t channels = 32; channels < 256; channels += 48) {
3767 DWConvMicrokernelTester()
3768 .cr(16)
3769 .kr(9)
3770 .channels(channels)
3771 .qmin(128)
3772 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3773 }
3774 }
3775
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,c_div_16_with_qmax)3776 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_div_16_with_qmax) {
3777 TEST_REQUIRES_ARM_NEON;
3778 for (uint32_t channels = 32; channels < 256; channels += 48) {
3779 DWConvMicrokernelTester()
3780 .cr(16)
3781 .kr(9)
3782 .channels(channels)
3783 .qmax(128)
3784 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3785 }
3786 }
3787
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,c_lt_16)3788 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_lt_16) {
3789 TEST_REQUIRES_ARM_NEON;
3790 for (uint32_t channels = 1; channels < 16; channels++) {
3791 DWConvMicrokernelTester()
3792 .cr(16)
3793 .kr(9)
3794 .channels(channels)
3795 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3796 }
3797 }
3798
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,c_gt_16)3799 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_gt_16) {
3800 TEST_REQUIRES_ARM_NEON;
3801 for (uint32_t channels = 17; channels < 32; channels++) {
3802 DWConvMicrokernelTester()
3803 .cr(16)
3804 .kr(9)
3805 .channels(channels)
3806 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3807 }
3808 }
3809
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,c_gt_16_with_qmin)3810 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_gt_16_with_qmin) {
3811 TEST_REQUIRES_ARM_NEON;
3812 for (uint32_t channels = 17; channels < 32; channels++) {
3813 DWConvMicrokernelTester()
3814 .cr(16)
3815 .kr(9)
3816 .channels(channels)
3817 .qmin(128)
3818 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3819 }
3820 }
3821
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,c_gt_16_with_qmax)3822 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, c_gt_16_with_qmax) {
3823 TEST_REQUIRES_ARM_NEON;
3824 for (uint32_t channels = 17; channels < 32; channels++) {
3825 DWConvMicrokernelTester()
3826 .cr(16)
3827 .kr(9)
3828 .channels(channels)
3829 .qmax(128)
3830 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3831 }
3832 }
3833
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,multipixel)3834 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, multipixel) {
3835 TEST_REQUIRES_ARM_NEON;
3836 for (size_t channels = 1; channels <= 80; channels += 15) {
3837 DWConvMicrokernelTester()
3838 .cr(16)
3839 .kr(9)
3840 .channels(channels)
3841 .width(3)
3842 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3843 }
3844 }
3845
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,multipixel_with_step)3846 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, multipixel_with_step) {
3847 TEST_REQUIRES_ARM_NEON;
3848 for (size_t channels = 1; channels <= 80; channels += 15) {
3849 for (size_t step = 2; step <= 9; step++) {
3850 DWConvMicrokernelTester()
3851 .cr(16)
3852 .kr(9)
3853 .channels(channels)
3854 .width(3)
3855 .step(step)
3856 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3857 }
3858 }
3859 }
3860
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,multipixel_with_output_stride)3861 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, multipixel_with_output_stride) {
3862 TEST_REQUIRES_ARM_NEON;
3863 for (size_t channels = 1; channels <= 80; channels += 15) {
3864 DWConvMicrokernelTester()
3865 .cr(16)
3866 .kr(9)
3867 .channels(16)
3868 .width(5)
3869 .output_stride(83)
3870 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3871 }
3872 }
3873
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,multipixel_with_qmin)3874 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, multipixel_with_qmin) {
3875 TEST_REQUIRES_ARM_NEON;
3876 for (size_t channels = 1; channels <= 80; channels += 15) {
3877 DWConvMicrokernelTester()
3878 .cr(16)
3879 .kr(9)
3880 .channels(channels)
3881 .width(3)
3882 .qmin(128)
3883 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3884 }
3885 }
3886
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,multipixel_with_qmax)3887 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, multipixel_with_qmax) {
3888 TEST_REQUIRES_ARM_NEON;
3889 for (size_t channels = 1; channels <= 80; channels += 15) {
3890 DWConvMicrokernelTester()
3891 .cr(16)
3892 .kr(9)
3893 .channels(channels)
3894 .width(3)
3895 .qmax(128)
3896 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3897 }
3898 }
3899
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,input_offset)3900 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, input_offset) {
3901 TEST_REQUIRES_ARM_NEON;
3902 for (uint32_t channels = 32; channels < 256; channels += 48) {
3903 DWConvMicrokernelTester()
3904 .cr(16)
3905 .kr(9)
3906 .channels(channels)
3907 .input_offset(304)
3908 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3909 }
3910 }
3911
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64,zero)3912 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD64, zero) {
3913 TEST_REQUIRES_ARM_NEON;
3914 for (uint32_t mz = 0; mz < 9; mz++) {
3915 for (uint32_t channels = 32; channels < 256; channels += 48) {
3916 DWConvMicrokernelTester()
3917 .cr(16)
3918 .kr(9)
3919 .channels(channels)
3920 .input_offset(304)
3921 .zero_index(mz)
3922 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3923 }
3924 }
3925 }
3926 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3927
3928
3929 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,c_eq_16)3930 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_eq_16) {
3931 TEST_REQUIRES_ARM_NEON;
3932 DWConvMicrokernelTester()
3933 .cr(16)
3934 .kr(9)
3935 .channels(16)
3936 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3937 }
3938
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,c_div_16)3939 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_div_16) {
3940 TEST_REQUIRES_ARM_NEON;
3941 for (uint32_t channels = 32; channels < 256; channels += 48) {
3942 DWConvMicrokernelTester()
3943 .cr(16)
3944 .kr(9)
3945 .channels(channels)
3946 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3947 }
3948 }
3949
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,c_div_16_with_qmin)3950 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_div_16_with_qmin) {
3951 TEST_REQUIRES_ARM_NEON;
3952 for (uint32_t channels = 32; channels < 256; channels += 48) {
3953 DWConvMicrokernelTester()
3954 .cr(16)
3955 .kr(9)
3956 .channels(channels)
3957 .qmin(128)
3958 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3959 }
3960 }
3961
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,c_div_16_with_qmax)3962 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_div_16_with_qmax) {
3963 TEST_REQUIRES_ARM_NEON;
3964 for (uint32_t channels = 32; channels < 256; channels += 48) {
3965 DWConvMicrokernelTester()
3966 .cr(16)
3967 .kr(9)
3968 .channels(channels)
3969 .qmax(128)
3970 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3971 }
3972 }
3973
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,c_lt_16)3974 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_lt_16) {
3975 TEST_REQUIRES_ARM_NEON;
3976 for (uint32_t channels = 1; channels < 16; channels++) {
3977 DWConvMicrokernelTester()
3978 .cr(16)
3979 .kr(9)
3980 .channels(channels)
3981 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3982 }
3983 }
3984
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,c_gt_16)3985 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_gt_16) {
3986 TEST_REQUIRES_ARM_NEON;
3987 for (uint32_t channels = 17; channels < 32; channels++) {
3988 DWConvMicrokernelTester()
3989 .cr(16)
3990 .kr(9)
3991 .channels(channels)
3992 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3993 }
3994 }
3995
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,c_gt_16_with_qmin)3996 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_gt_16_with_qmin) {
3997 TEST_REQUIRES_ARM_NEON;
3998 for (uint32_t channels = 17; channels < 32; channels++) {
3999 DWConvMicrokernelTester()
4000 .cr(16)
4001 .kr(9)
4002 .channels(channels)
4003 .qmin(128)
4004 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4005 }
4006 }
4007
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,c_gt_16_with_qmax)4008 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, c_gt_16_with_qmax) {
4009 TEST_REQUIRES_ARM_NEON;
4010 for (uint32_t channels = 17; channels < 32; channels++) {
4011 DWConvMicrokernelTester()
4012 .cr(16)
4013 .kr(9)
4014 .channels(channels)
4015 .qmax(128)
4016 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4017 }
4018 }
4019
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,multipixel)4020 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, multipixel) {
4021 TEST_REQUIRES_ARM_NEON;
4022 for (size_t channels = 1; channels <= 80; channels += 15) {
4023 DWConvMicrokernelTester()
4024 .cr(16)
4025 .kr(9)
4026 .channels(channels)
4027 .width(3)
4028 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4029 }
4030 }
4031
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,multipixel_with_step)4032 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, multipixel_with_step) {
4033 TEST_REQUIRES_ARM_NEON;
4034 for (size_t channels = 1; channels <= 80; channels += 15) {
4035 for (size_t step = 2; step <= 9; step++) {
4036 DWConvMicrokernelTester()
4037 .cr(16)
4038 .kr(9)
4039 .channels(channels)
4040 .width(3)
4041 .step(step)
4042 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4043 }
4044 }
4045 }
4046
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,multipixel_with_output_stride)4047 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, multipixel_with_output_stride) {
4048 TEST_REQUIRES_ARM_NEON;
4049 for (size_t channels = 1; channels <= 80; channels += 15) {
4050 DWConvMicrokernelTester()
4051 .cr(16)
4052 .kr(9)
4053 .channels(16)
4054 .width(5)
4055 .output_stride(83)
4056 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4057 }
4058 }
4059
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,multipixel_with_qmin)4060 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, multipixel_with_qmin) {
4061 TEST_REQUIRES_ARM_NEON;
4062 for (size_t channels = 1; channels <= 80; channels += 15) {
4063 DWConvMicrokernelTester()
4064 .cr(16)
4065 .kr(9)
4066 .channels(channels)
4067 .width(3)
4068 .qmin(128)
4069 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4070 }
4071 }
4072
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,multipixel_with_qmax)4073 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, multipixel_with_qmax) {
4074 TEST_REQUIRES_ARM_NEON;
4075 for (size_t channels = 1; channels <= 80; channels += 15) {
4076 DWConvMicrokernelTester()
4077 .cr(16)
4078 .kr(9)
4079 .channels(channels)
4080 .width(3)
4081 .qmax(128)
4082 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4083 }
4084 }
4085
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,input_offset)4086 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, input_offset) {
4087 TEST_REQUIRES_ARM_NEON;
4088 for (uint32_t channels = 32; channels < 256; channels += 48) {
4089 DWConvMicrokernelTester()
4090 .cr(16)
4091 .kr(9)
4092 .channels(channels)
4093 .input_offset(304)
4094 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4095 }
4096 }
4097
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128,zero)4098 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MLA8_LD128, zero) {
4099 TEST_REQUIRES_ARM_NEON;
4100 for (uint32_t mz = 0; mz < 9; mz++) {
4101 for (uint32_t channels = 32; channels < 256; channels += 48) {
4102 DWConvMicrokernelTester()
4103 .cr(16)
4104 .kr(9)
4105 .channels(channels)
4106 .input_offset(304)
4107 .zero_index(mz)
4108 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4109 }
4110 }
4111 }
4112 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4113
4114
4115 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,c_eq_16)4116 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_eq_16) {
4117 TEST_REQUIRES_ARM_NEON;
4118 DWConvMicrokernelTester()
4119 .cr(16)
4120 .kr(9)
4121 .channels(16)
4122 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4123 }
4124
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,c_div_16)4125 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_div_16) {
4126 TEST_REQUIRES_ARM_NEON;
4127 for (uint32_t channels = 32; channels < 256; channels += 48) {
4128 DWConvMicrokernelTester()
4129 .cr(16)
4130 .kr(9)
4131 .channels(channels)
4132 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4133 }
4134 }
4135
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,c_div_16_with_qmin)4136 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_div_16_with_qmin) {
4137 TEST_REQUIRES_ARM_NEON;
4138 for (uint32_t channels = 32; channels < 256; channels += 48) {
4139 DWConvMicrokernelTester()
4140 .cr(16)
4141 .kr(9)
4142 .channels(channels)
4143 .qmin(128)
4144 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4145 }
4146 }
4147
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,c_div_16_with_qmax)4148 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_div_16_with_qmax) {
4149 TEST_REQUIRES_ARM_NEON;
4150 for (uint32_t channels = 32; channels < 256; channels += 48) {
4151 DWConvMicrokernelTester()
4152 .cr(16)
4153 .kr(9)
4154 .channels(channels)
4155 .qmax(128)
4156 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4157 }
4158 }
4159
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,c_lt_16)4160 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_lt_16) {
4161 TEST_REQUIRES_ARM_NEON;
4162 for (uint32_t channels = 1; channels < 16; channels++) {
4163 DWConvMicrokernelTester()
4164 .cr(16)
4165 .kr(9)
4166 .channels(channels)
4167 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4168 }
4169 }
4170
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,c_gt_16)4171 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_gt_16) {
4172 TEST_REQUIRES_ARM_NEON;
4173 for (uint32_t channels = 17; channels < 32; channels++) {
4174 DWConvMicrokernelTester()
4175 .cr(16)
4176 .kr(9)
4177 .channels(channels)
4178 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4179 }
4180 }
4181
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,c_gt_16_with_qmin)4182 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_gt_16_with_qmin) {
4183 TEST_REQUIRES_ARM_NEON;
4184 for (uint32_t channels = 17; channels < 32; channels++) {
4185 DWConvMicrokernelTester()
4186 .cr(16)
4187 .kr(9)
4188 .channels(channels)
4189 .qmin(128)
4190 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4191 }
4192 }
4193
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,c_gt_16_with_qmax)4194 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, c_gt_16_with_qmax) {
4195 TEST_REQUIRES_ARM_NEON;
4196 for (uint32_t channels = 17; channels < 32; channels++) {
4197 DWConvMicrokernelTester()
4198 .cr(16)
4199 .kr(9)
4200 .channels(channels)
4201 .qmax(128)
4202 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4203 }
4204 }
4205
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,multipixel)4206 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, multipixel) {
4207 TEST_REQUIRES_ARM_NEON;
4208 for (size_t channels = 1; channels <= 80; channels += 15) {
4209 DWConvMicrokernelTester()
4210 .cr(16)
4211 .kr(9)
4212 .channels(channels)
4213 .width(3)
4214 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4215 }
4216 }
4217
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,multipixel_with_step)4218 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, multipixel_with_step) {
4219 TEST_REQUIRES_ARM_NEON;
4220 for (size_t channels = 1; channels <= 80; channels += 15) {
4221 for (size_t step = 2; step <= 9; step++) {
4222 DWConvMicrokernelTester()
4223 .cr(16)
4224 .kr(9)
4225 .channels(channels)
4226 .width(3)
4227 .step(step)
4228 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4229 }
4230 }
4231 }
4232
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,multipixel_with_output_stride)4233 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, multipixel_with_output_stride) {
4234 TEST_REQUIRES_ARM_NEON;
4235 for (size_t channels = 1; channels <= 80; channels += 15) {
4236 DWConvMicrokernelTester()
4237 .cr(16)
4238 .kr(9)
4239 .channels(16)
4240 .width(5)
4241 .output_stride(83)
4242 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4243 }
4244 }
4245
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,multipixel_with_qmin)4246 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, multipixel_with_qmin) {
4247 TEST_REQUIRES_ARM_NEON;
4248 for (size_t channels = 1; channels <= 80; channels += 15) {
4249 DWConvMicrokernelTester()
4250 .cr(16)
4251 .kr(9)
4252 .channels(channels)
4253 .width(3)
4254 .qmin(128)
4255 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4256 }
4257 }
4258
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,multipixel_with_qmax)4259 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, multipixel_with_qmax) {
4260 TEST_REQUIRES_ARM_NEON;
4261 for (size_t channels = 1; channels <= 80; channels += 15) {
4262 DWConvMicrokernelTester()
4263 .cr(16)
4264 .kr(9)
4265 .channels(channels)
4266 .width(3)
4267 .qmax(128)
4268 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4269 }
4270 }
4271
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,input_offset)4272 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, input_offset) {
4273 TEST_REQUIRES_ARM_NEON;
4274 for (uint32_t channels = 32; channels < 256; channels += 48) {
4275 DWConvMicrokernelTester()
4276 .cr(16)
4277 .kr(9)
4278 .channels(channels)
4279 .input_offset(304)
4280 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4281 }
4282 }
4283
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64,zero)4284 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD64, zero) {
4285 TEST_REQUIRES_ARM_NEON;
4286 for (uint32_t mz = 0; mz < 9; mz++) {
4287 for (uint32_t channels = 32; channels < 256; channels += 48) {
4288 DWConvMicrokernelTester()
4289 .cr(16)
4290 .kr(9)
4291 .channels(channels)
4292 .input_offset(304)
4293 .zero_index(mz)
4294 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4295 }
4296 }
4297 }
4298 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4299
4300
4301 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,c_eq_16)4302 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_eq_16) {
4303 TEST_REQUIRES_ARM_NEON;
4304 DWConvMicrokernelTester()
4305 .cr(16)
4306 .kr(9)
4307 .channels(16)
4308 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4309 }
4310
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,c_div_16)4311 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_div_16) {
4312 TEST_REQUIRES_ARM_NEON;
4313 for (uint32_t channels = 32; channels < 256; channels += 48) {
4314 DWConvMicrokernelTester()
4315 .cr(16)
4316 .kr(9)
4317 .channels(channels)
4318 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4319 }
4320 }
4321
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,c_div_16_with_qmin)4322 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_div_16_with_qmin) {
4323 TEST_REQUIRES_ARM_NEON;
4324 for (uint32_t channels = 32; channels < 256; channels += 48) {
4325 DWConvMicrokernelTester()
4326 .cr(16)
4327 .kr(9)
4328 .channels(channels)
4329 .qmin(128)
4330 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4331 }
4332 }
4333
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,c_div_16_with_qmax)4334 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_div_16_with_qmax) {
4335 TEST_REQUIRES_ARM_NEON;
4336 for (uint32_t channels = 32; channels < 256; channels += 48) {
4337 DWConvMicrokernelTester()
4338 .cr(16)
4339 .kr(9)
4340 .channels(channels)
4341 .qmax(128)
4342 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4343 }
4344 }
4345
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,c_lt_16)4346 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_lt_16) {
4347 TEST_REQUIRES_ARM_NEON;
4348 for (uint32_t channels = 1; channels < 16; channels++) {
4349 DWConvMicrokernelTester()
4350 .cr(16)
4351 .kr(9)
4352 .channels(channels)
4353 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4354 }
4355 }
4356
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,c_gt_16)4357 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_gt_16) {
4358 TEST_REQUIRES_ARM_NEON;
4359 for (uint32_t channels = 17; channels < 32; channels++) {
4360 DWConvMicrokernelTester()
4361 .cr(16)
4362 .kr(9)
4363 .channels(channels)
4364 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4365 }
4366 }
4367
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,c_gt_16_with_qmin)4368 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_gt_16_with_qmin) {
4369 TEST_REQUIRES_ARM_NEON;
4370 for (uint32_t channels = 17; channels < 32; channels++) {
4371 DWConvMicrokernelTester()
4372 .cr(16)
4373 .kr(9)
4374 .channels(channels)
4375 .qmin(128)
4376 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4377 }
4378 }
4379
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,c_gt_16_with_qmax)4380 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, c_gt_16_with_qmax) {
4381 TEST_REQUIRES_ARM_NEON;
4382 for (uint32_t channels = 17; channels < 32; channels++) {
4383 DWConvMicrokernelTester()
4384 .cr(16)
4385 .kr(9)
4386 .channels(channels)
4387 .qmax(128)
4388 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4389 }
4390 }
4391
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,multipixel)4392 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, multipixel) {
4393 TEST_REQUIRES_ARM_NEON;
4394 for (size_t channels = 1; channels <= 80; channels += 15) {
4395 DWConvMicrokernelTester()
4396 .cr(16)
4397 .kr(9)
4398 .channels(channels)
4399 .width(3)
4400 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4401 }
4402 }
4403
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,multipixel_with_step)4404 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, multipixel_with_step) {
4405 TEST_REQUIRES_ARM_NEON;
4406 for (size_t channels = 1; channels <= 80; channels += 15) {
4407 for (size_t step = 2; step <= 9; step++) {
4408 DWConvMicrokernelTester()
4409 .cr(16)
4410 .kr(9)
4411 .channels(channels)
4412 .width(3)
4413 .step(step)
4414 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4415 }
4416 }
4417 }
4418
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,multipixel_with_output_stride)4419 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, multipixel_with_output_stride) {
4420 TEST_REQUIRES_ARM_NEON;
4421 for (size_t channels = 1; channels <= 80; channels += 15) {
4422 DWConvMicrokernelTester()
4423 .cr(16)
4424 .kr(9)
4425 .channels(16)
4426 .width(5)
4427 .output_stride(83)
4428 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4429 }
4430 }
4431
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,multipixel_with_qmin)4432 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, multipixel_with_qmin) {
4433 TEST_REQUIRES_ARM_NEON;
4434 for (size_t channels = 1; channels <= 80; channels += 15) {
4435 DWConvMicrokernelTester()
4436 .cr(16)
4437 .kr(9)
4438 .channels(channels)
4439 .width(3)
4440 .qmin(128)
4441 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4442 }
4443 }
4444
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,multipixel_with_qmax)4445 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, multipixel_with_qmax) {
4446 TEST_REQUIRES_ARM_NEON;
4447 for (size_t channels = 1; channels <= 80; channels += 15) {
4448 DWConvMicrokernelTester()
4449 .cr(16)
4450 .kr(9)
4451 .channels(channels)
4452 .width(3)
4453 .qmax(128)
4454 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4455 }
4456 }
4457
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,input_offset)4458 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, input_offset) {
4459 TEST_REQUIRES_ARM_NEON;
4460 for (uint32_t channels = 32; channels < 256; channels += 48) {
4461 DWConvMicrokernelTester()
4462 .cr(16)
4463 .kr(9)
4464 .channels(channels)
4465 .input_offset(304)
4466 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4467 }
4468 }
4469
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128,zero)4470 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL8_LD128, zero) {
4471 TEST_REQUIRES_ARM_NEON;
4472 for (uint32_t mz = 0; mz < 9; mz++) {
4473 for (uint32_t channels = 32; channels < 256; channels += 48) {
4474 DWConvMicrokernelTester()
4475 .cr(16)
4476 .kr(9)
4477 .channels(channels)
4478 .input_offset(304)
4479 .zero_index(mz)
4480 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4481 }
4482 }
4483 }
4484 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4485
4486
4487 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_eq_16)4488 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_eq_16) {
4489 TEST_REQUIRES_ARM_NEON;
4490 DWConvMicrokernelTester()
4491 .cr(16)
4492 .kr(9)
4493 .channels(16)
4494 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4495 }
4496
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_div_16)4497 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_div_16) {
4498 TEST_REQUIRES_ARM_NEON;
4499 for (uint32_t channels = 32; channels < 256; channels += 48) {
4500 DWConvMicrokernelTester()
4501 .cr(16)
4502 .kr(9)
4503 .channels(channels)
4504 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4505 }
4506 }
4507
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_div_16_with_qmin)4508 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_div_16_with_qmin) {
4509 TEST_REQUIRES_ARM_NEON;
4510 for (uint32_t channels = 32; channels < 256; channels += 48) {
4511 DWConvMicrokernelTester()
4512 .cr(16)
4513 .kr(9)
4514 .channels(channels)
4515 .qmin(128)
4516 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4517 }
4518 }
4519
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_div_16_with_qmax)4520 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_div_16_with_qmax) {
4521 TEST_REQUIRES_ARM_NEON;
4522 for (uint32_t channels = 32; channels < 256; channels += 48) {
4523 DWConvMicrokernelTester()
4524 .cr(16)
4525 .kr(9)
4526 .channels(channels)
4527 .qmax(128)
4528 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4529 }
4530 }
4531
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_lt_16)4532 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_lt_16) {
4533 TEST_REQUIRES_ARM_NEON;
4534 for (uint32_t channels = 1; channels < 16; channels++) {
4535 DWConvMicrokernelTester()
4536 .cr(16)
4537 .kr(9)
4538 .channels(channels)
4539 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4540 }
4541 }
4542
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_gt_16)4543 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_gt_16) {
4544 TEST_REQUIRES_ARM_NEON;
4545 for (uint32_t channels = 17; channels < 32; channels++) {
4546 DWConvMicrokernelTester()
4547 .cr(16)
4548 .kr(9)
4549 .channels(channels)
4550 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4551 }
4552 }
4553
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_gt_16_with_qmin)4554 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_gt_16_with_qmin) {
4555 TEST_REQUIRES_ARM_NEON;
4556 for (uint32_t channels = 17; channels < 32; channels++) {
4557 DWConvMicrokernelTester()
4558 .cr(16)
4559 .kr(9)
4560 .channels(channels)
4561 .qmin(128)
4562 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4563 }
4564 }
4565
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_gt_16_with_qmax)4566 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_gt_16_with_qmax) {
4567 TEST_REQUIRES_ARM_NEON;
4568 for (uint32_t channels = 17; channels < 32; channels++) {
4569 DWConvMicrokernelTester()
4570 .cr(16)
4571 .kr(9)
4572 .channels(channels)
4573 .qmax(128)
4574 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4575 }
4576 }
4577
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,multipixel)4578 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel) {
4579 TEST_REQUIRES_ARM_NEON;
4580 for (size_t channels = 1; channels <= 80; channels += 15) {
4581 DWConvMicrokernelTester()
4582 .cr(16)
4583 .kr(9)
4584 .channels(channels)
4585 .width(3)
4586 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4587 }
4588 }
4589
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,multipixel_with_step)4590 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_step) {
4591 TEST_REQUIRES_ARM_NEON;
4592 for (size_t channels = 1; channels <= 80; channels += 15) {
4593 for (size_t step = 2; step <= 9; step++) {
4594 DWConvMicrokernelTester()
4595 .cr(16)
4596 .kr(9)
4597 .channels(channels)
4598 .width(3)
4599 .step(step)
4600 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4601 }
4602 }
4603 }
4604
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,multipixel_with_output_stride)4605 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_output_stride) {
4606 TEST_REQUIRES_ARM_NEON;
4607 for (size_t channels = 1; channels <= 80; channels += 15) {
4608 DWConvMicrokernelTester()
4609 .cr(16)
4610 .kr(9)
4611 .channels(16)
4612 .width(5)
4613 .output_stride(83)
4614 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4615 }
4616 }
4617
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,multipixel_with_qmin)4618 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_qmin) {
4619 TEST_REQUIRES_ARM_NEON;
4620 for (size_t channels = 1; channels <= 80; channels += 15) {
4621 DWConvMicrokernelTester()
4622 .cr(16)
4623 .kr(9)
4624 .channels(channels)
4625 .width(3)
4626 .qmin(128)
4627 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4628 }
4629 }
4630
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,multipixel_with_qmax)4631 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_qmax) {
4632 TEST_REQUIRES_ARM_NEON;
4633 for (size_t channels = 1; channels <= 80; channels += 15) {
4634 DWConvMicrokernelTester()
4635 .cr(16)
4636 .kr(9)
4637 .channels(channels)
4638 .width(3)
4639 .qmax(128)
4640 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4641 }
4642 }
4643
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,input_offset)4644 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, input_offset) {
4645 TEST_REQUIRES_ARM_NEON;
4646 for (uint32_t channels = 32; channels < 256; channels += 48) {
4647 DWConvMicrokernelTester()
4648 .cr(16)
4649 .kr(9)
4650 .channels(channels)
4651 .input_offset(304)
4652 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4653 }
4654 }
4655
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,zero)4656 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, zero) {
4657 TEST_REQUIRES_ARM_NEON;
4658 for (uint32_t mz = 0; mz < 9; mz++) {
4659 for (uint32_t channels = 32; channels < 256; channels += 48) {
4660 DWConvMicrokernelTester()
4661 .cr(16)
4662 .kr(9)
4663 .channels(channels)
4664 .input_offset(304)
4665 .zero_index(mz)
4666 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4667 }
4668 }
4669 }
4670 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4671
4672
4673 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,c_eq_16)4674 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_eq_16) {
4675 TEST_REQUIRES_ARM_NEON_V8;
4676 DWConvMicrokernelTester()
4677 .cr(16)
4678 .kr(9)
4679 .channels(16)
4680 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4681 }
4682
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,c_div_16)4683 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_div_16) {
4684 TEST_REQUIRES_ARM_NEON_V8;
4685 for (uint32_t channels = 32; channels < 256; channels += 48) {
4686 DWConvMicrokernelTester()
4687 .cr(16)
4688 .kr(9)
4689 .channels(channels)
4690 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4691 }
4692 }
4693
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,c_div_16_with_qmin)4694 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_div_16_with_qmin) {
4695 TEST_REQUIRES_ARM_NEON_V8;
4696 for (uint32_t channels = 32; channels < 256; channels += 48) {
4697 DWConvMicrokernelTester()
4698 .cr(16)
4699 .kr(9)
4700 .channels(channels)
4701 .qmin(128)
4702 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4703 }
4704 }
4705
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,c_div_16_with_qmax)4706 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_div_16_with_qmax) {
4707 TEST_REQUIRES_ARM_NEON_V8;
4708 for (uint32_t channels = 32; channels < 256; channels += 48) {
4709 DWConvMicrokernelTester()
4710 .cr(16)
4711 .kr(9)
4712 .channels(channels)
4713 .qmax(128)
4714 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4715 }
4716 }
4717
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,c_lt_16)4718 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_lt_16) {
4719 TEST_REQUIRES_ARM_NEON_V8;
4720 for (uint32_t channels = 1; channels < 16; channels++) {
4721 DWConvMicrokernelTester()
4722 .cr(16)
4723 .kr(9)
4724 .channels(channels)
4725 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4726 }
4727 }
4728
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,c_gt_16)4729 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_gt_16) {
4730 TEST_REQUIRES_ARM_NEON_V8;
4731 for (uint32_t channels = 17; channels < 32; channels++) {
4732 DWConvMicrokernelTester()
4733 .cr(16)
4734 .kr(9)
4735 .channels(channels)
4736 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4737 }
4738 }
4739
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,c_gt_16_with_qmin)4740 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_gt_16_with_qmin) {
4741 TEST_REQUIRES_ARM_NEON_V8;
4742 for (uint32_t channels = 17; channels < 32; channels++) {
4743 DWConvMicrokernelTester()
4744 .cr(16)
4745 .kr(9)
4746 .channels(channels)
4747 .qmin(128)
4748 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4749 }
4750 }
4751
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,c_gt_16_with_qmax)4752 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, c_gt_16_with_qmax) {
4753 TEST_REQUIRES_ARM_NEON_V8;
4754 for (uint32_t channels = 17; channels < 32; channels++) {
4755 DWConvMicrokernelTester()
4756 .cr(16)
4757 .kr(9)
4758 .channels(channels)
4759 .qmax(128)
4760 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4761 }
4762 }
4763
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,multipixel)4764 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, multipixel) {
4765 TEST_REQUIRES_ARM_NEON_V8;
4766 for (size_t channels = 1; channels <= 80; channels += 15) {
4767 DWConvMicrokernelTester()
4768 .cr(16)
4769 .kr(9)
4770 .channels(channels)
4771 .width(3)
4772 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4773 }
4774 }
4775
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,multipixel_with_step)4776 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, multipixel_with_step) {
4777 TEST_REQUIRES_ARM_NEON_V8;
4778 for (size_t channels = 1; channels <= 80; channels += 15) {
4779 for (size_t step = 2; step <= 9; step++) {
4780 DWConvMicrokernelTester()
4781 .cr(16)
4782 .kr(9)
4783 .channels(channels)
4784 .width(3)
4785 .step(step)
4786 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4787 }
4788 }
4789 }
4790
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,multipixel_with_output_stride)4791 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, multipixel_with_output_stride) {
4792 TEST_REQUIRES_ARM_NEON_V8;
4793 for (size_t channels = 1; channels <= 80; channels += 15) {
4794 DWConvMicrokernelTester()
4795 .cr(16)
4796 .kr(9)
4797 .channels(16)
4798 .width(5)
4799 .output_stride(83)
4800 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4801 }
4802 }
4803
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,multipixel_with_qmin)4804 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, multipixel_with_qmin) {
4805 TEST_REQUIRES_ARM_NEON_V8;
4806 for (size_t channels = 1; channels <= 80; channels += 15) {
4807 DWConvMicrokernelTester()
4808 .cr(16)
4809 .kr(9)
4810 .channels(channels)
4811 .width(3)
4812 .qmin(128)
4813 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4814 }
4815 }
4816
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,multipixel_with_qmax)4817 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, multipixel_with_qmax) {
4818 TEST_REQUIRES_ARM_NEON_V8;
4819 for (size_t channels = 1; channels <= 80; channels += 15) {
4820 DWConvMicrokernelTester()
4821 .cr(16)
4822 .kr(9)
4823 .channels(channels)
4824 .width(3)
4825 .qmax(128)
4826 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4827 }
4828 }
4829
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,input_offset)4830 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, input_offset) {
4831 TEST_REQUIRES_ARM_NEON_V8;
4832 for (uint32_t channels = 32; channels < 256; channels += 48) {
4833 DWConvMicrokernelTester()
4834 .cr(16)
4835 .kr(9)
4836 .channels(channels)
4837 .input_offset(304)
4838 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4839 }
4840 }
4841
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64,zero)4842 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD64, zero) {
4843 TEST_REQUIRES_ARM_NEON_V8;
4844 for (uint32_t mz = 0; mz < 9; mz++) {
4845 for (uint32_t channels = 32; channels < 256; channels += 48) {
4846 DWConvMicrokernelTester()
4847 .cr(16)
4848 .kr(9)
4849 .channels(channels)
4850 .input_offset(304)
4851 .zero_index(mz)
4852 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4853 }
4854 }
4855 }
4856 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4857
4858
4859 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,c_eq_16)4860 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_eq_16) {
4861 TEST_REQUIRES_ARM_NEON_V8;
4862 DWConvMicrokernelTester()
4863 .cr(16)
4864 .kr(9)
4865 .channels(16)
4866 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4867 }
4868
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,c_div_16)4869 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_div_16) {
4870 TEST_REQUIRES_ARM_NEON_V8;
4871 for (uint32_t channels = 32; channels < 256; channels += 48) {
4872 DWConvMicrokernelTester()
4873 .cr(16)
4874 .kr(9)
4875 .channels(channels)
4876 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4877 }
4878 }
4879
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,c_div_16_with_qmin)4880 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_div_16_with_qmin) {
4881 TEST_REQUIRES_ARM_NEON_V8;
4882 for (uint32_t channels = 32; channels < 256; channels += 48) {
4883 DWConvMicrokernelTester()
4884 .cr(16)
4885 .kr(9)
4886 .channels(channels)
4887 .qmin(128)
4888 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4889 }
4890 }
4891
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,c_div_16_with_qmax)4892 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_div_16_with_qmax) {
4893 TEST_REQUIRES_ARM_NEON_V8;
4894 for (uint32_t channels = 32; channels < 256; channels += 48) {
4895 DWConvMicrokernelTester()
4896 .cr(16)
4897 .kr(9)
4898 .channels(channels)
4899 .qmax(128)
4900 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4901 }
4902 }
4903
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,c_lt_16)4904 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_lt_16) {
4905 TEST_REQUIRES_ARM_NEON_V8;
4906 for (uint32_t channels = 1; channels < 16; channels++) {
4907 DWConvMicrokernelTester()
4908 .cr(16)
4909 .kr(9)
4910 .channels(channels)
4911 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4912 }
4913 }
4914
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,c_gt_16)4915 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_gt_16) {
4916 TEST_REQUIRES_ARM_NEON_V8;
4917 for (uint32_t channels = 17; channels < 32; channels++) {
4918 DWConvMicrokernelTester()
4919 .cr(16)
4920 .kr(9)
4921 .channels(channels)
4922 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4923 }
4924 }
4925
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,c_gt_16_with_qmin)4926 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_gt_16_with_qmin) {
4927 TEST_REQUIRES_ARM_NEON_V8;
4928 for (uint32_t channels = 17; channels < 32; channels++) {
4929 DWConvMicrokernelTester()
4930 .cr(16)
4931 .kr(9)
4932 .channels(channels)
4933 .qmin(128)
4934 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4935 }
4936 }
4937
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,c_gt_16_with_qmax)4938 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, c_gt_16_with_qmax) {
4939 TEST_REQUIRES_ARM_NEON_V8;
4940 for (uint32_t channels = 17; channels < 32; channels++) {
4941 DWConvMicrokernelTester()
4942 .cr(16)
4943 .kr(9)
4944 .channels(channels)
4945 .qmax(128)
4946 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4947 }
4948 }
4949
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,multipixel)4950 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, multipixel) {
4951 TEST_REQUIRES_ARM_NEON_V8;
4952 for (size_t channels = 1; channels <= 80; channels += 15) {
4953 DWConvMicrokernelTester()
4954 .cr(16)
4955 .kr(9)
4956 .channels(channels)
4957 .width(3)
4958 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4959 }
4960 }
4961
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,multipixel_with_step)4962 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, multipixel_with_step) {
4963 TEST_REQUIRES_ARM_NEON_V8;
4964 for (size_t channels = 1; channels <= 80; channels += 15) {
4965 for (size_t step = 2; step <= 9; step++) {
4966 DWConvMicrokernelTester()
4967 .cr(16)
4968 .kr(9)
4969 .channels(channels)
4970 .width(3)
4971 .step(step)
4972 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4973 }
4974 }
4975 }
4976
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,multipixel_with_output_stride)4977 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, multipixel_with_output_stride) {
4978 TEST_REQUIRES_ARM_NEON_V8;
4979 for (size_t channels = 1; channels <= 80; channels += 15) {
4980 DWConvMicrokernelTester()
4981 .cr(16)
4982 .kr(9)
4983 .channels(16)
4984 .width(5)
4985 .output_stride(83)
4986 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4987 }
4988 }
4989
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,multipixel_with_qmin)4990 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, multipixel_with_qmin) {
4991 TEST_REQUIRES_ARM_NEON_V8;
4992 for (size_t channels = 1; channels <= 80; channels += 15) {
4993 DWConvMicrokernelTester()
4994 .cr(16)
4995 .kr(9)
4996 .channels(channels)
4997 .width(3)
4998 .qmin(128)
4999 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5000 }
5001 }
5002
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,multipixel_with_qmax)5003 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, multipixel_with_qmax) {
5004 TEST_REQUIRES_ARM_NEON_V8;
5005 for (size_t channels = 1; channels <= 80; channels += 15) {
5006 DWConvMicrokernelTester()
5007 .cr(16)
5008 .kr(9)
5009 .channels(channels)
5010 .width(3)
5011 .qmax(128)
5012 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5013 }
5014 }
5015
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,input_offset)5016 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, input_offset) {
5017 TEST_REQUIRES_ARM_NEON_V8;
5018 for (uint32_t channels = 32; channels < 256; channels += 48) {
5019 DWConvMicrokernelTester()
5020 .cr(16)
5021 .kr(9)
5022 .channels(channels)
5023 .input_offset(304)
5024 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5025 }
5026 }
5027
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128,zero)5028 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MLA8_LD128, zero) {
5029 TEST_REQUIRES_ARM_NEON_V8;
5030 for (uint32_t mz = 0; mz < 9; mz++) {
5031 for (uint32_t channels = 32; channels < 256; channels += 48) {
5032 DWConvMicrokernelTester()
5033 .cr(16)
5034 .kr(9)
5035 .channels(channels)
5036 .input_offset(304)
5037 .zero_index(mz)
5038 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5039 }
5040 }
5041 }
5042 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5043
5044
5045 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,c_eq_16)5046 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_eq_16) {
5047 TEST_REQUIRES_ARM_NEON_V8;
5048 DWConvMicrokernelTester()
5049 .cr(16)
5050 .kr(9)
5051 .channels(16)
5052 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5053 }
5054
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,c_div_16)5055 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_div_16) {
5056 TEST_REQUIRES_ARM_NEON_V8;
5057 for (uint32_t channels = 32; channels < 256; channels += 48) {
5058 DWConvMicrokernelTester()
5059 .cr(16)
5060 .kr(9)
5061 .channels(channels)
5062 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5063 }
5064 }
5065
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,c_div_16_with_qmin)5066 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_div_16_with_qmin) {
5067 TEST_REQUIRES_ARM_NEON_V8;
5068 for (uint32_t channels = 32; channels < 256; channels += 48) {
5069 DWConvMicrokernelTester()
5070 .cr(16)
5071 .kr(9)
5072 .channels(channels)
5073 .qmin(128)
5074 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5075 }
5076 }
5077
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,c_div_16_with_qmax)5078 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_div_16_with_qmax) {
5079 TEST_REQUIRES_ARM_NEON_V8;
5080 for (uint32_t channels = 32; channels < 256; channels += 48) {
5081 DWConvMicrokernelTester()
5082 .cr(16)
5083 .kr(9)
5084 .channels(channels)
5085 .qmax(128)
5086 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5087 }
5088 }
5089
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,c_lt_16)5090 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_lt_16) {
5091 TEST_REQUIRES_ARM_NEON_V8;
5092 for (uint32_t channels = 1; channels < 16; channels++) {
5093 DWConvMicrokernelTester()
5094 .cr(16)
5095 .kr(9)
5096 .channels(channels)
5097 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5098 }
5099 }
5100
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,c_gt_16)5101 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_gt_16) {
5102 TEST_REQUIRES_ARM_NEON_V8;
5103 for (uint32_t channels = 17; channels < 32; channels++) {
5104 DWConvMicrokernelTester()
5105 .cr(16)
5106 .kr(9)
5107 .channels(channels)
5108 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5109 }
5110 }
5111
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,c_gt_16_with_qmin)5112 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_gt_16_with_qmin) {
5113 TEST_REQUIRES_ARM_NEON_V8;
5114 for (uint32_t channels = 17; channels < 32; channels++) {
5115 DWConvMicrokernelTester()
5116 .cr(16)
5117 .kr(9)
5118 .channels(channels)
5119 .qmin(128)
5120 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5121 }
5122 }
5123
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,c_gt_16_with_qmax)5124 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, c_gt_16_with_qmax) {
5125 TEST_REQUIRES_ARM_NEON_V8;
5126 for (uint32_t channels = 17; channels < 32; channels++) {
5127 DWConvMicrokernelTester()
5128 .cr(16)
5129 .kr(9)
5130 .channels(channels)
5131 .qmax(128)
5132 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5133 }
5134 }
5135
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,multipixel)5136 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, multipixel) {
5137 TEST_REQUIRES_ARM_NEON_V8;
5138 for (size_t channels = 1; channels <= 80; channels += 15) {
5139 DWConvMicrokernelTester()
5140 .cr(16)
5141 .kr(9)
5142 .channels(channels)
5143 .width(3)
5144 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5145 }
5146 }
5147
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,multipixel_with_step)5148 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, multipixel_with_step) {
5149 TEST_REQUIRES_ARM_NEON_V8;
5150 for (size_t channels = 1; channels <= 80; channels += 15) {
5151 for (size_t step = 2; step <= 9; step++) {
5152 DWConvMicrokernelTester()
5153 .cr(16)
5154 .kr(9)
5155 .channels(channels)
5156 .width(3)
5157 .step(step)
5158 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5159 }
5160 }
5161 }
5162
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,multipixel_with_output_stride)5163 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, multipixel_with_output_stride) {
5164 TEST_REQUIRES_ARM_NEON_V8;
5165 for (size_t channels = 1; channels <= 80; channels += 15) {
5166 DWConvMicrokernelTester()
5167 .cr(16)
5168 .kr(9)
5169 .channels(16)
5170 .width(5)
5171 .output_stride(83)
5172 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5173 }
5174 }
5175
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,multipixel_with_qmin)5176 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, multipixel_with_qmin) {
5177 TEST_REQUIRES_ARM_NEON_V8;
5178 for (size_t channels = 1; channels <= 80; channels += 15) {
5179 DWConvMicrokernelTester()
5180 .cr(16)
5181 .kr(9)
5182 .channels(channels)
5183 .width(3)
5184 .qmin(128)
5185 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5186 }
5187 }
5188
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,multipixel_with_qmax)5189 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, multipixel_with_qmax) {
5190 TEST_REQUIRES_ARM_NEON_V8;
5191 for (size_t channels = 1; channels <= 80; channels += 15) {
5192 DWConvMicrokernelTester()
5193 .cr(16)
5194 .kr(9)
5195 .channels(channels)
5196 .width(3)
5197 .qmax(128)
5198 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5199 }
5200 }
5201
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,input_offset)5202 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, input_offset) {
5203 TEST_REQUIRES_ARM_NEON_V8;
5204 for (uint32_t channels = 32; channels < 256; channels += 48) {
5205 DWConvMicrokernelTester()
5206 .cr(16)
5207 .kr(9)
5208 .channels(channels)
5209 .input_offset(304)
5210 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5211 }
5212 }
5213
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64,zero)5214 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD64, zero) {
5215 TEST_REQUIRES_ARM_NEON_V8;
5216 for (uint32_t mz = 0; mz < 9; mz++) {
5217 for (uint32_t channels = 32; channels < 256; channels += 48) {
5218 DWConvMicrokernelTester()
5219 .cr(16)
5220 .kr(9)
5221 .channels(channels)
5222 .input_offset(304)
5223 .zero_index(mz)
5224 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5225 }
5226 }
5227 }
5228 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5229
5230
5231 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,c_eq_16)5232 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_eq_16) {
5233 TEST_REQUIRES_ARM_NEON_V8;
5234 DWConvMicrokernelTester()
5235 .cr(16)
5236 .kr(9)
5237 .channels(16)
5238 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5239 }
5240
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,c_div_16)5241 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_div_16) {
5242 TEST_REQUIRES_ARM_NEON_V8;
5243 for (uint32_t channels = 32; channels < 256; channels += 48) {
5244 DWConvMicrokernelTester()
5245 .cr(16)
5246 .kr(9)
5247 .channels(channels)
5248 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5249 }
5250 }
5251
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,c_div_16_with_qmin)5252 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_div_16_with_qmin) {
5253 TEST_REQUIRES_ARM_NEON_V8;
5254 for (uint32_t channels = 32; channels < 256; channels += 48) {
5255 DWConvMicrokernelTester()
5256 .cr(16)
5257 .kr(9)
5258 .channels(channels)
5259 .qmin(128)
5260 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5261 }
5262 }
5263
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,c_div_16_with_qmax)5264 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_div_16_with_qmax) {
5265 TEST_REQUIRES_ARM_NEON_V8;
5266 for (uint32_t channels = 32; channels < 256; channels += 48) {
5267 DWConvMicrokernelTester()
5268 .cr(16)
5269 .kr(9)
5270 .channels(channels)
5271 .qmax(128)
5272 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5273 }
5274 }
5275
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,c_lt_16)5276 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_lt_16) {
5277 TEST_REQUIRES_ARM_NEON_V8;
5278 for (uint32_t channels = 1; channels < 16; channels++) {
5279 DWConvMicrokernelTester()
5280 .cr(16)
5281 .kr(9)
5282 .channels(channels)
5283 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5284 }
5285 }
5286
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,c_gt_16)5287 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_gt_16) {
5288 TEST_REQUIRES_ARM_NEON_V8;
5289 for (uint32_t channels = 17; channels < 32; channels++) {
5290 DWConvMicrokernelTester()
5291 .cr(16)
5292 .kr(9)
5293 .channels(channels)
5294 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5295 }
5296 }
5297
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,c_gt_16_with_qmin)5298 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_gt_16_with_qmin) {
5299 TEST_REQUIRES_ARM_NEON_V8;
5300 for (uint32_t channels = 17; channels < 32; channels++) {
5301 DWConvMicrokernelTester()
5302 .cr(16)
5303 .kr(9)
5304 .channels(channels)
5305 .qmin(128)
5306 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5307 }
5308 }
5309
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,c_gt_16_with_qmax)5310 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, c_gt_16_with_qmax) {
5311 TEST_REQUIRES_ARM_NEON_V8;
5312 for (uint32_t channels = 17; channels < 32; channels++) {
5313 DWConvMicrokernelTester()
5314 .cr(16)
5315 .kr(9)
5316 .channels(channels)
5317 .qmax(128)
5318 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5319 }
5320 }
5321
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,multipixel)5322 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, multipixel) {
5323 TEST_REQUIRES_ARM_NEON_V8;
5324 for (size_t channels = 1; channels <= 80; channels += 15) {
5325 DWConvMicrokernelTester()
5326 .cr(16)
5327 .kr(9)
5328 .channels(channels)
5329 .width(3)
5330 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5331 }
5332 }
5333
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,multipixel_with_step)5334 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, multipixel_with_step) {
5335 TEST_REQUIRES_ARM_NEON_V8;
5336 for (size_t channels = 1; channels <= 80; channels += 15) {
5337 for (size_t step = 2; step <= 9; step++) {
5338 DWConvMicrokernelTester()
5339 .cr(16)
5340 .kr(9)
5341 .channels(channels)
5342 .width(3)
5343 .step(step)
5344 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5345 }
5346 }
5347 }
5348
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,multipixel_with_output_stride)5349 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, multipixel_with_output_stride) {
5350 TEST_REQUIRES_ARM_NEON_V8;
5351 for (size_t channels = 1; channels <= 80; channels += 15) {
5352 DWConvMicrokernelTester()
5353 .cr(16)
5354 .kr(9)
5355 .channels(16)
5356 .width(5)
5357 .output_stride(83)
5358 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5359 }
5360 }
5361
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,multipixel_with_qmin)5362 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, multipixel_with_qmin) {
5363 TEST_REQUIRES_ARM_NEON_V8;
5364 for (size_t channels = 1; channels <= 80; channels += 15) {
5365 DWConvMicrokernelTester()
5366 .cr(16)
5367 .kr(9)
5368 .channels(channels)
5369 .width(3)
5370 .qmin(128)
5371 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5372 }
5373 }
5374
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,multipixel_with_qmax)5375 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, multipixel_with_qmax) {
5376 TEST_REQUIRES_ARM_NEON_V8;
5377 for (size_t channels = 1; channels <= 80; channels += 15) {
5378 DWConvMicrokernelTester()
5379 .cr(16)
5380 .kr(9)
5381 .channels(channels)
5382 .width(3)
5383 .qmax(128)
5384 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5385 }
5386 }
5387
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,input_offset)5388 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, input_offset) {
5389 TEST_REQUIRES_ARM_NEON_V8;
5390 for (uint32_t channels = 32; channels < 256; channels += 48) {
5391 DWConvMicrokernelTester()
5392 .cr(16)
5393 .kr(9)
5394 .channels(channels)
5395 .input_offset(304)
5396 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5397 }
5398 }
5399
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128,zero)5400 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL8_LD128, zero) {
5401 TEST_REQUIRES_ARM_NEON_V8;
5402 for (uint32_t mz = 0; mz < 9; mz++) {
5403 for (uint32_t channels = 32; channels < 256; channels += 48) {
5404 DWConvMicrokernelTester()
5405 .cr(16)
5406 .kr(9)
5407 .channels(channels)
5408 .input_offset(304)
5409 .zero_index(mz)
5410 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5411 }
5412 }
5413 }
5414 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5415
5416
5417 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_eq_16)5418 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_eq_16) {
5419 TEST_REQUIRES_ARM_NEON_V8;
5420 DWConvMicrokernelTester()
5421 .cr(16)
5422 .kr(9)
5423 .channels(16)
5424 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5425 }
5426
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_div_16)5427 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_div_16) {
5428 TEST_REQUIRES_ARM_NEON_V8;
5429 for (uint32_t channels = 32; channels < 256; channels += 48) {
5430 DWConvMicrokernelTester()
5431 .cr(16)
5432 .kr(9)
5433 .channels(channels)
5434 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5435 }
5436 }
5437
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_div_16_with_qmin)5438 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_div_16_with_qmin) {
5439 TEST_REQUIRES_ARM_NEON_V8;
5440 for (uint32_t channels = 32; channels < 256; channels += 48) {
5441 DWConvMicrokernelTester()
5442 .cr(16)
5443 .kr(9)
5444 .channels(channels)
5445 .qmin(128)
5446 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5447 }
5448 }
5449
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_div_16_with_qmax)5450 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_div_16_with_qmax) {
5451 TEST_REQUIRES_ARM_NEON_V8;
5452 for (uint32_t channels = 32; channels < 256; channels += 48) {
5453 DWConvMicrokernelTester()
5454 .cr(16)
5455 .kr(9)
5456 .channels(channels)
5457 .qmax(128)
5458 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5459 }
5460 }
5461
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_lt_16)5462 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_lt_16) {
5463 TEST_REQUIRES_ARM_NEON_V8;
5464 for (uint32_t channels = 1; channels < 16; channels++) {
5465 DWConvMicrokernelTester()
5466 .cr(16)
5467 .kr(9)
5468 .channels(channels)
5469 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5470 }
5471 }
5472
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_gt_16)5473 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_gt_16) {
5474 TEST_REQUIRES_ARM_NEON_V8;
5475 for (uint32_t channels = 17; channels < 32; channels++) {
5476 DWConvMicrokernelTester()
5477 .cr(16)
5478 .kr(9)
5479 .channels(channels)
5480 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5481 }
5482 }
5483
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_gt_16_with_qmin)5484 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_gt_16_with_qmin) {
5485 TEST_REQUIRES_ARM_NEON_V8;
5486 for (uint32_t channels = 17; channels < 32; channels++) {
5487 DWConvMicrokernelTester()
5488 .cr(16)
5489 .kr(9)
5490 .channels(channels)
5491 .qmin(128)
5492 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5493 }
5494 }
5495
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_gt_16_with_qmax)5496 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_gt_16_with_qmax) {
5497 TEST_REQUIRES_ARM_NEON_V8;
5498 for (uint32_t channels = 17; channels < 32; channels++) {
5499 DWConvMicrokernelTester()
5500 .cr(16)
5501 .kr(9)
5502 .channels(channels)
5503 .qmax(128)
5504 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5505 }
5506 }
5507
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,multipixel)5508 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel) {
5509 TEST_REQUIRES_ARM_NEON_V8;
5510 for (size_t channels = 1; channels <= 80; channels += 15) {
5511 DWConvMicrokernelTester()
5512 .cr(16)
5513 .kr(9)
5514 .channels(channels)
5515 .width(3)
5516 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5517 }
5518 }
5519
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,multipixel_with_step)5520 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_step) {
5521 TEST_REQUIRES_ARM_NEON_V8;
5522 for (size_t channels = 1; channels <= 80; channels += 15) {
5523 for (size_t step = 2; step <= 9; step++) {
5524 DWConvMicrokernelTester()
5525 .cr(16)
5526 .kr(9)
5527 .channels(channels)
5528 .width(3)
5529 .step(step)
5530 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5531 }
5532 }
5533 }
5534
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,multipixel_with_output_stride)5535 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_output_stride) {
5536 TEST_REQUIRES_ARM_NEON_V8;
5537 for (size_t channels = 1; channels <= 80; channels += 15) {
5538 DWConvMicrokernelTester()
5539 .cr(16)
5540 .kr(9)
5541 .channels(16)
5542 .width(5)
5543 .output_stride(83)
5544 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5545 }
5546 }
5547
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,multipixel_with_qmin)5548 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_qmin) {
5549 TEST_REQUIRES_ARM_NEON_V8;
5550 for (size_t channels = 1; channels <= 80; channels += 15) {
5551 DWConvMicrokernelTester()
5552 .cr(16)
5553 .kr(9)
5554 .channels(channels)
5555 .width(3)
5556 .qmin(128)
5557 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5558 }
5559 }
5560
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,multipixel_with_qmax)5561 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_qmax) {
5562 TEST_REQUIRES_ARM_NEON_V8;
5563 for (size_t channels = 1; channels <= 80; channels += 15) {
5564 DWConvMicrokernelTester()
5565 .cr(16)
5566 .kr(9)
5567 .channels(channels)
5568 .width(3)
5569 .qmax(128)
5570 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5571 }
5572 }
5573
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,input_offset)5574 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, input_offset) {
5575 TEST_REQUIRES_ARM_NEON_V8;
5576 for (uint32_t channels = 32; channels < 256; channels += 48) {
5577 DWConvMicrokernelTester()
5578 .cr(16)
5579 .kr(9)
5580 .channels(channels)
5581 .input_offset(304)
5582 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5583 }
5584 }
5585
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,zero)5586 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, zero) {
5587 TEST_REQUIRES_ARM_NEON_V8;
5588 for (uint32_t mz = 0; mz < 9; mz++) {
5589 for (uint32_t channels = 32; channels < 256; channels += 48) {
5590 DWConvMicrokernelTester()
5591 .cr(16)
5592 .kr(9)
5593 .channels(channels)
5594 .input_offset(304)
5595 .zero_index(mz)
5596 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5597 }
5598 }
5599 }
5600 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5601
5602
5603 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,c_eq_16)5604 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_eq_16) {
5605 TEST_REQUIRES_ARM_NEON;
5606 DWConvMicrokernelTester()
5607 .cr(16)
5608 .kr(25)
5609 .channels(16)
5610 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5611 }
5612
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,c_div_16)5613 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_div_16) {
5614 TEST_REQUIRES_ARM_NEON;
5615 for (uint32_t channels = 32; channels < 256; channels += 48) {
5616 DWConvMicrokernelTester()
5617 .cr(16)
5618 .kr(25)
5619 .channels(channels)
5620 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5621 }
5622 }
5623
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,c_div_16_with_qmin)5624 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_div_16_with_qmin) {
5625 TEST_REQUIRES_ARM_NEON;
5626 for (uint32_t channels = 32; channels < 256; channels += 48) {
5627 DWConvMicrokernelTester()
5628 .cr(16)
5629 .kr(25)
5630 .channels(channels)
5631 .qmin(128)
5632 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5633 }
5634 }
5635
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,c_div_16_with_qmax)5636 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_div_16_with_qmax) {
5637 TEST_REQUIRES_ARM_NEON;
5638 for (uint32_t channels = 32; channels < 256; channels += 48) {
5639 DWConvMicrokernelTester()
5640 .cr(16)
5641 .kr(25)
5642 .channels(channels)
5643 .qmax(128)
5644 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5645 }
5646 }
5647
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,c_lt_16)5648 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_lt_16) {
5649 TEST_REQUIRES_ARM_NEON;
5650 for (uint32_t channels = 1; channels < 16; channels++) {
5651 DWConvMicrokernelTester()
5652 .cr(16)
5653 .kr(25)
5654 .channels(channels)
5655 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5656 }
5657 }
5658
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,c_gt_16)5659 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_gt_16) {
5660 TEST_REQUIRES_ARM_NEON;
5661 for (uint32_t channels = 17; channels < 32; channels++) {
5662 DWConvMicrokernelTester()
5663 .cr(16)
5664 .kr(25)
5665 .channels(channels)
5666 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5667 }
5668 }
5669
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,c_gt_16_with_qmin)5670 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_gt_16_with_qmin) {
5671 TEST_REQUIRES_ARM_NEON;
5672 for (uint32_t channels = 17; channels < 32; channels++) {
5673 DWConvMicrokernelTester()
5674 .cr(16)
5675 .kr(25)
5676 .channels(channels)
5677 .qmin(128)
5678 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5679 }
5680 }
5681
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,c_gt_16_with_qmax)5682 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, c_gt_16_with_qmax) {
5683 TEST_REQUIRES_ARM_NEON;
5684 for (uint32_t channels = 17; channels < 32; channels++) {
5685 DWConvMicrokernelTester()
5686 .cr(16)
5687 .kr(25)
5688 .channels(channels)
5689 .qmax(128)
5690 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5691 }
5692 }
5693
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,multipixel)5694 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, multipixel) {
5695 TEST_REQUIRES_ARM_NEON;
5696 for (size_t channels = 1; channels <= 80; channels += 15) {
5697 DWConvMicrokernelTester()
5698 .cr(16)
5699 .kr(25)
5700 .channels(channels)
5701 .width(3)
5702 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5703 }
5704 }
5705
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,multipixel_with_step)5706 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, multipixel_with_step) {
5707 TEST_REQUIRES_ARM_NEON;
5708 for (size_t channels = 1; channels <= 80; channels += 15) {
5709 for (size_t step = 2; step <= 25; step++) {
5710 DWConvMicrokernelTester()
5711 .cr(16)
5712 .kr(25)
5713 .channels(channels)
5714 .width(3)
5715 .step(step)
5716 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5717 }
5718 }
5719 }
5720
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,multipixel_with_output_stride)5721 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, multipixel_with_output_stride) {
5722 TEST_REQUIRES_ARM_NEON;
5723 for (size_t channels = 1; channels <= 80; channels += 15) {
5724 DWConvMicrokernelTester()
5725 .cr(16)
5726 .kr(25)
5727 .channels(16)
5728 .width(5)
5729 .output_stride(83)
5730 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5731 }
5732 }
5733
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,multipixel_with_qmin)5734 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, multipixel_with_qmin) {
5735 TEST_REQUIRES_ARM_NEON;
5736 for (size_t channels = 1; channels <= 80; channels += 15) {
5737 DWConvMicrokernelTester()
5738 .cr(16)
5739 .kr(25)
5740 .channels(channels)
5741 .width(3)
5742 .qmin(128)
5743 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5744 }
5745 }
5746
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,multipixel_with_qmax)5747 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, multipixel_with_qmax) {
5748 TEST_REQUIRES_ARM_NEON;
5749 for (size_t channels = 1; channels <= 80; channels += 15) {
5750 DWConvMicrokernelTester()
5751 .cr(16)
5752 .kr(25)
5753 .channels(channels)
5754 .width(3)
5755 .qmax(128)
5756 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5757 }
5758 }
5759
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,input_offset)5760 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, input_offset) {
5761 TEST_REQUIRES_ARM_NEON;
5762 for (uint32_t channels = 32; channels < 256; channels += 48) {
5763 DWConvMicrokernelTester()
5764 .cr(16)
5765 .kr(25)
5766 .channels(channels)
5767 .input_offset(304)
5768 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5769 }
5770 }
5771
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64,zero)5772 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD64, zero) {
5773 TEST_REQUIRES_ARM_NEON;
5774 for (uint32_t mz = 0; mz < 25; mz++) {
5775 for (uint32_t channels = 32; channels < 256; channels += 48) {
5776 DWConvMicrokernelTester()
5777 .cr(16)
5778 .kr(25)
5779 .channels(channels)
5780 .input_offset(304)
5781 .zero_index(mz)
5782 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5783 }
5784 }
5785 }
5786 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5787
5788
5789 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,c_eq_16)5790 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_eq_16) {
5791 TEST_REQUIRES_ARM_NEON;
5792 DWConvMicrokernelTester()
5793 .cr(16)
5794 .kr(25)
5795 .channels(16)
5796 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5797 }
5798
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,c_div_16)5799 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_div_16) {
5800 TEST_REQUIRES_ARM_NEON;
5801 for (uint32_t channels = 32; channels < 256; channels += 48) {
5802 DWConvMicrokernelTester()
5803 .cr(16)
5804 .kr(25)
5805 .channels(channels)
5806 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5807 }
5808 }
5809
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,c_div_16_with_qmin)5810 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_div_16_with_qmin) {
5811 TEST_REQUIRES_ARM_NEON;
5812 for (uint32_t channels = 32; channels < 256; channels += 48) {
5813 DWConvMicrokernelTester()
5814 .cr(16)
5815 .kr(25)
5816 .channels(channels)
5817 .qmin(128)
5818 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5819 }
5820 }
5821
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,c_div_16_with_qmax)5822 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_div_16_with_qmax) {
5823 TEST_REQUIRES_ARM_NEON;
5824 for (uint32_t channels = 32; channels < 256; channels += 48) {
5825 DWConvMicrokernelTester()
5826 .cr(16)
5827 .kr(25)
5828 .channels(channels)
5829 .qmax(128)
5830 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5831 }
5832 }
5833
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,c_lt_16)5834 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_lt_16) {
5835 TEST_REQUIRES_ARM_NEON;
5836 for (uint32_t channels = 1; channels < 16; channels++) {
5837 DWConvMicrokernelTester()
5838 .cr(16)
5839 .kr(25)
5840 .channels(channels)
5841 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5842 }
5843 }
5844
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,c_gt_16)5845 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_gt_16) {
5846 TEST_REQUIRES_ARM_NEON;
5847 for (uint32_t channels = 17; channels < 32; channels++) {
5848 DWConvMicrokernelTester()
5849 .cr(16)
5850 .kr(25)
5851 .channels(channels)
5852 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5853 }
5854 }
5855
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,c_gt_16_with_qmin)5856 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_gt_16_with_qmin) {
5857 TEST_REQUIRES_ARM_NEON;
5858 for (uint32_t channels = 17; channels < 32; channels++) {
5859 DWConvMicrokernelTester()
5860 .cr(16)
5861 .kr(25)
5862 .channels(channels)
5863 .qmin(128)
5864 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5865 }
5866 }
5867
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,c_gt_16_with_qmax)5868 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, c_gt_16_with_qmax) {
5869 TEST_REQUIRES_ARM_NEON;
5870 for (uint32_t channels = 17; channels < 32; channels++) {
5871 DWConvMicrokernelTester()
5872 .cr(16)
5873 .kr(25)
5874 .channels(channels)
5875 .qmax(128)
5876 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5877 }
5878 }
5879
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,multipixel)5880 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, multipixel) {
5881 TEST_REQUIRES_ARM_NEON;
5882 for (size_t channels = 1; channels <= 80; channels += 15) {
5883 DWConvMicrokernelTester()
5884 .cr(16)
5885 .kr(25)
5886 .channels(channels)
5887 .width(3)
5888 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5889 }
5890 }
5891
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,multipixel_with_step)5892 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, multipixel_with_step) {
5893 TEST_REQUIRES_ARM_NEON;
5894 for (size_t channels = 1; channels <= 80; channels += 15) {
5895 for (size_t step = 2; step <= 25; step++) {
5896 DWConvMicrokernelTester()
5897 .cr(16)
5898 .kr(25)
5899 .channels(channels)
5900 .width(3)
5901 .step(step)
5902 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5903 }
5904 }
5905 }
5906
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,multipixel_with_output_stride)5907 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, multipixel_with_output_stride) {
5908 TEST_REQUIRES_ARM_NEON;
5909 for (size_t channels = 1; channels <= 80; channels += 15) {
5910 DWConvMicrokernelTester()
5911 .cr(16)
5912 .kr(25)
5913 .channels(16)
5914 .width(5)
5915 .output_stride(83)
5916 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5917 }
5918 }
5919
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,multipixel_with_qmin)5920 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, multipixel_with_qmin) {
5921 TEST_REQUIRES_ARM_NEON;
5922 for (size_t channels = 1; channels <= 80; channels += 15) {
5923 DWConvMicrokernelTester()
5924 .cr(16)
5925 .kr(25)
5926 .channels(channels)
5927 .width(3)
5928 .qmin(128)
5929 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5930 }
5931 }
5932
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,multipixel_with_qmax)5933 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, multipixel_with_qmax) {
5934 TEST_REQUIRES_ARM_NEON;
5935 for (size_t channels = 1; channels <= 80; channels += 15) {
5936 DWConvMicrokernelTester()
5937 .cr(16)
5938 .kr(25)
5939 .channels(channels)
5940 .width(3)
5941 .qmax(128)
5942 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5943 }
5944 }
5945
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,input_offset)5946 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, input_offset) {
5947 TEST_REQUIRES_ARM_NEON;
5948 for (uint32_t channels = 32; channels < 256; channels += 48) {
5949 DWConvMicrokernelTester()
5950 .cr(16)
5951 .kr(25)
5952 .channels(channels)
5953 .input_offset(304)
5954 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5955 }
5956 }
5957
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128,zero)5958 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MLA8_LD128, zero) {
5959 TEST_REQUIRES_ARM_NEON;
5960 for (uint32_t mz = 0; mz < 25; mz++) {
5961 for (uint32_t channels = 32; channels < 256; channels += 48) {
5962 DWConvMicrokernelTester()
5963 .cr(16)
5964 .kr(25)
5965 .channels(channels)
5966 .input_offset(304)
5967 .zero_index(mz)
5968 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5969 }
5970 }
5971 }
5972 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5973
5974
5975 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,c_eq_16)5976 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_eq_16) {
5977 TEST_REQUIRES_ARM_NEON;
5978 DWConvMicrokernelTester()
5979 .cr(16)
5980 .kr(25)
5981 .channels(16)
5982 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5983 }
5984
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,c_div_16)5985 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_div_16) {
5986 TEST_REQUIRES_ARM_NEON;
5987 for (uint32_t channels = 32; channels < 256; channels += 48) {
5988 DWConvMicrokernelTester()
5989 .cr(16)
5990 .kr(25)
5991 .channels(channels)
5992 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5993 }
5994 }
5995
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,c_div_16_with_qmin)5996 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_div_16_with_qmin) {
5997 TEST_REQUIRES_ARM_NEON;
5998 for (uint32_t channels = 32; channels < 256; channels += 48) {
5999 DWConvMicrokernelTester()
6000 .cr(16)
6001 .kr(25)
6002 .channels(channels)
6003 .qmin(128)
6004 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6005 }
6006 }
6007
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,c_div_16_with_qmax)6008 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_div_16_with_qmax) {
6009 TEST_REQUIRES_ARM_NEON;
6010 for (uint32_t channels = 32; channels < 256; channels += 48) {
6011 DWConvMicrokernelTester()
6012 .cr(16)
6013 .kr(25)
6014 .channels(channels)
6015 .qmax(128)
6016 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6017 }
6018 }
6019
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,c_lt_16)6020 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_lt_16) {
6021 TEST_REQUIRES_ARM_NEON;
6022 for (uint32_t channels = 1; channels < 16; channels++) {
6023 DWConvMicrokernelTester()
6024 .cr(16)
6025 .kr(25)
6026 .channels(channels)
6027 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6028 }
6029 }
6030
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,c_gt_16)6031 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_gt_16) {
6032 TEST_REQUIRES_ARM_NEON;
6033 for (uint32_t channels = 17; channels < 32; channels++) {
6034 DWConvMicrokernelTester()
6035 .cr(16)
6036 .kr(25)
6037 .channels(channels)
6038 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6039 }
6040 }
6041
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,c_gt_16_with_qmin)6042 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_gt_16_with_qmin) {
6043 TEST_REQUIRES_ARM_NEON;
6044 for (uint32_t channels = 17; channels < 32; channels++) {
6045 DWConvMicrokernelTester()
6046 .cr(16)
6047 .kr(25)
6048 .channels(channels)
6049 .qmin(128)
6050 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6051 }
6052 }
6053
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,c_gt_16_with_qmax)6054 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, c_gt_16_with_qmax) {
6055 TEST_REQUIRES_ARM_NEON;
6056 for (uint32_t channels = 17; channels < 32; channels++) {
6057 DWConvMicrokernelTester()
6058 .cr(16)
6059 .kr(25)
6060 .channels(channels)
6061 .qmax(128)
6062 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6063 }
6064 }
6065
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,multipixel)6066 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, multipixel) {
6067 TEST_REQUIRES_ARM_NEON;
6068 for (size_t channels = 1; channels <= 80; channels += 15) {
6069 DWConvMicrokernelTester()
6070 .cr(16)
6071 .kr(25)
6072 .channels(channels)
6073 .width(3)
6074 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6075 }
6076 }
6077
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,multipixel_with_step)6078 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, multipixel_with_step) {
6079 TEST_REQUIRES_ARM_NEON;
6080 for (size_t channels = 1; channels <= 80; channels += 15) {
6081 for (size_t step = 2; step <= 25; step++) {
6082 DWConvMicrokernelTester()
6083 .cr(16)
6084 .kr(25)
6085 .channels(channels)
6086 .width(3)
6087 .step(step)
6088 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6089 }
6090 }
6091 }
6092
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,multipixel_with_output_stride)6093 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, multipixel_with_output_stride) {
6094 TEST_REQUIRES_ARM_NEON;
6095 for (size_t channels = 1; channels <= 80; channels += 15) {
6096 DWConvMicrokernelTester()
6097 .cr(16)
6098 .kr(25)
6099 .channels(16)
6100 .width(5)
6101 .output_stride(83)
6102 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6103 }
6104 }
6105
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,multipixel_with_qmin)6106 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, multipixel_with_qmin) {
6107 TEST_REQUIRES_ARM_NEON;
6108 for (size_t channels = 1; channels <= 80; channels += 15) {
6109 DWConvMicrokernelTester()
6110 .cr(16)
6111 .kr(25)
6112 .channels(channels)
6113 .width(3)
6114 .qmin(128)
6115 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6116 }
6117 }
6118
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,multipixel_with_qmax)6119 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, multipixel_with_qmax) {
6120 TEST_REQUIRES_ARM_NEON;
6121 for (size_t channels = 1; channels <= 80; channels += 15) {
6122 DWConvMicrokernelTester()
6123 .cr(16)
6124 .kr(25)
6125 .channels(channels)
6126 .width(3)
6127 .qmax(128)
6128 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6129 }
6130 }
6131
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,input_offset)6132 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, input_offset) {
6133 TEST_REQUIRES_ARM_NEON;
6134 for (uint32_t channels = 32; channels < 256; channels += 48) {
6135 DWConvMicrokernelTester()
6136 .cr(16)
6137 .kr(25)
6138 .channels(channels)
6139 .input_offset(304)
6140 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6141 }
6142 }
6143
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64,zero)6144 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD64, zero) {
6145 TEST_REQUIRES_ARM_NEON;
6146 for (uint32_t mz = 0; mz < 25; mz++) {
6147 for (uint32_t channels = 32; channels < 256; channels += 48) {
6148 DWConvMicrokernelTester()
6149 .cr(16)
6150 .kr(25)
6151 .channels(channels)
6152 .input_offset(304)
6153 .zero_index(mz)
6154 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6155 }
6156 }
6157 }
6158 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6159
6160
6161 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,c_eq_16)6162 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_eq_16) {
6163 TEST_REQUIRES_ARM_NEON;
6164 DWConvMicrokernelTester()
6165 .cr(16)
6166 .kr(25)
6167 .channels(16)
6168 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6169 }
6170
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,c_div_16)6171 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_div_16) {
6172 TEST_REQUIRES_ARM_NEON;
6173 for (uint32_t channels = 32; channels < 256; channels += 48) {
6174 DWConvMicrokernelTester()
6175 .cr(16)
6176 .kr(25)
6177 .channels(channels)
6178 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6179 }
6180 }
6181
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,c_div_16_with_qmin)6182 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_div_16_with_qmin) {
6183 TEST_REQUIRES_ARM_NEON;
6184 for (uint32_t channels = 32; channels < 256; channels += 48) {
6185 DWConvMicrokernelTester()
6186 .cr(16)
6187 .kr(25)
6188 .channels(channels)
6189 .qmin(128)
6190 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6191 }
6192 }
6193
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,c_div_16_with_qmax)6194 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_div_16_with_qmax) {
6195 TEST_REQUIRES_ARM_NEON;
6196 for (uint32_t channels = 32; channels < 256; channels += 48) {
6197 DWConvMicrokernelTester()
6198 .cr(16)
6199 .kr(25)
6200 .channels(channels)
6201 .qmax(128)
6202 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6203 }
6204 }
6205
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,c_lt_16)6206 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_lt_16) {
6207 TEST_REQUIRES_ARM_NEON;
6208 for (uint32_t channels = 1; channels < 16; channels++) {
6209 DWConvMicrokernelTester()
6210 .cr(16)
6211 .kr(25)
6212 .channels(channels)
6213 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6214 }
6215 }
6216
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,c_gt_16)6217 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_gt_16) {
6218 TEST_REQUIRES_ARM_NEON;
6219 for (uint32_t channels = 17; channels < 32; channels++) {
6220 DWConvMicrokernelTester()
6221 .cr(16)
6222 .kr(25)
6223 .channels(channels)
6224 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6225 }
6226 }
6227
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,c_gt_16_with_qmin)6228 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_gt_16_with_qmin) {
6229 TEST_REQUIRES_ARM_NEON;
6230 for (uint32_t channels = 17; channels < 32; channels++) {
6231 DWConvMicrokernelTester()
6232 .cr(16)
6233 .kr(25)
6234 .channels(channels)
6235 .qmin(128)
6236 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6237 }
6238 }
6239
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,c_gt_16_with_qmax)6240 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, c_gt_16_with_qmax) {
6241 TEST_REQUIRES_ARM_NEON;
6242 for (uint32_t channels = 17; channels < 32; channels++) {
6243 DWConvMicrokernelTester()
6244 .cr(16)
6245 .kr(25)
6246 .channels(channels)
6247 .qmax(128)
6248 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6249 }
6250 }
6251
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,multipixel)6252 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, multipixel) {
6253 TEST_REQUIRES_ARM_NEON;
6254 for (size_t channels = 1; channels <= 80; channels += 15) {
6255 DWConvMicrokernelTester()
6256 .cr(16)
6257 .kr(25)
6258 .channels(channels)
6259 .width(3)
6260 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6261 }
6262 }
6263
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,multipixel_with_step)6264 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, multipixel_with_step) {
6265 TEST_REQUIRES_ARM_NEON;
6266 for (size_t channels = 1; channels <= 80; channels += 15) {
6267 for (size_t step = 2; step <= 25; step++) {
6268 DWConvMicrokernelTester()
6269 .cr(16)
6270 .kr(25)
6271 .channels(channels)
6272 .width(3)
6273 .step(step)
6274 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6275 }
6276 }
6277 }
6278
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,multipixel_with_output_stride)6279 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, multipixel_with_output_stride) {
6280 TEST_REQUIRES_ARM_NEON;
6281 for (size_t channels = 1; channels <= 80; channels += 15) {
6282 DWConvMicrokernelTester()
6283 .cr(16)
6284 .kr(25)
6285 .channels(16)
6286 .width(5)
6287 .output_stride(83)
6288 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6289 }
6290 }
6291
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,multipixel_with_qmin)6292 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, multipixel_with_qmin) {
6293 TEST_REQUIRES_ARM_NEON;
6294 for (size_t channels = 1; channels <= 80; channels += 15) {
6295 DWConvMicrokernelTester()
6296 .cr(16)
6297 .kr(25)
6298 .channels(channels)
6299 .width(3)
6300 .qmin(128)
6301 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6302 }
6303 }
6304
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,multipixel_with_qmax)6305 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, multipixel_with_qmax) {
6306 TEST_REQUIRES_ARM_NEON;
6307 for (size_t channels = 1; channels <= 80; channels += 15) {
6308 DWConvMicrokernelTester()
6309 .cr(16)
6310 .kr(25)
6311 .channels(channels)
6312 .width(3)
6313 .qmax(128)
6314 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6315 }
6316 }
6317
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,input_offset)6318 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, input_offset) {
6319 TEST_REQUIRES_ARM_NEON;
6320 for (uint32_t channels = 32; channels < 256; channels += 48) {
6321 DWConvMicrokernelTester()
6322 .cr(16)
6323 .kr(25)
6324 .channels(channels)
6325 .input_offset(304)
6326 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6327 }
6328 }
6329
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128,zero)6330 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL8_LD128, zero) {
6331 TEST_REQUIRES_ARM_NEON;
6332 for (uint32_t mz = 0; mz < 25; mz++) {
6333 for (uint32_t channels = 32; channels < 256; channels += 48) {
6334 DWConvMicrokernelTester()
6335 .cr(16)
6336 .kr(25)
6337 .channels(channels)
6338 .input_offset(304)
6339 .zero_index(mz)
6340 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6341 }
6342 }
6343 }
6344 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6345
6346
6347 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_eq_16)6348 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_eq_16) {
6349 TEST_REQUIRES_ARM_NEON;
6350 DWConvMicrokernelTester()
6351 .cr(16)
6352 .kr(25)
6353 .channels(16)
6354 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6355 }
6356
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_div_16)6357 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_div_16) {
6358 TEST_REQUIRES_ARM_NEON;
6359 for (uint32_t channels = 32; channels < 256; channels += 48) {
6360 DWConvMicrokernelTester()
6361 .cr(16)
6362 .kr(25)
6363 .channels(channels)
6364 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6365 }
6366 }
6367
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_div_16_with_qmin)6368 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_div_16_with_qmin) {
6369 TEST_REQUIRES_ARM_NEON;
6370 for (uint32_t channels = 32; channels < 256; channels += 48) {
6371 DWConvMicrokernelTester()
6372 .cr(16)
6373 .kr(25)
6374 .channels(channels)
6375 .qmin(128)
6376 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6377 }
6378 }
6379
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_div_16_with_qmax)6380 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_div_16_with_qmax) {
6381 TEST_REQUIRES_ARM_NEON;
6382 for (uint32_t channels = 32; channels < 256; channels += 48) {
6383 DWConvMicrokernelTester()
6384 .cr(16)
6385 .kr(25)
6386 .channels(channels)
6387 .qmax(128)
6388 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6389 }
6390 }
6391
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_lt_16)6392 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_lt_16) {
6393 TEST_REQUIRES_ARM_NEON;
6394 for (uint32_t channels = 1; channels < 16; channels++) {
6395 DWConvMicrokernelTester()
6396 .cr(16)
6397 .kr(25)
6398 .channels(channels)
6399 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6400 }
6401 }
6402
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_gt_16)6403 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_gt_16) {
6404 TEST_REQUIRES_ARM_NEON;
6405 for (uint32_t channels = 17; channels < 32; channels++) {
6406 DWConvMicrokernelTester()
6407 .cr(16)
6408 .kr(25)
6409 .channels(channels)
6410 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6411 }
6412 }
6413
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_gt_16_with_qmin)6414 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_gt_16_with_qmin) {
6415 TEST_REQUIRES_ARM_NEON;
6416 for (uint32_t channels = 17; channels < 32; channels++) {
6417 DWConvMicrokernelTester()
6418 .cr(16)
6419 .kr(25)
6420 .channels(channels)
6421 .qmin(128)
6422 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6423 }
6424 }
6425
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_gt_16_with_qmax)6426 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_gt_16_with_qmax) {
6427 TEST_REQUIRES_ARM_NEON;
6428 for (uint32_t channels = 17; channels < 32; channels++) {
6429 DWConvMicrokernelTester()
6430 .cr(16)
6431 .kr(25)
6432 .channels(channels)
6433 .qmax(128)
6434 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6435 }
6436 }
6437
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,multipixel)6438 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel) {
6439 TEST_REQUIRES_ARM_NEON;
6440 for (size_t channels = 1; channels <= 80; channels += 15) {
6441 DWConvMicrokernelTester()
6442 .cr(16)
6443 .kr(25)
6444 .channels(channels)
6445 .width(3)
6446 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6447 }
6448 }
6449
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,multipixel_with_step)6450 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_step) {
6451 TEST_REQUIRES_ARM_NEON;
6452 for (size_t channels = 1; channels <= 80; channels += 15) {
6453 for (size_t step = 2; step <= 25; step++) {
6454 DWConvMicrokernelTester()
6455 .cr(16)
6456 .kr(25)
6457 .channels(channels)
6458 .width(3)
6459 .step(step)
6460 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6461 }
6462 }
6463 }
6464
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,multipixel_with_output_stride)6465 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_output_stride) {
6466 TEST_REQUIRES_ARM_NEON;
6467 for (size_t channels = 1; channels <= 80; channels += 15) {
6468 DWConvMicrokernelTester()
6469 .cr(16)
6470 .kr(25)
6471 .channels(16)
6472 .width(5)
6473 .output_stride(83)
6474 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6475 }
6476 }
6477
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,multipixel_with_qmin)6478 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_qmin) {
6479 TEST_REQUIRES_ARM_NEON;
6480 for (size_t channels = 1; channels <= 80; channels += 15) {
6481 DWConvMicrokernelTester()
6482 .cr(16)
6483 .kr(25)
6484 .channels(channels)
6485 .width(3)
6486 .qmin(128)
6487 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6488 }
6489 }
6490
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,multipixel_with_qmax)6491 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_qmax) {
6492 TEST_REQUIRES_ARM_NEON;
6493 for (size_t channels = 1; channels <= 80; channels += 15) {
6494 DWConvMicrokernelTester()
6495 .cr(16)
6496 .kr(25)
6497 .channels(channels)
6498 .width(3)
6499 .qmax(128)
6500 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6501 }
6502 }
6503
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,input_offset)6504 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, input_offset) {
6505 TEST_REQUIRES_ARM_NEON;
6506 for (uint32_t channels = 32; channels < 256; channels += 48) {
6507 DWConvMicrokernelTester()
6508 .cr(16)
6509 .kr(25)
6510 .channels(channels)
6511 .input_offset(304)
6512 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6513 }
6514 }
6515
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,zero)6516 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, zero) {
6517 TEST_REQUIRES_ARM_NEON;
6518 for (uint32_t mz = 0; mz < 25; mz++) {
6519 for (uint32_t channels = 32; channels < 256; channels += 48) {
6520 DWConvMicrokernelTester()
6521 .cr(16)
6522 .kr(25)
6523 .channels(channels)
6524 .input_offset(304)
6525 .zero_index(mz)
6526 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6527 }
6528 }
6529 }
6530 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6531
6532
6533 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,c_eq_16)6534 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_eq_16) {
6535 TEST_REQUIRES_ARM_NEON_V8;
6536 DWConvMicrokernelTester()
6537 .cr(16)
6538 .kr(25)
6539 .channels(16)
6540 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6541 }
6542
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,c_div_16)6543 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_div_16) {
6544 TEST_REQUIRES_ARM_NEON_V8;
6545 for (uint32_t channels = 32; channels < 256; channels += 48) {
6546 DWConvMicrokernelTester()
6547 .cr(16)
6548 .kr(25)
6549 .channels(channels)
6550 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6551 }
6552 }
6553
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,c_div_16_with_qmin)6554 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_div_16_with_qmin) {
6555 TEST_REQUIRES_ARM_NEON_V8;
6556 for (uint32_t channels = 32; channels < 256; channels += 48) {
6557 DWConvMicrokernelTester()
6558 .cr(16)
6559 .kr(25)
6560 .channels(channels)
6561 .qmin(128)
6562 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6563 }
6564 }
6565
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,c_div_16_with_qmax)6566 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_div_16_with_qmax) {
6567 TEST_REQUIRES_ARM_NEON_V8;
6568 for (uint32_t channels = 32; channels < 256; channels += 48) {
6569 DWConvMicrokernelTester()
6570 .cr(16)
6571 .kr(25)
6572 .channels(channels)
6573 .qmax(128)
6574 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6575 }
6576 }
6577
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,c_lt_16)6578 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_lt_16) {
6579 TEST_REQUIRES_ARM_NEON_V8;
6580 for (uint32_t channels = 1; channels < 16; channels++) {
6581 DWConvMicrokernelTester()
6582 .cr(16)
6583 .kr(25)
6584 .channels(channels)
6585 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6586 }
6587 }
6588
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,c_gt_16)6589 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_gt_16) {
6590 TEST_REQUIRES_ARM_NEON_V8;
6591 for (uint32_t channels = 17; channels < 32; channels++) {
6592 DWConvMicrokernelTester()
6593 .cr(16)
6594 .kr(25)
6595 .channels(channels)
6596 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6597 }
6598 }
6599
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,c_gt_16_with_qmin)6600 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_gt_16_with_qmin) {
6601 TEST_REQUIRES_ARM_NEON_V8;
6602 for (uint32_t channels = 17; channels < 32; channels++) {
6603 DWConvMicrokernelTester()
6604 .cr(16)
6605 .kr(25)
6606 .channels(channels)
6607 .qmin(128)
6608 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6609 }
6610 }
6611
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,c_gt_16_with_qmax)6612 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, c_gt_16_with_qmax) {
6613 TEST_REQUIRES_ARM_NEON_V8;
6614 for (uint32_t channels = 17; channels < 32; channels++) {
6615 DWConvMicrokernelTester()
6616 .cr(16)
6617 .kr(25)
6618 .channels(channels)
6619 .qmax(128)
6620 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6621 }
6622 }
6623
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,multipixel)6624 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, multipixel) {
6625 TEST_REQUIRES_ARM_NEON_V8;
6626 for (size_t channels = 1; channels <= 80; channels += 15) {
6627 DWConvMicrokernelTester()
6628 .cr(16)
6629 .kr(25)
6630 .channels(channels)
6631 .width(3)
6632 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6633 }
6634 }
6635
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,multipixel_with_step)6636 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, multipixel_with_step) {
6637 TEST_REQUIRES_ARM_NEON_V8;
6638 for (size_t channels = 1; channels <= 80; channels += 15) {
6639 for (size_t step = 2; step <= 25; step++) {
6640 DWConvMicrokernelTester()
6641 .cr(16)
6642 .kr(25)
6643 .channels(channels)
6644 .width(3)
6645 .step(step)
6646 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6647 }
6648 }
6649 }
6650
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,multipixel_with_output_stride)6651 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, multipixel_with_output_stride) {
6652 TEST_REQUIRES_ARM_NEON_V8;
6653 for (size_t channels = 1; channels <= 80; channels += 15) {
6654 DWConvMicrokernelTester()
6655 .cr(16)
6656 .kr(25)
6657 .channels(16)
6658 .width(5)
6659 .output_stride(83)
6660 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6661 }
6662 }
6663
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,multipixel_with_qmin)6664 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, multipixel_with_qmin) {
6665 TEST_REQUIRES_ARM_NEON_V8;
6666 for (size_t channels = 1; channels <= 80; channels += 15) {
6667 DWConvMicrokernelTester()
6668 .cr(16)
6669 .kr(25)
6670 .channels(channels)
6671 .width(3)
6672 .qmin(128)
6673 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6674 }
6675 }
6676
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,multipixel_with_qmax)6677 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, multipixel_with_qmax) {
6678 TEST_REQUIRES_ARM_NEON_V8;
6679 for (size_t channels = 1; channels <= 80; channels += 15) {
6680 DWConvMicrokernelTester()
6681 .cr(16)
6682 .kr(25)
6683 .channels(channels)
6684 .width(3)
6685 .qmax(128)
6686 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6687 }
6688 }
6689
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,input_offset)6690 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, input_offset) {
6691 TEST_REQUIRES_ARM_NEON_V8;
6692 for (uint32_t channels = 32; channels < 256; channels += 48) {
6693 DWConvMicrokernelTester()
6694 .cr(16)
6695 .kr(25)
6696 .channels(channels)
6697 .input_offset(304)
6698 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6699 }
6700 }
6701
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64,zero)6702 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD64, zero) {
6703 TEST_REQUIRES_ARM_NEON_V8;
6704 for (uint32_t mz = 0; mz < 25; mz++) {
6705 for (uint32_t channels = 32; channels < 256; channels += 48) {
6706 DWConvMicrokernelTester()
6707 .cr(16)
6708 .kr(25)
6709 .channels(channels)
6710 .input_offset(304)
6711 .zero_index(mz)
6712 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6713 }
6714 }
6715 }
6716 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6717
6718
6719 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,c_eq_16)6720 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_eq_16) {
6721 TEST_REQUIRES_ARM_NEON_V8;
6722 DWConvMicrokernelTester()
6723 .cr(16)
6724 .kr(25)
6725 .channels(16)
6726 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6727 }
6728
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,c_div_16)6729 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_div_16) {
6730 TEST_REQUIRES_ARM_NEON_V8;
6731 for (uint32_t channels = 32; channels < 256; channels += 48) {
6732 DWConvMicrokernelTester()
6733 .cr(16)
6734 .kr(25)
6735 .channels(channels)
6736 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6737 }
6738 }
6739
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,c_div_16_with_qmin)6740 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_div_16_with_qmin) {
6741 TEST_REQUIRES_ARM_NEON_V8;
6742 for (uint32_t channels = 32; channels < 256; channels += 48) {
6743 DWConvMicrokernelTester()
6744 .cr(16)
6745 .kr(25)
6746 .channels(channels)
6747 .qmin(128)
6748 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6749 }
6750 }
6751
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,c_div_16_with_qmax)6752 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_div_16_with_qmax) {
6753 TEST_REQUIRES_ARM_NEON_V8;
6754 for (uint32_t channels = 32; channels < 256; channels += 48) {
6755 DWConvMicrokernelTester()
6756 .cr(16)
6757 .kr(25)
6758 .channels(channels)
6759 .qmax(128)
6760 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6761 }
6762 }
6763
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,c_lt_16)6764 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_lt_16) {
6765 TEST_REQUIRES_ARM_NEON_V8;
6766 for (uint32_t channels = 1; channels < 16; channels++) {
6767 DWConvMicrokernelTester()
6768 .cr(16)
6769 .kr(25)
6770 .channels(channels)
6771 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6772 }
6773 }
6774
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,c_gt_16)6775 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_gt_16) {
6776 TEST_REQUIRES_ARM_NEON_V8;
6777 for (uint32_t channels = 17; channels < 32; channels++) {
6778 DWConvMicrokernelTester()
6779 .cr(16)
6780 .kr(25)
6781 .channels(channels)
6782 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6783 }
6784 }
6785
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,c_gt_16_with_qmin)6786 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_gt_16_with_qmin) {
6787 TEST_REQUIRES_ARM_NEON_V8;
6788 for (uint32_t channels = 17; channels < 32; channels++) {
6789 DWConvMicrokernelTester()
6790 .cr(16)
6791 .kr(25)
6792 .channels(channels)
6793 .qmin(128)
6794 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6795 }
6796 }
6797
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,c_gt_16_with_qmax)6798 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, c_gt_16_with_qmax) {
6799 TEST_REQUIRES_ARM_NEON_V8;
6800 for (uint32_t channels = 17; channels < 32; channels++) {
6801 DWConvMicrokernelTester()
6802 .cr(16)
6803 .kr(25)
6804 .channels(channels)
6805 .qmax(128)
6806 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6807 }
6808 }
6809
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,multipixel)6810 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, multipixel) {
6811 TEST_REQUIRES_ARM_NEON_V8;
6812 for (size_t channels = 1; channels <= 80; channels += 15) {
6813 DWConvMicrokernelTester()
6814 .cr(16)
6815 .kr(25)
6816 .channels(channels)
6817 .width(3)
6818 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6819 }
6820 }
6821
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,multipixel_with_step)6822 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, multipixel_with_step) {
6823 TEST_REQUIRES_ARM_NEON_V8;
6824 for (size_t channels = 1; channels <= 80; channels += 15) {
6825 for (size_t step = 2; step <= 25; step++) {
6826 DWConvMicrokernelTester()
6827 .cr(16)
6828 .kr(25)
6829 .channels(channels)
6830 .width(3)
6831 .step(step)
6832 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6833 }
6834 }
6835 }
6836
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,multipixel_with_output_stride)6837 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, multipixel_with_output_stride) {
6838 TEST_REQUIRES_ARM_NEON_V8;
6839 for (size_t channels = 1; channels <= 80; channels += 15) {
6840 DWConvMicrokernelTester()
6841 .cr(16)
6842 .kr(25)
6843 .channels(16)
6844 .width(5)
6845 .output_stride(83)
6846 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6847 }
6848 }
6849
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,multipixel_with_qmin)6850 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, multipixel_with_qmin) {
6851 TEST_REQUIRES_ARM_NEON_V8;
6852 for (size_t channels = 1; channels <= 80; channels += 15) {
6853 DWConvMicrokernelTester()
6854 .cr(16)
6855 .kr(25)
6856 .channels(channels)
6857 .width(3)
6858 .qmin(128)
6859 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6860 }
6861 }
6862
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,multipixel_with_qmax)6863 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, multipixel_with_qmax) {
6864 TEST_REQUIRES_ARM_NEON_V8;
6865 for (size_t channels = 1; channels <= 80; channels += 15) {
6866 DWConvMicrokernelTester()
6867 .cr(16)
6868 .kr(25)
6869 .channels(channels)
6870 .width(3)
6871 .qmax(128)
6872 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6873 }
6874 }
6875
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,input_offset)6876 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, input_offset) {
6877 TEST_REQUIRES_ARM_NEON_V8;
6878 for (uint32_t channels = 32; channels < 256; channels += 48) {
6879 DWConvMicrokernelTester()
6880 .cr(16)
6881 .kr(25)
6882 .channels(channels)
6883 .input_offset(304)
6884 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6885 }
6886 }
6887
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128,zero)6888 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MLA8_LD128, zero) {
6889 TEST_REQUIRES_ARM_NEON_V8;
6890 for (uint32_t mz = 0; mz < 25; mz++) {
6891 for (uint32_t channels = 32; channels < 256; channels += 48) {
6892 DWConvMicrokernelTester()
6893 .cr(16)
6894 .kr(25)
6895 .channels(channels)
6896 .input_offset(304)
6897 .zero_index(mz)
6898 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6899 }
6900 }
6901 }
6902 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6903
6904
6905 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,c_eq_16)6906 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_eq_16) {
6907 TEST_REQUIRES_ARM_NEON_V8;
6908 DWConvMicrokernelTester()
6909 .cr(16)
6910 .kr(25)
6911 .channels(16)
6912 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6913 }
6914
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,c_div_16)6915 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_div_16) {
6916 TEST_REQUIRES_ARM_NEON_V8;
6917 for (uint32_t channels = 32; channels < 256; channels += 48) {
6918 DWConvMicrokernelTester()
6919 .cr(16)
6920 .kr(25)
6921 .channels(channels)
6922 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6923 }
6924 }
6925
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,c_div_16_with_qmin)6926 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_div_16_with_qmin) {
6927 TEST_REQUIRES_ARM_NEON_V8;
6928 for (uint32_t channels = 32; channels < 256; channels += 48) {
6929 DWConvMicrokernelTester()
6930 .cr(16)
6931 .kr(25)
6932 .channels(channels)
6933 .qmin(128)
6934 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6935 }
6936 }
6937
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,c_div_16_with_qmax)6938 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_div_16_with_qmax) {
6939 TEST_REQUIRES_ARM_NEON_V8;
6940 for (uint32_t channels = 32; channels < 256; channels += 48) {
6941 DWConvMicrokernelTester()
6942 .cr(16)
6943 .kr(25)
6944 .channels(channels)
6945 .qmax(128)
6946 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6947 }
6948 }
6949
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,c_lt_16)6950 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_lt_16) {
6951 TEST_REQUIRES_ARM_NEON_V8;
6952 for (uint32_t channels = 1; channels < 16; channels++) {
6953 DWConvMicrokernelTester()
6954 .cr(16)
6955 .kr(25)
6956 .channels(channels)
6957 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6958 }
6959 }
6960
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,c_gt_16)6961 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_gt_16) {
6962 TEST_REQUIRES_ARM_NEON_V8;
6963 for (uint32_t channels = 17; channels < 32; channels++) {
6964 DWConvMicrokernelTester()
6965 .cr(16)
6966 .kr(25)
6967 .channels(channels)
6968 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6969 }
6970 }
6971
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,c_gt_16_with_qmin)6972 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_gt_16_with_qmin) {
6973 TEST_REQUIRES_ARM_NEON_V8;
6974 for (uint32_t channels = 17; channels < 32; channels++) {
6975 DWConvMicrokernelTester()
6976 .cr(16)
6977 .kr(25)
6978 .channels(channels)
6979 .qmin(128)
6980 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6981 }
6982 }
6983
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,c_gt_16_with_qmax)6984 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, c_gt_16_with_qmax) {
6985 TEST_REQUIRES_ARM_NEON_V8;
6986 for (uint32_t channels = 17; channels < 32; channels++) {
6987 DWConvMicrokernelTester()
6988 .cr(16)
6989 .kr(25)
6990 .channels(channels)
6991 .qmax(128)
6992 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6993 }
6994 }
6995
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,multipixel)6996 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, multipixel) {
6997 TEST_REQUIRES_ARM_NEON_V8;
6998 for (size_t channels = 1; channels <= 80; channels += 15) {
6999 DWConvMicrokernelTester()
7000 .cr(16)
7001 .kr(25)
7002 .channels(channels)
7003 .width(3)
7004 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7005 }
7006 }
7007
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,multipixel_with_step)7008 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, multipixel_with_step) {
7009 TEST_REQUIRES_ARM_NEON_V8;
7010 for (size_t channels = 1; channels <= 80; channels += 15) {
7011 for (size_t step = 2; step <= 25; step++) {
7012 DWConvMicrokernelTester()
7013 .cr(16)
7014 .kr(25)
7015 .channels(channels)
7016 .width(3)
7017 .step(step)
7018 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7019 }
7020 }
7021 }
7022
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,multipixel_with_output_stride)7023 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, multipixel_with_output_stride) {
7024 TEST_REQUIRES_ARM_NEON_V8;
7025 for (size_t channels = 1; channels <= 80; channels += 15) {
7026 DWConvMicrokernelTester()
7027 .cr(16)
7028 .kr(25)
7029 .channels(16)
7030 .width(5)
7031 .output_stride(83)
7032 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7033 }
7034 }
7035
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,multipixel_with_qmin)7036 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, multipixel_with_qmin) {
7037 TEST_REQUIRES_ARM_NEON_V8;
7038 for (size_t channels = 1; channels <= 80; channels += 15) {
7039 DWConvMicrokernelTester()
7040 .cr(16)
7041 .kr(25)
7042 .channels(channels)
7043 .width(3)
7044 .qmin(128)
7045 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7046 }
7047 }
7048
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,multipixel_with_qmax)7049 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, multipixel_with_qmax) {
7050 TEST_REQUIRES_ARM_NEON_V8;
7051 for (size_t channels = 1; channels <= 80; channels += 15) {
7052 DWConvMicrokernelTester()
7053 .cr(16)
7054 .kr(25)
7055 .channels(channels)
7056 .width(3)
7057 .qmax(128)
7058 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7059 }
7060 }
7061
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,input_offset)7062 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, input_offset) {
7063 TEST_REQUIRES_ARM_NEON_V8;
7064 for (uint32_t channels = 32; channels < 256; channels += 48) {
7065 DWConvMicrokernelTester()
7066 .cr(16)
7067 .kr(25)
7068 .channels(channels)
7069 .input_offset(304)
7070 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7071 }
7072 }
7073
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64,zero)7074 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD64, zero) {
7075 TEST_REQUIRES_ARM_NEON_V8;
7076 for (uint32_t mz = 0; mz < 25; mz++) {
7077 for (uint32_t channels = 32; channels < 256; channels += 48) {
7078 DWConvMicrokernelTester()
7079 .cr(16)
7080 .kr(25)
7081 .channels(channels)
7082 .input_offset(304)
7083 .zero_index(mz)
7084 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7085 }
7086 }
7087 }
7088 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7089
7090
7091 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,c_eq_16)7092 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_eq_16) {
7093 TEST_REQUIRES_ARM_NEON_V8;
7094 DWConvMicrokernelTester()
7095 .cr(16)
7096 .kr(25)
7097 .channels(16)
7098 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7099 }
7100
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,c_div_16)7101 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_div_16) {
7102 TEST_REQUIRES_ARM_NEON_V8;
7103 for (uint32_t channels = 32; channels < 256; channels += 48) {
7104 DWConvMicrokernelTester()
7105 .cr(16)
7106 .kr(25)
7107 .channels(channels)
7108 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7109 }
7110 }
7111
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,c_div_16_with_qmin)7112 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_div_16_with_qmin) {
7113 TEST_REQUIRES_ARM_NEON_V8;
7114 for (uint32_t channels = 32; channels < 256; channels += 48) {
7115 DWConvMicrokernelTester()
7116 .cr(16)
7117 .kr(25)
7118 .channels(channels)
7119 .qmin(128)
7120 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7121 }
7122 }
7123
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,c_div_16_with_qmax)7124 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_div_16_with_qmax) {
7125 TEST_REQUIRES_ARM_NEON_V8;
7126 for (uint32_t channels = 32; channels < 256; channels += 48) {
7127 DWConvMicrokernelTester()
7128 .cr(16)
7129 .kr(25)
7130 .channels(channels)
7131 .qmax(128)
7132 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7133 }
7134 }
7135
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,c_lt_16)7136 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_lt_16) {
7137 TEST_REQUIRES_ARM_NEON_V8;
7138 for (uint32_t channels = 1; channels < 16; channels++) {
7139 DWConvMicrokernelTester()
7140 .cr(16)
7141 .kr(25)
7142 .channels(channels)
7143 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7144 }
7145 }
7146
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,c_gt_16)7147 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_gt_16) {
7148 TEST_REQUIRES_ARM_NEON_V8;
7149 for (uint32_t channels = 17; channels < 32; channels++) {
7150 DWConvMicrokernelTester()
7151 .cr(16)
7152 .kr(25)
7153 .channels(channels)
7154 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7155 }
7156 }
7157
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,c_gt_16_with_qmin)7158 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_gt_16_with_qmin) {
7159 TEST_REQUIRES_ARM_NEON_V8;
7160 for (uint32_t channels = 17; channels < 32; channels++) {
7161 DWConvMicrokernelTester()
7162 .cr(16)
7163 .kr(25)
7164 .channels(channels)
7165 .qmin(128)
7166 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7167 }
7168 }
7169
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,c_gt_16_with_qmax)7170 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, c_gt_16_with_qmax) {
7171 TEST_REQUIRES_ARM_NEON_V8;
7172 for (uint32_t channels = 17; channels < 32; channels++) {
7173 DWConvMicrokernelTester()
7174 .cr(16)
7175 .kr(25)
7176 .channels(channels)
7177 .qmax(128)
7178 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7179 }
7180 }
7181
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,multipixel)7182 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, multipixel) {
7183 TEST_REQUIRES_ARM_NEON_V8;
7184 for (size_t channels = 1; channels <= 80; channels += 15) {
7185 DWConvMicrokernelTester()
7186 .cr(16)
7187 .kr(25)
7188 .channels(channels)
7189 .width(3)
7190 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7191 }
7192 }
7193
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,multipixel_with_step)7194 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, multipixel_with_step) {
7195 TEST_REQUIRES_ARM_NEON_V8;
7196 for (size_t channels = 1; channels <= 80; channels += 15) {
7197 for (size_t step = 2; step <= 25; step++) {
7198 DWConvMicrokernelTester()
7199 .cr(16)
7200 .kr(25)
7201 .channels(channels)
7202 .width(3)
7203 .step(step)
7204 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7205 }
7206 }
7207 }
7208
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,multipixel_with_output_stride)7209 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, multipixel_with_output_stride) {
7210 TEST_REQUIRES_ARM_NEON_V8;
7211 for (size_t channels = 1; channels <= 80; channels += 15) {
7212 DWConvMicrokernelTester()
7213 .cr(16)
7214 .kr(25)
7215 .channels(16)
7216 .width(5)
7217 .output_stride(83)
7218 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7219 }
7220 }
7221
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,multipixel_with_qmin)7222 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, multipixel_with_qmin) {
7223 TEST_REQUIRES_ARM_NEON_V8;
7224 for (size_t channels = 1; channels <= 80; channels += 15) {
7225 DWConvMicrokernelTester()
7226 .cr(16)
7227 .kr(25)
7228 .channels(channels)
7229 .width(3)
7230 .qmin(128)
7231 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7232 }
7233 }
7234
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,multipixel_with_qmax)7235 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, multipixel_with_qmax) {
7236 TEST_REQUIRES_ARM_NEON_V8;
7237 for (size_t channels = 1; channels <= 80; channels += 15) {
7238 DWConvMicrokernelTester()
7239 .cr(16)
7240 .kr(25)
7241 .channels(channels)
7242 .width(3)
7243 .qmax(128)
7244 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7245 }
7246 }
7247
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,input_offset)7248 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, input_offset) {
7249 TEST_REQUIRES_ARM_NEON_V8;
7250 for (uint32_t channels = 32; channels < 256; channels += 48) {
7251 DWConvMicrokernelTester()
7252 .cr(16)
7253 .kr(25)
7254 .channels(channels)
7255 .input_offset(304)
7256 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7257 }
7258 }
7259
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128,zero)7260 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL8_LD128, zero) {
7261 TEST_REQUIRES_ARM_NEON_V8;
7262 for (uint32_t mz = 0; mz < 25; mz++) {
7263 for (uint32_t channels = 32; channels < 256; channels += 48) {
7264 DWConvMicrokernelTester()
7265 .cr(16)
7266 .kr(25)
7267 .channels(channels)
7268 .input_offset(304)
7269 .zero_index(mz)
7270 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul8_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7271 }
7272 }
7273 }
7274 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7275
7276
7277 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_eq_16)7278 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_eq_16) {
7279 TEST_REQUIRES_ARM_NEON_V8;
7280 DWConvMicrokernelTester()
7281 .cr(16)
7282 .kr(25)
7283 .channels(16)
7284 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7285 }
7286
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_div_16)7287 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_div_16) {
7288 TEST_REQUIRES_ARM_NEON_V8;
7289 for (uint32_t channels = 32; channels < 256; channels += 48) {
7290 DWConvMicrokernelTester()
7291 .cr(16)
7292 .kr(25)
7293 .channels(channels)
7294 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7295 }
7296 }
7297
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_div_16_with_qmin)7298 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_div_16_with_qmin) {
7299 TEST_REQUIRES_ARM_NEON_V8;
7300 for (uint32_t channels = 32; channels < 256; channels += 48) {
7301 DWConvMicrokernelTester()
7302 .cr(16)
7303 .kr(25)
7304 .channels(channels)
7305 .qmin(128)
7306 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7307 }
7308 }
7309
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_div_16_with_qmax)7310 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_div_16_with_qmax) {
7311 TEST_REQUIRES_ARM_NEON_V8;
7312 for (uint32_t channels = 32; channels < 256; channels += 48) {
7313 DWConvMicrokernelTester()
7314 .cr(16)
7315 .kr(25)
7316 .channels(channels)
7317 .qmax(128)
7318 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7319 }
7320 }
7321
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_lt_16)7322 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_lt_16) {
7323 TEST_REQUIRES_ARM_NEON_V8;
7324 for (uint32_t channels = 1; channels < 16; channels++) {
7325 DWConvMicrokernelTester()
7326 .cr(16)
7327 .kr(25)
7328 .channels(channels)
7329 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7330 }
7331 }
7332
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_gt_16)7333 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_gt_16) {
7334 TEST_REQUIRES_ARM_NEON_V8;
7335 for (uint32_t channels = 17; channels < 32; channels++) {
7336 DWConvMicrokernelTester()
7337 .cr(16)
7338 .kr(25)
7339 .channels(channels)
7340 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7341 }
7342 }
7343
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_gt_16_with_qmin)7344 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_gt_16_with_qmin) {
7345 TEST_REQUIRES_ARM_NEON_V8;
7346 for (uint32_t channels = 17; channels < 32; channels++) {
7347 DWConvMicrokernelTester()
7348 .cr(16)
7349 .kr(25)
7350 .channels(channels)
7351 .qmin(128)
7352 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7353 }
7354 }
7355
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_gt_16_with_qmax)7356 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_gt_16_with_qmax) {
7357 TEST_REQUIRES_ARM_NEON_V8;
7358 for (uint32_t channels = 17; channels < 32; channels++) {
7359 DWConvMicrokernelTester()
7360 .cr(16)
7361 .kr(25)
7362 .channels(channels)
7363 .qmax(128)
7364 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7365 }
7366 }
7367
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,multipixel)7368 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel) {
7369 TEST_REQUIRES_ARM_NEON_V8;
7370 for (size_t channels = 1; channels <= 80; channels += 15) {
7371 DWConvMicrokernelTester()
7372 .cr(16)
7373 .kr(25)
7374 .channels(channels)
7375 .width(3)
7376 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7377 }
7378 }
7379
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,multipixel_with_step)7380 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_step) {
7381 TEST_REQUIRES_ARM_NEON_V8;
7382 for (size_t channels = 1; channels <= 80; channels += 15) {
7383 for (size_t step = 2; step <= 25; step++) {
7384 DWConvMicrokernelTester()
7385 .cr(16)
7386 .kr(25)
7387 .channels(channels)
7388 .width(3)
7389 .step(step)
7390 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7391 }
7392 }
7393 }
7394
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,multipixel_with_output_stride)7395 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_output_stride) {
7396 TEST_REQUIRES_ARM_NEON_V8;
7397 for (size_t channels = 1; channels <= 80; channels += 15) {
7398 DWConvMicrokernelTester()
7399 .cr(16)
7400 .kr(25)
7401 .channels(16)
7402 .width(5)
7403 .output_stride(83)
7404 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7405 }
7406 }
7407
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,multipixel_with_qmin)7408 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_qmin) {
7409 TEST_REQUIRES_ARM_NEON_V8;
7410 for (size_t channels = 1; channels <= 80; channels += 15) {
7411 DWConvMicrokernelTester()
7412 .cr(16)
7413 .kr(25)
7414 .channels(channels)
7415 .width(3)
7416 .qmin(128)
7417 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7418 }
7419 }
7420
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,multipixel_with_qmax)7421 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_qmax) {
7422 TEST_REQUIRES_ARM_NEON_V8;
7423 for (size_t channels = 1; channels <= 80; channels += 15) {
7424 DWConvMicrokernelTester()
7425 .cr(16)
7426 .kr(25)
7427 .channels(channels)
7428 .width(3)
7429 .qmax(128)
7430 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7431 }
7432 }
7433
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,input_offset)7434 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, input_offset) {
7435 TEST_REQUIRES_ARM_NEON_V8;
7436 for (uint32_t channels = 32; channels < 256; channels += 48) {
7437 DWConvMicrokernelTester()
7438 .cr(16)
7439 .kr(25)
7440 .channels(channels)
7441 .input_offset(304)
7442 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7443 }
7444 }
7445
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,zero)7446 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, zero) {
7447 TEST_REQUIRES_ARM_NEON_V8;
7448 for (uint32_t mz = 0; mz < 25; mz++) {
7449 for (uint32_t channels = 32; channels < 256; channels += 48) {
7450 DWConvMicrokernelTester()
7451 .cr(16)
7452 .kr(25)
7453 .channels(channels)
7454 .input_offset(304)
7455 .zero_index(mz)
7456 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7457 }
7458 }
7459 }
7460 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7461
7462
7463 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_eq_24)7464 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_eq_24) {
7465 TEST_REQUIRES_ARM_NEON;
7466 DWConvMicrokernelTester()
7467 .cr(24)
7468 .kr(9)
7469 .channels(24)
7470 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7471 }
7472
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_div_24)7473 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_div_24) {
7474 TEST_REQUIRES_ARM_NEON;
7475 for (uint32_t channels = 48; channels < 384; channels += 72) {
7476 DWConvMicrokernelTester()
7477 .cr(24)
7478 .kr(9)
7479 .channels(channels)
7480 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7481 }
7482 }
7483
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_div_24_with_qmin)7484 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_div_24_with_qmin) {
7485 TEST_REQUIRES_ARM_NEON;
7486 for (uint32_t channels = 48; channels < 384; channels += 72) {
7487 DWConvMicrokernelTester()
7488 .cr(24)
7489 .kr(9)
7490 .channels(channels)
7491 .qmin(128)
7492 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7493 }
7494 }
7495
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_div_24_with_qmax)7496 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_div_24_with_qmax) {
7497 TEST_REQUIRES_ARM_NEON;
7498 for (uint32_t channels = 48; channels < 384; channels += 72) {
7499 DWConvMicrokernelTester()
7500 .cr(24)
7501 .kr(9)
7502 .channels(channels)
7503 .qmax(128)
7504 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7505 }
7506 }
7507
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_lt_24)7508 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_lt_24) {
7509 TEST_REQUIRES_ARM_NEON;
7510 for (uint32_t channels = 1; channels < 24; channels++) {
7511 DWConvMicrokernelTester()
7512 .cr(24)
7513 .kr(9)
7514 .channels(channels)
7515 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7516 }
7517 }
7518
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_gt_24)7519 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_gt_24) {
7520 TEST_REQUIRES_ARM_NEON;
7521 for (uint32_t channels = 25; channels < 48; channels++) {
7522 DWConvMicrokernelTester()
7523 .cr(24)
7524 .kr(9)
7525 .channels(channels)
7526 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7527 }
7528 }
7529
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_gt_24_with_qmin)7530 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_gt_24_with_qmin) {
7531 TEST_REQUIRES_ARM_NEON;
7532 for (uint32_t channels = 25; channels < 48; channels++) {
7533 DWConvMicrokernelTester()
7534 .cr(24)
7535 .kr(9)
7536 .channels(channels)
7537 .qmin(128)
7538 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7539 }
7540 }
7541
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_gt_24_with_qmax)7542 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_gt_24_with_qmax) {
7543 TEST_REQUIRES_ARM_NEON;
7544 for (uint32_t channels = 25; channels < 48; channels++) {
7545 DWConvMicrokernelTester()
7546 .cr(24)
7547 .kr(9)
7548 .channels(channels)
7549 .qmax(128)
7550 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7551 }
7552 }
7553
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,multipixel)7554 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel) {
7555 TEST_REQUIRES_ARM_NEON;
7556 for (size_t channels = 1; channels <= 120; channels += 23) {
7557 DWConvMicrokernelTester()
7558 .cr(24)
7559 .kr(9)
7560 .channels(channels)
7561 .width(3)
7562 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7563 }
7564 }
7565
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,multipixel_with_step)7566 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_step) {
7567 TEST_REQUIRES_ARM_NEON;
7568 for (size_t channels = 1; channels <= 120; channels += 23) {
7569 for (size_t step = 2; step <= 9; step++) {
7570 DWConvMicrokernelTester()
7571 .cr(24)
7572 .kr(9)
7573 .channels(channels)
7574 .width(3)
7575 .step(step)
7576 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7577 }
7578 }
7579 }
7580
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,multipixel_with_output_stride)7581 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_output_stride) {
7582 TEST_REQUIRES_ARM_NEON;
7583 for (size_t channels = 1; channels <= 120; channels += 23) {
7584 DWConvMicrokernelTester()
7585 .cr(24)
7586 .kr(9)
7587 .channels(24)
7588 .width(5)
7589 .output_stride(127)
7590 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7591 }
7592 }
7593
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,multipixel_with_qmin)7594 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_qmin) {
7595 TEST_REQUIRES_ARM_NEON;
7596 for (size_t channels = 1; channels <= 120; channels += 23) {
7597 DWConvMicrokernelTester()
7598 .cr(24)
7599 .kr(9)
7600 .channels(channels)
7601 .width(3)
7602 .qmin(128)
7603 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7604 }
7605 }
7606
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,multipixel_with_qmax)7607 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_qmax) {
7608 TEST_REQUIRES_ARM_NEON;
7609 for (size_t channels = 1; channels <= 120; channels += 23) {
7610 DWConvMicrokernelTester()
7611 .cr(24)
7612 .kr(9)
7613 .channels(channels)
7614 .width(3)
7615 .qmax(128)
7616 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7617 }
7618 }
7619
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,input_offset)7620 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, input_offset) {
7621 TEST_REQUIRES_ARM_NEON;
7622 for (uint32_t channels = 48; channels < 384; channels += 72) {
7623 DWConvMicrokernelTester()
7624 .cr(24)
7625 .kr(9)
7626 .channels(channels)
7627 .input_offset(464)
7628 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7629 }
7630 }
7631
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,zero)7632 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, zero) {
7633 TEST_REQUIRES_ARM_NEON;
7634 for (uint32_t mz = 0; mz < 9; mz++) {
7635 for (uint32_t channels = 48; channels < 384; channels += 72) {
7636 DWConvMicrokernelTester()
7637 .cr(24)
7638 .kr(9)
7639 .channels(channels)
7640 .input_offset(464)
7641 .zero_index(mz)
7642 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7643 }
7644 }
7645 }
7646 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7647
7648
7649 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_eq_24)7650 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_eq_24) {
7651 TEST_REQUIRES_ARM_NEON_V8;
7652 DWConvMicrokernelTester()
7653 .cr(24)
7654 .kr(9)
7655 .channels(24)
7656 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7657 }
7658
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_div_24)7659 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_div_24) {
7660 TEST_REQUIRES_ARM_NEON_V8;
7661 for (uint32_t channels = 48; channels < 384; channels += 72) {
7662 DWConvMicrokernelTester()
7663 .cr(24)
7664 .kr(9)
7665 .channels(channels)
7666 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7667 }
7668 }
7669
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_div_24_with_qmin)7670 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_div_24_with_qmin) {
7671 TEST_REQUIRES_ARM_NEON_V8;
7672 for (uint32_t channels = 48; channels < 384; channels += 72) {
7673 DWConvMicrokernelTester()
7674 .cr(24)
7675 .kr(9)
7676 .channels(channels)
7677 .qmin(128)
7678 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7679 }
7680 }
7681
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_div_24_with_qmax)7682 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_div_24_with_qmax) {
7683 TEST_REQUIRES_ARM_NEON_V8;
7684 for (uint32_t channels = 48; channels < 384; channels += 72) {
7685 DWConvMicrokernelTester()
7686 .cr(24)
7687 .kr(9)
7688 .channels(channels)
7689 .qmax(128)
7690 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7691 }
7692 }
7693
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_lt_24)7694 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_lt_24) {
7695 TEST_REQUIRES_ARM_NEON_V8;
7696 for (uint32_t channels = 1; channels < 24; channels++) {
7697 DWConvMicrokernelTester()
7698 .cr(24)
7699 .kr(9)
7700 .channels(channels)
7701 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7702 }
7703 }
7704
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_gt_24)7705 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_gt_24) {
7706 TEST_REQUIRES_ARM_NEON_V8;
7707 for (uint32_t channels = 25; channels < 48; channels++) {
7708 DWConvMicrokernelTester()
7709 .cr(24)
7710 .kr(9)
7711 .channels(channels)
7712 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7713 }
7714 }
7715
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_gt_24_with_qmin)7716 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_gt_24_with_qmin) {
7717 TEST_REQUIRES_ARM_NEON_V8;
7718 for (uint32_t channels = 25; channels < 48; channels++) {
7719 DWConvMicrokernelTester()
7720 .cr(24)
7721 .kr(9)
7722 .channels(channels)
7723 .qmin(128)
7724 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7725 }
7726 }
7727
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_gt_24_with_qmax)7728 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_gt_24_with_qmax) {
7729 TEST_REQUIRES_ARM_NEON_V8;
7730 for (uint32_t channels = 25; channels < 48; channels++) {
7731 DWConvMicrokernelTester()
7732 .cr(24)
7733 .kr(9)
7734 .channels(channels)
7735 .qmax(128)
7736 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7737 }
7738 }
7739
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,multipixel)7740 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel) {
7741 TEST_REQUIRES_ARM_NEON_V8;
7742 for (size_t channels = 1; channels <= 120; channels += 23) {
7743 DWConvMicrokernelTester()
7744 .cr(24)
7745 .kr(9)
7746 .channels(channels)
7747 .width(3)
7748 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7749 }
7750 }
7751
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,multipixel_with_step)7752 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_step) {
7753 TEST_REQUIRES_ARM_NEON_V8;
7754 for (size_t channels = 1; channels <= 120; channels += 23) {
7755 for (size_t step = 2; step <= 9; step++) {
7756 DWConvMicrokernelTester()
7757 .cr(24)
7758 .kr(9)
7759 .channels(channels)
7760 .width(3)
7761 .step(step)
7762 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7763 }
7764 }
7765 }
7766
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,multipixel_with_output_stride)7767 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_output_stride) {
7768 TEST_REQUIRES_ARM_NEON_V8;
7769 for (size_t channels = 1; channels <= 120; channels += 23) {
7770 DWConvMicrokernelTester()
7771 .cr(24)
7772 .kr(9)
7773 .channels(24)
7774 .width(5)
7775 .output_stride(127)
7776 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7777 }
7778 }
7779
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,multipixel_with_qmin)7780 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_qmin) {
7781 TEST_REQUIRES_ARM_NEON_V8;
7782 for (size_t channels = 1; channels <= 120; channels += 23) {
7783 DWConvMicrokernelTester()
7784 .cr(24)
7785 .kr(9)
7786 .channels(channels)
7787 .width(3)
7788 .qmin(128)
7789 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7790 }
7791 }
7792
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,multipixel_with_qmax)7793 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_qmax) {
7794 TEST_REQUIRES_ARM_NEON_V8;
7795 for (size_t channels = 1; channels <= 120; channels += 23) {
7796 DWConvMicrokernelTester()
7797 .cr(24)
7798 .kr(9)
7799 .channels(channels)
7800 .width(3)
7801 .qmax(128)
7802 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7803 }
7804 }
7805
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,input_offset)7806 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, input_offset) {
7807 TEST_REQUIRES_ARM_NEON_V8;
7808 for (uint32_t channels = 48; channels < 384; channels += 72) {
7809 DWConvMicrokernelTester()
7810 .cr(24)
7811 .kr(9)
7812 .channels(channels)
7813 .input_offset(464)
7814 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7815 }
7816 }
7817
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,zero)7818 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, zero) {
7819 TEST_REQUIRES_ARM_NEON_V8;
7820 for (uint32_t mz = 0; mz < 9; mz++) {
7821 for (uint32_t channels = 48; channels < 384; channels += 72) {
7822 DWConvMicrokernelTester()
7823 .cr(24)
7824 .kr(9)
7825 .channels(channels)
7826 .input_offset(464)
7827 .zero_index(mz)
7828 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7829 }
7830 }
7831 }
7832 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7833
7834
7835 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_eq_24)7836 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_eq_24) {
7837 TEST_REQUIRES_ARM_NEON;
7838 DWConvMicrokernelTester()
7839 .cr(24)
7840 .kr(25)
7841 .channels(24)
7842 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7843 }
7844
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_div_24)7845 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_div_24) {
7846 TEST_REQUIRES_ARM_NEON;
7847 for (uint32_t channels = 48; channels < 384; channels += 72) {
7848 DWConvMicrokernelTester()
7849 .cr(24)
7850 .kr(25)
7851 .channels(channels)
7852 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7853 }
7854 }
7855
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_div_24_with_qmin)7856 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_div_24_with_qmin) {
7857 TEST_REQUIRES_ARM_NEON;
7858 for (uint32_t channels = 48; channels < 384; channels += 72) {
7859 DWConvMicrokernelTester()
7860 .cr(24)
7861 .kr(25)
7862 .channels(channels)
7863 .qmin(128)
7864 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7865 }
7866 }
7867
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_div_24_with_qmax)7868 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_div_24_with_qmax) {
7869 TEST_REQUIRES_ARM_NEON;
7870 for (uint32_t channels = 48; channels < 384; channels += 72) {
7871 DWConvMicrokernelTester()
7872 .cr(24)
7873 .kr(25)
7874 .channels(channels)
7875 .qmax(128)
7876 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7877 }
7878 }
7879
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_lt_24)7880 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_lt_24) {
7881 TEST_REQUIRES_ARM_NEON;
7882 for (uint32_t channels = 1; channels < 24; channels++) {
7883 DWConvMicrokernelTester()
7884 .cr(24)
7885 .kr(25)
7886 .channels(channels)
7887 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7888 }
7889 }
7890
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_gt_24)7891 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_gt_24) {
7892 TEST_REQUIRES_ARM_NEON;
7893 for (uint32_t channels = 25; channels < 48; channels++) {
7894 DWConvMicrokernelTester()
7895 .cr(24)
7896 .kr(25)
7897 .channels(channels)
7898 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7899 }
7900 }
7901
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_gt_24_with_qmin)7902 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_gt_24_with_qmin) {
7903 TEST_REQUIRES_ARM_NEON;
7904 for (uint32_t channels = 25; channels < 48; channels++) {
7905 DWConvMicrokernelTester()
7906 .cr(24)
7907 .kr(25)
7908 .channels(channels)
7909 .qmin(128)
7910 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7911 }
7912 }
7913
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_gt_24_with_qmax)7914 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_gt_24_with_qmax) {
7915 TEST_REQUIRES_ARM_NEON;
7916 for (uint32_t channels = 25; channels < 48; channels++) {
7917 DWConvMicrokernelTester()
7918 .cr(24)
7919 .kr(25)
7920 .channels(channels)
7921 .qmax(128)
7922 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7923 }
7924 }
7925
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,multipixel)7926 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel) {
7927 TEST_REQUIRES_ARM_NEON;
7928 for (size_t channels = 1; channels <= 120; channels += 23) {
7929 DWConvMicrokernelTester()
7930 .cr(24)
7931 .kr(25)
7932 .channels(channels)
7933 .width(3)
7934 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7935 }
7936 }
7937
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,multipixel_with_step)7938 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_step) {
7939 TEST_REQUIRES_ARM_NEON;
7940 for (size_t channels = 1; channels <= 120; channels += 23) {
7941 for (size_t step = 2; step <= 25; step++) {
7942 DWConvMicrokernelTester()
7943 .cr(24)
7944 .kr(25)
7945 .channels(channels)
7946 .width(3)
7947 .step(step)
7948 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7949 }
7950 }
7951 }
7952
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,multipixel_with_output_stride)7953 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_output_stride) {
7954 TEST_REQUIRES_ARM_NEON;
7955 for (size_t channels = 1; channels <= 120; channels += 23) {
7956 DWConvMicrokernelTester()
7957 .cr(24)
7958 .kr(25)
7959 .channels(24)
7960 .width(5)
7961 .output_stride(127)
7962 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7963 }
7964 }
7965
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,multipixel_with_qmin)7966 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_qmin) {
7967 TEST_REQUIRES_ARM_NEON;
7968 for (size_t channels = 1; channels <= 120; channels += 23) {
7969 DWConvMicrokernelTester()
7970 .cr(24)
7971 .kr(25)
7972 .channels(channels)
7973 .width(3)
7974 .qmin(128)
7975 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7976 }
7977 }
7978
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,multipixel_with_qmax)7979 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_qmax) {
7980 TEST_REQUIRES_ARM_NEON;
7981 for (size_t channels = 1; channels <= 120; channels += 23) {
7982 DWConvMicrokernelTester()
7983 .cr(24)
7984 .kr(25)
7985 .channels(channels)
7986 .width(3)
7987 .qmax(128)
7988 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7989 }
7990 }
7991
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,input_offset)7992 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, input_offset) {
7993 TEST_REQUIRES_ARM_NEON;
7994 for (uint32_t channels = 48; channels < 384; channels += 72) {
7995 DWConvMicrokernelTester()
7996 .cr(24)
7997 .kr(25)
7998 .channels(channels)
7999 .input_offset(464)
8000 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8001 }
8002 }
8003
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,zero)8004 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, zero) {
8005 TEST_REQUIRES_ARM_NEON;
8006 for (uint32_t mz = 0; mz < 25; mz++) {
8007 for (uint32_t channels = 48; channels < 384; channels += 72) {
8008 DWConvMicrokernelTester()
8009 .cr(24)
8010 .kr(25)
8011 .channels(channels)
8012 .input_offset(464)
8013 .zero_index(mz)
8014 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8015 }
8016 }
8017 }
8018 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8019
8020
8021 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_eq_24)8022 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_eq_24) {
8023 TEST_REQUIRES_ARM_NEON_V8;
8024 DWConvMicrokernelTester()
8025 .cr(24)
8026 .kr(25)
8027 .channels(24)
8028 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8029 }
8030
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_div_24)8031 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_div_24) {
8032 TEST_REQUIRES_ARM_NEON_V8;
8033 for (uint32_t channels = 48; channels < 384; channels += 72) {
8034 DWConvMicrokernelTester()
8035 .cr(24)
8036 .kr(25)
8037 .channels(channels)
8038 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8039 }
8040 }
8041
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_div_24_with_qmin)8042 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_div_24_with_qmin) {
8043 TEST_REQUIRES_ARM_NEON_V8;
8044 for (uint32_t channels = 48; channels < 384; channels += 72) {
8045 DWConvMicrokernelTester()
8046 .cr(24)
8047 .kr(25)
8048 .channels(channels)
8049 .qmin(128)
8050 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8051 }
8052 }
8053
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_div_24_with_qmax)8054 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_div_24_with_qmax) {
8055 TEST_REQUIRES_ARM_NEON_V8;
8056 for (uint32_t channels = 48; channels < 384; channels += 72) {
8057 DWConvMicrokernelTester()
8058 .cr(24)
8059 .kr(25)
8060 .channels(channels)
8061 .qmax(128)
8062 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8063 }
8064 }
8065
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_lt_24)8066 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_lt_24) {
8067 TEST_REQUIRES_ARM_NEON_V8;
8068 for (uint32_t channels = 1; channels < 24; channels++) {
8069 DWConvMicrokernelTester()
8070 .cr(24)
8071 .kr(25)
8072 .channels(channels)
8073 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8074 }
8075 }
8076
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_gt_24)8077 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_gt_24) {
8078 TEST_REQUIRES_ARM_NEON_V8;
8079 for (uint32_t channels = 25; channels < 48; channels++) {
8080 DWConvMicrokernelTester()
8081 .cr(24)
8082 .kr(25)
8083 .channels(channels)
8084 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8085 }
8086 }
8087
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_gt_24_with_qmin)8088 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_gt_24_with_qmin) {
8089 TEST_REQUIRES_ARM_NEON_V8;
8090 for (uint32_t channels = 25; channels < 48; channels++) {
8091 DWConvMicrokernelTester()
8092 .cr(24)
8093 .kr(25)
8094 .channels(channels)
8095 .qmin(128)
8096 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8097 }
8098 }
8099
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_gt_24_with_qmax)8100 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_gt_24_with_qmax) {
8101 TEST_REQUIRES_ARM_NEON_V8;
8102 for (uint32_t channels = 25; channels < 48; channels++) {
8103 DWConvMicrokernelTester()
8104 .cr(24)
8105 .kr(25)
8106 .channels(channels)
8107 .qmax(128)
8108 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8109 }
8110 }
8111
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,multipixel)8112 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel) {
8113 TEST_REQUIRES_ARM_NEON_V8;
8114 for (size_t channels = 1; channels <= 120; channels += 23) {
8115 DWConvMicrokernelTester()
8116 .cr(24)
8117 .kr(25)
8118 .channels(channels)
8119 .width(3)
8120 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8121 }
8122 }
8123
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,multipixel_with_step)8124 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_step) {
8125 TEST_REQUIRES_ARM_NEON_V8;
8126 for (size_t channels = 1; channels <= 120; channels += 23) {
8127 for (size_t step = 2; step <= 25; step++) {
8128 DWConvMicrokernelTester()
8129 .cr(24)
8130 .kr(25)
8131 .channels(channels)
8132 .width(3)
8133 .step(step)
8134 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8135 }
8136 }
8137 }
8138
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,multipixel_with_output_stride)8139 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_output_stride) {
8140 TEST_REQUIRES_ARM_NEON_V8;
8141 for (size_t channels = 1; channels <= 120; channels += 23) {
8142 DWConvMicrokernelTester()
8143 .cr(24)
8144 .kr(25)
8145 .channels(24)
8146 .width(5)
8147 .output_stride(127)
8148 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8149 }
8150 }
8151
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,multipixel_with_qmin)8152 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_qmin) {
8153 TEST_REQUIRES_ARM_NEON_V8;
8154 for (size_t channels = 1; channels <= 120; channels += 23) {
8155 DWConvMicrokernelTester()
8156 .cr(24)
8157 .kr(25)
8158 .channels(channels)
8159 .width(3)
8160 .qmin(128)
8161 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8162 }
8163 }
8164
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,multipixel_with_qmax)8165 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_qmax) {
8166 TEST_REQUIRES_ARM_NEON_V8;
8167 for (size_t channels = 1; channels <= 120; channels += 23) {
8168 DWConvMicrokernelTester()
8169 .cr(24)
8170 .kr(25)
8171 .channels(channels)
8172 .width(3)
8173 .qmax(128)
8174 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8175 }
8176 }
8177
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,input_offset)8178 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, input_offset) {
8179 TEST_REQUIRES_ARM_NEON_V8;
8180 for (uint32_t channels = 48; channels < 384; channels += 72) {
8181 DWConvMicrokernelTester()
8182 .cr(24)
8183 .kr(25)
8184 .channels(channels)
8185 .input_offset(464)
8186 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8187 }
8188 }
8189
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,zero)8190 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, zero) {
8191 TEST_REQUIRES_ARM_NEON_V8;
8192 for (uint32_t mz = 0; mz < 25; mz++) {
8193 for (uint32_t channels = 48; channels < 384; channels += 72) {
8194 DWConvMicrokernelTester()
8195 .cr(24)
8196 .kr(25)
8197 .channels(channels)
8198 .input_offset(464)
8199 .zero_index(mz)
8200 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8201 }
8202 }
8203 }
8204 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8205
8206
8207 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_eq_32)8208 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_eq_32) {
8209 TEST_REQUIRES_ARM_NEON;
8210 DWConvMicrokernelTester()
8211 .cr(32)
8212 .kr(9)
8213 .channels(32)
8214 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8215 }
8216
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_div_32)8217 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_div_32) {
8218 TEST_REQUIRES_ARM_NEON;
8219 for (uint32_t channels = 64; channels < 512; channels += 96) {
8220 DWConvMicrokernelTester()
8221 .cr(32)
8222 .kr(9)
8223 .channels(channels)
8224 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8225 }
8226 }
8227
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_div_32_with_qmin)8228 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_div_32_with_qmin) {
8229 TEST_REQUIRES_ARM_NEON;
8230 for (uint32_t channels = 64; channels < 512; channels += 96) {
8231 DWConvMicrokernelTester()
8232 .cr(32)
8233 .kr(9)
8234 .channels(channels)
8235 .qmin(128)
8236 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8237 }
8238 }
8239
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_div_32_with_qmax)8240 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_div_32_with_qmax) {
8241 TEST_REQUIRES_ARM_NEON;
8242 for (uint32_t channels = 64; channels < 512; channels += 96) {
8243 DWConvMicrokernelTester()
8244 .cr(32)
8245 .kr(9)
8246 .channels(channels)
8247 .qmax(128)
8248 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8249 }
8250 }
8251
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_lt_32)8252 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_lt_32) {
8253 TEST_REQUIRES_ARM_NEON;
8254 for (uint32_t channels = 1; channels < 32; channels++) {
8255 DWConvMicrokernelTester()
8256 .cr(32)
8257 .kr(9)
8258 .channels(channels)
8259 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8260 }
8261 }
8262
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_gt_32)8263 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_gt_32) {
8264 TEST_REQUIRES_ARM_NEON;
8265 for (uint32_t channels = 33; channels < 64; channels++) {
8266 DWConvMicrokernelTester()
8267 .cr(32)
8268 .kr(9)
8269 .channels(channels)
8270 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8271 }
8272 }
8273
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_gt_32_with_qmin)8274 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_gt_32_with_qmin) {
8275 TEST_REQUIRES_ARM_NEON;
8276 for (uint32_t channels = 33; channels < 64; channels++) {
8277 DWConvMicrokernelTester()
8278 .cr(32)
8279 .kr(9)
8280 .channels(channels)
8281 .qmin(128)
8282 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8283 }
8284 }
8285
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_gt_32_with_qmax)8286 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_gt_32_with_qmax) {
8287 TEST_REQUIRES_ARM_NEON;
8288 for (uint32_t channels = 33; channels < 64; channels++) {
8289 DWConvMicrokernelTester()
8290 .cr(32)
8291 .kr(9)
8292 .channels(channels)
8293 .qmax(128)
8294 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8295 }
8296 }
8297
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,multipixel)8298 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel) {
8299 TEST_REQUIRES_ARM_NEON;
8300 for (size_t channels = 1; channels <= 160; channels += 31) {
8301 DWConvMicrokernelTester()
8302 .cr(32)
8303 .kr(9)
8304 .channels(channels)
8305 .width(3)
8306 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8307 }
8308 }
8309
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,multipixel_with_step)8310 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_step) {
8311 TEST_REQUIRES_ARM_NEON;
8312 for (size_t channels = 1; channels <= 160; channels += 31) {
8313 for (size_t step = 2; step <= 9; step++) {
8314 DWConvMicrokernelTester()
8315 .cr(32)
8316 .kr(9)
8317 .channels(channels)
8318 .width(3)
8319 .step(step)
8320 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8321 }
8322 }
8323 }
8324
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,multipixel_with_output_stride)8325 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_output_stride) {
8326 TEST_REQUIRES_ARM_NEON;
8327 for (size_t channels = 1; channels <= 160; channels += 31) {
8328 DWConvMicrokernelTester()
8329 .cr(32)
8330 .kr(9)
8331 .channels(32)
8332 .width(5)
8333 .output_stride(163)
8334 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8335 }
8336 }
8337
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,multipixel_with_qmin)8338 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_qmin) {
8339 TEST_REQUIRES_ARM_NEON;
8340 for (size_t channels = 1; channels <= 160; channels += 31) {
8341 DWConvMicrokernelTester()
8342 .cr(32)
8343 .kr(9)
8344 .channels(channels)
8345 .width(3)
8346 .qmin(128)
8347 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8348 }
8349 }
8350
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,multipixel_with_qmax)8351 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_qmax) {
8352 TEST_REQUIRES_ARM_NEON;
8353 for (size_t channels = 1; channels <= 160; channels += 31) {
8354 DWConvMicrokernelTester()
8355 .cr(32)
8356 .kr(9)
8357 .channels(channels)
8358 .width(3)
8359 .qmax(128)
8360 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8361 }
8362 }
8363
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,input_offset)8364 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, input_offset) {
8365 TEST_REQUIRES_ARM_NEON;
8366 for (uint32_t channels = 64; channels < 512; channels += 96) {
8367 DWConvMicrokernelTester()
8368 .cr(32)
8369 .kr(9)
8370 .channels(channels)
8371 .input_offset(592)
8372 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8373 }
8374 }
8375
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,zero)8376 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, zero) {
8377 TEST_REQUIRES_ARM_NEON;
8378 for (uint32_t mz = 0; mz < 9; mz++) {
8379 for (uint32_t channels = 64; channels < 512; channels += 96) {
8380 DWConvMicrokernelTester()
8381 .cr(32)
8382 .kr(9)
8383 .channels(channels)
8384 .input_offset(592)
8385 .zero_index(mz)
8386 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8387 }
8388 }
8389 }
8390 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8391
8392
8393 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_eq_32)8394 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_eq_32) {
8395 TEST_REQUIRES_ARM_NEON_V8;
8396 DWConvMicrokernelTester()
8397 .cr(32)
8398 .kr(9)
8399 .channels(32)
8400 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8401 }
8402
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_div_32)8403 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_div_32) {
8404 TEST_REQUIRES_ARM_NEON_V8;
8405 for (uint32_t channels = 64; channels < 512; channels += 96) {
8406 DWConvMicrokernelTester()
8407 .cr(32)
8408 .kr(9)
8409 .channels(channels)
8410 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8411 }
8412 }
8413
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_div_32_with_qmin)8414 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_div_32_with_qmin) {
8415 TEST_REQUIRES_ARM_NEON_V8;
8416 for (uint32_t channels = 64; channels < 512; channels += 96) {
8417 DWConvMicrokernelTester()
8418 .cr(32)
8419 .kr(9)
8420 .channels(channels)
8421 .qmin(128)
8422 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8423 }
8424 }
8425
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_div_32_with_qmax)8426 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_div_32_with_qmax) {
8427 TEST_REQUIRES_ARM_NEON_V8;
8428 for (uint32_t channels = 64; channels < 512; channels += 96) {
8429 DWConvMicrokernelTester()
8430 .cr(32)
8431 .kr(9)
8432 .channels(channels)
8433 .qmax(128)
8434 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8435 }
8436 }
8437
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_lt_32)8438 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_lt_32) {
8439 TEST_REQUIRES_ARM_NEON_V8;
8440 for (uint32_t channels = 1; channels < 32; channels++) {
8441 DWConvMicrokernelTester()
8442 .cr(32)
8443 .kr(9)
8444 .channels(channels)
8445 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8446 }
8447 }
8448
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_gt_32)8449 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_gt_32) {
8450 TEST_REQUIRES_ARM_NEON_V8;
8451 for (uint32_t channels = 33; channels < 64; channels++) {
8452 DWConvMicrokernelTester()
8453 .cr(32)
8454 .kr(9)
8455 .channels(channels)
8456 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8457 }
8458 }
8459
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_gt_32_with_qmin)8460 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_gt_32_with_qmin) {
8461 TEST_REQUIRES_ARM_NEON_V8;
8462 for (uint32_t channels = 33; channels < 64; channels++) {
8463 DWConvMicrokernelTester()
8464 .cr(32)
8465 .kr(9)
8466 .channels(channels)
8467 .qmin(128)
8468 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8469 }
8470 }
8471
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_gt_32_with_qmax)8472 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_gt_32_with_qmax) {
8473 TEST_REQUIRES_ARM_NEON_V8;
8474 for (uint32_t channels = 33; channels < 64; channels++) {
8475 DWConvMicrokernelTester()
8476 .cr(32)
8477 .kr(9)
8478 .channels(channels)
8479 .qmax(128)
8480 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8481 }
8482 }
8483
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,multipixel)8484 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel) {
8485 TEST_REQUIRES_ARM_NEON_V8;
8486 for (size_t channels = 1; channels <= 160; channels += 31) {
8487 DWConvMicrokernelTester()
8488 .cr(32)
8489 .kr(9)
8490 .channels(channels)
8491 .width(3)
8492 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8493 }
8494 }
8495
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,multipixel_with_step)8496 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_step) {
8497 TEST_REQUIRES_ARM_NEON_V8;
8498 for (size_t channels = 1; channels <= 160; channels += 31) {
8499 for (size_t step = 2; step <= 9; step++) {
8500 DWConvMicrokernelTester()
8501 .cr(32)
8502 .kr(9)
8503 .channels(channels)
8504 .width(3)
8505 .step(step)
8506 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8507 }
8508 }
8509 }
8510
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,multipixel_with_output_stride)8511 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_output_stride) {
8512 TEST_REQUIRES_ARM_NEON_V8;
8513 for (size_t channels = 1; channels <= 160; channels += 31) {
8514 DWConvMicrokernelTester()
8515 .cr(32)
8516 .kr(9)
8517 .channels(32)
8518 .width(5)
8519 .output_stride(163)
8520 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8521 }
8522 }
8523
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,multipixel_with_qmin)8524 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_qmin) {
8525 TEST_REQUIRES_ARM_NEON_V8;
8526 for (size_t channels = 1; channels <= 160; channels += 31) {
8527 DWConvMicrokernelTester()
8528 .cr(32)
8529 .kr(9)
8530 .channels(channels)
8531 .width(3)
8532 .qmin(128)
8533 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8534 }
8535 }
8536
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,multipixel_with_qmax)8537 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_qmax) {
8538 TEST_REQUIRES_ARM_NEON_V8;
8539 for (size_t channels = 1; channels <= 160; channels += 31) {
8540 DWConvMicrokernelTester()
8541 .cr(32)
8542 .kr(9)
8543 .channels(channels)
8544 .width(3)
8545 .qmax(128)
8546 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8547 }
8548 }
8549
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,input_offset)8550 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, input_offset) {
8551 TEST_REQUIRES_ARM_NEON_V8;
8552 for (uint32_t channels = 64; channels < 512; channels += 96) {
8553 DWConvMicrokernelTester()
8554 .cr(32)
8555 .kr(9)
8556 .channels(channels)
8557 .input_offset(592)
8558 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8559 }
8560 }
8561
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,zero)8562 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, zero) {
8563 TEST_REQUIRES_ARM_NEON_V8;
8564 for (uint32_t mz = 0; mz < 9; mz++) {
8565 for (uint32_t channels = 64; channels < 512; channels += 96) {
8566 DWConvMicrokernelTester()
8567 .cr(32)
8568 .kr(9)
8569 .channels(channels)
8570 .input_offset(592)
8571 .zero_index(mz)
8572 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8573 }
8574 }
8575 }
8576 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8577
8578
8579 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_eq_32)8580 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_eq_32) {
8581 TEST_REQUIRES_ARM_NEON;
8582 DWConvMicrokernelTester()
8583 .cr(32)
8584 .kr(25)
8585 .channels(32)
8586 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8587 }
8588
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_div_32)8589 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_div_32) {
8590 TEST_REQUIRES_ARM_NEON;
8591 for (uint32_t channels = 64; channels < 512; channels += 96) {
8592 DWConvMicrokernelTester()
8593 .cr(32)
8594 .kr(25)
8595 .channels(channels)
8596 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8597 }
8598 }
8599
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_div_32_with_qmin)8600 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_div_32_with_qmin) {
8601 TEST_REQUIRES_ARM_NEON;
8602 for (uint32_t channels = 64; channels < 512; channels += 96) {
8603 DWConvMicrokernelTester()
8604 .cr(32)
8605 .kr(25)
8606 .channels(channels)
8607 .qmin(128)
8608 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8609 }
8610 }
8611
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_div_32_with_qmax)8612 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_div_32_with_qmax) {
8613 TEST_REQUIRES_ARM_NEON;
8614 for (uint32_t channels = 64; channels < 512; channels += 96) {
8615 DWConvMicrokernelTester()
8616 .cr(32)
8617 .kr(25)
8618 .channels(channels)
8619 .qmax(128)
8620 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8621 }
8622 }
8623
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_lt_32)8624 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_lt_32) {
8625 TEST_REQUIRES_ARM_NEON;
8626 for (uint32_t channels = 1; channels < 32; channels++) {
8627 DWConvMicrokernelTester()
8628 .cr(32)
8629 .kr(25)
8630 .channels(channels)
8631 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8632 }
8633 }
8634
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_gt_32)8635 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_gt_32) {
8636 TEST_REQUIRES_ARM_NEON;
8637 for (uint32_t channels = 33; channels < 64; channels++) {
8638 DWConvMicrokernelTester()
8639 .cr(32)
8640 .kr(25)
8641 .channels(channels)
8642 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8643 }
8644 }
8645
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_gt_32_with_qmin)8646 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_gt_32_with_qmin) {
8647 TEST_REQUIRES_ARM_NEON;
8648 for (uint32_t channels = 33; channels < 64; channels++) {
8649 DWConvMicrokernelTester()
8650 .cr(32)
8651 .kr(25)
8652 .channels(channels)
8653 .qmin(128)
8654 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8655 }
8656 }
8657
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_gt_32_with_qmax)8658 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_gt_32_with_qmax) {
8659 TEST_REQUIRES_ARM_NEON;
8660 for (uint32_t channels = 33; channels < 64; channels++) {
8661 DWConvMicrokernelTester()
8662 .cr(32)
8663 .kr(25)
8664 .channels(channels)
8665 .qmax(128)
8666 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8667 }
8668 }
8669
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,multipixel)8670 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel) {
8671 TEST_REQUIRES_ARM_NEON;
8672 for (size_t channels = 1; channels <= 160; channels += 31) {
8673 DWConvMicrokernelTester()
8674 .cr(32)
8675 .kr(25)
8676 .channels(channels)
8677 .width(3)
8678 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8679 }
8680 }
8681
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,multipixel_with_step)8682 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_step) {
8683 TEST_REQUIRES_ARM_NEON;
8684 for (size_t channels = 1; channels <= 160; channels += 31) {
8685 for (size_t step = 2; step <= 25; step++) {
8686 DWConvMicrokernelTester()
8687 .cr(32)
8688 .kr(25)
8689 .channels(channels)
8690 .width(3)
8691 .step(step)
8692 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8693 }
8694 }
8695 }
8696
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,multipixel_with_output_stride)8697 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_output_stride) {
8698 TEST_REQUIRES_ARM_NEON;
8699 for (size_t channels = 1; channels <= 160; channels += 31) {
8700 DWConvMicrokernelTester()
8701 .cr(32)
8702 .kr(25)
8703 .channels(32)
8704 .width(5)
8705 .output_stride(163)
8706 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8707 }
8708 }
8709
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,multipixel_with_qmin)8710 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_qmin) {
8711 TEST_REQUIRES_ARM_NEON;
8712 for (size_t channels = 1; channels <= 160; channels += 31) {
8713 DWConvMicrokernelTester()
8714 .cr(32)
8715 .kr(25)
8716 .channels(channels)
8717 .width(3)
8718 .qmin(128)
8719 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8720 }
8721 }
8722
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,multipixel_with_qmax)8723 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_qmax) {
8724 TEST_REQUIRES_ARM_NEON;
8725 for (size_t channels = 1; channels <= 160; channels += 31) {
8726 DWConvMicrokernelTester()
8727 .cr(32)
8728 .kr(25)
8729 .channels(channels)
8730 .width(3)
8731 .qmax(128)
8732 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8733 }
8734 }
8735
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,input_offset)8736 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, input_offset) {
8737 TEST_REQUIRES_ARM_NEON;
8738 for (uint32_t channels = 64; channels < 512; channels += 96) {
8739 DWConvMicrokernelTester()
8740 .cr(32)
8741 .kr(25)
8742 .channels(channels)
8743 .input_offset(592)
8744 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8745 }
8746 }
8747
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,zero)8748 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, zero) {
8749 TEST_REQUIRES_ARM_NEON;
8750 for (uint32_t mz = 0; mz < 25; mz++) {
8751 for (uint32_t channels = 64; channels < 512; channels += 96) {
8752 DWConvMicrokernelTester()
8753 .cr(32)
8754 .kr(25)
8755 .channels(channels)
8756 .input_offset(592)
8757 .zero_index(mz)
8758 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8759 }
8760 }
8761 }
8762 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8763
8764
8765 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_eq_32)8766 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_eq_32) {
8767 TEST_REQUIRES_ARM_NEON_V8;
8768 DWConvMicrokernelTester()
8769 .cr(32)
8770 .kr(25)
8771 .channels(32)
8772 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8773 }
8774
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_div_32)8775 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_div_32) {
8776 TEST_REQUIRES_ARM_NEON_V8;
8777 for (uint32_t channels = 64; channels < 512; channels += 96) {
8778 DWConvMicrokernelTester()
8779 .cr(32)
8780 .kr(25)
8781 .channels(channels)
8782 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8783 }
8784 }
8785
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_div_32_with_qmin)8786 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_div_32_with_qmin) {
8787 TEST_REQUIRES_ARM_NEON_V8;
8788 for (uint32_t channels = 64; channels < 512; channels += 96) {
8789 DWConvMicrokernelTester()
8790 .cr(32)
8791 .kr(25)
8792 .channels(channels)
8793 .qmin(128)
8794 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8795 }
8796 }
8797
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_div_32_with_qmax)8798 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_div_32_with_qmax) {
8799 TEST_REQUIRES_ARM_NEON_V8;
8800 for (uint32_t channels = 64; channels < 512; channels += 96) {
8801 DWConvMicrokernelTester()
8802 .cr(32)
8803 .kr(25)
8804 .channels(channels)
8805 .qmax(128)
8806 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8807 }
8808 }
8809
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_lt_32)8810 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_lt_32) {
8811 TEST_REQUIRES_ARM_NEON_V8;
8812 for (uint32_t channels = 1; channels < 32; channels++) {
8813 DWConvMicrokernelTester()
8814 .cr(32)
8815 .kr(25)
8816 .channels(channels)
8817 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8818 }
8819 }
8820
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_gt_32)8821 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_gt_32) {
8822 TEST_REQUIRES_ARM_NEON_V8;
8823 for (uint32_t channels = 33; channels < 64; channels++) {
8824 DWConvMicrokernelTester()
8825 .cr(32)
8826 .kr(25)
8827 .channels(channels)
8828 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8829 }
8830 }
8831
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_gt_32_with_qmin)8832 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_gt_32_with_qmin) {
8833 TEST_REQUIRES_ARM_NEON_V8;
8834 for (uint32_t channels = 33; channels < 64; channels++) {
8835 DWConvMicrokernelTester()
8836 .cr(32)
8837 .kr(25)
8838 .channels(channels)
8839 .qmin(128)
8840 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8841 }
8842 }
8843
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_gt_32_with_qmax)8844 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_gt_32_with_qmax) {
8845 TEST_REQUIRES_ARM_NEON_V8;
8846 for (uint32_t channels = 33; channels < 64; channels++) {
8847 DWConvMicrokernelTester()
8848 .cr(32)
8849 .kr(25)
8850 .channels(channels)
8851 .qmax(128)
8852 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8853 }
8854 }
8855
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,multipixel)8856 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel) {
8857 TEST_REQUIRES_ARM_NEON_V8;
8858 for (size_t channels = 1; channels <= 160; channels += 31) {
8859 DWConvMicrokernelTester()
8860 .cr(32)
8861 .kr(25)
8862 .channels(channels)
8863 .width(3)
8864 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8865 }
8866 }
8867
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,multipixel_with_step)8868 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_step) {
8869 TEST_REQUIRES_ARM_NEON_V8;
8870 for (size_t channels = 1; channels <= 160; channels += 31) {
8871 for (size_t step = 2; step <= 25; step++) {
8872 DWConvMicrokernelTester()
8873 .cr(32)
8874 .kr(25)
8875 .channels(channels)
8876 .width(3)
8877 .step(step)
8878 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8879 }
8880 }
8881 }
8882
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,multipixel_with_output_stride)8883 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_output_stride) {
8884 TEST_REQUIRES_ARM_NEON_V8;
8885 for (size_t channels = 1; channels <= 160; channels += 31) {
8886 DWConvMicrokernelTester()
8887 .cr(32)
8888 .kr(25)
8889 .channels(32)
8890 .width(5)
8891 .output_stride(163)
8892 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8893 }
8894 }
8895
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,multipixel_with_qmin)8896 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_qmin) {
8897 TEST_REQUIRES_ARM_NEON_V8;
8898 for (size_t channels = 1; channels <= 160; channels += 31) {
8899 DWConvMicrokernelTester()
8900 .cr(32)
8901 .kr(25)
8902 .channels(channels)
8903 .width(3)
8904 .qmin(128)
8905 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8906 }
8907 }
8908
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,multipixel_with_qmax)8909 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_qmax) {
8910 TEST_REQUIRES_ARM_NEON_V8;
8911 for (size_t channels = 1; channels <= 160; channels += 31) {
8912 DWConvMicrokernelTester()
8913 .cr(32)
8914 .kr(25)
8915 .channels(channels)
8916 .width(3)
8917 .qmax(128)
8918 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8919 }
8920 }
8921
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,input_offset)8922 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, input_offset) {
8923 TEST_REQUIRES_ARM_NEON_V8;
8924 for (uint32_t channels = 64; channels < 512; channels += 96) {
8925 DWConvMicrokernelTester()
8926 .cr(32)
8927 .kr(25)
8928 .channels(channels)
8929 .input_offset(592)
8930 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8931 }
8932 }
8933
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,zero)8934 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, zero) {
8935 TEST_REQUIRES_ARM_NEON_V8;
8936 for (uint32_t mz = 0; mz < 25; mz++) {
8937 for (uint32_t channels = 64; channels < 512; channels += 96) {
8938 DWConvMicrokernelTester()
8939 .cr(32)
8940 .kr(25)
8941 .channels(channels)
8942 .input_offset(592)
8943 .zero_index(mz)
8944 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8945 }
8946 }
8947 }
8948 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8949
8950
8951 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,c_eq_8)8952 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, c_eq_8) {
8953 TEST_REQUIRES_X86_SSE2;
8954 DWConvMicrokernelTester()
8955 .cr(8)
8956 .kr(3)
8957 .channels(8)
8958 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
8959 }
8960
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,c_div_8)8961 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, c_div_8) {
8962 TEST_REQUIRES_X86_SSE2;
8963 for (uint32_t channels = 16; channels < 128; channels += 24) {
8964 DWConvMicrokernelTester()
8965 .cr(8)
8966 .kr(3)
8967 .channels(channels)
8968 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
8969 }
8970 }
8971
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,c_div_8_with_qmin)8972 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, c_div_8_with_qmin) {
8973 TEST_REQUIRES_X86_SSE2;
8974 for (uint32_t channels = 16; channels < 128; channels += 24) {
8975 DWConvMicrokernelTester()
8976 .cr(8)
8977 .kr(3)
8978 .channels(channels)
8979 .qmin(128)
8980 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
8981 }
8982 }
8983
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,c_div_8_with_qmax)8984 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, c_div_8_with_qmax) {
8985 TEST_REQUIRES_X86_SSE2;
8986 for (uint32_t channels = 16; channels < 128; channels += 24) {
8987 DWConvMicrokernelTester()
8988 .cr(8)
8989 .kr(3)
8990 .channels(channels)
8991 .qmax(128)
8992 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
8993 }
8994 }
8995
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,c_lt_8)8996 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, c_lt_8) {
8997 TEST_REQUIRES_X86_SSE2;
8998 for (uint32_t channels = 1; channels < 8; channels++) {
8999 DWConvMicrokernelTester()
9000 .cr(8)
9001 .kr(3)
9002 .channels(channels)
9003 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9004 }
9005 }
9006
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,c_gt_8)9007 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, c_gt_8) {
9008 TEST_REQUIRES_X86_SSE2;
9009 for (uint32_t channels = 9; channels < 16; channels++) {
9010 DWConvMicrokernelTester()
9011 .cr(8)
9012 .kr(3)
9013 .channels(channels)
9014 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9015 }
9016 }
9017
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,c_gt_8_with_qmin)9018 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, c_gt_8_with_qmin) {
9019 TEST_REQUIRES_X86_SSE2;
9020 for (uint32_t channels = 9; channels < 16; channels++) {
9021 DWConvMicrokernelTester()
9022 .cr(8)
9023 .kr(3)
9024 .channels(channels)
9025 .qmin(128)
9026 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9027 }
9028 }
9029
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,c_gt_8_with_qmax)9030 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, c_gt_8_with_qmax) {
9031 TEST_REQUIRES_X86_SSE2;
9032 for (uint32_t channels = 9; channels < 16; channels++) {
9033 DWConvMicrokernelTester()
9034 .cr(8)
9035 .kr(3)
9036 .channels(channels)
9037 .qmax(128)
9038 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9039 }
9040 }
9041
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,multipixel)9042 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, multipixel) {
9043 TEST_REQUIRES_X86_SSE2;
9044 for (size_t channels = 1; channels <= 40; channels += 7) {
9045 DWConvMicrokernelTester()
9046 .cr(8)
9047 .kr(3)
9048 .channels(channels)
9049 .width(3)
9050 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9051 }
9052 }
9053
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,multipixel_with_step)9054 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, multipixel_with_step) {
9055 TEST_REQUIRES_X86_SSE2;
9056 for (size_t channels = 1; channels <= 40; channels += 7) {
9057 for (size_t step = 2; step <= 3; step++) {
9058 DWConvMicrokernelTester()
9059 .cr(8)
9060 .kr(3)
9061 .channels(channels)
9062 .width(3)
9063 .step(step)
9064 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9065 }
9066 }
9067 }
9068
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,multipixel_with_output_stride)9069 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, multipixel_with_output_stride) {
9070 TEST_REQUIRES_X86_SSE2;
9071 for (size_t channels = 1; channels <= 40; channels += 7) {
9072 DWConvMicrokernelTester()
9073 .cr(8)
9074 .kr(3)
9075 .channels(8)
9076 .width(5)
9077 .output_stride(43)
9078 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9079 }
9080 }
9081
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,multipixel_with_qmin)9082 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, multipixel_with_qmin) {
9083 TEST_REQUIRES_X86_SSE2;
9084 for (size_t channels = 1; channels <= 40; channels += 7) {
9085 DWConvMicrokernelTester()
9086 .cr(8)
9087 .kr(3)
9088 .channels(channels)
9089 .width(3)
9090 .qmin(128)
9091 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9092 }
9093 }
9094
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,multipixel_with_qmax)9095 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, multipixel_with_qmax) {
9096 TEST_REQUIRES_X86_SSE2;
9097 for (size_t channels = 1; channels <= 40; channels += 7) {
9098 DWConvMicrokernelTester()
9099 .cr(8)
9100 .kr(3)
9101 .channels(channels)
9102 .width(3)
9103 .qmax(128)
9104 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9105 }
9106 }
9107
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,input_offset)9108 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, input_offset) {
9109 TEST_REQUIRES_X86_SSE2;
9110 for (uint32_t channels = 16; channels < 128; channels += 24) {
9111 DWConvMicrokernelTester()
9112 .cr(8)
9113 .kr(3)
9114 .channels(channels)
9115 .input_offset(176)
9116 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9117 }
9118 }
9119
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16,zero)9120 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE2_MUL16, zero) {
9121 TEST_REQUIRES_X86_SSE2;
9122 for (uint32_t mz = 0; mz < 3; mz++) {
9123 for (uint32_t channels = 16; channels < 128; channels += 24) {
9124 DWConvMicrokernelTester()
9125 .cr(8)
9126 .kr(3)
9127 .channels(channels)
9128 .input_offset(176)
9129 .zero_index(mz)
9130 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9131 }
9132 }
9133 }
9134 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9135
9136
9137 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,c_eq_8)9138 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, c_eq_8) {
9139 TEST_REQUIRES_X86_SSE41;
9140 DWConvMicrokernelTester()
9141 .cr(8)
9142 .kr(3)
9143 .channels(8)
9144 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9145 }
9146
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,c_div_8)9147 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, c_div_8) {
9148 TEST_REQUIRES_X86_SSE41;
9149 for (uint32_t channels = 16; channels < 128; channels += 24) {
9150 DWConvMicrokernelTester()
9151 .cr(8)
9152 .kr(3)
9153 .channels(channels)
9154 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9155 }
9156 }
9157
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,c_div_8_with_qmin)9158 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, c_div_8_with_qmin) {
9159 TEST_REQUIRES_X86_SSE41;
9160 for (uint32_t channels = 16; channels < 128; channels += 24) {
9161 DWConvMicrokernelTester()
9162 .cr(8)
9163 .kr(3)
9164 .channels(channels)
9165 .qmin(128)
9166 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9167 }
9168 }
9169
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,c_div_8_with_qmax)9170 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, c_div_8_with_qmax) {
9171 TEST_REQUIRES_X86_SSE41;
9172 for (uint32_t channels = 16; channels < 128; channels += 24) {
9173 DWConvMicrokernelTester()
9174 .cr(8)
9175 .kr(3)
9176 .channels(channels)
9177 .qmax(128)
9178 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9179 }
9180 }
9181
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,c_lt_8)9182 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, c_lt_8) {
9183 TEST_REQUIRES_X86_SSE41;
9184 for (uint32_t channels = 1; channels < 8; channels++) {
9185 DWConvMicrokernelTester()
9186 .cr(8)
9187 .kr(3)
9188 .channels(channels)
9189 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9190 }
9191 }
9192
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,c_gt_8)9193 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, c_gt_8) {
9194 TEST_REQUIRES_X86_SSE41;
9195 for (uint32_t channels = 9; channels < 16; channels++) {
9196 DWConvMicrokernelTester()
9197 .cr(8)
9198 .kr(3)
9199 .channels(channels)
9200 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9201 }
9202 }
9203
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,c_gt_8_with_qmin)9204 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, c_gt_8_with_qmin) {
9205 TEST_REQUIRES_X86_SSE41;
9206 for (uint32_t channels = 9; channels < 16; channels++) {
9207 DWConvMicrokernelTester()
9208 .cr(8)
9209 .kr(3)
9210 .channels(channels)
9211 .qmin(128)
9212 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9213 }
9214 }
9215
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,c_gt_8_with_qmax)9216 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, c_gt_8_with_qmax) {
9217 TEST_REQUIRES_X86_SSE41;
9218 for (uint32_t channels = 9; channels < 16; channels++) {
9219 DWConvMicrokernelTester()
9220 .cr(8)
9221 .kr(3)
9222 .channels(channels)
9223 .qmax(128)
9224 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9225 }
9226 }
9227
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,multipixel)9228 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, multipixel) {
9229 TEST_REQUIRES_X86_SSE41;
9230 for (size_t channels = 1; channels <= 40; channels += 7) {
9231 DWConvMicrokernelTester()
9232 .cr(8)
9233 .kr(3)
9234 .channels(channels)
9235 .width(3)
9236 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9237 }
9238 }
9239
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,multipixel_with_step)9240 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, multipixel_with_step) {
9241 TEST_REQUIRES_X86_SSE41;
9242 for (size_t channels = 1; channels <= 40; channels += 7) {
9243 for (size_t step = 2; step <= 3; step++) {
9244 DWConvMicrokernelTester()
9245 .cr(8)
9246 .kr(3)
9247 .channels(channels)
9248 .width(3)
9249 .step(step)
9250 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9251 }
9252 }
9253 }
9254
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,multipixel_with_output_stride)9255 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, multipixel_with_output_stride) {
9256 TEST_REQUIRES_X86_SSE41;
9257 for (size_t channels = 1; channels <= 40; channels += 7) {
9258 DWConvMicrokernelTester()
9259 .cr(8)
9260 .kr(3)
9261 .channels(8)
9262 .width(5)
9263 .output_stride(43)
9264 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9265 }
9266 }
9267
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,multipixel_with_qmin)9268 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, multipixel_with_qmin) {
9269 TEST_REQUIRES_X86_SSE41;
9270 for (size_t channels = 1; channels <= 40; channels += 7) {
9271 DWConvMicrokernelTester()
9272 .cr(8)
9273 .kr(3)
9274 .channels(channels)
9275 .width(3)
9276 .qmin(128)
9277 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9278 }
9279 }
9280
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,multipixel_with_qmax)9281 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, multipixel_with_qmax) {
9282 TEST_REQUIRES_X86_SSE41;
9283 for (size_t channels = 1; channels <= 40; channels += 7) {
9284 DWConvMicrokernelTester()
9285 .cr(8)
9286 .kr(3)
9287 .channels(channels)
9288 .width(3)
9289 .qmax(128)
9290 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9291 }
9292 }
9293
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,input_offset)9294 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, input_offset) {
9295 TEST_REQUIRES_X86_SSE41;
9296 for (uint32_t channels = 16; channels < 128; channels += 24) {
9297 DWConvMicrokernelTester()
9298 .cr(8)
9299 .kr(3)
9300 .channels(channels)
9301 .input_offset(176)
9302 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9303 }
9304 }
9305
TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16,zero)9306 TEST(QC8_DWCONV_MINMAX_FP32_UP8X3__SSE41_MUL16, zero) {
9307 TEST_REQUIRES_X86_SSE41;
9308 for (uint32_t mz = 0; mz < 3; mz++) {
9309 for (uint32_t channels = 16; channels < 128; channels += 24) {
9310 DWConvMicrokernelTester()
9311 .cr(8)
9312 .kr(3)
9313 .channels(channels)
9314 .input_offset(176)
9315 .zero_index(mz)
9316 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9317 }
9318 }
9319 }
9320 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9321
9322
9323 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_eq_8)9324 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_eq_8) {
9325 TEST_REQUIRES_X86_SSE2;
9326 DWConvMicrokernelTester()
9327 .cr(8)
9328 .kr(9)
9329 .channels(8)
9330 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9331 }
9332
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_div_8)9333 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_div_8) {
9334 TEST_REQUIRES_X86_SSE2;
9335 for (uint32_t channels = 16; channels < 128; channels += 24) {
9336 DWConvMicrokernelTester()
9337 .cr(8)
9338 .kr(9)
9339 .channels(channels)
9340 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9341 }
9342 }
9343
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_div_8_with_qmin)9344 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_div_8_with_qmin) {
9345 TEST_REQUIRES_X86_SSE2;
9346 for (uint32_t channels = 16; channels < 128; channels += 24) {
9347 DWConvMicrokernelTester()
9348 .cr(8)
9349 .kr(9)
9350 .channels(channels)
9351 .qmin(128)
9352 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9353 }
9354 }
9355
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_div_8_with_qmax)9356 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_div_8_with_qmax) {
9357 TEST_REQUIRES_X86_SSE2;
9358 for (uint32_t channels = 16; channels < 128; channels += 24) {
9359 DWConvMicrokernelTester()
9360 .cr(8)
9361 .kr(9)
9362 .channels(channels)
9363 .qmax(128)
9364 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9365 }
9366 }
9367
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_lt_8)9368 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_lt_8) {
9369 TEST_REQUIRES_X86_SSE2;
9370 for (uint32_t channels = 1; channels < 8; channels++) {
9371 DWConvMicrokernelTester()
9372 .cr(8)
9373 .kr(9)
9374 .channels(channels)
9375 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9376 }
9377 }
9378
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_gt_8)9379 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_gt_8) {
9380 TEST_REQUIRES_X86_SSE2;
9381 for (uint32_t channels = 9; channels < 16; channels++) {
9382 DWConvMicrokernelTester()
9383 .cr(8)
9384 .kr(9)
9385 .channels(channels)
9386 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9387 }
9388 }
9389
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_gt_8_with_qmin)9390 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_gt_8_with_qmin) {
9391 TEST_REQUIRES_X86_SSE2;
9392 for (uint32_t channels = 9; channels < 16; channels++) {
9393 DWConvMicrokernelTester()
9394 .cr(8)
9395 .kr(9)
9396 .channels(channels)
9397 .qmin(128)
9398 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9399 }
9400 }
9401
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_gt_8_with_qmax)9402 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_gt_8_with_qmax) {
9403 TEST_REQUIRES_X86_SSE2;
9404 for (uint32_t channels = 9; channels < 16; channels++) {
9405 DWConvMicrokernelTester()
9406 .cr(8)
9407 .kr(9)
9408 .channels(channels)
9409 .qmax(128)
9410 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9411 }
9412 }
9413
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,multipixel)9414 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel) {
9415 TEST_REQUIRES_X86_SSE2;
9416 for (size_t channels = 1; channels <= 40; channels += 7) {
9417 DWConvMicrokernelTester()
9418 .cr(8)
9419 .kr(9)
9420 .channels(channels)
9421 .width(3)
9422 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9423 }
9424 }
9425
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,multipixel_with_step)9426 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_step) {
9427 TEST_REQUIRES_X86_SSE2;
9428 for (size_t channels = 1; channels <= 40; channels += 7) {
9429 for (size_t step = 2; step <= 9; step++) {
9430 DWConvMicrokernelTester()
9431 .cr(8)
9432 .kr(9)
9433 .channels(channels)
9434 .width(3)
9435 .step(step)
9436 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9437 }
9438 }
9439 }
9440
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,multipixel_with_output_stride)9441 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_output_stride) {
9442 TEST_REQUIRES_X86_SSE2;
9443 for (size_t channels = 1; channels <= 40; channels += 7) {
9444 DWConvMicrokernelTester()
9445 .cr(8)
9446 .kr(9)
9447 .channels(8)
9448 .width(5)
9449 .output_stride(43)
9450 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9451 }
9452 }
9453
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,multipixel_with_qmin)9454 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_qmin) {
9455 TEST_REQUIRES_X86_SSE2;
9456 for (size_t channels = 1; channels <= 40; channels += 7) {
9457 DWConvMicrokernelTester()
9458 .cr(8)
9459 .kr(9)
9460 .channels(channels)
9461 .width(3)
9462 .qmin(128)
9463 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9464 }
9465 }
9466
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,multipixel_with_qmax)9467 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_qmax) {
9468 TEST_REQUIRES_X86_SSE2;
9469 for (size_t channels = 1; channels <= 40; channels += 7) {
9470 DWConvMicrokernelTester()
9471 .cr(8)
9472 .kr(9)
9473 .channels(channels)
9474 .width(3)
9475 .qmax(128)
9476 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9477 }
9478 }
9479
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,input_offset)9480 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, input_offset) {
9481 TEST_REQUIRES_X86_SSE2;
9482 for (uint32_t channels = 16; channels < 128; channels += 24) {
9483 DWConvMicrokernelTester()
9484 .cr(8)
9485 .kr(9)
9486 .channels(channels)
9487 .input_offset(176)
9488 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9489 }
9490 }
9491
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,zero)9492 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, zero) {
9493 TEST_REQUIRES_X86_SSE2;
9494 for (uint32_t mz = 0; mz < 9; mz++) {
9495 for (uint32_t channels = 16; channels < 128; channels += 24) {
9496 DWConvMicrokernelTester()
9497 .cr(8)
9498 .kr(9)
9499 .channels(channels)
9500 .input_offset(176)
9501 .zero_index(mz)
9502 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9503 }
9504 }
9505 }
9506 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9507
9508
9509 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_eq_8)9510 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_eq_8) {
9511 TEST_REQUIRES_X86_SSE2;
9512 DWConvMicrokernelTester()
9513 .cr(8)
9514 .kr(9)
9515 .channels(8)
9516 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9517 }
9518
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_div_8)9519 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_div_8) {
9520 TEST_REQUIRES_X86_SSE2;
9521 for (uint32_t channels = 16; channels < 128; channels += 24) {
9522 DWConvMicrokernelTester()
9523 .cr(8)
9524 .kr(9)
9525 .channels(channels)
9526 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9527 }
9528 }
9529
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_div_8_with_qmin)9530 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_div_8_with_qmin) {
9531 TEST_REQUIRES_X86_SSE2;
9532 for (uint32_t channels = 16; channels < 128; channels += 24) {
9533 DWConvMicrokernelTester()
9534 .cr(8)
9535 .kr(9)
9536 .channels(channels)
9537 .qmin(128)
9538 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9539 }
9540 }
9541
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_div_8_with_qmax)9542 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_div_8_with_qmax) {
9543 TEST_REQUIRES_X86_SSE2;
9544 for (uint32_t channels = 16; channels < 128; channels += 24) {
9545 DWConvMicrokernelTester()
9546 .cr(8)
9547 .kr(9)
9548 .channels(channels)
9549 .qmax(128)
9550 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9551 }
9552 }
9553
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_lt_8)9554 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_lt_8) {
9555 TEST_REQUIRES_X86_SSE2;
9556 for (uint32_t channels = 1; channels < 8; channels++) {
9557 DWConvMicrokernelTester()
9558 .cr(8)
9559 .kr(9)
9560 .channels(channels)
9561 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9562 }
9563 }
9564
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_gt_8)9565 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_gt_8) {
9566 TEST_REQUIRES_X86_SSE2;
9567 for (uint32_t channels = 9; channels < 16; channels++) {
9568 DWConvMicrokernelTester()
9569 .cr(8)
9570 .kr(9)
9571 .channels(channels)
9572 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9573 }
9574 }
9575
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_gt_8_with_qmin)9576 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_gt_8_with_qmin) {
9577 TEST_REQUIRES_X86_SSE2;
9578 for (uint32_t channels = 9; channels < 16; channels++) {
9579 DWConvMicrokernelTester()
9580 .cr(8)
9581 .kr(9)
9582 .channels(channels)
9583 .qmin(128)
9584 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9585 }
9586 }
9587
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,c_gt_8_with_qmax)9588 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, c_gt_8_with_qmax) {
9589 TEST_REQUIRES_X86_SSE2;
9590 for (uint32_t channels = 9; channels < 16; channels++) {
9591 DWConvMicrokernelTester()
9592 .cr(8)
9593 .kr(9)
9594 .channels(channels)
9595 .qmax(128)
9596 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9597 }
9598 }
9599
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,multipixel)9600 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel) {
9601 TEST_REQUIRES_X86_SSE2;
9602 for (size_t channels = 1; channels <= 40; channels += 7) {
9603 DWConvMicrokernelTester()
9604 .cr(8)
9605 .kr(9)
9606 .channels(channels)
9607 .width(3)
9608 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9609 }
9610 }
9611
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,multipixel_with_step)9612 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel_with_step) {
9613 TEST_REQUIRES_X86_SSE2;
9614 for (size_t channels = 1; channels <= 40; channels += 7) {
9615 for (size_t step = 2; step <= 9; step++) {
9616 DWConvMicrokernelTester()
9617 .cr(8)
9618 .kr(9)
9619 .channels(channels)
9620 .width(3)
9621 .step(step)
9622 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9623 }
9624 }
9625 }
9626
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,multipixel_with_output_stride)9627 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel_with_output_stride) {
9628 TEST_REQUIRES_X86_SSE2;
9629 for (size_t channels = 1; channels <= 40; channels += 7) {
9630 DWConvMicrokernelTester()
9631 .cr(8)
9632 .kr(9)
9633 .channels(8)
9634 .width(5)
9635 .output_stride(43)
9636 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9637 }
9638 }
9639
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,multipixel_with_qmin)9640 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel_with_qmin) {
9641 TEST_REQUIRES_X86_SSE2;
9642 for (size_t channels = 1; channels <= 40; channels += 7) {
9643 DWConvMicrokernelTester()
9644 .cr(8)
9645 .kr(9)
9646 .channels(channels)
9647 .width(3)
9648 .qmin(128)
9649 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9650 }
9651 }
9652
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,multipixel_with_qmax)9653 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, multipixel_with_qmax) {
9654 TEST_REQUIRES_X86_SSE2;
9655 for (size_t channels = 1; channels <= 40; channels += 7) {
9656 DWConvMicrokernelTester()
9657 .cr(8)
9658 .kr(9)
9659 .channels(channels)
9660 .width(3)
9661 .qmax(128)
9662 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9663 }
9664 }
9665
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,input_offset)9666 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, input_offset) {
9667 TEST_REQUIRES_X86_SSE2;
9668 for (uint32_t channels = 16; channels < 128; channels += 24) {
9669 DWConvMicrokernelTester()
9670 .cr(8)
9671 .kr(9)
9672 .channels(channels)
9673 .input_offset(176)
9674 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9675 }
9676 }
9677
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16,zero)9678 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16_ADD16, zero) {
9679 TEST_REQUIRES_X86_SSE2;
9680 for (uint32_t mz = 0; mz < 9; mz++) {
9681 for (uint32_t channels = 16; channels < 128; channels += 24) {
9682 DWConvMicrokernelTester()
9683 .cr(8)
9684 .kr(9)
9685 .channels(channels)
9686 .input_offset(176)
9687 .zero_index(mz)
9688 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
9689 }
9690 }
9691 }
9692 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9693
9694
9695 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_eq_8)9696 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_eq_8) {
9697 TEST_REQUIRES_X86_SSE41;
9698 DWConvMicrokernelTester()
9699 .cr(8)
9700 .kr(9)
9701 .channels(8)
9702 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9703 }
9704
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_div_8)9705 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_div_8) {
9706 TEST_REQUIRES_X86_SSE41;
9707 for (uint32_t channels = 16; channels < 128; channels += 24) {
9708 DWConvMicrokernelTester()
9709 .cr(8)
9710 .kr(9)
9711 .channels(channels)
9712 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9713 }
9714 }
9715
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_div_8_with_qmin)9716 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_div_8_with_qmin) {
9717 TEST_REQUIRES_X86_SSE41;
9718 for (uint32_t channels = 16; channels < 128; channels += 24) {
9719 DWConvMicrokernelTester()
9720 .cr(8)
9721 .kr(9)
9722 .channels(channels)
9723 .qmin(128)
9724 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9725 }
9726 }
9727
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_div_8_with_qmax)9728 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_div_8_with_qmax) {
9729 TEST_REQUIRES_X86_SSE41;
9730 for (uint32_t channels = 16; channels < 128; channels += 24) {
9731 DWConvMicrokernelTester()
9732 .cr(8)
9733 .kr(9)
9734 .channels(channels)
9735 .qmax(128)
9736 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9737 }
9738 }
9739
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_lt_8)9740 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_lt_8) {
9741 TEST_REQUIRES_X86_SSE41;
9742 for (uint32_t channels = 1; channels < 8; channels++) {
9743 DWConvMicrokernelTester()
9744 .cr(8)
9745 .kr(9)
9746 .channels(channels)
9747 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9748 }
9749 }
9750
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_gt_8)9751 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_gt_8) {
9752 TEST_REQUIRES_X86_SSE41;
9753 for (uint32_t channels = 9; channels < 16; channels++) {
9754 DWConvMicrokernelTester()
9755 .cr(8)
9756 .kr(9)
9757 .channels(channels)
9758 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9759 }
9760 }
9761
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_gt_8_with_qmin)9762 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_gt_8_with_qmin) {
9763 TEST_REQUIRES_X86_SSE41;
9764 for (uint32_t channels = 9; channels < 16; channels++) {
9765 DWConvMicrokernelTester()
9766 .cr(8)
9767 .kr(9)
9768 .channels(channels)
9769 .qmin(128)
9770 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9771 }
9772 }
9773
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_gt_8_with_qmax)9774 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_gt_8_with_qmax) {
9775 TEST_REQUIRES_X86_SSE41;
9776 for (uint32_t channels = 9; channels < 16; channels++) {
9777 DWConvMicrokernelTester()
9778 .cr(8)
9779 .kr(9)
9780 .channels(channels)
9781 .qmax(128)
9782 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9783 }
9784 }
9785
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,multipixel)9786 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel) {
9787 TEST_REQUIRES_X86_SSE41;
9788 for (size_t channels = 1; channels <= 40; channels += 7) {
9789 DWConvMicrokernelTester()
9790 .cr(8)
9791 .kr(9)
9792 .channels(channels)
9793 .width(3)
9794 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9795 }
9796 }
9797
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,multipixel_with_step)9798 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_step) {
9799 TEST_REQUIRES_X86_SSE41;
9800 for (size_t channels = 1; channels <= 40; channels += 7) {
9801 for (size_t step = 2; step <= 9; step++) {
9802 DWConvMicrokernelTester()
9803 .cr(8)
9804 .kr(9)
9805 .channels(channels)
9806 .width(3)
9807 .step(step)
9808 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9809 }
9810 }
9811 }
9812
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,multipixel_with_output_stride)9813 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_output_stride) {
9814 TEST_REQUIRES_X86_SSE41;
9815 for (size_t channels = 1; channels <= 40; channels += 7) {
9816 DWConvMicrokernelTester()
9817 .cr(8)
9818 .kr(9)
9819 .channels(8)
9820 .width(5)
9821 .output_stride(43)
9822 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9823 }
9824 }
9825
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,multipixel_with_qmin)9826 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_qmin) {
9827 TEST_REQUIRES_X86_SSE41;
9828 for (size_t channels = 1; channels <= 40; channels += 7) {
9829 DWConvMicrokernelTester()
9830 .cr(8)
9831 .kr(9)
9832 .channels(channels)
9833 .width(3)
9834 .qmin(128)
9835 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9836 }
9837 }
9838
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,multipixel_with_qmax)9839 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_qmax) {
9840 TEST_REQUIRES_X86_SSE41;
9841 for (size_t channels = 1; channels <= 40; channels += 7) {
9842 DWConvMicrokernelTester()
9843 .cr(8)
9844 .kr(9)
9845 .channels(channels)
9846 .width(3)
9847 .qmax(128)
9848 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9849 }
9850 }
9851
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,input_offset)9852 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, input_offset) {
9853 TEST_REQUIRES_X86_SSE41;
9854 for (uint32_t channels = 16; channels < 128; channels += 24) {
9855 DWConvMicrokernelTester()
9856 .cr(8)
9857 .kr(9)
9858 .channels(channels)
9859 .input_offset(176)
9860 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9861 }
9862 }
9863
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,zero)9864 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, zero) {
9865 TEST_REQUIRES_X86_SSE41;
9866 for (uint32_t mz = 0; mz < 9; mz++) {
9867 for (uint32_t channels = 16; channels < 128; channels += 24) {
9868 DWConvMicrokernelTester()
9869 .cr(8)
9870 .kr(9)
9871 .channels(channels)
9872 .input_offset(176)
9873 .zero_index(mz)
9874 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9875 }
9876 }
9877 }
9878 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9879
9880
9881 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_eq_8)9882 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_eq_8) {
9883 TEST_REQUIRES_X86_SSE41;
9884 DWConvMicrokernelTester()
9885 .cr(8)
9886 .kr(9)
9887 .channels(8)
9888 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9889 }
9890
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_div_8)9891 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_div_8) {
9892 TEST_REQUIRES_X86_SSE41;
9893 for (uint32_t channels = 16; channels < 128; channels += 24) {
9894 DWConvMicrokernelTester()
9895 .cr(8)
9896 .kr(9)
9897 .channels(channels)
9898 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9899 }
9900 }
9901
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_div_8_with_qmin)9902 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_div_8_with_qmin) {
9903 TEST_REQUIRES_X86_SSE41;
9904 for (uint32_t channels = 16; channels < 128; channels += 24) {
9905 DWConvMicrokernelTester()
9906 .cr(8)
9907 .kr(9)
9908 .channels(channels)
9909 .qmin(128)
9910 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9911 }
9912 }
9913
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_div_8_with_qmax)9914 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_div_8_with_qmax) {
9915 TEST_REQUIRES_X86_SSE41;
9916 for (uint32_t channels = 16; channels < 128; channels += 24) {
9917 DWConvMicrokernelTester()
9918 .cr(8)
9919 .kr(9)
9920 .channels(channels)
9921 .qmax(128)
9922 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9923 }
9924 }
9925
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_lt_8)9926 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_lt_8) {
9927 TEST_REQUIRES_X86_SSE41;
9928 for (uint32_t channels = 1; channels < 8; channels++) {
9929 DWConvMicrokernelTester()
9930 .cr(8)
9931 .kr(9)
9932 .channels(channels)
9933 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9934 }
9935 }
9936
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_gt_8)9937 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_gt_8) {
9938 TEST_REQUIRES_X86_SSE41;
9939 for (uint32_t channels = 9; channels < 16; channels++) {
9940 DWConvMicrokernelTester()
9941 .cr(8)
9942 .kr(9)
9943 .channels(channels)
9944 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9945 }
9946 }
9947
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_gt_8_with_qmin)9948 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_gt_8_with_qmin) {
9949 TEST_REQUIRES_X86_SSE41;
9950 for (uint32_t channels = 9; channels < 16; channels++) {
9951 DWConvMicrokernelTester()
9952 .cr(8)
9953 .kr(9)
9954 .channels(channels)
9955 .qmin(128)
9956 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9957 }
9958 }
9959
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,c_gt_8_with_qmax)9960 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, c_gt_8_with_qmax) {
9961 TEST_REQUIRES_X86_SSE41;
9962 for (uint32_t channels = 9; channels < 16; channels++) {
9963 DWConvMicrokernelTester()
9964 .cr(8)
9965 .kr(9)
9966 .channels(channels)
9967 .qmax(128)
9968 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9969 }
9970 }
9971
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,multipixel)9972 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel) {
9973 TEST_REQUIRES_X86_SSE41;
9974 for (size_t channels = 1; channels <= 40; channels += 7) {
9975 DWConvMicrokernelTester()
9976 .cr(8)
9977 .kr(9)
9978 .channels(channels)
9979 .width(3)
9980 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9981 }
9982 }
9983
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,multipixel_with_step)9984 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel_with_step) {
9985 TEST_REQUIRES_X86_SSE41;
9986 for (size_t channels = 1; channels <= 40; channels += 7) {
9987 for (size_t step = 2; step <= 9; step++) {
9988 DWConvMicrokernelTester()
9989 .cr(8)
9990 .kr(9)
9991 .channels(channels)
9992 .width(3)
9993 .step(step)
9994 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
9995 }
9996 }
9997 }
9998
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,multipixel_with_output_stride)9999 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel_with_output_stride) {
10000 TEST_REQUIRES_X86_SSE41;
10001 for (size_t channels = 1; channels <= 40; channels += 7) {
10002 DWConvMicrokernelTester()
10003 .cr(8)
10004 .kr(9)
10005 .channels(8)
10006 .width(5)
10007 .output_stride(43)
10008 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10009 }
10010 }
10011
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,multipixel_with_qmin)10012 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel_with_qmin) {
10013 TEST_REQUIRES_X86_SSE41;
10014 for (size_t channels = 1; channels <= 40; channels += 7) {
10015 DWConvMicrokernelTester()
10016 .cr(8)
10017 .kr(9)
10018 .channels(channels)
10019 .width(3)
10020 .qmin(128)
10021 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10022 }
10023 }
10024
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,multipixel_with_qmax)10025 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, multipixel_with_qmax) {
10026 TEST_REQUIRES_X86_SSE41;
10027 for (size_t channels = 1; channels <= 40; channels += 7) {
10028 DWConvMicrokernelTester()
10029 .cr(8)
10030 .kr(9)
10031 .channels(channels)
10032 .width(3)
10033 .qmax(128)
10034 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10035 }
10036 }
10037
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,input_offset)10038 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, input_offset) {
10039 TEST_REQUIRES_X86_SSE41;
10040 for (uint32_t channels = 16; channels < 128; channels += 24) {
10041 DWConvMicrokernelTester()
10042 .cr(8)
10043 .kr(9)
10044 .channels(channels)
10045 .input_offset(176)
10046 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10047 }
10048 }
10049
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16,zero)10050 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16_ADD16, zero) {
10051 TEST_REQUIRES_X86_SSE41;
10052 for (uint32_t mz = 0; mz < 9; mz++) {
10053 for (uint32_t channels = 16; channels < 128; channels += 24) {
10054 DWConvMicrokernelTester()
10055 .cr(8)
10056 .kr(9)
10057 .channels(channels)
10058 .input_offset(176)
10059 .zero_index(mz)
10060 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10061 }
10062 }
10063 }
10064 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10065
10066
10067 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_eq_8)10068 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_eq_8) {
10069 TEST_REQUIRES_X86_SSE41;
10070 DWConvMicrokernelTester()
10071 .cr(8)
10072 .kr(9)
10073 .channels(8)
10074 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10075 }
10076
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_div_8)10077 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_div_8) {
10078 TEST_REQUIRES_X86_SSE41;
10079 for (uint32_t channels = 16; channels < 128; channels += 24) {
10080 DWConvMicrokernelTester()
10081 .cr(8)
10082 .kr(9)
10083 .channels(channels)
10084 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10085 }
10086 }
10087
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_div_8_with_qmin)10088 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_div_8_with_qmin) {
10089 TEST_REQUIRES_X86_SSE41;
10090 for (uint32_t channels = 16; channels < 128; channels += 24) {
10091 DWConvMicrokernelTester()
10092 .cr(8)
10093 .kr(9)
10094 .channels(channels)
10095 .qmin(128)
10096 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10097 }
10098 }
10099
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_div_8_with_qmax)10100 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_div_8_with_qmax) {
10101 TEST_REQUIRES_X86_SSE41;
10102 for (uint32_t channels = 16; channels < 128; channels += 24) {
10103 DWConvMicrokernelTester()
10104 .cr(8)
10105 .kr(9)
10106 .channels(channels)
10107 .qmax(128)
10108 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10109 }
10110 }
10111
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_lt_8)10112 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_lt_8) {
10113 TEST_REQUIRES_X86_SSE41;
10114 for (uint32_t channels = 1; channels < 8; channels++) {
10115 DWConvMicrokernelTester()
10116 .cr(8)
10117 .kr(9)
10118 .channels(channels)
10119 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10120 }
10121 }
10122
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_gt_8)10123 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_gt_8) {
10124 TEST_REQUIRES_X86_SSE41;
10125 for (uint32_t channels = 9; channels < 16; channels++) {
10126 DWConvMicrokernelTester()
10127 .cr(8)
10128 .kr(9)
10129 .channels(channels)
10130 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10131 }
10132 }
10133
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_gt_8_with_qmin)10134 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_gt_8_with_qmin) {
10135 TEST_REQUIRES_X86_SSE41;
10136 for (uint32_t channels = 9; channels < 16; channels++) {
10137 DWConvMicrokernelTester()
10138 .cr(8)
10139 .kr(9)
10140 .channels(channels)
10141 .qmin(128)
10142 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10143 }
10144 }
10145
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_gt_8_with_qmax)10146 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_gt_8_with_qmax) {
10147 TEST_REQUIRES_X86_SSE41;
10148 for (uint32_t channels = 9; channels < 16; channels++) {
10149 DWConvMicrokernelTester()
10150 .cr(8)
10151 .kr(9)
10152 .channels(channels)
10153 .qmax(128)
10154 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10155 }
10156 }
10157
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,multipixel)10158 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel) {
10159 TEST_REQUIRES_X86_SSE41;
10160 for (size_t channels = 1; channels <= 40; channels += 7) {
10161 DWConvMicrokernelTester()
10162 .cr(8)
10163 .kr(9)
10164 .channels(channels)
10165 .width(3)
10166 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10167 }
10168 }
10169
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,multipixel_with_step)10170 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_step) {
10171 TEST_REQUIRES_X86_SSE41;
10172 for (size_t channels = 1; channels <= 40; channels += 7) {
10173 for (size_t step = 2; step <= 9; step++) {
10174 DWConvMicrokernelTester()
10175 .cr(8)
10176 .kr(9)
10177 .channels(channels)
10178 .width(3)
10179 .step(step)
10180 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10181 }
10182 }
10183 }
10184
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,multipixel_with_output_stride)10185 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_output_stride) {
10186 TEST_REQUIRES_X86_SSE41;
10187 for (size_t channels = 1; channels <= 40; channels += 7) {
10188 DWConvMicrokernelTester()
10189 .cr(8)
10190 .kr(9)
10191 .channels(8)
10192 .width(5)
10193 .output_stride(43)
10194 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10195 }
10196 }
10197
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,multipixel_with_qmin)10198 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_qmin) {
10199 TEST_REQUIRES_X86_SSE41;
10200 for (size_t channels = 1; channels <= 40; channels += 7) {
10201 DWConvMicrokernelTester()
10202 .cr(8)
10203 .kr(9)
10204 .channels(channels)
10205 .width(3)
10206 .qmin(128)
10207 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10208 }
10209 }
10210
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,multipixel_with_qmax)10211 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_qmax) {
10212 TEST_REQUIRES_X86_SSE41;
10213 for (size_t channels = 1; channels <= 40; channels += 7) {
10214 DWConvMicrokernelTester()
10215 .cr(8)
10216 .kr(9)
10217 .channels(channels)
10218 .width(3)
10219 .qmax(128)
10220 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10221 }
10222 }
10223
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,input_offset)10224 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, input_offset) {
10225 TEST_REQUIRES_X86_SSE41;
10226 for (uint32_t channels = 16; channels < 128; channels += 24) {
10227 DWConvMicrokernelTester()
10228 .cr(8)
10229 .kr(9)
10230 .channels(channels)
10231 .input_offset(176)
10232 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10233 }
10234 }
10235
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,zero)10236 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, zero) {
10237 TEST_REQUIRES_X86_SSE41;
10238 for (uint32_t mz = 0; mz < 9; mz++) {
10239 for (uint32_t channels = 16; channels < 128; channels += 24) {
10240 DWConvMicrokernelTester()
10241 .cr(8)
10242 .kr(9)
10243 .channels(channels)
10244 .input_offset(176)
10245 .zero_index(mz)
10246 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10247 }
10248 }
10249 }
10250 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10251
10252
10253 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_eq_8)10254 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_eq_8) {
10255 TEST_REQUIRES_X86_SSE2;
10256 DWConvMicrokernelTester()
10257 .cr(8)
10258 .kr(25)
10259 .channels(8)
10260 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10261 }
10262
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_div_8)10263 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_div_8) {
10264 TEST_REQUIRES_X86_SSE2;
10265 for (uint32_t channels = 16; channels < 128; channels += 24) {
10266 DWConvMicrokernelTester()
10267 .cr(8)
10268 .kr(25)
10269 .channels(channels)
10270 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10271 }
10272 }
10273
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_div_8_with_qmin)10274 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_div_8_with_qmin) {
10275 TEST_REQUIRES_X86_SSE2;
10276 for (uint32_t channels = 16; channels < 128; channels += 24) {
10277 DWConvMicrokernelTester()
10278 .cr(8)
10279 .kr(25)
10280 .channels(channels)
10281 .qmin(128)
10282 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10283 }
10284 }
10285
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_div_8_with_qmax)10286 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_div_8_with_qmax) {
10287 TEST_REQUIRES_X86_SSE2;
10288 for (uint32_t channels = 16; channels < 128; channels += 24) {
10289 DWConvMicrokernelTester()
10290 .cr(8)
10291 .kr(25)
10292 .channels(channels)
10293 .qmax(128)
10294 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10295 }
10296 }
10297
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_lt_8)10298 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_lt_8) {
10299 TEST_REQUIRES_X86_SSE2;
10300 for (uint32_t channels = 1; channels < 8; channels++) {
10301 DWConvMicrokernelTester()
10302 .cr(8)
10303 .kr(25)
10304 .channels(channels)
10305 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10306 }
10307 }
10308
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_gt_8)10309 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_gt_8) {
10310 TEST_REQUIRES_X86_SSE2;
10311 for (uint32_t channels = 9; channels < 16; channels++) {
10312 DWConvMicrokernelTester()
10313 .cr(8)
10314 .kr(25)
10315 .channels(channels)
10316 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10317 }
10318 }
10319
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_gt_8_with_qmin)10320 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_gt_8_with_qmin) {
10321 TEST_REQUIRES_X86_SSE2;
10322 for (uint32_t channels = 9; channels < 16; channels++) {
10323 DWConvMicrokernelTester()
10324 .cr(8)
10325 .kr(25)
10326 .channels(channels)
10327 .qmin(128)
10328 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10329 }
10330 }
10331
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_gt_8_with_qmax)10332 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_gt_8_with_qmax) {
10333 TEST_REQUIRES_X86_SSE2;
10334 for (uint32_t channels = 9; channels < 16; channels++) {
10335 DWConvMicrokernelTester()
10336 .cr(8)
10337 .kr(25)
10338 .channels(channels)
10339 .qmax(128)
10340 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10341 }
10342 }
10343
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,multipixel)10344 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel) {
10345 TEST_REQUIRES_X86_SSE2;
10346 for (size_t channels = 1; channels <= 40; channels += 7) {
10347 DWConvMicrokernelTester()
10348 .cr(8)
10349 .kr(25)
10350 .channels(channels)
10351 .width(3)
10352 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10353 }
10354 }
10355
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,multipixel_with_step)10356 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_step) {
10357 TEST_REQUIRES_X86_SSE2;
10358 for (size_t channels = 1; channels <= 40; channels += 7) {
10359 for (size_t step = 2; step <= 25; step++) {
10360 DWConvMicrokernelTester()
10361 .cr(8)
10362 .kr(25)
10363 .channels(channels)
10364 .width(3)
10365 .step(step)
10366 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10367 }
10368 }
10369 }
10370
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,multipixel_with_output_stride)10371 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_output_stride) {
10372 TEST_REQUIRES_X86_SSE2;
10373 for (size_t channels = 1; channels <= 40; channels += 7) {
10374 DWConvMicrokernelTester()
10375 .cr(8)
10376 .kr(25)
10377 .channels(8)
10378 .width(5)
10379 .output_stride(43)
10380 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10381 }
10382 }
10383
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,multipixel_with_qmin)10384 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_qmin) {
10385 TEST_REQUIRES_X86_SSE2;
10386 for (size_t channels = 1; channels <= 40; channels += 7) {
10387 DWConvMicrokernelTester()
10388 .cr(8)
10389 .kr(25)
10390 .channels(channels)
10391 .width(3)
10392 .qmin(128)
10393 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10394 }
10395 }
10396
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,multipixel_with_qmax)10397 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_qmax) {
10398 TEST_REQUIRES_X86_SSE2;
10399 for (size_t channels = 1; channels <= 40; channels += 7) {
10400 DWConvMicrokernelTester()
10401 .cr(8)
10402 .kr(25)
10403 .channels(channels)
10404 .width(3)
10405 .qmax(128)
10406 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10407 }
10408 }
10409
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,input_offset)10410 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, input_offset) {
10411 TEST_REQUIRES_X86_SSE2;
10412 for (uint32_t channels = 16; channels < 128; channels += 24) {
10413 DWConvMicrokernelTester()
10414 .cr(8)
10415 .kr(25)
10416 .channels(channels)
10417 .input_offset(176)
10418 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10419 }
10420 }
10421
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,zero)10422 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, zero) {
10423 TEST_REQUIRES_X86_SSE2;
10424 for (uint32_t mz = 0; mz < 25; mz++) {
10425 for (uint32_t channels = 16; channels < 128; channels += 24) {
10426 DWConvMicrokernelTester()
10427 .cr(8)
10428 .kr(25)
10429 .channels(channels)
10430 .input_offset(176)
10431 .zero_index(mz)
10432 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10433 }
10434 }
10435 }
10436 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10437
10438
10439 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_eq_8)10440 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_eq_8) {
10441 TEST_REQUIRES_X86_SSE2;
10442 DWConvMicrokernelTester()
10443 .cr(8)
10444 .kr(25)
10445 .channels(8)
10446 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10447 }
10448
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_div_8)10449 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_div_8) {
10450 TEST_REQUIRES_X86_SSE2;
10451 for (uint32_t channels = 16; channels < 128; channels += 24) {
10452 DWConvMicrokernelTester()
10453 .cr(8)
10454 .kr(25)
10455 .channels(channels)
10456 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10457 }
10458 }
10459
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_div_8_with_qmin)10460 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_div_8_with_qmin) {
10461 TEST_REQUIRES_X86_SSE2;
10462 for (uint32_t channels = 16; channels < 128; channels += 24) {
10463 DWConvMicrokernelTester()
10464 .cr(8)
10465 .kr(25)
10466 .channels(channels)
10467 .qmin(128)
10468 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10469 }
10470 }
10471
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_div_8_with_qmax)10472 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_div_8_with_qmax) {
10473 TEST_REQUIRES_X86_SSE2;
10474 for (uint32_t channels = 16; channels < 128; channels += 24) {
10475 DWConvMicrokernelTester()
10476 .cr(8)
10477 .kr(25)
10478 .channels(channels)
10479 .qmax(128)
10480 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10481 }
10482 }
10483
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_lt_8)10484 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_lt_8) {
10485 TEST_REQUIRES_X86_SSE2;
10486 for (uint32_t channels = 1; channels < 8; channels++) {
10487 DWConvMicrokernelTester()
10488 .cr(8)
10489 .kr(25)
10490 .channels(channels)
10491 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10492 }
10493 }
10494
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_gt_8)10495 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_gt_8) {
10496 TEST_REQUIRES_X86_SSE2;
10497 for (uint32_t channels = 9; channels < 16; channels++) {
10498 DWConvMicrokernelTester()
10499 .cr(8)
10500 .kr(25)
10501 .channels(channels)
10502 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10503 }
10504 }
10505
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_gt_8_with_qmin)10506 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_gt_8_with_qmin) {
10507 TEST_REQUIRES_X86_SSE2;
10508 for (uint32_t channels = 9; channels < 16; channels++) {
10509 DWConvMicrokernelTester()
10510 .cr(8)
10511 .kr(25)
10512 .channels(channels)
10513 .qmin(128)
10514 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10515 }
10516 }
10517
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,c_gt_8_with_qmax)10518 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, c_gt_8_with_qmax) {
10519 TEST_REQUIRES_X86_SSE2;
10520 for (uint32_t channels = 9; channels < 16; channels++) {
10521 DWConvMicrokernelTester()
10522 .cr(8)
10523 .kr(25)
10524 .channels(channels)
10525 .qmax(128)
10526 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10527 }
10528 }
10529
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,multipixel)10530 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel) {
10531 TEST_REQUIRES_X86_SSE2;
10532 for (size_t channels = 1; channels <= 40; channels += 7) {
10533 DWConvMicrokernelTester()
10534 .cr(8)
10535 .kr(25)
10536 .channels(channels)
10537 .width(3)
10538 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10539 }
10540 }
10541
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,multipixel_with_step)10542 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel_with_step) {
10543 TEST_REQUIRES_X86_SSE2;
10544 for (size_t channels = 1; channels <= 40; channels += 7) {
10545 for (size_t step = 2; step <= 25; step++) {
10546 DWConvMicrokernelTester()
10547 .cr(8)
10548 .kr(25)
10549 .channels(channels)
10550 .width(3)
10551 .step(step)
10552 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10553 }
10554 }
10555 }
10556
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,multipixel_with_output_stride)10557 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel_with_output_stride) {
10558 TEST_REQUIRES_X86_SSE2;
10559 for (size_t channels = 1; channels <= 40; channels += 7) {
10560 DWConvMicrokernelTester()
10561 .cr(8)
10562 .kr(25)
10563 .channels(8)
10564 .width(5)
10565 .output_stride(43)
10566 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10567 }
10568 }
10569
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,multipixel_with_qmin)10570 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel_with_qmin) {
10571 TEST_REQUIRES_X86_SSE2;
10572 for (size_t channels = 1; channels <= 40; channels += 7) {
10573 DWConvMicrokernelTester()
10574 .cr(8)
10575 .kr(25)
10576 .channels(channels)
10577 .width(3)
10578 .qmin(128)
10579 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10580 }
10581 }
10582
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,multipixel_with_qmax)10583 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, multipixel_with_qmax) {
10584 TEST_REQUIRES_X86_SSE2;
10585 for (size_t channels = 1; channels <= 40; channels += 7) {
10586 DWConvMicrokernelTester()
10587 .cr(8)
10588 .kr(25)
10589 .channels(channels)
10590 .width(3)
10591 .qmax(128)
10592 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10593 }
10594 }
10595
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,input_offset)10596 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, input_offset) {
10597 TEST_REQUIRES_X86_SSE2;
10598 for (uint32_t channels = 16; channels < 128; channels += 24) {
10599 DWConvMicrokernelTester()
10600 .cr(8)
10601 .kr(25)
10602 .channels(channels)
10603 .input_offset(176)
10604 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10605 }
10606 }
10607
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16,zero)10608 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16_ADD16, zero) {
10609 TEST_REQUIRES_X86_SSE2;
10610 for (uint32_t mz = 0; mz < 25; mz++) {
10611 for (uint32_t channels = 16; channels < 128; channels += 24) {
10612 DWConvMicrokernelTester()
10613 .cr(8)
10614 .kr(25)
10615 .channels(channels)
10616 .input_offset(176)
10617 .zero_index(mz)
10618 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
10619 }
10620 }
10621 }
10622 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10623
10624
10625 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_eq_8)10626 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_eq_8) {
10627 TEST_REQUIRES_X86_SSE41;
10628 DWConvMicrokernelTester()
10629 .cr(8)
10630 .kr(25)
10631 .channels(8)
10632 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10633 }
10634
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_div_8)10635 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_div_8) {
10636 TEST_REQUIRES_X86_SSE41;
10637 for (uint32_t channels = 16; channels < 128; channels += 24) {
10638 DWConvMicrokernelTester()
10639 .cr(8)
10640 .kr(25)
10641 .channels(channels)
10642 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10643 }
10644 }
10645
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_div_8_with_qmin)10646 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_div_8_with_qmin) {
10647 TEST_REQUIRES_X86_SSE41;
10648 for (uint32_t channels = 16; channels < 128; channels += 24) {
10649 DWConvMicrokernelTester()
10650 .cr(8)
10651 .kr(25)
10652 .channels(channels)
10653 .qmin(128)
10654 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10655 }
10656 }
10657
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_div_8_with_qmax)10658 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_div_8_with_qmax) {
10659 TEST_REQUIRES_X86_SSE41;
10660 for (uint32_t channels = 16; channels < 128; channels += 24) {
10661 DWConvMicrokernelTester()
10662 .cr(8)
10663 .kr(25)
10664 .channels(channels)
10665 .qmax(128)
10666 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10667 }
10668 }
10669
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_lt_8)10670 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_lt_8) {
10671 TEST_REQUIRES_X86_SSE41;
10672 for (uint32_t channels = 1; channels < 8; channels++) {
10673 DWConvMicrokernelTester()
10674 .cr(8)
10675 .kr(25)
10676 .channels(channels)
10677 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10678 }
10679 }
10680
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_gt_8)10681 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_gt_8) {
10682 TEST_REQUIRES_X86_SSE41;
10683 for (uint32_t channels = 9; channels < 16; channels++) {
10684 DWConvMicrokernelTester()
10685 .cr(8)
10686 .kr(25)
10687 .channels(channels)
10688 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10689 }
10690 }
10691
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_gt_8_with_qmin)10692 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_gt_8_with_qmin) {
10693 TEST_REQUIRES_X86_SSE41;
10694 for (uint32_t channels = 9; channels < 16; channels++) {
10695 DWConvMicrokernelTester()
10696 .cr(8)
10697 .kr(25)
10698 .channels(channels)
10699 .qmin(128)
10700 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10701 }
10702 }
10703
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_gt_8_with_qmax)10704 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_gt_8_with_qmax) {
10705 TEST_REQUIRES_X86_SSE41;
10706 for (uint32_t channels = 9; channels < 16; channels++) {
10707 DWConvMicrokernelTester()
10708 .cr(8)
10709 .kr(25)
10710 .channels(channels)
10711 .qmax(128)
10712 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10713 }
10714 }
10715
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,multipixel)10716 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel) {
10717 TEST_REQUIRES_X86_SSE41;
10718 for (size_t channels = 1; channels <= 40; channels += 7) {
10719 DWConvMicrokernelTester()
10720 .cr(8)
10721 .kr(25)
10722 .channels(channels)
10723 .width(3)
10724 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10725 }
10726 }
10727
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,multipixel_with_step)10728 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_step) {
10729 TEST_REQUIRES_X86_SSE41;
10730 for (size_t channels = 1; channels <= 40; channels += 7) {
10731 for (size_t step = 2; step <= 25; step++) {
10732 DWConvMicrokernelTester()
10733 .cr(8)
10734 .kr(25)
10735 .channels(channels)
10736 .width(3)
10737 .step(step)
10738 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10739 }
10740 }
10741 }
10742
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,multipixel_with_output_stride)10743 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_output_stride) {
10744 TEST_REQUIRES_X86_SSE41;
10745 for (size_t channels = 1; channels <= 40; channels += 7) {
10746 DWConvMicrokernelTester()
10747 .cr(8)
10748 .kr(25)
10749 .channels(8)
10750 .width(5)
10751 .output_stride(43)
10752 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10753 }
10754 }
10755
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,multipixel_with_qmin)10756 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_qmin) {
10757 TEST_REQUIRES_X86_SSE41;
10758 for (size_t channels = 1; channels <= 40; channels += 7) {
10759 DWConvMicrokernelTester()
10760 .cr(8)
10761 .kr(25)
10762 .channels(channels)
10763 .width(3)
10764 .qmin(128)
10765 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10766 }
10767 }
10768
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,multipixel_with_qmax)10769 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_qmax) {
10770 TEST_REQUIRES_X86_SSE41;
10771 for (size_t channels = 1; channels <= 40; channels += 7) {
10772 DWConvMicrokernelTester()
10773 .cr(8)
10774 .kr(25)
10775 .channels(channels)
10776 .width(3)
10777 .qmax(128)
10778 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10779 }
10780 }
10781
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,input_offset)10782 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, input_offset) {
10783 TEST_REQUIRES_X86_SSE41;
10784 for (uint32_t channels = 16; channels < 128; channels += 24) {
10785 DWConvMicrokernelTester()
10786 .cr(8)
10787 .kr(25)
10788 .channels(channels)
10789 .input_offset(176)
10790 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10791 }
10792 }
10793
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,zero)10794 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, zero) {
10795 TEST_REQUIRES_X86_SSE41;
10796 for (uint32_t mz = 0; mz < 25; mz++) {
10797 for (uint32_t channels = 16; channels < 128; channels += 24) {
10798 DWConvMicrokernelTester()
10799 .cr(8)
10800 .kr(25)
10801 .channels(channels)
10802 .input_offset(176)
10803 .zero_index(mz)
10804 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10805 }
10806 }
10807 }
10808 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10809
10810
10811 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_eq_8)10812 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_eq_8) {
10813 TEST_REQUIRES_X86_SSE41;
10814 DWConvMicrokernelTester()
10815 .cr(8)
10816 .kr(25)
10817 .channels(8)
10818 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10819 }
10820
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_div_8)10821 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_div_8) {
10822 TEST_REQUIRES_X86_SSE41;
10823 for (uint32_t channels = 16; channels < 128; channels += 24) {
10824 DWConvMicrokernelTester()
10825 .cr(8)
10826 .kr(25)
10827 .channels(channels)
10828 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10829 }
10830 }
10831
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_div_8_with_qmin)10832 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_div_8_with_qmin) {
10833 TEST_REQUIRES_X86_SSE41;
10834 for (uint32_t channels = 16; channels < 128; channels += 24) {
10835 DWConvMicrokernelTester()
10836 .cr(8)
10837 .kr(25)
10838 .channels(channels)
10839 .qmin(128)
10840 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10841 }
10842 }
10843
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_div_8_with_qmax)10844 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_div_8_with_qmax) {
10845 TEST_REQUIRES_X86_SSE41;
10846 for (uint32_t channels = 16; channels < 128; channels += 24) {
10847 DWConvMicrokernelTester()
10848 .cr(8)
10849 .kr(25)
10850 .channels(channels)
10851 .qmax(128)
10852 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10853 }
10854 }
10855
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_lt_8)10856 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_lt_8) {
10857 TEST_REQUIRES_X86_SSE41;
10858 for (uint32_t channels = 1; channels < 8; channels++) {
10859 DWConvMicrokernelTester()
10860 .cr(8)
10861 .kr(25)
10862 .channels(channels)
10863 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10864 }
10865 }
10866
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_gt_8)10867 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_gt_8) {
10868 TEST_REQUIRES_X86_SSE41;
10869 for (uint32_t channels = 9; channels < 16; channels++) {
10870 DWConvMicrokernelTester()
10871 .cr(8)
10872 .kr(25)
10873 .channels(channels)
10874 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10875 }
10876 }
10877
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_gt_8_with_qmin)10878 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_gt_8_with_qmin) {
10879 TEST_REQUIRES_X86_SSE41;
10880 for (uint32_t channels = 9; channels < 16; channels++) {
10881 DWConvMicrokernelTester()
10882 .cr(8)
10883 .kr(25)
10884 .channels(channels)
10885 .qmin(128)
10886 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10887 }
10888 }
10889
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,c_gt_8_with_qmax)10890 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, c_gt_8_with_qmax) {
10891 TEST_REQUIRES_X86_SSE41;
10892 for (uint32_t channels = 9; channels < 16; channels++) {
10893 DWConvMicrokernelTester()
10894 .cr(8)
10895 .kr(25)
10896 .channels(channels)
10897 .qmax(128)
10898 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10899 }
10900 }
10901
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,multipixel)10902 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel) {
10903 TEST_REQUIRES_X86_SSE41;
10904 for (size_t channels = 1; channels <= 40; channels += 7) {
10905 DWConvMicrokernelTester()
10906 .cr(8)
10907 .kr(25)
10908 .channels(channels)
10909 .width(3)
10910 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10911 }
10912 }
10913
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,multipixel_with_step)10914 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel_with_step) {
10915 TEST_REQUIRES_X86_SSE41;
10916 for (size_t channels = 1; channels <= 40; channels += 7) {
10917 for (size_t step = 2; step <= 25; step++) {
10918 DWConvMicrokernelTester()
10919 .cr(8)
10920 .kr(25)
10921 .channels(channels)
10922 .width(3)
10923 .step(step)
10924 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10925 }
10926 }
10927 }
10928
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,multipixel_with_output_stride)10929 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel_with_output_stride) {
10930 TEST_REQUIRES_X86_SSE41;
10931 for (size_t channels = 1; channels <= 40; channels += 7) {
10932 DWConvMicrokernelTester()
10933 .cr(8)
10934 .kr(25)
10935 .channels(8)
10936 .width(5)
10937 .output_stride(43)
10938 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10939 }
10940 }
10941
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,multipixel_with_qmin)10942 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel_with_qmin) {
10943 TEST_REQUIRES_X86_SSE41;
10944 for (size_t channels = 1; channels <= 40; channels += 7) {
10945 DWConvMicrokernelTester()
10946 .cr(8)
10947 .kr(25)
10948 .channels(channels)
10949 .width(3)
10950 .qmin(128)
10951 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10952 }
10953 }
10954
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,multipixel_with_qmax)10955 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, multipixel_with_qmax) {
10956 TEST_REQUIRES_X86_SSE41;
10957 for (size_t channels = 1; channels <= 40; channels += 7) {
10958 DWConvMicrokernelTester()
10959 .cr(8)
10960 .kr(25)
10961 .channels(channels)
10962 .width(3)
10963 .qmax(128)
10964 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10965 }
10966 }
10967
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,input_offset)10968 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, input_offset) {
10969 TEST_REQUIRES_X86_SSE41;
10970 for (uint32_t channels = 16; channels < 128; channels += 24) {
10971 DWConvMicrokernelTester()
10972 .cr(8)
10973 .kr(25)
10974 .channels(channels)
10975 .input_offset(176)
10976 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10977 }
10978 }
10979
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16,zero)10980 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16_ADD16, zero) {
10981 TEST_REQUIRES_X86_SSE41;
10982 for (uint32_t mz = 0; mz < 25; mz++) {
10983 for (uint32_t channels = 16; channels < 128; channels += 24) {
10984 DWConvMicrokernelTester()
10985 .cr(8)
10986 .kr(25)
10987 .channels(channels)
10988 .input_offset(176)
10989 .zero_index(mz)
10990 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
10991 }
10992 }
10993 }
10994 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10995
10996
10997 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_eq_8)10998 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_eq_8) {
10999 TEST_REQUIRES_X86_SSE41;
11000 DWConvMicrokernelTester()
11001 .cr(8)
11002 .kr(25)
11003 .channels(8)
11004 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11005 }
11006
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_div_8)11007 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_div_8) {
11008 TEST_REQUIRES_X86_SSE41;
11009 for (uint32_t channels = 16; channels < 128; channels += 24) {
11010 DWConvMicrokernelTester()
11011 .cr(8)
11012 .kr(25)
11013 .channels(channels)
11014 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11015 }
11016 }
11017
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_div_8_with_qmin)11018 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_div_8_with_qmin) {
11019 TEST_REQUIRES_X86_SSE41;
11020 for (uint32_t channels = 16; channels < 128; channels += 24) {
11021 DWConvMicrokernelTester()
11022 .cr(8)
11023 .kr(25)
11024 .channels(channels)
11025 .qmin(128)
11026 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11027 }
11028 }
11029
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_div_8_with_qmax)11030 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_div_8_with_qmax) {
11031 TEST_REQUIRES_X86_SSE41;
11032 for (uint32_t channels = 16; channels < 128; channels += 24) {
11033 DWConvMicrokernelTester()
11034 .cr(8)
11035 .kr(25)
11036 .channels(channels)
11037 .qmax(128)
11038 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11039 }
11040 }
11041
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_lt_8)11042 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_lt_8) {
11043 TEST_REQUIRES_X86_SSE41;
11044 for (uint32_t channels = 1; channels < 8; channels++) {
11045 DWConvMicrokernelTester()
11046 .cr(8)
11047 .kr(25)
11048 .channels(channels)
11049 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11050 }
11051 }
11052
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_gt_8)11053 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_gt_8) {
11054 TEST_REQUIRES_X86_SSE41;
11055 for (uint32_t channels = 9; channels < 16; channels++) {
11056 DWConvMicrokernelTester()
11057 .cr(8)
11058 .kr(25)
11059 .channels(channels)
11060 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11061 }
11062 }
11063
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_gt_8_with_qmin)11064 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_gt_8_with_qmin) {
11065 TEST_REQUIRES_X86_SSE41;
11066 for (uint32_t channels = 9; channels < 16; channels++) {
11067 DWConvMicrokernelTester()
11068 .cr(8)
11069 .kr(25)
11070 .channels(channels)
11071 .qmin(128)
11072 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11073 }
11074 }
11075
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_gt_8_with_qmax)11076 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_gt_8_with_qmax) {
11077 TEST_REQUIRES_X86_SSE41;
11078 for (uint32_t channels = 9; channels < 16; channels++) {
11079 DWConvMicrokernelTester()
11080 .cr(8)
11081 .kr(25)
11082 .channels(channels)
11083 .qmax(128)
11084 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11085 }
11086 }
11087
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,multipixel)11088 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel) {
11089 TEST_REQUIRES_X86_SSE41;
11090 for (size_t channels = 1; channels <= 40; channels += 7) {
11091 DWConvMicrokernelTester()
11092 .cr(8)
11093 .kr(25)
11094 .channels(channels)
11095 .width(3)
11096 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11097 }
11098 }
11099
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,multipixel_with_step)11100 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_step) {
11101 TEST_REQUIRES_X86_SSE41;
11102 for (size_t channels = 1; channels <= 40; channels += 7) {
11103 for (size_t step = 2; step <= 25; step++) {
11104 DWConvMicrokernelTester()
11105 .cr(8)
11106 .kr(25)
11107 .channels(channels)
11108 .width(3)
11109 .step(step)
11110 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11111 }
11112 }
11113 }
11114
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,multipixel_with_output_stride)11115 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_output_stride) {
11116 TEST_REQUIRES_X86_SSE41;
11117 for (size_t channels = 1; channels <= 40; channels += 7) {
11118 DWConvMicrokernelTester()
11119 .cr(8)
11120 .kr(25)
11121 .channels(8)
11122 .width(5)
11123 .output_stride(43)
11124 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11125 }
11126 }
11127
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,multipixel_with_qmin)11128 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_qmin) {
11129 TEST_REQUIRES_X86_SSE41;
11130 for (size_t channels = 1; channels <= 40; channels += 7) {
11131 DWConvMicrokernelTester()
11132 .cr(8)
11133 .kr(25)
11134 .channels(channels)
11135 .width(3)
11136 .qmin(128)
11137 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11138 }
11139 }
11140
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,multipixel_with_qmax)11141 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_qmax) {
11142 TEST_REQUIRES_X86_SSE41;
11143 for (size_t channels = 1; channels <= 40; channels += 7) {
11144 DWConvMicrokernelTester()
11145 .cr(8)
11146 .kr(25)
11147 .channels(channels)
11148 .width(3)
11149 .qmax(128)
11150 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11151 }
11152 }
11153
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,input_offset)11154 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, input_offset) {
11155 TEST_REQUIRES_X86_SSE41;
11156 for (uint32_t channels = 16; channels < 128; channels += 24) {
11157 DWConvMicrokernelTester()
11158 .cr(8)
11159 .kr(25)
11160 .channels(channels)
11161 .input_offset(176)
11162 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11163 }
11164 }
11165
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,zero)11166 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, zero) {
11167 TEST_REQUIRES_X86_SSE41;
11168 for (uint32_t mz = 0; mz < 25; mz++) {
11169 for (uint32_t channels = 16; channels < 128; channels += 24) {
11170 DWConvMicrokernelTester()
11171 .cr(8)
11172 .kr(25)
11173 .channels(channels)
11174 .input_offset(176)
11175 .zero_index(mz)
11176 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11177 }
11178 }
11179 }
11180 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11181
11182
11183 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_eq_16)11184 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_eq_16) {
11185 TEST_REQUIRES_X86_SSE2;
11186 DWConvMicrokernelTester()
11187 .cr(16)
11188 .kr(9)
11189 .channels(16)
11190 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11191 }
11192
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_div_16)11193 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_div_16) {
11194 TEST_REQUIRES_X86_SSE2;
11195 for (uint32_t channels = 32; channels < 256; channels += 48) {
11196 DWConvMicrokernelTester()
11197 .cr(16)
11198 .kr(9)
11199 .channels(channels)
11200 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11201 }
11202 }
11203
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_div_16_with_qmin)11204 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_div_16_with_qmin) {
11205 TEST_REQUIRES_X86_SSE2;
11206 for (uint32_t channels = 32; channels < 256; channels += 48) {
11207 DWConvMicrokernelTester()
11208 .cr(16)
11209 .kr(9)
11210 .channels(channels)
11211 .qmin(128)
11212 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11213 }
11214 }
11215
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_div_16_with_qmax)11216 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_div_16_with_qmax) {
11217 TEST_REQUIRES_X86_SSE2;
11218 for (uint32_t channels = 32; channels < 256; channels += 48) {
11219 DWConvMicrokernelTester()
11220 .cr(16)
11221 .kr(9)
11222 .channels(channels)
11223 .qmax(128)
11224 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11225 }
11226 }
11227
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_lt_16)11228 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_lt_16) {
11229 TEST_REQUIRES_X86_SSE2;
11230 for (uint32_t channels = 1; channels < 16; channels++) {
11231 DWConvMicrokernelTester()
11232 .cr(16)
11233 .kr(9)
11234 .channels(channels)
11235 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11236 }
11237 }
11238
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_gt_16)11239 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_gt_16) {
11240 TEST_REQUIRES_X86_SSE2;
11241 for (uint32_t channels = 17; channels < 32; channels++) {
11242 DWConvMicrokernelTester()
11243 .cr(16)
11244 .kr(9)
11245 .channels(channels)
11246 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11247 }
11248 }
11249
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_gt_16_with_qmin)11250 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_gt_16_with_qmin) {
11251 TEST_REQUIRES_X86_SSE2;
11252 for (uint32_t channels = 17; channels < 32; channels++) {
11253 DWConvMicrokernelTester()
11254 .cr(16)
11255 .kr(9)
11256 .channels(channels)
11257 .qmin(128)
11258 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11259 }
11260 }
11261
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_gt_16_with_qmax)11262 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_gt_16_with_qmax) {
11263 TEST_REQUIRES_X86_SSE2;
11264 for (uint32_t channels = 17; channels < 32; channels++) {
11265 DWConvMicrokernelTester()
11266 .cr(16)
11267 .kr(9)
11268 .channels(channels)
11269 .qmax(128)
11270 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11271 }
11272 }
11273
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,multipixel)11274 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel) {
11275 TEST_REQUIRES_X86_SSE2;
11276 for (size_t channels = 1; channels <= 80; channels += 15) {
11277 DWConvMicrokernelTester()
11278 .cr(16)
11279 .kr(9)
11280 .channels(channels)
11281 .width(3)
11282 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11283 }
11284 }
11285
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,multipixel_with_step)11286 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_step) {
11287 TEST_REQUIRES_X86_SSE2;
11288 for (size_t channels = 1; channels <= 80; channels += 15) {
11289 for (size_t step = 2; step <= 9; step++) {
11290 DWConvMicrokernelTester()
11291 .cr(16)
11292 .kr(9)
11293 .channels(channels)
11294 .width(3)
11295 .step(step)
11296 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11297 }
11298 }
11299 }
11300
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,multipixel_with_output_stride)11301 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_output_stride) {
11302 TEST_REQUIRES_X86_SSE2;
11303 for (size_t channels = 1; channels <= 80; channels += 15) {
11304 DWConvMicrokernelTester()
11305 .cr(16)
11306 .kr(9)
11307 .channels(16)
11308 .width(5)
11309 .output_stride(83)
11310 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11311 }
11312 }
11313
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,multipixel_with_qmin)11314 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_qmin) {
11315 TEST_REQUIRES_X86_SSE2;
11316 for (size_t channels = 1; channels <= 80; channels += 15) {
11317 DWConvMicrokernelTester()
11318 .cr(16)
11319 .kr(9)
11320 .channels(channels)
11321 .width(3)
11322 .qmin(128)
11323 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11324 }
11325 }
11326
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,multipixel_with_qmax)11327 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_qmax) {
11328 TEST_REQUIRES_X86_SSE2;
11329 for (size_t channels = 1; channels <= 80; channels += 15) {
11330 DWConvMicrokernelTester()
11331 .cr(16)
11332 .kr(9)
11333 .channels(channels)
11334 .width(3)
11335 .qmax(128)
11336 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11337 }
11338 }
11339
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,input_offset)11340 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, input_offset) {
11341 TEST_REQUIRES_X86_SSE2;
11342 for (uint32_t channels = 32; channels < 256; channels += 48) {
11343 DWConvMicrokernelTester()
11344 .cr(16)
11345 .kr(9)
11346 .channels(channels)
11347 .input_offset(304)
11348 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11349 }
11350 }
11351
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,zero)11352 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, zero) {
11353 TEST_REQUIRES_X86_SSE2;
11354 for (uint32_t mz = 0; mz < 9; mz++) {
11355 for (uint32_t channels = 32; channels < 256; channels += 48) {
11356 DWConvMicrokernelTester()
11357 .cr(16)
11358 .kr(9)
11359 .channels(channels)
11360 .input_offset(304)
11361 .zero_index(mz)
11362 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11363 }
11364 }
11365 }
11366 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11367
11368
11369 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_eq_16)11370 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_eq_16) {
11371 TEST_REQUIRES_X86_SSE2;
11372 DWConvMicrokernelTester()
11373 .cr(16)
11374 .kr(9)
11375 .channels(16)
11376 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11377 }
11378
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_div_16)11379 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_div_16) {
11380 TEST_REQUIRES_X86_SSE2;
11381 for (uint32_t channels = 32; channels < 256; channels += 48) {
11382 DWConvMicrokernelTester()
11383 .cr(16)
11384 .kr(9)
11385 .channels(channels)
11386 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11387 }
11388 }
11389
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_div_16_with_qmin)11390 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_div_16_with_qmin) {
11391 TEST_REQUIRES_X86_SSE2;
11392 for (uint32_t channels = 32; channels < 256; channels += 48) {
11393 DWConvMicrokernelTester()
11394 .cr(16)
11395 .kr(9)
11396 .channels(channels)
11397 .qmin(128)
11398 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11399 }
11400 }
11401
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_div_16_with_qmax)11402 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_div_16_with_qmax) {
11403 TEST_REQUIRES_X86_SSE2;
11404 for (uint32_t channels = 32; channels < 256; channels += 48) {
11405 DWConvMicrokernelTester()
11406 .cr(16)
11407 .kr(9)
11408 .channels(channels)
11409 .qmax(128)
11410 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11411 }
11412 }
11413
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_lt_16)11414 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_lt_16) {
11415 TEST_REQUIRES_X86_SSE2;
11416 for (uint32_t channels = 1; channels < 16; channels++) {
11417 DWConvMicrokernelTester()
11418 .cr(16)
11419 .kr(9)
11420 .channels(channels)
11421 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11422 }
11423 }
11424
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_gt_16)11425 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_gt_16) {
11426 TEST_REQUIRES_X86_SSE2;
11427 for (uint32_t channels = 17; channels < 32; channels++) {
11428 DWConvMicrokernelTester()
11429 .cr(16)
11430 .kr(9)
11431 .channels(channels)
11432 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11433 }
11434 }
11435
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_gt_16_with_qmin)11436 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_gt_16_with_qmin) {
11437 TEST_REQUIRES_X86_SSE2;
11438 for (uint32_t channels = 17; channels < 32; channels++) {
11439 DWConvMicrokernelTester()
11440 .cr(16)
11441 .kr(9)
11442 .channels(channels)
11443 .qmin(128)
11444 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11445 }
11446 }
11447
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,c_gt_16_with_qmax)11448 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, c_gt_16_with_qmax) {
11449 TEST_REQUIRES_X86_SSE2;
11450 for (uint32_t channels = 17; channels < 32; channels++) {
11451 DWConvMicrokernelTester()
11452 .cr(16)
11453 .kr(9)
11454 .channels(channels)
11455 .qmax(128)
11456 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11457 }
11458 }
11459
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,multipixel)11460 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel) {
11461 TEST_REQUIRES_X86_SSE2;
11462 for (size_t channels = 1; channels <= 80; channels += 15) {
11463 DWConvMicrokernelTester()
11464 .cr(16)
11465 .kr(9)
11466 .channels(channels)
11467 .width(3)
11468 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11469 }
11470 }
11471
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,multipixel_with_step)11472 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel_with_step) {
11473 TEST_REQUIRES_X86_SSE2;
11474 for (size_t channels = 1; channels <= 80; channels += 15) {
11475 for (size_t step = 2; step <= 9; step++) {
11476 DWConvMicrokernelTester()
11477 .cr(16)
11478 .kr(9)
11479 .channels(channels)
11480 .width(3)
11481 .step(step)
11482 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11483 }
11484 }
11485 }
11486
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,multipixel_with_output_stride)11487 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel_with_output_stride) {
11488 TEST_REQUIRES_X86_SSE2;
11489 for (size_t channels = 1; channels <= 80; channels += 15) {
11490 DWConvMicrokernelTester()
11491 .cr(16)
11492 .kr(9)
11493 .channels(16)
11494 .width(5)
11495 .output_stride(83)
11496 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11497 }
11498 }
11499
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,multipixel_with_qmin)11500 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel_with_qmin) {
11501 TEST_REQUIRES_X86_SSE2;
11502 for (size_t channels = 1; channels <= 80; channels += 15) {
11503 DWConvMicrokernelTester()
11504 .cr(16)
11505 .kr(9)
11506 .channels(channels)
11507 .width(3)
11508 .qmin(128)
11509 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11510 }
11511 }
11512
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,multipixel_with_qmax)11513 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, multipixel_with_qmax) {
11514 TEST_REQUIRES_X86_SSE2;
11515 for (size_t channels = 1; channels <= 80; channels += 15) {
11516 DWConvMicrokernelTester()
11517 .cr(16)
11518 .kr(9)
11519 .channels(channels)
11520 .width(3)
11521 .qmax(128)
11522 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11523 }
11524 }
11525
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,input_offset)11526 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, input_offset) {
11527 TEST_REQUIRES_X86_SSE2;
11528 for (uint32_t channels = 32; channels < 256; channels += 48) {
11529 DWConvMicrokernelTester()
11530 .cr(16)
11531 .kr(9)
11532 .channels(channels)
11533 .input_offset(304)
11534 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11535 }
11536 }
11537
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16,zero)11538 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16_ADD16, zero) {
11539 TEST_REQUIRES_X86_SSE2;
11540 for (uint32_t mz = 0; mz < 9; mz++) {
11541 for (uint32_t channels = 32; channels < 256; channels += 48) {
11542 DWConvMicrokernelTester()
11543 .cr(16)
11544 .kr(9)
11545 .channels(channels)
11546 .input_offset(304)
11547 .zero_index(mz)
11548 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
11549 }
11550 }
11551 }
11552 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11553
11554
11555 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_eq_16)11556 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_eq_16) {
11557 TEST_REQUIRES_X86_SSE41;
11558 DWConvMicrokernelTester()
11559 .cr(16)
11560 .kr(9)
11561 .channels(16)
11562 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11563 }
11564
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_div_16)11565 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_div_16) {
11566 TEST_REQUIRES_X86_SSE41;
11567 for (uint32_t channels = 32; channels < 256; channels += 48) {
11568 DWConvMicrokernelTester()
11569 .cr(16)
11570 .kr(9)
11571 .channels(channels)
11572 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11573 }
11574 }
11575
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_div_16_with_qmin)11576 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_div_16_with_qmin) {
11577 TEST_REQUIRES_X86_SSE41;
11578 for (uint32_t channels = 32; channels < 256; channels += 48) {
11579 DWConvMicrokernelTester()
11580 .cr(16)
11581 .kr(9)
11582 .channels(channels)
11583 .qmin(128)
11584 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11585 }
11586 }
11587
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_div_16_with_qmax)11588 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_div_16_with_qmax) {
11589 TEST_REQUIRES_X86_SSE41;
11590 for (uint32_t channels = 32; channels < 256; channels += 48) {
11591 DWConvMicrokernelTester()
11592 .cr(16)
11593 .kr(9)
11594 .channels(channels)
11595 .qmax(128)
11596 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11597 }
11598 }
11599
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_lt_16)11600 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_lt_16) {
11601 TEST_REQUIRES_X86_SSE41;
11602 for (uint32_t channels = 1; channels < 16; channels++) {
11603 DWConvMicrokernelTester()
11604 .cr(16)
11605 .kr(9)
11606 .channels(channels)
11607 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11608 }
11609 }
11610
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_gt_16)11611 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_gt_16) {
11612 TEST_REQUIRES_X86_SSE41;
11613 for (uint32_t channels = 17; channels < 32; channels++) {
11614 DWConvMicrokernelTester()
11615 .cr(16)
11616 .kr(9)
11617 .channels(channels)
11618 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11619 }
11620 }
11621
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_gt_16_with_qmin)11622 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_gt_16_with_qmin) {
11623 TEST_REQUIRES_X86_SSE41;
11624 for (uint32_t channels = 17; channels < 32; channels++) {
11625 DWConvMicrokernelTester()
11626 .cr(16)
11627 .kr(9)
11628 .channels(channels)
11629 .qmin(128)
11630 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11631 }
11632 }
11633
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_gt_16_with_qmax)11634 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_gt_16_with_qmax) {
11635 TEST_REQUIRES_X86_SSE41;
11636 for (uint32_t channels = 17; channels < 32; channels++) {
11637 DWConvMicrokernelTester()
11638 .cr(16)
11639 .kr(9)
11640 .channels(channels)
11641 .qmax(128)
11642 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11643 }
11644 }
11645
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,multipixel)11646 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel) {
11647 TEST_REQUIRES_X86_SSE41;
11648 for (size_t channels = 1; channels <= 80; channels += 15) {
11649 DWConvMicrokernelTester()
11650 .cr(16)
11651 .kr(9)
11652 .channels(channels)
11653 .width(3)
11654 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11655 }
11656 }
11657
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,multipixel_with_step)11658 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_step) {
11659 TEST_REQUIRES_X86_SSE41;
11660 for (size_t channels = 1; channels <= 80; channels += 15) {
11661 for (size_t step = 2; step <= 9; step++) {
11662 DWConvMicrokernelTester()
11663 .cr(16)
11664 .kr(9)
11665 .channels(channels)
11666 .width(3)
11667 .step(step)
11668 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11669 }
11670 }
11671 }
11672
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,multipixel_with_output_stride)11673 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_output_stride) {
11674 TEST_REQUIRES_X86_SSE41;
11675 for (size_t channels = 1; channels <= 80; channels += 15) {
11676 DWConvMicrokernelTester()
11677 .cr(16)
11678 .kr(9)
11679 .channels(16)
11680 .width(5)
11681 .output_stride(83)
11682 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11683 }
11684 }
11685
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,multipixel_with_qmin)11686 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_qmin) {
11687 TEST_REQUIRES_X86_SSE41;
11688 for (size_t channels = 1; channels <= 80; channels += 15) {
11689 DWConvMicrokernelTester()
11690 .cr(16)
11691 .kr(9)
11692 .channels(channels)
11693 .width(3)
11694 .qmin(128)
11695 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11696 }
11697 }
11698
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,multipixel_with_qmax)11699 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_qmax) {
11700 TEST_REQUIRES_X86_SSE41;
11701 for (size_t channels = 1; channels <= 80; channels += 15) {
11702 DWConvMicrokernelTester()
11703 .cr(16)
11704 .kr(9)
11705 .channels(channels)
11706 .width(3)
11707 .qmax(128)
11708 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11709 }
11710 }
11711
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,input_offset)11712 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, input_offset) {
11713 TEST_REQUIRES_X86_SSE41;
11714 for (uint32_t channels = 32; channels < 256; channels += 48) {
11715 DWConvMicrokernelTester()
11716 .cr(16)
11717 .kr(9)
11718 .channels(channels)
11719 .input_offset(304)
11720 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11721 }
11722 }
11723
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,zero)11724 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, zero) {
11725 TEST_REQUIRES_X86_SSE41;
11726 for (uint32_t mz = 0; mz < 9; mz++) {
11727 for (uint32_t channels = 32; channels < 256; channels += 48) {
11728 DWConvMicrokernelTester()
11729 .cr(16)
11730 .kr(9)
11731 .channels(channels)
11732 .input_offset(304)
11733 .zero_index(mz)
11734 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11735 }
11736 }
11737 }
11738 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11739
11740
11741 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_eq_16)11742 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_eq_16) {
11743 TEST_REQUIRES_X86_SSE41;
11744 DWConvMicrokernelTester()
11745 .cr(16)
11746 .kr(9)
11747 .channels(16)
11748 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11749 }
11750
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_div_16)11751 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_div_16) {
11752 TEST_REQUIRES_X86_SSE41;
11753 for (uint32_t channels = 32; channels < 256; channels += 48) {
11754 DWConvMicrokernelTester()
11755 .cr(16)
11756 .kr(9)
11757 .channels(channels)
11758 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11759 }
11760 }
11761
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_div_16_with_qmin)11762 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_div_16_with_qmin) {
11763 TEST_REQUIRES_X86_SSE41;
11764 for (uint32_t channels = 32; channels < 256; channels += 48) {
11765 DWConvMicrokernelTester()
11766 .cr(16)
11767 .kr(9)
11768 .channels(channels)
11769 .qmin(128)
11770 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11771 }
11772 }
11773
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_div_16_with_qmax)11774 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_div_16_with_qmax) {
11775 TEST_REQUIRES_X86_SSE41;
11776 for (uint32_t channels = 32; channels < 256; channels += 48) {
11777 DWConvMicrokernelTester()
11778 .cr(16)
11779 .kr(9)
11780 .channels(channels)
11781 .qmax(128)
11782 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11783 }
11784 }
11785
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_lt_16)11786 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_lt_16) {
11787 TEST_REQUIRES_X86_SSE41;
11788 for (uint32_t channels = 1; channels < 16; channels++) {
11789 DWConvMicrokernelTester()
11790 .cr(16)
11791 .kr(9)
11792 .channels(channels)
11793 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11794 }
11795 }
11796
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_gt_16)11797 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_gt_16) {
11798 TEST_REQUIRES_X86_SSE41;
11799 for (uint32_t channels = 17; channels < 32; channels++) {
11800 DWConvMicrokernelTester()
11801 .cr(16)
11802 .kr(9)
11803 .channels(channels)
11804 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11805 }
11806 }
11807
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_gt_16_with_qmin)11808 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_gt_16_with_qmin) {
11809 TEST_REQUIRES_X86_SSE41;
11810 for (uint32_t channels = 17; channels < 32; channels++) {
11811 DWConvMicrokernelTester()
11812 .cr(16)
11813 .kr(9)
11814 .channels(channels)
11815 .qmin(128)
11816 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11817 }
11818 }
11819
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,c_gt_16_with_qmax)11820 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, c_gt_16_with_qmax) {
11821 TEST_REQUIRES_X86_SSE41;
11822 for (uint32_t channels = 17; channels < 32; channels++) {
11823 DWConvMicrokernelTester()
11824 .cr(16)
11825 .kr(9)
11826 .channels(channels)
11827 .qmax(128)
11828 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11829 }
11830 }
11831
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,multipixel)11832 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel) {
11833 TEST_REQUIRES_X86_SSE41;
11834 for (size_t channels = 1; channels <= 80; channels += 15) {
11835 DWConvMicrokernelTester()
11836 .cr(16)
11837 .kr(9)
11838 .channels(channels)
11839 .width(3)
11840 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11841 }
11842 }
11843
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,multipixel_with_step)11844 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel_with_step) {
11845 TEST_REQUIRES_X86_SSE41;
11846 for (size_t channels = 1; channels <= 80; channels += 15) {
11847 for (size_t step = 2; step <= 9; step++) {
11848 DWConvMicrokernelTester()
11849 .cr(16)
11850 .kr(9)
11851 .channels(channels)
11852 .width(3)
11853 .step(step)
11854 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11855 }
11856 }
11857 }
11858
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,multipixel_with_output_stride)11859 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel_with_output_stride) {
11860 TEST_REQUIRES_X86_SSE41;
11861 for (size_t channels = 1; channels <= 80; channels += 15) {
11862 DWConvMicrokernelTester()
11863 .cr(16)
11864 .kr(9)
11865 .channels(16)
11866 .width(5)
11867 .output_stride(83)
11868 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11869 }
11870 }
11871
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,multipixel_with_qmin)11872 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel_with_qmin) {
11873 TEST_REQUIRES_X86_SSE41;
11874 for (size_t channels = 1; channels <= 80; channels += 15) {
11875 DWConvMicrokernelTester()
11876 .cr(16)
11877 .kr(9)
11878 .channels(channels)
11879 .width(3)
11880 .qmin(128)
11881 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11882 }
11883 }
11884
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,multipixel_with_qmax)11885 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, multipixel_with_qmax) {
11886 TEST_REQUIRES_X86_SSE41;
11887 for (size_t channels = 1; channels <= 80; channels += 15) {
11888 DWConvMicrokernelTester()
11889 .cr(16)
11890 .kr(9)
11891 .channels(channels)
11892 .width(3)
11893 .qmax(128)
11894 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11895 }
11896 }
11897
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,input_offset)11898 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, input_offset) {
11899 TEST_REQUIRES_X86_SSE41;
11900 for (uint32_t channels = 32; channels < 256; channels += 48) {
11901 DWConvMicrokernelTester()
11902 .cr(16)
11903 .kr(9)
11904 .channels(channels)
11905 .input_offset(304)
11906 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11907 }
11908 }
11909
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16,zero)11910 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16_ADD16, zero) {
11911 TEST_REQUIRES_X86_SSE41;
11912 for (uint32_t mz = 0; mz < 9; mz++) {
11913 for (uint32_t channels = 32; channels < 256; channels += 48) {
11914 DWConvMicrokernelTester()
11915 .cr(16)
11916 .kr(9)
11917 .channels(channels)
11918 .input_offset(304)
11919 .zero_index(mz)
11920 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11921 }
11922 }
11923 }
11924 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11925
11926
11927 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_eq_16)11928 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_eq_16) {
11929 TEST_REQUIRES_X86_SSE41;
11930 DWConvMicrokernelTester()
11931 .cr(16)
11932 .kr(9)
11933 .channels(16)
11934 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11935 }
11936
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_div_16)11937 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_div_16) {
11938 TEST_REQUIRES_X86_SSE41;
11939 for (uint32_t channels = 32; channels < 256; channels += 48) {
11940 DWConvMicrokernelTester()
11941 .cr(16)
11942 .kr(9)
11943 .channels(channels)
11944 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11945 }
11946 }
11947
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_div_16_with_qmin)11948 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_div_16_with_qmin) {
11949 TEST_REQUIRES_X86_SSE41;
11950 for (uint32_t channels = 32; channels < 256; channels += 48) {
11951 DWConvMicrokernelTester()
11952 .cr(16)
11953 .kr(9)
11954 .channels(channels)
11955 .qmin(128)
11956 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11957 }
11958 }
11959
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_div_16_with_qmax)11960 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_div_16_with_qmax) {
11961 TEST_REQUIRES_X86_SSE41;
11962 for (uint32_t channels = 32; channels < 256; channels += 48) {
11963 DWConvMicrokernelTester()
11964 .cr(16)
11965 .kr(9)
11966 .channels(channels)
11967 .qmax(128)
11968 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11969 }
11970 }
11971
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_lt_16)11972 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_lt_16) {
11973 TEST_REQUIRES_X86_SSE41;
11974 for (uint32_t channels = 1; channels < 16; channels++) {
11975 DWConvMicrokernelTester()
11976 .cr(16)
11977 .kr(9)
11978 .channels(channels)
11979 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11980 }
11981 }
11982
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_gt_16)11983 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_gt_16) {
11984 TEST_REQUIRES_X86_SSE41;
11985 for (uint32_t channels = 17; channels < 32; channels++) {
11986 DWConvMicrokernelTester()
11987 .cr(16)
11988 .kr(9)
11989 .channels(channels)
11990 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
11991 }
11992 }
11993
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_gt_16_with_qmin)11994 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_gt_16_with_qmin) {
11995 TEST_REQUIRES_X86_SSE41;
11996 for (uint32_t channels = 17; channels < 32; channels++) {
11997 DWConvMicrokernelTester()
11998 .cr(16)
11999 .kr(9)
12000 .channels(channels)
12001 .qmin(128)
12002 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12003 }
12004 }
12005
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_gt_16_with_qmax)12006 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_gt_16_with_qmax) {
12007 TEST_REQUIRES_X86_SSE41;
12008 for (uint32_t channels = 17; channels < 32; channels++) {
12009 DWConvMicrokernelTester()
12010 .cr(16)
12011 .kr(9)
12012 .channels(channels)
12013 .qmax(128)
12014 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12015 }
12016 }
12017
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,multipixel)12018 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel) {
12019 TEST_REQUIRES_X86_SSE41;
12020 for (size_t channels = 1; channels <= 80; channels += 15) {
12021 DWConvMicrokernelTester()
12022 .cr(16)
12023 .kr(9)
12024 .channels(channels)
12025 .width(3)
12026 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12027 }
12028 }
12029
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,multipixel_with_step)12030 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_step) {
12031 TEST_REQUIRES_X86_SSE41;
12032 for (size_t channels = 1; channels <= 80; channels += 15) {
12033 for (size_t step = 2; step <= 9; step++) {
12034 DWConvMicrokernelTester()
12035 .cr(16)
12036 .kr(9)
12037 .channels(channels)
12038 .width(3)
12039 .step(step)
12040 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12041 }
12042 }
12043 }
12044
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,multipixel_with_output_stride)12045 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_output_stride) {
12046 TEST_REQUIRES_X86_SSE41;
12047 for (size_t channels = 1; channels <= 80; channels += 15) {
12048 DWConvMicrokernelTester()
12049 .cr(16)
12050 .kr(9)
12051 .channels(16)
12052 .width(5)
12053 .output_stride(83)
12054 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12055 }
12056 }
12057
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,multipixel_with_qmin)12058 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_qmin) {
12059 TEST_REQUIRES_X86_SSE41;
12060 for (size_t channels = 1; channels <= 80; channels += 15) {
12061 DWConvMicrokernelTester()
12062 .cr(16)
12063 .kr(9)
12064 .channels(channels)
12065 .width(3)
12066 .qmin(128)
12067 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12068 }
12069 }
12070
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,multipixel_with_qmax)12071 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_qmax) {
12072 TEST_REQUIRES_X86_SSE41;
12073 for (size_t channels = 1; channels <= 80; channels += 15) {
12074 DWConvMicrokernelTester()
12075 .cr(16)
12076 .kr(9)
12077 .channels(channels)
12078 .width(3)
12079 .qmax(128)
12080 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12081 }
12082 }
12083
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,input_offset)12084 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, input_offset) {
12085 TEST_REQUIRES_X86_SSE41;
12086 for (uint32_t channels = 32; channels < 256; channels += 48) {
12087 DWConvMicrokernelTester()
12088 .cr(16)
12089 .kr(9)
12090 .channels(channels)
12091 .input_offset(304)
12092 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12093 }
12094 }
12095
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,zero)12096 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, zero) {
12097 TEST_REQUIRES_X86_SSE41;
12098 for (uint32_t mz = 0; mz < 9; mz++) {
12099 for (uint32_t channels = 32; channels < 256; channels += 48) {
12100 DWConvMicrokernelTester()
12101 .cr(16)
12102 .kr(9)
12103 .channels(channels)
12104 .input_offset(304)
12105 .zero_index(mz)
12106 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12107 }
12108 }
12109 }
12110 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12111
12112
12113 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_eq_16)12114 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_eq_16) {
12115 TEST_REQUIRES_X86_SSE2;
12116 DWConvMicrokernelTester()
12117 .cr(16)
12118 .kr(25)
12119 .channels(16)
12120 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12121 }
12122
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_div_16)12123 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_div_16) {
12124 TEST_REQUIRES_X86_SSE2;
12125 for (uint32_t channels = 32; channels < 256; channels += 48) {
12126 DWConvMicrokernelTester()
12127 .cr(16)
12128 .kr(25)
12129 .channels(channels)
12130 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12131 }
12132 }
12133
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_div_16_with_qmin)12134 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_div_16_with_qmin) {
12135 TEST_REQUIRES_X86_SSE2;
12136 for (uint32_t channels = 32; channels < 256; channels += 48) {
12137 DWConvMicrokernelTester()
12138 .cr(16)
12139 .kr(25)
12140 .channels(channels)
12141 .qmin(128)
12142 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12143 }
12144 }
12145
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_div_16_with_qmax)12146 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_div_16_with_qmax) {
12147 TEST_REQUIRES_X86_SSE2;
12148 for (uint32_t channels = 32; channels < 256; channels += 48) {
12149 DWConvMicrokernelTester()
12150 .cr(16)
12151 .kr(25)
12152 .channels(channels)
12153 .qmax(128)
12154 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12155 }
12156 }
12157
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_lt_16)12158 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_lt_16) {
12159 TEST_REQUIRES_X86_SSE2;
12160 for (uint32_t channels = 1; channels < 16; channels++) {
12161 DWConvMicrokernelTester()
12162 .cr(16)
12163 .kr(25)
12164 .channels(channels)
12165 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12166 }
12167 }
12168
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_gt_16)12169 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_gt_16) {
12170 TEST_REQUIRES_X86_SSE2;
12171 for (uint32_t channels = 17; channels < 32; channels++) {
12172 DWConvMicrokernelTester()
12173 .cr(16)
12174 .kr(25)
12175 .channels(channels)
12176 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12177 }
12178 }
12179
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_gt_16_with_qmin)12180 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_gt_16_with_qmin) {
12181 TEST_REQUIRES_X86_SSE2;
12182 for (uint32_t channels = 17; channels < 32; channels++) {
12183 DWConvMicrokernelTester()
12184 .cr(16)
12185 .kr(25)
12186 .channels(channels)
12187 .qmin(128)
12188 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12189 }
12190 }
12191
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_gt_16_with_qmax)12192 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_gt_16_with_qmax) {
12193 TEST_REQUIRES_X86_SSE2;
12194 for (uint32_t channels = 17; channels < 32; channels++) {
12195 DWConvMicrokernelTester()
12196 .cr(16)
12197 .kr(25)
12198 .channels(channels)
12199 .qmax(128)
12200 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12201 }
12202 }
12203
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,multipixel)12204 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel) {
12205 TEST_REQUIRES_X86_SSE2;
12206 for (size_t channels = 1; channels <= 80; channels += 15) {
12207 DWConvMicrokernelTester()
12208 .cr(16)
12209 .kr(25)
12210 .channels(channels)
12211 .width(3)
12212 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12213 }
12214 }
12215
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,multipixel_with_step)12216 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_step) {
12217 TEST_REQUIRES_X86_SSE2;
12218 for (size_t channels = 1; channels <= 80; channels += 15) {
12219 for (size_t step = 2; step <= 25; step++) {
12220 DWConvMicrokernelTester()
12221 .cr(16)
12222 .kr(25)
12223 .channels(channels)
12224 .width(3)
12225 .step(step)
12226 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12227 }
12228 }
12229 }
12230
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,multipixel_with_output_stride)12231 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_output_stride) {
12232 TEST_REQUIRES_X86_SSE2;
12233 for (size_t channels = 1; channels <= 80; channels += 15) {
12234 DWConvMicrokernelTester()
12235 .cr(16)
12236 .kr(25)
12237 .channels(16)
12238 .width(5)
12239 .output_stride(83)
12240 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12241 }
12242 }
12243
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,multipixel_with_qmin)12244 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_qmin) {
12245 TEST_REQUIRES_X86_SSE2;
12246 for (size_t channels = 1; channels <= 80; channels += 15) {
12247 DWConvMicrokernelTester()
12248 .cr(16)
12249 .kr(25)
12250 .channels(channels)
12251 .width(3)
12252 .qmin(128)
12253 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12254 }
12255 }
12256
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,multipixel_with_qmax)12257 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_qmax) {
12258 TEST_REQUIRES_X86_SSE2;
12259 for (size_t channels = 1; channels <= 80; channels += 15) {
12260 DWConvMicrokernelTester()
12261 .cr(16)
12262 .kr(25)
12263 .channels(channels)
12264 .width(3)
12265 .qmax(128)
12266 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12267 }
12268 }
12269
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,input_offset)12270 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, input_offset) {
12271 TEST_REQUIRES_X86_SSE2;
12272 for (uint32_t channels = 32; channels < 256; channels += 48) {
12273 DWConvMicrokernelTester()
12274 .cr(16)
12275 .kr(25)
12276 .channels(channels)
12277 .input_offset(304)
12278 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12279 }
12280 }
12281
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,zero)12282 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, zero) {
12283 TEST_REQUIRES_X86_SSE2;
12284 for (uint32_t mz = 0; mz < 25; mz++) {
12285 for (uint32_t channels = 32; channels < 256; channels += 48) {
12286 DWConvMicrokernelTester()
12287 .cr(16)
12288 .kr(25)
12289 .channels(channels)
12290 .input_offset(304)
12291 .zero_index(mz)
12292 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12293 }
12294 }
12295 }
12296 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12297
12298
12299 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_eq_16)12300 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_eq_16) {
12301 TEST_REQUIRES_X86_SSE2;
12302 DWConvMicrokernelTester()
12303 .cr(16)
12304 .kr(25)
12305 .channels(16)
12306 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12307 }
12308
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_div_16)12309 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_div_16) {
12310 TEST_REQUIRES_X86_SSE2;
12311 for (uint32_t channels = 32; channels < 256; channels += 48) {
12312 DWConvMicrokernelTester()
12313 .cr(16)
12314 .kr(25)
12315 .channels(channels)
12316 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12317 }
12318 }
12319
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_div_16_with_qmin)12320 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_div_16_with_qmin) {
12321 TEST_REQUIRES_X86_SSE2;
12322 for (uint32_t channels = 32; channels < 256; channels += 48) {
12323 DWConvMicrokernelTester()
12324 .cr(16)
12325 .kr(25)
12326 .channels(channels)
12327 .qmin(128)
12328 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12329 }
12330 }
12331
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_div_16_with_qmax)12332 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_div_16_with_qmax) {
12333 TEST_REQUIRES_X86_SSE2;
12334 for (uint32_t channels = 32; channels < 256; channels += 48) {
12335 DWConvMicrokernelTester()
12336 .cr(16)
12337 .kr(25)
12338 .channels(channels)
12339 .qmax(128)
12340 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12341 }
12342 }
12343
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_lt_16)12344 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_lt_16) {
12345 TEST_REQUIRES_X86_SSE2;
12346 for (uint32_t channels = 1; channels < 16; channels++) {
12347 DWConvMicrokernelTester()
12348 .cr(16)
12349 .kr(25)
12350 .channels(channels)
12351 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12352 }
12353 }
12354
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_gt_16)12355 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_gt_16) {
12356 TEST_REQUIRES_X86_SSE2;
12357 for (uint32_t channels = 17; channels < 32; channels++) {
12358 DWConvMicrokernelTester()
12359 .cr(16)
12360 .kr(25)
12361 .channels(channels)
12362 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12363 }
12364 }
12365
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_gt_16_with_qmin)12366 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_gt_16_with_qmin) {
12367 TEST_REQUIRES_X86_SSE2;
12368 for (uint32_t channels = 17; channels < 32; channels++) {
12369 DWConvMicrokernelTester()
12370 .cr(16)
12371 .kr(25)
12372 .channels(channels)
12373 .qmin(128)
12374 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12375 }
12376 }
12377
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,c_gt_16_with_qmax)12378 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, c_gt_16_with_qmax) {
12379 TEST_REQUIRES_X86_SSE2;
12380 for (uint32_t channels = 17; channels < 32; channels++) {
12381 DWConvMicrokernelTester()
12382 .cr(16)
12383 .kr(25)
12384 .channels(channels)
12385 .qmax(128)
12386 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12387 }
12388 }
12389
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,multipixel)12390 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel) {
12391 TEST_REQUIRES_X86_SSE2;
12392 for (size_t channels = 1; channels <= 80; channels += 15) {
12393 DWConvMicrokernelTester()
12394 .cr(16)
12395 .kr(25)
12396 .channels(channels)
12397 .width(3)
12398 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12399 }
12400 }
12401
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,multipixel_with_step)12402 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel_with_step) {
12403 TEST_REQUIRES_X86_SSE2;
12404 for (size_t channels = 1; channels <= 80; channels += 15) {
12405 for (size_t step = 2; step <= 25; step++) {
12406 DWConvMicrokernelTester()
12407 .cr(16)
12408 .kr(25)
12409 .channels(channels)
12410 .width(3)
12411 .step(step)
12412 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12413 }
12414 }
12415 }
12416
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,multipixel_with_output_stride)12417 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel_with_output_stride) {
12418 TEST_REQUIRES_X86_SSE2;
12419 for (size_t channels = 1; channels <= 80; channels += 15) {
12420 DWConvMicrokernelTester()
12421 .cr(16)
12422 .kr(25)
12423 .channels(16)
12424 .width(5)
12425 .output_stride(83)
12426 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12427 }
12428 }
12429
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,multipixel_with_qmin)12430 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel_with_qmin) {
12431 TEST_REQUIRES_X86_SSE2;
12432 for (size_t channels = 1; channels <= 80; channels += 15) {
12433 DWConvMicrokernelTester()
12434 .cr(16)
12435 .kr(25)
12436 .channels(channels)
12437 .width(3)
12438 .qmin(128)
12439 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12440 }
12441 }
12442
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,multipixel_with_qmax)12443 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, multipixel_with_qmax) {
12444 TEST_REQUIRES_X86_SSE2;
12445 for (size_t channels = 1; channels <= 80; channels += 15) {
12446 DWConvMicrokernelTester()
12447 .cr(16)
12448 .kr(25)
12449 .channels(channels)
12450 .width(3)
12451 .qmax(128)
12452 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12453 }
12454 }
12455
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,input_offset)12456 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, input_offset) {
12457 TEST_REQUIRES_X86_SSE2;
12458 for (uint32_t channels = 32; channels < 256; channels += 48) {
12459 DWConvMicrokernelTester()
12460 .cr(16)
12461 .kr(25)
12462 .channels(channels)
12463 .input_offset(304)
12464 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12465 }
12466 }
12467
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16,zero)12468 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16_ADD16, zero) {
12469 TEST_REQUIRES_X86_SSE2;
12470 for (uint32_t mz = 0; mz < 25; mz++) {
12471 for (uint32_t channels = 32; channels < 256; channels += 48) {
12472 DWConvMicrokernelTester()
12473 .cr(16)
12474 .kr(25)
12475 .channels(channels)
12476 .input_offset(304)
12477 .zero_index(mz)
12478 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
12479 }
12480 }
12481 }
12482 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12483
12484
12485 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_eq_16)12486 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_eq_16) {
12487 TEST_REQUIRES_X86_SSE41;
12488 DWConvMicrokernelTester()
12489 .cr(16)
12490 .kr(25)
12491 .channels(16)
12492 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12493 }
12494
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_div_16)12495 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_div_16) {
12496 TEST_REQUIRES_X86_SSE41;
12497 for (uint32_t channels = 32; channels < 256; channels += 48) {
12498 DWConvMicrokernelTester()
12499 .cr(16)
12500 .kr(25)
12501 .channels(channels)
12502 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12503 }
12504 }
12505
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_div_16_with_qmin)12506 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_div_16_with_qmin) {
12507 TEST_REQUIRES_X86_SSE41;
12508 for (uint32_t channels = 32; channels < 256; channels += 48) {
12509 DWConvMicrokernelTester()
12510 .cr(16)
12511 .kr(25)
12512 .channels(channels)
12513 .qmin(128)
12514 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12515 }
12516 }
12517
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_div_16_with_qmax)12518 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_div_16_with_qmax) {
12519 TEST_REQUIRES_X86_SSE41;
12520 for (uint32_t channels = 32; channels < 256; channels += 48) {
12521 DWConvMicrokernelTester()
12522 .cr(16)
12523 .kr(25)
12524 .channels(channels)
12525 .qmax(128)
12526 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12527 }
12528 }
12529
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_lt_16)12530 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_lt_16) {
12531 TEST_REQUIRES_X86_SSE41;
12532 for (uint32_t channels = 1; channels < 16; channels++) {
12533 DWConvMicrokernelTester()
12534 .cr(16)
12535 .kr(25)
12536 .channels(channels)
12537 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12538 }
12539 }
12540
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_gt_16)12541 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_gt_16) {
12542 TEST_REQUIRES_X86_SSE41;
12543 for (uint32_t channels = 17; channels < 32; channels++) {
12544 DWConvMicrokernelTester()
12545 .cr(16)
12546 .kr(25)
12547 .channels(channels)
12548 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12549 }
12550 }
12551
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_gt_16_with_qmin)12552 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_gt_16_with_qmin) {
12553 TEST_REQUIRES_X86_SSE41;
12554 for (uint32_t channels = 17; channels < 32; channels++) {
12555 DWConvMicrokernelTester()
12556 .cr(16)
12557 .kr(25)
12558 .channels(channels)
12559 .qmin(128)
12560 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12561 }
12562 }
12563
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_gt_16_with_qmax)12564 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_gt_16_with_qmax) {
12565 TEST_REQUIRES_X86_SSE41;
12566 for (uint32_t channels = 17; channels < 32; channels++) {
12567 DWConvMicrokernelTester()
12568 .cr(16)
12569 .kr(25)
12570 .channels(channels)
12571 .qmax(128)
12572 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12573 }
12574 }
12575
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,multipixel)12576 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel) {
12577 TEST_REQUIRES_X86_SSE41;
12578 for (size_t channels = 1; channels <= 80; channels += 15) {
12579 DWConvMicrokernelTester()
12580 .cr(16)
12581 .kr(25)
12582 .channels(channels)
12583 .width(3)
12584 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12585 }
12586 }
12587
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,multipixel_with_step)12588 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_step) {
12589 TEST_REQUIRES_X86_SSE41;
12590 for (size_t channels = 1; channels <= 80; channels += 15) {
12591 for (size_t step = 2; step <= 25; step++) {
12592 DWConvMicrokernelTester()
12593 .cr(16)
12594 .kr(25)
12595 .channels(channels)
12596 .width(3)
12597 .step(step)
12598 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12599 }
12600 }
12601 }
12602
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,multipixel_with_output_stride)12603 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_output_stride) {
12604 TEST_REQUIRES_X86_SSE41;
12605 for (size_t channels = 1; channels <= 80; channels += 15) {
12606 DWConvMicrokernelTester()
12607 .cr(16)
12608 .kr(25)
12609 .channels(16)
12610 .width(5)
12611 .output_stride(83)
12612 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12613 }
12614 }
12615
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,multipixel_with_qmin)12616 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_qmin) {
12617 TEST_REQUIRES_X86_SSE41;
12618 for (size_t channels = 1; channels <= 80; channels += 15) {
12619 DWConvMicrokernelTester()
12620 .cr(16)
12621 .kr(25)
12622 .channels(channels)
12623 .width(3)
12624 .qmin(128)
12625 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12626 }
12627 }
12628
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,multipixel_with_qmax)12629 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_qmax) {
12630 TEST_REQUIRES_X86_SSE41;
12631 for (size_t channels = 1; channels <= 80; channels += 15) {
12632 DWConvMicrokernelTester()
12633 .cr(16)
12634 .kr(25)
12635 .channels(channels)
12636 .width(3)
12637 .qmax(128)
12638 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12639 }
12640 }
12641
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,input_offset)12642 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, input_offset) {
12643 TEST_REQUIRES_X86_SSE41;
12644 for (uint32_t channels = 32; channels < 256; channels += 48) {
12645 DWConvMicrokernelTester()
12646 .cr(16)
12647 .kr(25)
12648 .channels(channels)
12649 .input_offset(304)
12650 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12651 }
12652 }
12653
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,zero)12654 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, zero) {
12655 TEST_REQUIRES_X86_SSE41;
12656 for (uint32_t mz = 0; mz < 25; mz++) {
12657 for (uint32_t channels = 32; channels < 256; channels += 48) {
12658 DWConvMicrokernelTester()
12659 .cr(16)
12660 .kr(25)
12661 .channels(channels)
12662 .input_offset(304)
12663 .zero_index(mz)
12664 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12665 }
12666 }
12667 }
12668 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12669
12670
12671 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_eq_16)12672 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_eq_16) {
12673 TEST_REQUIRES_X86_SSE41;
12674 DWConvMicrokernelTester()
12675 .cr(16)
12676 .kr(25)
12677 .channels(16)
12678 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12679 }
12680
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_div_16)12681 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_div_16) {
12682 TEST_REQUIRES_X86_SSE41;
12683 for (uint32_t channels = 32; channels < 256; channels += 48) {
12684 DWConvMicrokernelTester()
12685 .cr(16)
12686 .kr(25)
12687 .channels(channels)
12688 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12689 }
12690 }
12691
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_div_16_with_qmin)12692 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_div_16_with_qmin) {
12693 TEST_REQUIRES_X86_SSE41;
12694 for (uint32_t channels = 32; channels < 256; channels += 48) {
12695 DWConvMicrokernelTester()
12696 .cr(16)
12697 .kr(25)
12698 .channels(channels)
12699 .qmin(128)
12700 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12701 }
12702 }
12703
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_div_16_with_qmax)12704 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_div_16_with_qmax) {
12705 TEST_REQUIRES_X86_SSE41;
12706 for (uint32_t channels = 32; channels < 256; channels += 48) {
12707 DWConvMicrokernelTester()
12708 .cr(16)
12709 .kr(25)
12710 .channels(channels)
12711 .qmax(128)
12712 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12713 }
12714 }
12715
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_lt_16)12716 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_lt_16) {
12717 TEST_REQUIRES_X86_SSE41;
12718 for (uint32_t channels = 1; channels < 16; channels++) {
12719 DWConvMicrokernelTester()
12720 .cr(16)
12721 .kr(25)
12722 .channels(channels)
12723 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12724 }
12725 }
12726
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_gt_16)12727 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_gt_16) {
12728 TEST_REQUIRES_X86_SSE41;
12729 for (uint32_t channels = 17; channels < 32; channels++) {
12730 DWConvMicrokernelTester()
12731 .cr(16)
12732 .kr(25)
12733 .channels(channels)
12734 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12735 }
12736 }
12737
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_gt_16_with_qmin)12738 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_gt_16_with_qmin) {
12739 TEST_REQUIRES_X86_SSE41;
12740 for (uint32_t channels = 17; channels < 32; channels++) {
12741 DWConvMicrokernelTester()
12742 .cr(16)
12743 .kr(25)
12744 .channels(channels)
12745 .qmin(128)
12746 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12747 }
12748 }
12749
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,c_gt_16_with_qmax)12750 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, c_gt_16_with_qmax) {
12751 TEST_REQUIRES_X86_SSE41;
12752 for (uint32_t channels = 17; channels < 32; channels++) {
12753 DWConvMicrokernelTester()
12754 .cr(16)
12755 .kr(25)
12756 .channels(channels)
12757 .qmax(128)
12758 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12759 }
12760 }
12761
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,multipixel)12762 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel) {
12763 TEST_REQUIRES_X86_SSE41;
12764 for (size_t channels = 1; channels <= 80; channels += 15) {
12765 DWConvMicrokernelTester()
12766 .cr(16)
12767 .kr(25)
12768 .channels(channels)
12769 .width(3)
12770 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12771 }
12772 }
12773
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,multipixel_with_step)12774 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel_with_step) {
12775 TEST_REQUIRES_X86_SSE41;
12776 for (size_t channels = 1; channels <= 80; channels += 15) {
12777 for (size_t step = 2; step <= 25; step++) {
12778 DWConvMicrokernelTester()
12779 .cr(16)
12780 .kr(25)
12781 .channels(channels)
12782 .width(3)
12783 .step(step)
12784 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12785 }
12786 }
12787 }
12788
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,multipixel_with_output_stride)12789 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel_with_output_stride) {
12790 TEST_REQUIRES_X86_SSE41;
12791 for (size_t channels = 1; channels <= 80; channels += 15) {
12792 DWConvMicrokernelTester()
12793 .cr(16)
12794 .kr(25)
12795 .channels(16)
12796 .width(5)
12797 .output_stride(83)
12798 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12799 }
12800 }
12801
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,multipixel_with_qmin)12802 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel_with_qmin) {
12803 TEST_REQUIRES_X86_SSE41;
12804 for (size_t channels = 1; channels <= 80; channels += 15) {
12805 DWConvMicrokernelTester()
12806 .cr(16)
12807 .kr(25)
12808 .channels(channels)
12809 .width(3)
12810 .qmin(128)
12811 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12812 }
12813 }
12814
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,multipixel_with_qmax)12815 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, multipixel_with_qmax) {
12816 TEST_REQUIRES_X86_SSE41;
12817 for (size_t channels = 1; channels <= 80; channels += 15) {
12818 DWConvMicrokernelTester()
12819 .cr(16)
12820 .kr(25)
12821 .channels(channels)
12822 .width(3)
12823 .qmax(128)
12824 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12825 }
12826 }
12827
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,input_offset)12828 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, input_offset) {
12829 TEST_REQUIRES_X86_SSE41;
12830 for (uint32_t channels = 32; channels < 256; channels += 48) {
12831 DWConvMicrokernelTester()
12832 .cr(16)
12833 .kr(25)
12834 .channels(channels)
12835 .input_offset(304)
12836 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12837 }
12838 }
12839
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16,zero)12840 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16_ADD16, zero) {
12841 TEST_REQUIRES_X86_SSE41;
12842 for (uint32_t mz = 0; mz < 25; mz++) {
12843 for (uint32_t channels = 32; channels < 256; channels += 48) {
12844 DWConvMicrokernelTester()
12845 .cr(16)
12846 .kr(25)
12847 .channels(channels)
12848 .input_offset(304)
12849 .zero_index(mz)
12850 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12851 }
12852 }
12853 }
12854 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12855
12856
12857 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_eq_16)12858 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_eq_16) {
12859 TEST_REQUIRES_X86_SSE41;
12860 DWConvMicrokernelTester()
12861 .cr(16)
12862 .kr(25)
12863 .channels(16)
12864 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12865 }
12866
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_div_16)12867 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_div_16) {
12868 TEST_REQUIRES_X86_SSE41;
12869 for (uint32_t channels = 32; channels < 256; channels += 48) {
12870 DWConvMicrokernelTester()
12871 .cr(16)
12872 .kr(25)
12873 .channels(channels)
12874 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12875 }
12876 }
12877
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_div_16_with_qmin)12878 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_div_16_with_qmin) {
12879 TEST_REQUIRES_X86_SSE41;
12880 for (uint32_t channels = 32; channels < 256; channels += 48) {
12881 DWConvMicrokernelTester()
12882 .cr(16)
12883 .kr(25)
12884 .channels(channels)
12885 .qmin(128)
12886 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12887 }
12888 }
12889
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_div_16_with_qmax)12890 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_div_16_with_qmax) {
12891 TEST_REQUIRES_X86_SSE41;
12892 for (uint32_t channels = 32; channels < 256; channels += 48) {
12893 DWConvMicrokernelTester()
12894 .cr(16)
12895 .kr(25)
12896 .channels(channels)
12897 .qmax(128)
12898 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12899 }
12900 }
12901
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_lt_16)12902 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_lt_16) {
12903 TEST_REQUIRES_X86_SSE41;
12904 for (uint32_t channels = 1; channels < 16; channels++) {
12905 DWConvMicrokernelTester()
12906 .cr(16)
12907 .kr(25)
12908 .channels(channels)
12909 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12910 }
12911 }
12912
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_gt_16)12913 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_gt_16) {
12914 TEST_REQUIRES_X86_SSE41;
12915 for (uint32_t channels = 17; channels < 32; channels++) {
12916 DWConvMicrokernelTester()
12917 .cr(16)
12918 .kr(25)
12919 .channels(channels)
12920 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12921 }
12922 }
12923
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_gt_16_with_qmin)12924 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_gt_16_with_qmin) {
12925 TEST_REQUIRES_X86_SSE41;
12926 for (uint32_t channels = 17; channels < 32; channels++) {
12927 DWConvMicrokernelTester()
12928 .cr(16)
12929 .kr(25)
12930 .channels(channels)
12931 .qmin(128)
12932 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12933 }
12934 }
12935
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_gt_16_with_qmax)12936 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_gt_16_with_qmax) {
12937 TEST_REQUIRES_X86_SSE41;
12938 for (uint32_t channels = 17; channels < 32; channels++) {
12939 DWConvMicrokernelTester()
12940 .cr(16)
12941 .kr(25)
12942 .channels(channels)
12943 .qmax(128)
12944 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12945 }
12946 }
12947
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,multipixel)12948 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel) {
12949 TEST_REQUIRES_X86_SSE41;
12950 for (size_t channels = 1; channels <= 80; channels += 15) {
12951 DWConvMicrokernelTester()
12952 .cr(16)
12953 .kr(25)
12954 .channels(channels)
12955 .width(3)
12956 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12957 }
12958 }
12959
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,multipixel_with_step)12960 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_step) {
12961 TEST_REQUIRES_X86_SSE41;
12962 for (size_t channels = 1; channels <= 80; channels += 15) {
12963 for (size_t step = 2; step <= 25; step++) {
12964 DWConvMicrokernelTester()
12965 .cr(16)
12966 .kr(25)
12967 .channels(channels)
12968 .width(3)
12969 .step(step)
12970 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12971 }
12972 }
12973 }
12974
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,multipixel_with_output_stride)12975 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_output_stride) {
12976 TEST_REQUIRES_X86_SSE41;
12977 for (size_t channels = 1; channels <= 80; channels += 15) {
12978 DWConvMicrokernelTester()
12979 .cr(16)
12980 .kr(25)
12981 .channels(16)
12982 .width(5)
12983 .output_stride(83)
12984 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12985 }
12986 }
12987
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,multipixel_with_qmin)12988 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_qmin) {
12989 TEST_REQUIRES_X86_SSE41;
12990 for (size_t channels = 1; channels <= 80; channels += 15) {
12991 DWConvMicrokernelTester()
12992 .cr(16)
12993 .kr(25)
12994 .channels(channels)
12995 .width(3)
12996 .qmin(128)
12997 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
12998 }
12999 }
13000
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,multipixel_with_qmax)13001 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_qmax) {
13002 TEST_REQUIRES_X86_SSE41;
13003 for (size_t channels = 1; channels <= 80; channels += 15) {
13004 DWConvMicrokernelTester()
13005 .cr(16)
13006 .kr(25)
13007 .channels(channels)
13008 .width(3)
13009 .qmax(128)
13010 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13011 }
13012 }
13013
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,input_offset)13014 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, input_offset) {
13015 TEST_REQUIRES_X86_SSE41;
13016 for (uint32_t channels = 32; channels < 256; channels += 48) {
13017 DWConvMicrokernelTester()
13018 .cr(16)
13019 .kr(25)
13020 .channels(channels)
13021 .input_offset(304)
13022 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13023 }
13024 }
13025
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,zero)13026 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, zero) {
13027 TEST_REQUIRES_X86_SSE41;
13028 for (uint32_t mz = 0; mz < 25; mz++) {
13029 for (uint32_t channels = 32; channels < 256; channels += 48) {
13030 DWConvMicrokernelTester()
13031 .cr(16)
13032 .kr(25)
13033 .channels(channels)
13034 .input_offset(304)
13035 .zero_index(mz)
13036 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13037 }
13038 }
13039 }
13040 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13041
13042
13043 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_eq_24)13044 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_eq_24) {
13045 TEST_REQUIRES_X86_SSE2;
13046 DWConvMicrokernelTester()
13047 .cr(24)
13048 .kr(9)
13049 .channels(24)
13050 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13051 }
13052
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_div_24)13053 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_div_24) {
13054 TEST_REQUIRES_X86_SSE2;
13055 for (uint32_t channels = 48; channels < 384; channels += 72) {
13056 DWConvMicrokernelTester()
13057 .cr(24)
13058 .kr(9)
13059 .channels(channels)
13060 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13061 }
13062 }
13063
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_div_24_with_qmin)13064 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_div_24_with_qmin) {
13065 TEST_REQUIRES_X86_SSE2;
13066 for (uint32_t channels = 48; channels < 384; channels += 72) {
13067 DWConvMicrokernelTester()
13068 .cr(24)
13069 .kr(9)
13070 .channels(channels)
13071 .qmin(128)
13072 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13073 }
13074 }
13075
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_div_24_with_qmax)13076 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_div_24_with_qmax) {
13077 TEST_REQUIRES_X86_SSE2;
13078 for (uint32_t channels = 48; channels < 384; channels += 72) {
13079 DWConvMicrokernelTester()
13080 .cr(24)
13081 .kr(9)
13082 .channels(channels)
13083 .qmax(128)
13084 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13085 }
13086 }
13087
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_lt_24)13088 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_lt_24) {
13089 TEST_REQUIRES_X86_SSE2;
13090 for (uint32_t channels = 1; channels < 24; channels++) {
13091 DWConvMicrokernelTester()
13092 .cr(24)
13093 .kr(9)
13094 .channels(channels)
13095 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13096 }
13097 }
13098
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_gt_24)13099 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_gt_24) {
13100 TEST_REQUIRES_X86_SSE2;
13101 for (uint32_t channels = 25; channels < 48; channels++) {
13102 DWConvMicrokernelTester()
13103 .cr(24)
13104 .kr(9)
13105 .channels(channels)
13106 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13107 }
13108 }
13109
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_gt_24_with_qmin)13110 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_gt_24_with_qmin) {
13111 TEST_REQUIRES_X86_SSE2;
13112 for (uint32_t channels = 25; channels < 48; channels++) {
13113 DWConvMicrokernelTester()
13114 .cr(24)
13115 .kr(9)
13116 .channels(channels)
13117 .qmin(128)
13118 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13119 }
13120 }
13121
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,c_gt_24_with_qmax)13122 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, c_gt_24_with_qmax) {
13123 TEST_REQUIRES_X86_SSE2;
13124 for (uint32_t channels = 25; channels < 48; channels++) {
13125 DWConvMicrokernelTester()
13126 .cr(24)
13127 .kr(9)
13128 .channels(channels)
13129 .qmax(128)
13130 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13131 }
13132 }
13133
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,multipixel)13134 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel) {
13135 TEST_REQUIRES_X86_SSE2;
13136 for (size_t channels = 1; channels <= 120; channels += 23) {
13137 DWConvMicrokernelTester()
13138 .cr(24)
13139 .kr(9)
13140 .channels(channels)
13141 .width(3)
13142 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13143 }
13144 }
13145
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,multipixel_with_step)13146 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel_with_step) {
13147 TEST_REQUIRES_X86_SSE2;
13148 for (size_t channels = 1; channels <= 120; channels += 23) {
13149 for (size_t step = 2; step <= 9; step++) {
13150 DWConvMicrokernelTester()
13151 .cr(24)
13152 .kr(9)
13153 .channels(channels)
13154 .width(3)
13155 .step(step)
13156 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13157 }
13158 }
13159 }
13160
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,multipixel_with_output_stride)13161 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel_with_output_stride) {
13162 TEST_REQUIRES_X86_SSE2;
13163 for (size_t channels = 1; channels <= 120; channels += 23) {
13164 DWConvMicrokernelTester()
13165 .cr(24)
13166 .kr(9)
13167 .channels(24)
13168 .width(5)
13169 .output_stride(127)
13170 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13171 }
13172 }
13173
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,multipixel_with_qmin)13174 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel_with_qmin) {
13175 TEST_REQUIRES_X86_SSE2;
13176 for (size_t channels = 1; channels <= 120; channels += 23) {
13177 DWConvMicrokernelTester()
13178 .cr(24)
13179 .kr(9)
13180 .channels(channels)
13181 .width(3)
13182 .qmin(128)
13183 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13184 }
13185 }
13186
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,multipixel_with_qmax)13187 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, multipixel_with_qmax) {
13188 TEST_REQUIRES_X86_SSE2;
13189 for (size_t channels = 1; channels <= 120; channels += 23) {
13190 DWConvMicrokernelTester()
13191 .cr(24)
13192 .kr(9)
13193 .channels(channels)
13194 .width(3)
13195 .qmax(128)
13196 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13197 }
13198 }
13199
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,input_offset)13200 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, input_offset) {
13201 TEST_REQUIRES_X86_SSE2;
13202 for (uint32_t channels = 48; channels < 384; channels += 72) {
13203 DWConvMicrokernelTester()
13204 .cr(24)
13205 .kr(9)
13206 .channels(channels)
13207 .input_offset(464)
13208 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13209 }
13210 }
13211
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16,zero)13212 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE2_MUL16, zero) {
13213 TEST_REQUIRES_X86_SSE2;
13214 for (uint32_t mz = 0; mz < 9; mz++) {
13215 for (uint32_t channels = 48; channels < 384; channels += 72) {
13216 DWConvMicrokernelTester()
13217 .cr(24)
13218 .kr(9)
13219 .channels(channels)
13220 .input_offset(464)
13221 .zero_index(mz)
13222 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13223 }
13224 }
13225 }
13226 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13227
13228
13229 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_eq_24)13230 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_eq_24) {
13231 TEST_REQUIRES_X86_SSE41;
13232 DWConvMicrokernelTester()
13233 .cr(24)
13234 .kr(9)
13235 .channels(24)
13236 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13237 }
13238
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_div_24)13239 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_div_24) {
13240 TEST_REQUIRES_X86_SSE41;
13241 for (uint32_t channels = 48; channels < 384; channels += 72) {
13242 DWConvMicrokernelTester()
13243 .cr(24)
13244 .kr(9)
13245 .channels(channels)
13246 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13247 }
13248 }
13249
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_div_24_with_qmin)13250 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_div_24_with_qmin) {
13251 TEST_REQUIRES_X86_SSE41;
13252 for (uint32_t channels = 48; channels < 384; channels += 72) {
13253 DWConvMicrokernelTester()
13254 .cr(24)
13255 .kr(9)
13256 .channels(channels)
13257 .qmin(128)
13258 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13259 }
13260 }
13261
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_div_24_with_qmax)13262 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_div_24_with_qmax) {
13263 TEST_REQUIRES_X86_SSE41;
13264 for (uint32_t channels = 48; channels < 384; channels += 72) {
13265 DWConvMicrokernelTester()
13266 .cr(24)
13267 .kr(9)
13268 .channels(channels)
13269 .qmax(128)
13270 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13271 }
13272 }
13273
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_lt_24)13274 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_lt_24) {
13275 TEST_REQUIRES_X86_SSE41;
13276 for (uint32_t channels = 1; channels < 24; channels++) {
13277 DWConvMicrokernelTester()
13278 .cr(24)
13279 .kr(9)
13280 .channels(channels)
13281 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13282 }
13283 }
13284
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_gt_24)13285 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_gt_24) {
13286 TEST_REQUIRES_X86_SSE41;
13287 for (uint32_t channels = 25; channels < 48; channels++) {
13288 DWConvMicrokernelTester()
13289 .cr(24)
13290 .kr(9)
13291 .channels(channels)
13292 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13293 }
13294 }
13295
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_gt_24_with_qmin)13296 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_gt_24_with_qmin) {
13297 TEST_REQUIRES_X86_SSE41;
13298 for (uint32_t channels = 25; channels < 48; channels++) {
13299 DWConvMicrokernelTester()
13300 .cr(24)
13301 .kr(9)
13302 .channels(channels)
13303 .qmin(128)
13304 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13305 }
13306 }
13307
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,c_gt_24_with_qmax)13308 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, c_gt_24_with_qmax) {
13309 TEST_REQUIRES_X86_SSE41;
13310 for (uint32_t channels = 25; channels < 48; channels++) {
13311 DWConvMicrokernelTester()
13312 .cr(24)
13313 .kr(9)
13314 .channels(channels)
13315 .qmax(128)
13316 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13317 }
13318 }
13319
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,multipixel)13320 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel) {
13321 TEST_REQUIRES_X86_SSE41;
13322 for (size_t channels = 1; channels <= 120; channels += 23) {
13323 DWConvMicrokernelTester()
13324 .cr(24)
13325 .kr(9)
13326 .channels(channels)
13327 .width(3)
13328 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13329 }
13330 }
13331
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,multipixel_with_step)13332 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel_with_step) {
13333 TEST_REQUIRES_X86_SSE41;
13334 for (size_t channels = 1; channels <= 120; channels += 23) {
13335 for (size_t step = 2; step <= 9; step++) {
13336 DWConvMicrokernelTester()
13337 .cr(24)
13338 .kr(9)
13339 .channels(channels)
13340 .width(3)
13341 .step(step)
13342 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13343 }
13344 }
13345 }
13346
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,multipixel_with_output_stride)13347 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel_with_output_stride) {
13348 TEST_REQUIRES_X86_SSE41;
13349 for (size_t channels = 1; channels <= 120; channels += 23) {
13350 DWConvMicrokernelTester()
13351 .cr(24)
13352 .kr(9)
13353 .channels(24)
13354 .width(5)
13355 .output_stride(127)
13356 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13357 }
13358 }
13359
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,multipixel_with_qmin)13360 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel_with_qmin) {
13361 TEST_REQUIRES_X86_SSE41;
13362 for (size_t channels = 1; channels <= 120; channels += 23) {
13363 DWConvMicrokernelTester()
13364 .cr(24)
13365 .kr(9)
13366 .channels(channels)
13367 .width(3)
13368 .qmin(128)
13369 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13370 }
13371 }
13372
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,multipixel_with_qmax)13373 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, multipixel_with_qmax) {
13374 TEST_REQUIRES_X86_SSE41;
13375 for (size_t channels = 1; channels <= 120; channels += 23) {
13376 DWConvMicrokernelTester()
13377 .cr(24)
13378 .kr(9)
13379 .channels(channels)
13380 .width(3)
13381 .qmax(128)
13382 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13383 }
13384 }
13385
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,input_offset)13386 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, input_offset) {
13387 TEST_REQUIRES_X86_SSE41;
13388 for (uint32_t channels = 48; channels < 384; channels += 72) {
13389 DWConvMicrokernelTester()
13390 .cr(24)
13391 .kr(9)
13392 .channels(channels)
13393 .input_offset(464)
13394 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13395 }
13396 }
13397
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16,zero)13398 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL16, zero) {
13399 TEST_REQUIRES_X86_SSE41;
13400 for (uint32_t mz = 0; mz < 9; mz++) {
13401 for (uint32_t channels = 48; channels < 384; channels += 72) {
13402 DWConvMicrokernelTester()
13403 .cr(24)
13404 .kr(9)
13405 .channels(channels)
13406 .input_offset(464)
13407 .zero_index(mz)
13408 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13409 }
13410 }
13411 }
13412 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13413
13414
13415 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_eq_24)13416 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_eq_24) {
13417 TEST_REQUIRES_X86_SSE41;
13418 DWConvMicrokernelTester()
13419 .cr(24)
13420 .kr(9)
13421 .channels(24)
13422 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13423 }
13424
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_div_24)13425 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_div_24) {
13426 TEST_REQUIRES_X86_SSE41;
13427 for (uint32_t channels = 48; channels < 384; channels += 72) {
13428 DWConvMicrokernelTester()
13429 .cr(24)
13430 .kr(9)
13431 .channels(channels)
13432 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13433 }
13434 }
13435
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_div_24_with_qmin)13436 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_div_24_with_qmin) {
13437 TEST_REQUIRES_X86_SSE41;
13438 for (uint32_t channels = 48; channels < 384; channels += 72) {
13439 DWConvMicrokernelTester()
13440 .cr(24)
13441 .kr(9)
13442 .channels(channels)
13443 .qmin(128)
13444 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13445 }
13446 }
13447
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_div_24_with_qmax)13448 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_div_24_with_qmax) {
13449 TEST_REQUIRES_X86_SSE41;
13450 for (uint32_t channels = 48; channels < 384; channels += 72) {
13451 DWConvMicrokernelTester()
13452 .cr(24)
13453 .kr(9)
13454 .channels(channels)
13455 .qmax(128)
13456 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13457 }
13458 }
13459
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_lt_24)13460 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_lt_24) {
13461 TEST_REQUIRES_X86_SSE41;
13462 for (uint32_t channels = 1; channels < 24; channels++) {
13463 DWConvMicrokernelTester()
13464 .cr(24)
13465 .kr(9)
13466 .channels(channels)
13467 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13468 }
13469 }
13470
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_gt_24)13471 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_gt_24) {
13472 TEST_REQUIRES_X86_SSE41;
13473 for (uint32_t channels = 25; channels < 48; channels++) {
13474 DWConvMicrokernelTester()
13475 .cr(24)
13476 .kr(9)
13477 .channels(channels)
13478 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13479 }
13480 }
13481
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_gt_24_with_qmin)13482 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_gt_24_with_qmin) {
13483 TEST_REQUIRES_X86_SSE41;
13484 for (uint32_t channels = 25; channels < 48; channels++) {
13485 DWConvMicrokernelTester()
13486 .cr(24)
13487 .kr(9)
13488 .channels(channels)
13489 .qmin(128)
13490 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13491 }
13492 }
13493
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,c_gt_24_with_qmax)13494 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, c_gt_24_with_qmax) {
13495 TEST_REQUIRES_X86_SSE41;
13496 for (uint32_t channels = 25; channels < 48; channels++) {
13497 DWConvMicrokernelTester()
13498 .cr(24)
13499 .kr(9)
13500 .channels(channels)
13501 .qmax(128)
13502 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13503 }
13504 }
13505
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,multipixel)13506 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel) {
13507 TEST_REQUIRES_X86_SSE41;
13508 for (size_t channels = 1; channels <= 120; channels += 23) {
13509 DWConvMicrokernelTester()
13510 .cr(24)
13511 .kr(9)
13512 .channels(channels)
13513 .width(3)
13514 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13515 }
13516 }
13517
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,multipixel_with_step)13518 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel_with_step) {
13519 TEST_REQUIRES_X86_SSE41;
13520 for (size_t channels = 1; channels <= 120; channels += 23) {
13521 for (size_t step = 2; step <= 9; step++) {
13522 DWConvMicrokernelTester()
13523 .cr(24)
13524 .kr(9)
13525 .channels(channels)
13526 .width(3)
13527 .step(step)
13528 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13529 }
13530 }
13531 }
13532
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,multipixel_with_output_stride)13533 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel_with_output_stride) {
13534 TEST_REQUIRES_X86_SSE41;
13535 for (size_t channels = 1; channels <= 120; channels += 23) {
13536 DWConvMicrokernelTester()
13537 .cr(24)
13538 .kr(9)
13539 .channels(24)
13540 .width(5)
13541 .output_stride(127)
13542 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13543 }
13544 }
13545
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,multipixel_with_qmin)13546 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel_with_qmin) {
13547 TEST_REQUIRES_X86_SSE41;
13548 for (size_t channels = 1; channels <= 120; channels += 23) {
13549 DWConvMicrokernelTester()
13550 .cr(24)
13551 .kr(9)
13552 .channels(channels)
13553 .width(3)
13554 .qmin(128)
13555 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13556 }
13557 }
13558
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,multipixel_with_qmax)13559 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, multipixel_with_qmax) {
13560 TEST_REQUIRES_X86_SSE41;
13561 for (size_t channels = 1; channels <= 120; channels += 23) {
13562 DWConvMicrokernelTester()
13563 .cr(24)
13564 .kr(9)
13565 .channels(channels)
13566 .width(3)
13567 .qmax(128)
13568 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13569 }
13570 }
13571
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,input_offset)13572 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, input_offset) {
13573 TEST_REQUIRES_X86_SSE41;
13574 for (uint32_t channels = 48; channels < 384; channels += 72) {
13575 DWConvMicrokernelTester()
13576 .cr(24)
13577 .kr(9)
13578 .channels(channels)
13579 .input_offset(464)
13580 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13581 }
13582 }
13583
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32,zero)13584 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__SSE41_MUL32, zero) {
13585 TEST_REQUIRES_X86_SSE41;
13586 for (uint32_t mz = 0; mz < 9; mz++) {
13587 for (uint32_t channels = 48; channels < 384; channels += 72) {
13588 DWConvMicrokernelTester()
13589 .cr(24)
13590 .kr(9)
13591 .channels(channels)
13592 .input_offset(464)
13593 .zero_index(mz)
13594 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13595 }
13596 }
13597 }
13598 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13599
13600
13601 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_eq_24)13602 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_eq_24) {
13603 TEST_REQUIRES_X86_SSE2;
13604 DWConvMicrokernelTester()
13605 .cr(24)
13606 .kr(25)
13607 .channels(24)
13608 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13609 }
13610
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_div_24)13611 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_div_24) {
13612 TEST_REQUIRES_X86_SSE2;
13613 for (uint32_t channels = 48; channels < 384; channels += 72) {
13614 DWConvMicrokernelTester()
13615 .cr(24)
13616 .kr(25)
13617 .channels(channels)
13618 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13619 }
13620 }
13621
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_div_24_with_qmin)13622 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_div_24_with_qmin) {
13623 TEST_REQUIRES_X86_SSE2;
13624 for (uint32_t channels = 48; channels < 384; channels += 72) {
13625 DWConvMicrokernelTester()
13626 .cr(24)
13627 .kr(25)
13628 .channels(channels)
13629 .qmin(128)
13630 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13631 }
13632 }
13633
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_div_24_with_qmax)13634 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_div_24_with_qmax) {
13635 TEST_REQUIRES_X86_SSE2;
13636 for (uint32_t channels = 48; channels < 384; channels += 72) {
13637 DWConvMicrokernelTester()
13638 .cr(24)
13639 .kr(25)
13640 .channels(channels)
13641 .qmax(128)
13642 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13643 }
13644 }
13645
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_lt_24)13646 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_lt_24) {
13647 TEST_REQUIRES_X86_SSE2;
13648 for (uint32_t channels = 1; channels < 24; channels++) {
13649 DWConvMicrokernelTester()
13650 .cr(24)
13651 .kr(25)
13652 .channels(channels)
13653 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13654 }
13655 }
13656
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_gt_24)13657 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_gt_24) {
13658 TEST_REQUIRES_X86_SSE2;
13659 for (uint32_t channels = 25; channels < 48; channels++) {
13660 DWConvMicrokernelTester()
13661 .cr(24)
13662 .kr(25)
13663 .channels(channels)
13664 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13665 }
13666 }
13667
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_gt_24_with_qmin)13668 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_gt_24_with_qmin) {
13669 TEST_REQUIRES_X86_SSE2;
13670 for (uint32_t channels = 25; channels < 48; channels++) {
13671 DWConvMicrokernelTester()
13672 .cr(24)
13673 .kr(25)
13674 .channels(channels)
13675 .qmin(128)
13676 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13677 }
13678 }
13679
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,c_gt_24_with_qmax)13680 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, c_gt_24_with_qmax) {
13681 TEST_REQUIRES_X86_SSE2;
13682 for (uint32_t channels = 25; channels < 48; channels++) {
13683 DWConvMicrokernelTester()
13684 .cr(24)
13685 .kr(25)
13686 .channels(channels)
13687 .qmax(128)
13688 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13689 }
13690 }
13691
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,multipixel)13692 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel) {
13693 TEST_REQUIRES_X86_SSE2;
13694 for (size_t channels = 1; channels <= 120; channels += 23) {
13695 DWConvMicrokernelTester()
13696 .cr(24)
13697 .kr(25)
13698 .channels(channels)
13699 .width(3)
13700 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13701 }
13702 }
13703
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,multipixel_with_step)13704 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel_with_step) {
13705 TEST_REQUIRES_X86_SSE2;
13706 for (size_t channels = 1; channels <= 120; channels += 23) {
13707 for (size_t step = 2; step <= 25; step++) {
13708 DWConvMicrokernelTester()
13709 .cr(24)
13710 .kr(25)
13711 .channels(channels)
13712 .width(3)
13713 .step(step)
13714 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13715 }
13716 }
13717 }
13718
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,multipixel_with_output_stride)13719 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel_with_output_stride) {
13720 TEST_REQUIRES_X86_SSE2;
13721 for (size_t channels = 1; channels <= 120; channels += 23) {
13722 DWConvMicrokernelTester()
13723 .cr(24)
13724 .kr(25)
13725 .channels(24)
13726 .width(5)
13727 .output_stride(127)
13728 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13729 }
13730 }
13731
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,multipixel_with_qmin)13732 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel_with_qmin) {
13733 TEST_REQUIRES_X86_SSE2;
13734 for (size_t channels = 1; channels <= 120; channels += 23) {
13735 DWConvMicrokernelTester()
13736 .cr(24)
13737 .kr(25)
13738 .channels(channels)
13739 .width(3)
13740 .qmin(128)
13741 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13742 }
13743 }
13744
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,multipixel_with_qmax)13745 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, multipixel_with_qmax) {
13746 TEST_REQUIRES_X86_SSE2;
13747 for (size_t channels = 1; channels <= 120; channels += 23) {
13748 DWConvMicrokernelTester()
13749 .cr(24)
13750 .kr(25)
13751 .channels(channels)
13752 .width(3)
13753 .qmax(128)
13754 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13755 }
13756 }
13757
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,input_offset)13758 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, input_offset) {
13759 TEST_REQUIRES_X86_SSE2;
13760 for (uint32_t channels = 48; channels < 384; channels += 72) {
13761 DWConvMicrokernelTester()
13762 .cr(24)
13763 .kr(25)
13764 .channels(channels)
13765 .input_offset(464)
13766 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13767 }
13768 }
13769
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16,zero)13770 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE2_MUL16, zero) {
13771 TEST_REQUIRES_X86_SSE2;
13772 for (uint32_t mz = 0; mz < 25; mz++) {
13773 for (uint32_t channels = 48; channels < 384; channels += 72) {
13774 DWConvMicrokernelTester()
13775 .cr(24)
13776 .kr(25)
13777 .channels(channels)
13778 .input_offset(464)
13779 .zero_index(mz)
13780 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse2_mul16, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
13781 }
13782 }
13783 }
13784 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13785
13786
13787 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_eq_24)13788 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_eq_24) {
13789 TEST_REQUIRES_X86_SSE41;
13790 DWConvMicrokernelTester()
13791 .cr(24)
13792 .kr(25)
13793 .channels(24)
13794 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13795 }
13796
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_div_24)13797 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_div_24) {
13798 TEST_REQUIRES_X86_SSE41;
13799 for (uint32_t channels = 48; channels < 384; channels += 72) {
13800 DWConvMicrokernelTester()
13801 .cr(24)
13802 .kr(25)
13803 .channels(channels)
13804 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13805 }
13806 }
13807
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_div_24_with_qmin)13808 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_div_24_with_qmin) {
13809 TEST_REQUIRES_X86_SSE41;
13810 for (uint32_t channels = 48; channels < 384; channels += 72) {
13811 DWConvMicrokernelTester()
13812 .cr(24)
13813 .kr(25)
13814 .channels(channels)
13815 .qmin(128)
13816 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13817 }
13818 }
13819
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_div_24_with_qmax)13820 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_div_24_with_qmax) {
13821 TEST_REQUIRES_X86_SSE41;
13822 for (uint32_t channels = 48; channels < 384; channels += 72) {
13823 DWConvMicrokernelTester()
13824 .cr(24)
13825 .kr(25)
13826 .channels(channels)
13827 .qmax(128)
13828 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13829 }
13830 }
13831
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_lt_24)13832 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_lt_24) {
13833 TEST_REQUIRES_X86_SSE41;
13834 for (uint32_t channels = 1; channels < 24; channels++) {
13835 DWConvMicrokernelTester()
13836 .cr(24)
13837 .kr(25)
13838 .channels(channels)
13839 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13840 }
13841 }
13842
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_gt_24)13843 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_gt_24) {
13844 TEST_REQUIRES_X86_SSE41;
13845 for (uint32_t channels = 25; channels < 48; channels++) {
13846 DWConvMicrokernelTester()
13847 .cr(24)
13848 .kr(25)
13849 .channels(channels)
13850 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13851 }
13852 }
13853
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_gt_24_with_qmin)13854 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_gt_24_with_qmin) {
13855 TEST_REQUIRES_X86_SSE41;
13856 for (uint32_t channels = 25; channels < 48; channels++) {
13857 DWConvMicrokernelTester()
13858 .cr(24)
13859 .kr(25)
13860 .channels(channels)
13861 .qmin(128)
13862 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13863 }
13864 }
13865
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,c_gt_24_with_qmax)13866 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, c_gt_24_with_qmax) {
13867 TEST_REQUIRES_X86_SSE41;
13868 for (uint32_t channels = 25; channels < 48; channels++) {
13869 DWConvMicrokernelTester()
13870 .cr(24)
13871 .kr(25)
13872 .channels(channels)
13873 .qmax(128)
13874 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13875 }
13876 }
13877
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,multipixel)13878 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel) {
13879 TEST_REQUIRES_X86_SSE41;
13880 for (size_t channels = 1; channels <= 120; channels += 23) {
13881 DWConvMicrokernelTester()
13882 .cr(24)
13883 .kr(25)
13884 .channels(channels)
13885 .width(3)
13886 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13887 }
13888 }
13889
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,multipixel_with_step)13890 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel_with_step) {
13891 TEST_REQUIRES_X86_SSE41;
13892 for (size_t channels = 1; channels <= 120; channels += 23) {
13893 for (size_t step = 2; step <= 25; step++) {
13894 DWConvMicrokernelTester()
13895 .cr(24)
13896 .kr(25)
13897 .channels(channels)
13898 .width(3)
13899 .step(step)
13900 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13901 }
13902 }
13903 }
13904
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,multipixel_with_output_stride)13905 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel_with_output_stride) {
13906 TEST_REQUIRES_X86_SSE41;
13907 for (size_t channels = 1; channels <= 120; channels += 23) {
13908 DWConvMicrokernelTester()
13909 .cr(24)
13910 .kr(25)
13911 .channels(24)
13912 .width(5)
13913 .output_stride(127)
13914 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13915 }
13916 }
13917
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,multipixel_with_qmin)13918 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel_with_qmin) {
13919 TEST_REQUIRES_X86_SSE41;
13920 for (size_t channels = 1; channels <= 120; channels += 23) {
13921 DWConvMicrokernelTester()
13922 .cr(24)
13923 .kr(25)
13924 .channels(channels)
13925 .width(3)
13926 .qmin(128)
13927 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13928 }
13929 }
13930
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,multipixel_with_qmax)13931 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, multipixel_with_qmax) {
13932 TEST_REQUIRES_X86_SSE41;
13933 for (size_t channels = 1; channels <= 120; channels += 23) {
13934 DWConvMicrokernelTester()
13935 .cr(24)
13936 .kr(25)
13937 .channels(channels)
13938 .width(3)
13939 .qmax(128)
13940 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13941 }
13942 }
13943
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,input_offset)13944 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, input_offset) {
13945 TEST_REQUIRES_X86_SSE41;
13946 for (uint32_t channels = 48; channels < 384; channels += 72) {
13947 DWConvMicrokernelTester()
13948 .cr(24)
13949 .kr(25)
13950 .channels(channels)
13951 .input_offset(464)
13952 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13953 }
13954 }
13955
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16,zero)13956 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL16, zero) {
13957 TEST_REQUIRES_X86_SSE41;
13958 for (uint32_t mz = 0; mz < 25; mz++) {
13959 for (uint32_t channels = 48; channels < 384; channels += 72) {
13960 DWConvMicrokernelTester()
13961 .cr(24)
13962 .kr(25)
13963 .channels(channels)
13964 .input_offset(464)
13965 .zero_index(mz)
13966 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13967 }
13968 }
13969 }
13970 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13971
13972
13973 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_eq_24)13974 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_eq_24) {
13975 TEST_REQUIRES_X86_SSE41;
13976 DWConvMicrokernelTester()
13977 .cr(24)
13978 .kr(25)
13979 .channels(24)
13980 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13981 }
13982
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_div_24)13983 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_div_24) {
13984 TEST_REQUIRES_X86_SSE41;
13985 for (uint32_t channels = 48; channels < 384; channels += 72) {
13986 DWConvMicrokernelTester()
13987 .cr(24)
13988 .kr(25)
13989 .channels(channels)
13990 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
13991 }
13992 }
13993
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_div_24_with_qmin)13994 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_div_24_with_qmin) {
13995 TEST_REQUIRES_X86_SSE41;
13996 for (uint32_t channels = 48; channels < 384; channels += 72) {
13997 DWConvMicrokernelTester()
13998 .cr(24)
13999 .kr(25)
14000 .channels(channels)
14001 .qmin(128)
14002 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14003 }
14004 }
14005
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_div_24_with_qmax)14006 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_div_24_with_qmax) {
14007 TEST_REQUIRES_X86_SSE41;
14008 for (uint32_t channels = 48; channels < 384; channels += 72) {
14009 DWConvMicrokernelTester()
14010 .cr(24)
14011 .kr(25)
14012 .channels(channels)
14013 .qmax(128)
14014 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14015 }
14016 }
14017
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_lt_24)14018 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_lt_24) {
14019 TEST_REQUIRES_X86_SSE41;
14020 for (uint32_t channels = 1; channels < 24; channels++) {
14021 DWConvMicrokernelTester()
14022 .cr(24)
14023 .kr(25)
14024 .channels(channels)
14025 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14026 }
14027 }
14028
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_gt_24)14029 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_gt_24) {
14030 TEST_REQUIRES_X86_SSE41;
14031 for (uint32_t channels = 25; channels < 48; channels++) {
14032 DWConvMicrokernelTester()
14033 .cr(24)
14034 .kr(25)
14035 .channels(channels)
14036 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14037 }
14038 }
14039
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_gt_24_with_qmin)14040 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_gt_24_with_qmin) {
14041 TEST_REQUIRES_X86_SSE41;
14042 for (uint32_t channels = 25; channels < 48; channels++) {
14043 DWConvMicrokernelTester()
14044 .cr(24)
14045 .kr(25)
14046 .channels(channels)
14047 .qmin(128)
14048 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14049 }
14050 }
14051
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,c_gt_24_with_qmax)14052 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, c_gt_24_with_qmax) {
14053 TEST_REQUIRES_X86_SSE41;
14054 for (uint32_t channels = 25; channels < 48; channels++) {
14055 DWConvMicrokernelTester()
14056 .cr(24)
14057 .kr(25)
14058 .channels(channels)
14059 .qmax(128)
14060 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14061 }
14062 }
14063
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,multipixel)14064 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel) {
14065 TEST_REQUIRES_X86_SSE41;
14066 for (size_t channels = 1; channels <= 120; channels += 23) {
14067 DWConvMicrokernelTester()
14068 .cr(24)
14069 .kr(25)
14070 .channels(channels)
14071 .width(3)
14072 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14073 }
14074 }
14075
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,multipixel_with_step)14076 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel_with_step) {
14077 TEST_REQUIRES_X86_SSE41;
14078 for (size_t channels = 1; channels <= 120; channels += 23) {
14079 for (size_t step = 2; step <= 25; step++) {
14080 DWConvMicrokernelTester()
14081 .cr(24)
14082 .kr(25)
14083 .channels(channels)
14084 .width(3)
14085 .step(step)
14086 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14087 }
14088 }
14089 }
14090
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,multipixel_with_output_stride)14091 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel_with_output_stride) {
14092 TEST_REQUIRES_X86_SSE41;
14093 for (size_t channels = 1; channels <= 120; channels += 23) {
14094 DWConvMicrokernelTester()
14095 .cr(24)
14096 .kr(25)
14097 .channels(24)
14098 .width(5)
14099 .output_stride(127)
14100 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14101 }
14102 }
14103
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,multipixel_with_qmin)14104 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel_with_qmin) {
14105 TEST_REQUIRES_X86_SSE41;
14106 for (size_t channels = 1; channels <= 120; channels += 23) {
14107 DWConvMicrokernelTester()
14108 .cr(24)
14109 .kr(25)
14110 .channels(channels)
14111 .width(3)
14112 .qmin(128)
14113 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14114 }
14115 }
14116
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,multipixel_with_qmax)14117 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, multipixel_with_qmax) {
14118 TEST_REQUIRES_X86_SSE41;
14119 for (size_t channels = 1; channels <= 120; channels += 23) {
14120 DWConvMicrokernelTester()
14121 .cr(24)
14122 .kr(25)
14123 .channels(channels)
14124 .width(3)
14125 .qmax(128)
14126 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14127 }
14128 }
14129
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,input_offset)14130 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, input_offset) {
14131 TEST_REQUIRES_X86_SSE41;
14132 for (uint32_t channels = 48; channels < 384; channels += 72) {
14133 DWConvMicrokernelTester()
14134 .cr(24)
14135 .kr(25)
14136 .channels(channels)
14137 .input_offset(464)
14138 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14139 }
14140 }
14141
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32,zero)14142 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__SSE41_MUL32, zero) {
14143 TEST_REQUIRES_X86_SSE41;
14144 for (uint32_t mz = 0; mz < 25; mz++) {
14145 for (uint32_t channels = 48; channels < 384; channels += 72) {
14146 DWConvMicrokernelTester()
14147 .cr(24)
14148 .kr(25)
14149 .channels(channels)
14150 .input_offset(464)
14151 .zero_index(mz)
14152 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__sse41_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14153 }
14154 }
14155 }
14156 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14157
14158
14159 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_eq_8)14160 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_eq_8) {
14161 TEST_REQUIRES_X86_AVX;
14162 DWConvMicrokernelTester()
14163 .cr(8)
14164 .kr(9)
14165 .channels(8)
14166 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14167 }
14168
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_div_8)14169 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_div_8) {
14170 TEST_REQUIRES_X86_AVX;
14171 for (uint32_t channels = 16; channels < 128; channels += 24) {
14172 DWConvMicrokernelTester()
14173 .cr(8)
14174 .kr(9)
14175 .channels(channels)
14176 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14177 }
14178 }
14179
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_div_8_with_qmin)14180 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_div_8_with_qmin) {
14181 TEST_REQUIRES_X86_AVX;
14182 for (uint32_t channels = 16; channels < 128; channels += 24) {
14183 DWConvMicrokernelTester()
14184 .cr(8)
14185 .kr(9)
14186 .channels(channels)
14187 .qmin(128)
14188 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14189 }
14190 }
14191
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_div_8_with_qmax)14192 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_div_8_with_qmax) {
14193 TEST_REQUIRES_X86_AVX;
14194 for (uint32_t channels = 16; channels < 128; channels += 24) {
14195 DWConvMicrokernelTester()
14196 .cr(8)
14197 .kr(9)
14198 .channels(channels)
14199 .qmax(128)
14200 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14201 }
14202 }
14203
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_lt_8)14204 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_lt_8) {
14205 TEST_REQUIRES_X86_AVX;
14206 for (uint32_t channels = 1; channels < 8; channels++) {
14207 DWConvMicrokernelTester()
14208 .cr(8)
14209 .kr(9)
14210 .channels(channels)
14211 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14212 }
14213 }
14214
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_gt_8)14215 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_gt_8) {
14216 TEST_REQUIRES_X86_AVX;
14217 for (uint32_t channels = 9; channels < 16; channels++) {
14218 DWConvMicrokernelTester()
14219 .cr(8)
14220 .kr(9)
14221 .channels(channels)
14222 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14223 }
14224 }
14225
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_gt_8_with_qmin)14226 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_gt_8_with_qmin) {
14227 TEST_REQUIRES_X86_AVX;
14228 for (uint32_t channels = 9; channels < 16; channels++) {
14229 DWConvMicrokernelTester()
14230 .cr(8)
14231 .kr(9)
14232 .channels(channels)
14233 .qmin(128)
14234 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14235 }
14236 }
14237
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_gt_8_with_qmax)14238 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_gt_8_with_qmax) {
14239 TEST_REQUIRES_X86_AVX;
14240 for (uint32_t channels = 9; channels < 16; channels++) {
14241 DWConvMicrokernelTester()
14242 .cr(8)
14243 .kr(9)
14244 .channels(channels)
14245 .qmax(128)
14246 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14247 }
14248 }
14249
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,multipixel)14250 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel) {
14251 TEST_REQUIRES_X86_AVX;
14252 for (size_t channels = 1; channels <= 40; channels += 7) {
14253 DWConvMicrokernelTester()
14254 .cr(8)
14255 .kr(9)
14256 .channels(channels)
14257 .width(3)
14258 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14259 }
14260 }
14261
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,multipixel_with_step)14262 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_step) {
14263 TEST_REQUIRES_X86_AVX;
14264 for (size_t channels = 1; channels <= 40; channels += 7) {
14265 for (size_t step = 2; step <= 9; step++) {
14266 DWConvMicrokernelTester()
14267 .cr(8)
14268 .kr(9)
14269 .channels(channels)
14270 .width(3)
14271 .step(step)
14272 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14273 }
14274 }
14275 }
14276
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,multipixel_with_output_stride)14277 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_output_stride) {
14278 TEST_REQUIRES_X86_AVX;
14279 for (size_t channels = 1; channels <= 40; channels += 7) {
14280 DWConvMicrokernelTester()
14281 .cr(8)
14282 .kr(9)
14283 .channels(8)
14284 .width(5)
14285 .output_stride(43)
14286 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14287 }
14288 }
14289
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,multipixel_with_qmin)14290 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_qmin) {
14291 TEST_REQUIRES_X86_AVX;
14292 for (size_t channels = 1; channels <= 40; channels += 7) {
14293 DWConvMicrokernelTester()
14294 .cr(8)
14295 .kr(9)
14296 .channels(channels)
14297 .width(3)
14298 .qmin(128)
14299 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14300 }
14301 }
14302
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,multipixel_with_qmax)14303 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_qmax) {
14304 TEST_REQUIRES_X86_AVX;
14305 for (size_t channels = 1; channels <= 40; channels += 7) {
14306 DWConvMicrokernelTester()
14307 .cr(8)
14308 .kr(9)
14309 .channels(channels)
14310 .width(3)
14311 .qmax(128)
14312 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14313 }
14314 }
14315
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,input_offset)14316 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, input_offset) {
14317 TEST_REQUIRES_X86_AVX;
14318 for (uint32_t channels = 16; channels < 128; channels += 24) {
14319 DWConvMicrokernelTester()
14320 .cr(8)
14321 .kr(9)
14322 .channels(channels)
14323 .input_offset(176)
14324 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14325 }
14326 }
14327
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,zero)14328 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, zero) {
14329 TEST_REQUIRES_X86_AVX;
14330 for (uint32_t mz = 0; mz < 9; mz++) {
14331 for (uint32_t channels = 16; channels < 128; channels += 24) {
14332 DWConvMicrokernelTester()
14333 .cr(8)
14334 .kr(9)
14335 .channels(channels)
14336 .input_offset(176)
14337 .zero_index(mz)
14338 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14339 }
14340 }
14341 }
14342 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14343
14344
14345 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_eq_8)14346 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_eq_8) {
14347 TEST_REQUIRES_X86_AVX;
14348 DWConvMicrokernelTester()
14349 .cr(8)
14350 .kr(9)
14351 .channels(8)
14352 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14353 }
14354
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_div_8)14355 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_div_8) {
14356 TEST_REQUIRES_X86_AVX;
14357 for (uint32_t channels = 16; channels < 128; channels += 24) {
14358 DWConvMicrokernelTester()
14359 .cr(8)
14360 .kr(9)
14361 .channels(channels)
14362 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14363 }
14364 }
14365
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_div_8_with_qmin)14366 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_div_8_with_qmin) {
14367 TEST_REQUIRES_X86_AVX;
14368 for (uint32_t channels = 16; channels < 128; channels += 24) {
14369 DWConvMicrokernelTester()
14370 .cr(8)
14371 .kr(9)
14372 .channels(channels)
14373 .qmin(128)
14374 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14375 }
14376 }
14377
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_div_8_with_qmax)14378 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_div_8_with_qmax) {
14379 TEST_REQUIRES_X86_AVX;
14380 for (uint32_t channels = 16; channels < 128; channels += 24) {
14381 DWConvMicrokernelTester()
14382 .cr(8)
14383 .kr(9)
14384 .channels(channels)
14385 .qmax(128)
14386 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14387 }
14388 }
14389
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_lt_8)14390 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_lt_8) {
14391 TEST_REQUIRES_X86_AVX;
14392 for (uint32_t channels = 1; channels < 8; channels++) {
14393 DWConvMicrokernelTester()
14394 .cr(8)
14395 .kr(9)
14396 .channels(channels)
14397 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14398 }
14399 }
14400
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_gt_8)14401 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_gt_8) {
14402 TEST_REQUIRES_X86_AVX;
14403 for (uint32_t channels = 9; channels < 16; channels++) {
14404 DWConvMicrokernelTester()
14405 .cr(8)
14406 .kr(9)
14407 .channels(channels)
14408 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14409 }
14410 }
14411
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_gt_8_with_qmin)14412 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_gt_8_with_qmin) {
14413 TEST_REQUIRES_X86_AVX;
14414 for (uint32_t channels = 9; channels < 16; channels++) {
14415 DWConvMicrokernelTester()
14416 .cr(8)
14417 .kr(9)
14418 .channels(channels)
14419 .qmin(128)
14420 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14421 }
14422 }
14423
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,c_gt_8_with_qmax)14424 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, c_gt_8_with_qmax) {
14425 TEST_REQUIRES_X86_AVX;
14426 for (uint32_t channels = 9; channels < 16; channels++) {
14427 DWConvMicrokernelTester()
14428 .cr(8)
14429 .kr(9)
14430 .channels(channels)
14431 .qmax(128)
14432 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14433 }
14434 }
14435
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,multipixel)14436 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel) {
14437 TEST_REQUIRES_X86_AVX;
14438 for (size_t channels = 1; channels <= 40; channels += 7) {
14439 DWConvMicrokernelTester()
14440 .cr(8)
14441 .kr(9)
14442 .channels(channels)
14443 .width(3)
14444 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14445 }
14446 }
14447
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,multipixel_with_step)14448 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel_with_step) {
14449 TEST_REQUIRES_X86_AVX;
14450 for (size_t channels = 1; channels <= 40; channels += 7) {
14451 for (size_t step = 2; step <= 9; step++) {
14452 DWConvMicrokernelTester()
14453 .cr(8)
14454 .kr(9)
14455 .channels(channels)
14456 .width(3)
14457 .step(step)
14458 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14459 }
14460 }
14461 }
14462
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,multipixel_with_output_stride)14463 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel_with_output_stride) {
14464 TEST_REQUIRES_X86_AVX;
14465 for (size_t channels = 1; channels <= 40; channels += 7) {
14466 DWConvMicrokernelTester()
14467 .cr(8)
14468 .kr(9)
14469 .channels(8)
14470 .width(5)
14471 .output_stride(43)
14472 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14473 }
14474 }
14475
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,multipixel_with_qmin)14476 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel_with_qmin) {
14477 TEST_REQUIRES_X86_AVX;
14478 for (size_t channels = 1; channels <= 40; channels += 7) {
14479 DWConvMicrokernelTester()
14480 .cr(8)
14481 .kr(9)
14482 .channels(channels)
14483 .width(3)
14484 .qmin(128)
14485 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14486 }
14487 }
14488
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,multipixel_with_qmax)14489 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, multipixel_with_qmax) {
14490 TEST_REQUIRES_X86_AVX;
14491 for (size_t channels = 1; channels <= 40; channels += 7) {
14492 DWConvMicrokernelTester()
14493 .cr(8)
14494 .kr(9)
14495 .channels(channels)
14496 .width(3)
14497 .qmax(128)
14498 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14499 }
14500 }
14501
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,input_offset)14502 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, input_offset) {
14503 TEST_REQUIRES_X86_AVX;
14504 for (uint32_t channels = 16; channels < 128; channels += 24) {
14505 DWConvMicrokernelTester()
14506 .cr(8)
14507 .kr(9)
14508 .channels(channels)
14509 .input_offset(176)
14510 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14511 }
14512 }
14513
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16,zero)14514 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16_ADD16, zero) {
14515 TEST_REQUIRES_X86_AVX;
14516 for (uint32_t mz = 0; mz < 9; mz++) {
14517 for (uint32_t channels = 16; channels < 128; channels += 24) {
14518 DWConvMicrokernelTester()
14519 .cr(8)
14520 .kr(9)
14521 .channels(channels)
14522 .input_offset(176)
14523 .zero_index(mz)
14524 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14525 }
14526 }
14527 }
14528 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14529
14530
14531 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_eq_8)14532 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_eq_8) {
14533 TEST_REQUIRES_X86_AVX;
14534 DWConvMicrokernelTester()
14535 .cr(8)
14536 .kr(9)
14537 .channels(8)
14538 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14539 }
14540
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_div_8)14541 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_div_8) {
14542 TEST_REQUIRES_X86_AVX;
14543 for (uint32_t channels = 16; channels < 128; channels += 24) {
14544 DWConvMicrokernelTester()
14545 .cr(8)
14546 .kr(9)
14547 .channels(channels)
14548 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14549 }
14550 }
14551
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_div_8_with_qmin)14552 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_div_8_with_qmin) {
14553 TEST_REQUIRES_X86_AVX;
14554 for (uint32_t channels = 16; channels < 128; channels += 24) {
14555 DWConvMicrokernelTester()
14556 .cr(8)
14557 .kr(9)
14558 .channels(channels)
14559 .qmin(128)
14560 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14561 }
14562 }
14563
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_div_8_with_qmax)14564 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_div_8_with_qmax) {
14565 TEST_REQUIRES_X86_AVX;
14566 for (uint32_t channels = 16; channels < 128; channels += 24) {
14567 DWConvMicrokernelTester()
14568 .cr(8)
14569 .kr(9)
14570 .channels(channels)
14571 .qmax(128)
14572 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14573 }
14574 }
14575
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_lt_8)14576 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_lt_8) {
14577 TEST_REQUIRES_X86_AVX;
14578 for (uint32_t channels = 1; channels < 8; channels++) {
14579 DWConvMicrokernelTester()
14580 .cr(8)
14581 .kr(9)
14582 .channels(channels)
14583 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14584 }
14585 }
14586
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_gt_8)14587 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_gt_8) {
14588 TEST_REQUIRES_X86_AVX;
14589 for (uint32_t channels = 9; channels < 16; channels++) {
14590 DWConvMicrokernelTester()
14591 .cr(8)
14592 .kr(9)
14593 .channels(channels)
14594 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14595 }
14596 }
14597
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_gt_8_with_qmin)14598 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_gt_8_with_qmin) {
14599 TEST_REQUIRES_X86_AVX;
14600 for (uint32_t channels = 9; channels < 16; channels++) {
14601 DWConvMicrokernelTester()
14602 .cr(8)
14603 .kr(9)
14604 .channels(channels)
14605 .qmin(128)
14606 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14607 }
14608 }
14609
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_gt_8_with_qmax)14610 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_gt_8_with_qmax) {
14611 TEST_REQUIRES_X86_AVX;
14612 for (uint32_t channels = 9; channels < 16; channels++) {
14613 DWConvMicrokernelTester()
14614 .cr(8)
14615 .kr(9)
14616 .channels(channels)
14617 .qmax(128)
14618 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14619 }
14620 }
14621
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,multipixel)14622 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel) {
14623 TEST_REQUIRES_X86_AVX;
14624 for (size_t channels = 1; channels <= 40; channels += 7) {
14625 DWConvMicrokernelTester()
14626 .cr(8)
14627 .kr(9)
14628 .channels(channels)
14629 .width(3)
14630 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14631 }
14632 }
14633
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,multipixel_with_step)14634 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_step) {
14635 TEST_REQUIRES_X86_AVX;
14636 for (size_t channels = 1; channels <= 40; channels += 7) {
14637 for (size_t step = 2; step <= 9; step++) {
14638 DWConvMicrokernelTester()
14639 .cr(8)
14640 .kr(9)
14641 .channels(channels)
14642 .width(3)
14643 .step(step)
14644 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14645 }
14646 }
14647 }
14648
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,multipixel_with_output_stride)14649 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_output_stride) {
14650 TEST_REQUIRES_X86_AVX;
14651 for (size_t channels = 1; channels <= 40; channels += 7) {
14652 DWConvMicrokernelTester()
14653 .cr(8)
14654 .kr(9)
14655 .channels(8)
14656 .width(5)
14657 .output_stride(43)
14658 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14659 }
14660 }
14661
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,multipixel_with_qmin)14662 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_qmin) {
14663 TEST_REQUIRES_X86_AVX;
14664 for (size_t channels = 1; channels <= 40; channels += 7) {
14665 DWConvMicrokernelTester()
14666 .cr(8)
14667 .kr(9)
14668 .channels(channels)
14669 .width(3)
14670 .qmin(128)
14671 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14672 }
14673 }
14674
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,multipixel_with_qmax)14675 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_qmax) {
14676 TEST_REQUIRES_X86_AVX;
14677 for (size_t channels = 1; channels <= 40; channels += 7) {
14678 DWConvMicrokernelTester()
14679 .cr(8)
14680 .kr(9)
14681 .channels(channels)
14682 .width(3)
14683 .qmax(128)
14684 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14685 }
14686 }
14687
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,input_offset)14688 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, input_offset) {
14689 TEST_REQUIRES_X86_AVX;
14690 for (uint32_t channels = 16; channels < 128; channels += 24) {
14691 DWConvMicrokernelTester()
14692 .cr(8)
14693 .kr(9)
14694 .channels(channels)
14695 .input_offset(176)
14696 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14697 }
14698 }
14699
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,zero)14700 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, zero) {
14701 TEST_REQUIRES_X86_AVX;
14702 for (uint32_t mz = 0; mz < 9; mz++) {
14703 for (uint32_t channels = 16; channels < 128; channels += 24) {
14704 DWConvMicrokernelTester()
14705 .cr(8)
14706 .kr(9)
14707 .channels(channels)
14708 .input_offset(176)
14709 .zero_index(mz)
14710 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14711 }
14712 }
14713 }
14714 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14715
14716
14717 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_eq_8)14718 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_eq_8) {
14719 TEST_REQUIRES_X86_AVX2;
14720 DWConvMicrokernelTester()
14721 .cr(8)
14722 .kr(9)
14723 .channels(8)
14724 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14725 }
14726
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_div_8)14727 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8) {
14728 TEST_REQUIRES_X86_AVX2;
14729 for (uint32_t channels = 16; channels < 128; channels += 24) {
14730 DWConvMicrokernelTester()
14731 .cr(8)
14732 .kr(9)
14733 .channels(channels)
14734 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14735 }
14736 }
14737
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_div_8_with_qmin)14738 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8_with_qmin) {
14739 TEST_REQUIRES_X86_AVX2;
14740 for (uint32_t channels = 16; channels < 128; channels += 24) {
14741 DWConvMicrokernelTester()
14742 .cr(8)
14743 .kr(9)
14744 .channels(channels)
14745 .qmin(128)
14746 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14747 }
14748 }
14749
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_div_8_with_qmax)14750 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8_with_qmax) {
14751 TEST_REQUIRES_X86_AVX2;
14752 for (uint32_t channels = 16; channels < 128; channels += 24) {
14753 DWConvMicrokernelTester()
14754 .cr(8)
14755 .kr(9)
14756 .channels(channels)
14757 .qmax(128)
14758 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14759 }
14760 }
14761
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_lt_8)14762 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_lt_8) {
14763 TEST_REQUIRES_X86_AVX2;
14764 for (uint32_t channels = 1; channels < 8; channels++) {
14765 DWConvMicrokernelTester()
14766 .cr(8)
14767 .kr(9)
14768 .channels(channels)
14769 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14770 }
14771 }
14772
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_gt_8)14773 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8) {
14774 TEST_REQUIRES_X86_AVX2;
14775 for (uint32_t channels = 9; channels < 16; channels++) {
14776 DWConvMicrokernelTester()
14777 .cr(8)
14778 .kr(9)
14779 .channels(channels)
14780 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14781 }
14782 }
14783
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_gt_8_with_qmin)14784 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8_with_qmin) {
14785 TEST_REQUIRES_X86_AVX2;
14786 for (uint32_t channels = 9; channels < 16; channels++) {
14787 DWConvMicrokernelTester()
14788 .cr(8)
14789 .kr(9)
14790 .channels(channels)
14791 .qmin(128)
14792 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14793 }
14794 }
14795
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_gt_8_with_qmax)14796 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8_with_qmax) {
14797 TEST_REQUIRES_X86_AVX2;
14798 for (uint32_t channels = 9; channels < 16; channels++) {
14799 DWConvMicrokernelTester()
14800 .cr(8)
14801 .kr(9)
14802 .channels(channels)
14803 .qmax(128)
14804 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14805 }
14806 }
14807
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,multipixel)14808 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel) {
14809 TEST_REQUIRES_X86_AVX2;
14810 for (size_t channels = 1; channels <= 40; channels += 7) {
14811 DWConvMicrokernelTester()
14812 .cr(8)
14813 .kr(9)
14814 .channels(channels)
14815 .width(3)
14816 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14817 }
14818 }
14819
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,multipixel_with_step)14820 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_step) {
14821 TEST_REQUIRES_X86_AVX2;
14822 for (size_t channels = 1; channels <= 40; channels += 7) {
14823 for (size_t step = 2; step <= 9; step++) {
14824 DWConvMicrokernelTester()
14825 .cr(8)
14826 .kr(9)
14827 .channels(channels)
14828 .width(3)
14829 .step(step)
14830 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14831 }
14832 }
14833 }
14834
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,multipixel_with_output_stride)14835 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_output_stride) {
14836 TEST_REQUIRES_X86_AVX2;
14837 for (size_t channels = 1; channels <= 40; channels += 7) {
14838 DWConvMicrokernelTester()
14839 .cr(8)
14840 .kr(9)
14841 .channels(8)
14842 .width(5)
14843 .output_stride(43)
14844 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14845 }
14846 }
14847
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,multipixel_with_qmin)14848 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_qmin) {
14849 TEST_REQUIRES_X86_AVX2;
14850 for (size_t channels = 1; channels <= 40; channels += 7) {
14851 DWConvMicrokernelTester()
14852 .cr(8)
14853 .kr(9)
14854 .channels(channels)
14855 .width(3)
14856 .qmin(128)
14857 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14858 }
14859 }
14860
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,multipixel_with_qmax)14861 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_qmax) {
14862 TEST_REQUIRES_X86_AVX2;
14863 for (size_t channels = 1; channels <= 40; channels += 7) {
14864 DWConvMicrokernelTester()
14865 .cr(8)
14866 .kr(9)
14867 .channels(channels)
14868 .width(3)
14869 .qmax(128)
14870 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14871 }
14872 }
14873
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,input_offset)14874 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, input_offset) {
14875 TEST_REQUIRES_X86_AVX2;
14876 for (uint32_t channels = 16; channels < 128; channels += 24) {
14877 DWConvMicrokernelTester()
14878 .cr(8)
14879 .kr(9)
14880 .channels(channels)
14881 .input_offset(176)
14882 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14883 }
14884 }
14885
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,zero)14886 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, zero) {
14887 TEST_REQUIRES_X86_AVX2;
14888 for (uint32_t mz = 0; mz < 9; mz++) {
14889 for (uint32_t channels = 16; channels < 128; channels += 24) {
14890 DWConvMicrokernelTester()
14891 .cr(8)
14892 .kr(9)
14893 .channels(channels)
14894 .input_offset(176)
14895 .zero_index(mz)
14896 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
14897 }
14898 }
14899 }
14900 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14901
14902
14903 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_eq_8)14904 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_eq_8) {
14905 TEST_REQUIRES_X86_XOP;
14906 DWConvMicrokernelTester()
14907 .cr(8)
14908 .kr(9)
14909 .channels(8)
14910 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14911 }
14912
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_div_8)14913 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_div_8) {
14914 TEST_REQUIRES_X86_XOP;
14915 for (uint32_t channels = 16; channels < 128; channels += 24) {
14916 DWConvMicrokernelTester()
14917 .cr(8)
14918 .kr(9)
14919 .channels(channels)
14920 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14921 }
14922 }
14923
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_div_8_with_qmin)14924 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_div_8_with_qmin) {
14925 TEST_REQUIRES_X86_XOP;
14926 for (uint32_t channels = 16; channels < 128; channels += 24) {
14927 DWConvMicrokernelTester()
14928 .cr(8)
14929 .kr(9)
14930 .channels(channels)
14931 .qmin(128)
14932 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14933 }
14934 }
14935
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_div_8_with_qmax)14936 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_div_8_with_qmax) {
14937 TEST_REQUIRES_X86_XOP;
14938 for (uint32_t channels = 16; channels < 128; channels += 24) {
14939 DWConvMicrokernelTester()
14940 .cr(8)
14941 .kr(9)
14942 .channels(channels)
14943 .qmax(128)
14944 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14945 }
14946 }
14947
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_lt_8)14948 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_lt_8) {
14949 TEST_REQUIRES_X86_XOP;
14950 for (uint32_t channels = 1; channels < 8; channels++) {
14951 DWConvMicrokernelTester()
14952 .cr(8)
14953 .kr(9)
14954 .channels(channels)
14955 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14956 }
14957 }
14958
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_gt_8)14959 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_gt_8) {
14960 TEST_REQUIRES_X86_XOP;
14961 for (uint32_t channels = 9; channels < 16; channels++) {
14962 DWConvMicrokernelTester()
14963 .cr(8)
14964 .kr(9)
14965 .channels(channels)
14966 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14967 }
14968 }
14969
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_gt_8_with_qmin)14970 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_gt_8_with_qmin) {
14971 TEST_REQUIRES_X86_XOP;
14972 for (uint32_t channels = 9; channels < 16; channels++) {
14973 DWConvMicrokernelTester()
14974 .cr(8)
14975 .kr(9)
14976 .channels(channels)
14977 .qmin(128)
14978 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14979 }
14980 }
14981
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,c_gt_8_with_qmax)14982 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, c_gt_8_with_qmax) {
14983 TEST_REQUIRES_X86_XOP;
14984 for (uint32_t channels = 9; channels < 16; channels++) {
14985 DWConvMicrokernelTester()
14986 .cr(8)
14987 .kr(9)
14988 .channels(channels)
14989 .qmax(128)
14990 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
14991 }
14992 }
14993
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,multipixel)14994 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel) {
14995 TEST_REQUIRES_X86_XOP;
14996 for (size_t channels = 1; channels <= 40; channels += 7) {
14997 DWConvMicrokernelTester()
14998 .cr(8)
14999 .kr(9)
15000 .channels(channels)
15001 .width(3)
15002 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15003 }
15004 }
15005
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,multipixel_with_step)15006 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel_with_step) {
15007 TEST_REQUIRES_X86_XOP;
15008 for (size_t channels = 1; channels <= 40; channels += 7) {
15009 for (size_t step = 2; step <= 9; step++) {
15010 DWConvMicrokernelTester()
15011 .cr(8)
15012 .kr(9)
15013 .channels(channels)
15014 .width(3)
15015 .step(step)
15016 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15017 }
15018 }
15019 }
15020
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,multipixel_with_output_stride)15021 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel_with_output_stride) {
15022 TEST_REQUIRES_X86_XOP;
15023 for (size_t channels = 1; channels <= 40; channels += 7) {
15024 DWConvMicrokernelTester()
15025 .cr(8)
15026 .kr(9)
15027 .channels(8)
15028 .width(5)
15029 .output_stride(43)
15030 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15031 }
15032 }
15033
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,multipixel_with_qmin)15034 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel_with_qmin) {
15035 TEST_REQUIRES_X86_XOP;
15036 for (size_t channels = 1; channels <= 40; channels += 7) {
15037 DWConvMicrokernelTester()
15038 .cr(8)
15039 .kr(9)
15040 .channels(channels)
15041 .width(3)
15042 .qmin(128)
15043 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15044 }
15045 }
15046
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,multipixel_with_qmax)15047 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, multipixel_with_qmax) {
15048 TEST_REQUIRES_X86_XOP;
15049 for (size_t channels = 1; channels <= 40; channels += 7) {
15050 DWConvMicrokernelTester()
15051 .cr(8)
15052 .kr(9)
15053 .channels(channels)
15054 .width(3)
15055 .qmax(128)
15056 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15057 }
15058 }
15059
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,input_offset)15060 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, input_offset) {
15061 TEST_REQUIRES_X86_XOP;
15062 for (uint32_t channels = 16; channels < 128; channels += 24) {
15063 DWConvMicrokernelTester()
15064 .cr(8)
15065 .kr(9)
15066 .channels(channels)
15067 .input_offset(176)
15068 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15069 }
15070 }
15071
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16,zero)15072 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL16_ADD16, zero) {
15073 TEST_REQUIRES_X86_XOP;
15074 for (uint32_t mz = 0; mz < 9; mz++) {
15075 for (uint32_t channels = 16; channels < 128; channels += 24) {
15076 DWConvMicrokernelTester()
15077 .cr(8)
15078 .kr(9)
15079 .channels(channels)
15080 .input_offset(176)
15081 .zero_index(mz)
15082 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15083 }
15084 }
15085 }
15086 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15087
15088
15089 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_eq_8)15090 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_eq_8) {
15091 TEST_REQUIRES_X86_XOP;
15092 DWConvMicrokernelTester()
15093 .cr(8)
15094 .kr(9)
15095 .channels(8)
15096 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15097 }
15098
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_div_8)15099 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_div_8) {
15100 TEST_REQUIRES_X86_XOP;
15101 for (uint32_t channels = 16; channels < 128; channels += 24) {
15102 DWConvMicrokernelTester()
15103 .cr(8)
15104 .kr(9)
15105 .channels(channels)
15106 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15107 }
15108 }
15109
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_div_8_with_qmin)15110 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_div_8_with_qmin) {
15111 TEST_REQUIRES_X86_XOP;
15112 for (uint32_t channels = 16; channels < 128; channels += 24) {
15113 DWConvMicrokernelTester()
15114 .cr(8)
15115 .kr(9)
15116 .channels(channels)
15117 .qmin(128)
15118 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15119 }
15120 }
15121
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_div_8_with_qmax)15122 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_div_8_with_qmax) {
15123 TEST_REQUIRES_X86_XOP;
15124 for (uint32_t channels = 16; channels < 128; channels += 24) {
15125 DWConvMicrokernelTester()
15126 .cr(8)
15127 .kr(9)
15128 .channels(channels)
15129 .qmax(128)
15130 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15131 }
15132 }
15133
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_lt_8)15134 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_lt_8) {
15135 TEST_REQUIRES_X86_XOP;
15136 for (uint32_t channels = 1; channels < 8; channels++) {
15137 DWConvMicrokernelTester()
15138 .cr(8)
15139 .kr(9)
15140 .channels(channels)
15141 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15142 }
15143 }
15144
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_gt_8)15145 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_gt_8) {
15146 TEST_REQUIRES_X86_XOP;
15147 for (uint32_t channels = 9; channels < 16; channels++) {
15148 DWConvMicrokernelTester()
15149 .cr(8)
15150 .kr(9)
15151 .channels(channels)
15152 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15153 }
15154 }
15155
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_gt_8_with_qmin)15156 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_gt_8_with_qmin) {
15157 TEST_REQUIRES_X86_XOP;
15158 for (uint32_t channels = 9; channels < 16; channels++) {
15159 DWConvMicrokernelTester()
15160 .cr(8)
15161 .kr(9)
15162 .channels(channels)
15163 .qmin(128)
15164 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15165 }
15166 }
15167
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_gt_8_with_qmax)15168 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_gt_8_with_qmax) {
15169 TEST_REQUIRES_X86_XOP;
15170 for (uint32_t channels = 9; channels < 16; channels++) {
15171 DWConvMicrokernelTester()
15172 .cr(8)
15173 .kr(9)
15174 .channels(channels)
15175 .qmax(128)
15176 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15177 }
15178 }
15179
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,multipixel)15180 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel) {
15181 TEST_REQUIRES_X86_XOP;
15182 for (size_t channels = 1; channels <= 40; channels += 7) {
15183 DWConvMicrokernelTester()
15184 .cr(8)
15185 .kr(9)
15186 .channels(channels)
15187 .width(3)
15188 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15189 }
15190 }
15191
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,multipixel_with_step)15192 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_step) {
15193 TEST_REQUIRES_X86_XOP;
15194 for (size_t channels = 1; channels <= 40; channels += 7) {
15195 for (size_t step = 2; step <= 9; step++) {
15196 DWConvMicrokernelTester()
15197 .cr(8)
15198 .kr(9)
15199 .channels(channels)
15200 .width(3)
15201 .step(step)
15202 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15203 }
15204 }
15205 }
15206
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,multipixel_with_output_stride)15207 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_output_stride) {
15208 TEST_REQUIRES_X86_XOP;
15209 for (size_t channels = 1; channels <= 40; channels += 7) {
15210 DWConvMicrokernelTester()
15211 .cr(8)
15212 .kr(9)
15213 .channels(8)
15214 .width(5)
15215 .output_stride(43)
15216 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15217 }
15218 }
15219
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,multipixel_with_qmin)15220 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_qmin) {
15221 TEST_REQUIRES_X86_XOP;
15222 for (size_t channels = 1; channels <= 40; channels += 7) {
15223 DWConvMicrokernelTester()
15224 .cr(8)
15225 .kr(9)
15226 .channels(channels)
15227 .width(3)
15228 .qmin(128)
15229 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15230 }
15231 }
15232
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,multipixel_with_qmax)15233 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_qmax) {
15234 TEST_REQUIRES_X86_XOP;
15235 for (size_t channels = 1; channels <= 40; channels += 7) {
15236 DWConvMicrokernelTester()
15237 .cr(8)
15238 .kr(9)
15239 .channels(channels)
15240 .width(3)
15241 .qmax(128)
15242 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15243 }
15244 }
15245
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,input_offset)15246 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, input_offset) {
15247 TEST_REQUIRES_X86_XOP;
15248 for (uint32_t channels = 16; channels < 128; channels += 24) {
15249 DWConvMicrokernelTester()
15250 .cr(8)
15251 .kr(9)
15252 .channels(channels)
15253 .input_offset(176)
15254 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15255 }
15256 }
15257
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,zero)15258 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, zero) {
15259 TEST_REQUIRES_X86_XOP;
15260 for (uint32_t mz = 0; mz < 9; mz++) {
15261 for (uint32_t channels = 16; channels < 128; channels += 24) {
15262 DWConvMicrokernelTester()
15263 .cr(8)
15264 .kr(9)
15265 .channels(channels)
15266 .input_offset(176)
15267 .zero_index(mz)
15268 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15269 }
15270 }
15271 }
15272 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15273
15274
15275 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_eq_8)15276 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_eq_8) {
15277 TEST_REQUIRES_X86_AVX;
15278 DWConvMicrokernelTester()
15279 .cr(8)
15280 .kr(25)
15281 .channels(8)
15282 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15283 }
15284
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_div_8)15285 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_div_8) {
15286 TEST_REQUIRES_X86_AVX;
15287 for (uint32_t channels = 16; channels < 128; channels += 24) {
15288 DWConvMicrokernelTester()
15289 .cr(8)
15290 .kr(25)
15291 .channels(channels)
15292 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15293 }
15294 }
15295
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_div_8_with_qmin)15296 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_div_8_with_qmin) {
15297 TEST_REQUIRES_X86_AVX;
15298 for (uint32_t channels = 16; channels < 128; channels += 24) {
15299 DWConvMicrokernelTester()
15300 .cr(8)
15301 .kr(25)
15302 .channels(channels)
15303 .qmin(128)
15304 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15305 }
15306 }
15307
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_div_8_with_qmax)15308 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_div_8_with_qmax) {
15309 TEST_REQUIRES_X86_AVX;
15310 for (uint32_t channels = 16; channels < 128; channels += 24) {
15311 DWConvMicrokernelTester()
15312 .cr(8)
15313 .kr(25)
15314 .channels(channels)
15315 .qmax(128)
15316 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15317 }
15318 }
15319
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_lt_8)15320 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_lt_8) {
15321 TEST_REQUIRES_X86_AVX;
15322 for (uint32_t channels = 1; channels < 8; channels++) {
15323 DWConvMicrokernelTester()
15324 .cr(8)
15325 .kr(25)
15326 .channels(channels)
15327 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15328 }
15329 }
15330
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_gt_8)15331 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_gt_8) {
15332 TEST_REQUIRES_X86_AVX;
15333 for (uint32_t channels = 9; channels < 16; channels++) {
15334 DWConvMicrokernelTester()
15335 .cr(8)
15336 .kr(25)
15337 .channels(channels)
15338 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15339 }
15340 }
15341
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_gt_8_with_qmin)15342 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_gt_8_with_qmin) {
15343 TEST_REQUIRES_X86_AVX;
15344 for (uint32_t channels = 9; channels < 16; channels++) {
15345 DWConvMicrokernelTester()
15346 .cr(8)
15347 .kr(25)
15348 .channels(channels)
15349 .qmin(128)
15350 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15351 }
15352 }
15353
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_gt_8_with_qmax)15354 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_gt_8_with_qmax) {
15355 TEST_REQUIRES_X86_AVX;
15356 for (uint32_t channels = 9; channels < 16; channels++) {
15357 DWConvMicrokernelTester()
15358 .cr(8)
15359 .kr(25)
15360 .channels(channels)
15361 .qmax(128)
15362 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15363 }
15364 }
15365
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,multipixel)15366 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel) {
15367 TEST_REQUIRES_X86_AVX;
15368 for (size_t channels = 1; channels <= 40; channels += 7) {
15369 DWConvMicrokernelTester()
15370 .cr(8)
15371 .kr(25)
15372 .channels(channels)
15373 .width(3)
15374 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15375 }
15376 }
15377
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,multipixel_with_step)15378 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_step) {
15379 TEST_REQUIRES_X86_AVX;
15380 for (size_t channels = 1; channels <= 40; channels += 7) {
15381 for (size_t step = 2; step <= 25; step++) {
15382 DWConvMicrokernelTester()
15383 .cr(8)
15384 .kr(25)
15385 .channels(channels)
15386 .width(3)
15387 .step(step)
15388 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15389 }
15390 }
15391 }
15392
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,multipixel_with_output_stride)15393 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_output_stride) {
15394 TEST_REQUIRES_X86_AVX;
15395 for (size_t channels = 1; channels <= 40; channels += 7) {
15396 DWConvMicrokernelTester()
15397 .cr(8)
15398 .kr(25)
15399 .channels(8)
15400 .width(5)
15401 .output_stride(43)
15402 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15403 }
15404 }
15405
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,multipixel_with_qmin)15406 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_qmin) {
15407 TEST_REQUIRES_X86_AVX;
15408 for (size_t channels = 1; channels <= 40; channels += 7) {
15409 DWConvMicrokernelTester()
15410 .cr(8)
15411 .kr(25)
15412 .channels(channels)
15413 .width(3)
15414 .qmin(128)
15415 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15416 }
15417 }
15418
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,multipixel_with_qmax)15419 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_qmax) {
15420 TEST_REQUIRES_X86_AVX;
15421 for (size_t channels = 1; channels <= 40; channels += 7) {
15422 DWConvMicrokernelTester()
15423 .cr(8)
15424 .kr(25)
15425 .channels(channels)
15426 .width(3)
15427 .qmax(128)
15428 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15429 }
15430 }
15431
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,input_offset)15432 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, input_offset) {
15433 TEST_REQUIRES_X86_AVX;
15434 for (uint32_t channels = 16; channels < 128; channels += 24) {
15435 DWConvMicrokernelTester()
15436 .cr(8)
15437 .kr(25)
15438 .channels(channels)
15439 .input_offset(176)
15440 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15441 }
15442 }
15443
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,zero)15444 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, zero) {
15445 TEST_REQUIRES_X86_AVX;
15446 for (uint32_t mz = 0; mz < 25; mz++) {
15447 for (uint32_t channels = 16; channels < 128; channels += 24) {
15448 DWConvMicrokernelTester()
15449 .cr(8)
15450 .kr(25)
15451 .channels(channels)
15452 .input_offset(176)
15453 .zero_index(mz)
15454 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15455 }
15456 }
15457 }
15458 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15459
15460
15461 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_eq_8)15462 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_eq_8) {
15463 TEST_REQUIRES_X86_AVX;
15464 DWConvMicrokernelTester()
15465 .cr(8)
15466 .kr(25)
15467 .channels(8)
15468 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15469 }
15470
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_div_8)15471 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_div_8) {
15472 TEST_REQUIRES_X86_AVX;
15473 for (uint32_t channels = 16; channels < 128; channels += 24) {
15474 DWConvMicrokernelTester()
15475 .cr(8)
15476 .kr(25)
15477 .channels(channels)
15478 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15479 }
15480 }
15481
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_div_8_with_qmin)15482 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_div_8_with_qmin) {
15483 TEST_REQUIRES_X86_AVX;
15484 for (uint32_t channels = 16; channels < 128; channels += 24) {
15485 DWConvMicrokernelTester()
15486 .cr(8)
15487 .kr(25)
15488 .channels(channels)
15489 .qmin(128)
15490 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15491 }
15492 }
15493
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_div_8_with_qmax)15494 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_div_8_with_qmax) {
15495 TEST_REQUIRES_X86_AVX;
15496 for (uint32_t channels = 16; channels < 128; channels += 24) {
15497 DWConvMicrokernelTester()
15498 .cr(8)
15499 .kr(25)
15500 .channels(channels)
15501 .qmax(128)
15502 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15503 }
15504 }
15505
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_lt_8)15506 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_lt_8) {
15507 TEST_REQUIRES_X86_AVX;
15508 for (uint32_t channels = 1; channels < 8; channels++) {
15509 DWConvMicrokernelTester()
15510 .cr(8)
15511 .kr(25)
15512 .channels(channels)
15513 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15514 }
15515 }
15516
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_gt_8)15517 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_gt_8) {
15518 TEST_REQUIRES_X86_AVX;
15519 for (uint32_t channels = 9; channels < 16; channels++) {
15520 DWConvMicrokernelTester()
15521 .cr(8)
15522 .kr(25)
15523 .channels(channels)
15524 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15525 }
15526 }
15527
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_gt_8_with_qmin)15528 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_gt_8_with_qmin) {
15529 TEST_REQUIRES_X86_AVX;
15530 for (uint32_t channels = 9; channels < 16; channels++) {
15531 DWConvMicrokernelTester()
15532 .cr(8)
15533 .kr(25)
15534 .channels(channels)
15535 .qmin(128)
15536 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15537 }
15538 }
15539
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,c_gt_8_with_qmax)15540 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, c_gt_8_with_qmax) {
15541 TEST_REQUIRES_X86_AVX;
15542 for (uint32_t channels = 9; channels < 16; channels++) {
15543 DWConvMicrokernelTester()
15544 .cr(8)
15545 .kr(25)
15546 .channels(channels)
15547 .qmax(128)
15548 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15549 }
15550 }
15551
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,multipixel)15552 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel) {
15553 TEST_REQUIRES_X86_AVX;
15554 for (size_t channels = 1; channels <= 40; channels += 7) {
15555 DWConvMicrokernelTester()
15556 .cr(8)
15557 .kr(25)
15558 .channels(channels)
15559 .width(3)
15560 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15561 }
15562 }
15563
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,multipixel_with_step)15564 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel_with_step) {
15565 TEST_REQUIRES_X86_AVX;
15566 for (size_t channels = 1; channels <= 40; channels += 7) {
15567 for (size_t step = 2; step <= 25; step++) {
15568 DWConvMicrokernelTester()
15569 .cr(8)
15570 .kr(25)
15571 .channels(channels)
15572 .width(3)
15573 .step(step)
15574 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15575 }
15576 }
15577 }
15578
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,multipixel_with_output_stride)15579 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel_with_output_stride) {
15580 TEST_REQUIRES_X86_AVX;
15581 for (size_t channels = 1; channels <= 40; channels += 7) {
15582 DWConvMicrokernelTester()
15583 .cr(8)
15584 .kr(25)
15585 .channels(8)
15586 .width(5)
15587 .output_stride(43)
15588 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15589 }
15590 }
15591
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,multipixel_with_qmin)15592 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel_with_qmin) {
15593 TEST_REQUIRES_X86_AVX;
15594 for (size_t channels = 1; channels <= 40; channels += 7) {
15595 DWConvMicrokernelTester()
15596 .cr(8)
15597 .kr(25)
15598 .channels(channels)
15599 .width(3)
15600 .qmin(128)
15601 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15602 }
15603 }
15604
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,multipixel_with_qmax)15605 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, multipixel_with_qmax) {
15606 TEST_REQUIRES_X86_AVX;
15607 for (size_t channels = 1; channels <= 40; channels += 7) {
15608 DWConvMicrokernelTester()
15609 .cr(8)
15610 .kr(25)
15611 .channels(channels)
15612 .width(3)
15613 .qmax(128)
15614 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15615 }
15616 }
15617
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,input_offset)15618 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, input_offset) {
15619 TEST_REQUIRES_X86_AVX;
15620 for (uint32_t channels = 16; channels < 128; channels += 24) {
15621 DWConvMicrokernelTester()
15622 .cr(8)
15623 .kr(25)
15624 .channels(channels)
15625 .input_offset(176)
15626 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15627 }
15628 }
15629
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16,zero)15630 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16_ADD16, zero) {
15631 TEST_REQUIRES_X86_AVX;
15632 for (uint32_t mz = 0; mz < 25; mz++) {
15633 for (uint32_t channels = 16; channels < 128; channels += 24) {
15634 DWConvMicrokernelTester()
15635 .cr(8)
15636 .kr(25)
15637 .channels(channels)
15638 .input_offset(176)
15639 .zero_index(mz)
15640 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15641 }
15642 }
15643 }
15644 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15645
15646
15647 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_eq_8)15648 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_eq_8) {
15649 TEST_REQUIRES_X86_AVX;
15650 DWConvMicrokernelTester()
15651 .cr(8)
15652 .kr(25)
15653 .channels(8)
15654 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15655 }
15656
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_div_8)15657 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_div_8) {
15658 TEST_REQUIRES_X86_AVX;
15659 for (uint32_t channels = 16; channels < 128; channels += 24) {
15660 DWConvMicrokernelTester()
15661 .cr(8)
15662 .kr(25)
15663 .channels(channels)
15664 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15665 }
15666 }
15667
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_div_8_with_qmin)15668 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_div_8_with_qmin) {
15669 TEST_REQUIRES_X86_AVX;
15670 for (uint32_t channels = 16; channels < 128; channels += 24) {
15671 DWConvMicrokernelTester()
15672 .cr(8)
15673 .kr(25)
15674 .channels(channels)
15675 .qmin(128)
15676 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15677 }
15678 }
15679
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_div_8_with_qmax)15680 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_div_8_with_qmax) {
15681 TEST_REQUIRES_X86_AVX;
15682 for (uint32_t channels = 16; channels < 128; channels += 24) {
15683 DWConvMicrokernelTester()
15684 .cr(8)
15685 .kr(25)
15686 .channels(channels)
15687 .qmax(128)
15688 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15689 }
15690 }
15691
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_lt_8)15692 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_lt_8) {
15693 TEST_REQUIRES_X86_AVX;
15694 for (uint32_t channels = 1; channels < 8; channels++) {
15695 DWConvMicrokernelTester()
15696 .cr(8)
15697 .kr(25)
15698 .channels(channels)
15699 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15700 }
15701 }
15702
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_gt_8)15703 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_gt_8) {
15704 TEST_REQUIRES_X86_AVX;
15705 for (uint32_t channels = 9; channels < 16; channels++) {
15706 DWConvMicrokernelTester()
15707 .cr(8)
15708 .kr(25)
15709 .channels(channels)
15710 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15711 }
15712 }
15713
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_gt_8_with_qmin)15714 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_gt_8_with_qmin) {
15715 TEST_REQUIRES_X86_AVX;
15716 for (uint32_t channels = 9; channels < 16; channels++) {
15717 DWConvMicrokernelTester()
15718 .cr(8)
15719 .kr(25)
15720 .channels(channels)
15721 .qmin(128)
15722 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15723 }
15724 }
15725
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_gt_8_with_qmax)15726 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_gt_8_with_qmax) {
15727 TEST_REQUIRES_X86_AVX;
15728 for (uint32_t channels = 9; channels < 16; channels++) {
15729 DWConvMicrokernelTester()
15730 .cr(8)
15731 .kr(25)
15732 .channels(channels)
15733 .qmax(128)
15734 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15735 }
15736 }
15737
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,multipixel)15738 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel) {
15739 TEST_REQUIRES_X86_AVX;
15740 for (size_t channels = 1; channels <= 40; channels += 7) {
15741 DWConvMicrokernelTester()
15742 .cr(8)
15743 .kr(25)
15744 .channels(channels)
15745 .width(3)
15746 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15747 }
15748 }
15749
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,multipixel_with_step)15750 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_step) {
15751 TEST_REQUIRES_X86_AVX;
15752 for (size_t channels = 1; channels <= 40; channels += 7) {
15753 for (size_t step = 2; step <= 25; step++) {
15754 DWConvMicrokernelTester()
15755 .cr(8)
15756 .kr(25)
15757 .channels(channels)
15758 .width(3)
15759 .step(step)
15760 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15761 }
15762 }
15763 }
15764
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,multipixel_with_output_stride)15765 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_output_stride) {
15766 TEST_REQUIRES_X86_AVX;
15767 for (size_t channels = 1; channels <= 40; channels += 7) {
15768 DWConvMicrokernelTester()
15769 .cr(8)
15770 .kr(25)
15771 .channels(8)
15772 .width(5)
15773 .output_stride(43)
15774 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15775 }
15776 }
15777
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,multipixel_with_qmin)15778 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_qmin) {
15779 TEST_REQUIRES_X86_AVX;
15780 for (size_t channels = 1; channels <= 40; channels += 7) {
15781 DWConvMicrokernelTester()
15782 .cr(8)
15783 .kr(25)
15784 .channels(channels)
15785 .width(3)
15786 .qmin(128)
15787 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15788 }
15789 }
15790
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,multipixel_with_qmax)15791 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_qmax) {
15792 TEST_REQUIRES_X86_AVX;
15793 for (size_t channels = 1; channels <= 40; channels += 7) {
15794 DWConvMicrokernelTester()
15795 .cr(8)
15796 .kr(25)
15797 .channels(channels)
15798 .width(3)
15799 .qmax(128)
15800 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15801 }
15802 }
15803
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,input_offset)15804 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, input_offset) {
15805 TEST_REQUIRES_X86_AVX;
15806 for (uint32_t channels = 16; channels < 128; channels += 24) {
15807 DWConvMicrokernelTester()
15808 .cr(8)
15809 .kr(25)
15810 .channels(channels)
15811 .input_offset(176)
15812 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15813 }
15814 }
15815
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,zero)15816 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, zero) {
15817 TEST_REQUIRES_X86_AVX;
15818 for (uint32_t mz = 0; mz < 25; mz++) {
15819 for (uint32_t channels = 16; channels < 128; channels += 24) {
15820 DWConvMicrokernelTester()
15821 .cr(8)
15822 .kr(25)
15823 .channels(channels)
15824 .input_offset(176)
15825 .zero_index(mz)
15826 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
15827 }
15828 }
15829 }
15830 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15831
15832
15833 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_eq_8)15834 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_eq_8) {
15835 TEST_REQUIRES_X86_AVX2;
15836 DWConvMicrokernelTester()
15837 .cr(8)
15838 .kr(25)
15839 .channels(8)
15840 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15841 }
15842
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_div_8)15843 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8) {
15844 TEST_REQUIRES_X86_AVX2;
15845 for (uint32_t channels = 16; channels < 128; channels += 24) {
15846 DWConvMicrokernelTester()
15847 .cr(8)
15848 .kr(25)
15849 .channels(channels)
15850 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15851 }
15852 }
15853
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_div_8_with_qmin)15854 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8_with_qmin) {
15855 TEST_REQUIRES_X86_AVX2;
15856 for (uint32_t channels = 16; channels < 128; channels += 24) {
15857 DWConvMicrokernelTester()
15858 .cr(8)
15859 .kr(25)
15860 .channels(channels)
15861 .qmin(128)
15862 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15863 }
15864 }
15865
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_div_8_with_qmax)15866 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8_with_qmax) {
15867 TEST_REQUIRES_X86_AVX2;
15868 for (uint32_t channels = 16; channels < 128; channels += 24) {
15869 DWConvMicrokernelTester()
15870 .cr(8)
15871 .kr(25)
15872 .channels(channels)
15873 .qmax(128)
15874 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15875 }
15876 }
15877
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_lt_8)15878 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_lt_8) {
15879 TEST_REQUIRES_X86_AVX2;
15880 for (uint32_t channels = 1; channels < 8; channels++) {
15881 DWConvMicrokernelTester()
15882 .cr(8)
15883 .kr(25)
15884 .channels(channels)
15885 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15886 }
15887 }
15888
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_gt_8)15889 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8) {
15890 TEST_REQUIRES_X86_AVX2;
15891 for (uint32_t channels = 9; channels < 16; channels++) {
15892 DWConvMicrokernelTester()
15893 .cr(8)
15894 .kr(25)
15895 .channels(channels)
15896 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15897 }
15898 }
15899
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_gt_8_with_qmin)15900 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8_with_qmin) {
15901 TEST_REQUIRES_X86_AVX2;
15902 for (uint32_t channels = 9; channels < 16; channels++) {
15903 DWConvMicrokernelTester()
15904 .cr(8)
15905 .kr(25)
15906 .channels(channels)
15907 .qmin(128)
15908 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15909 }
15910 }
15911
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_gt_8_with_qmax)15912 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8_with_qmax) {
15913 TEST_REQUIRES_X86_AVX2;
15914 for (uint32_t channels = 9; channels < 16; channels++) {
15915 DWConvMicrokernelTester()
15916 .cr(8)
15917 .kr(25)
15918 .channels(channels)
15919 .qmax(128)
15920 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15921 }
15922 }
15923
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,multipixel)15924 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel) {
15925 TEST_REQUIRES_X86_AVX2;
15926 for (size_t channels = 1; channels <= 40; channels += 7) {
15927 DWConvMicrokernelTester()
15928 .cr(8)
15929 .kr(25)
15930 .channels(channels)
15931 .width(3)
15932 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15933 }
15934 }
15935
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,multipixel_with_step)15936 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_step) {
15937 TEST_REQUIRES_X86_AVX2;
15938 for (size_t channels = 1; channels <= 40; channels += 7) {
15939 for (size_t step = 2; step <= 25; step++) {
15940 DWConvMicrokernelTester()
15941 .cr(8)
15942 .kr(25)
15943 .channels(channels)
15944 .width(3)
15945 .step(step)
15946 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15947 }
15948 }
15949 }
15950
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,multipixel_with_output_stride)15951 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_output_stride) {
15952 TEST_REQUIRES_X86_AVX2;
15953 for (size_t channels = 1; channels <= 40; channels += 7) {
15954 DWConvMicrokernelTester()
15955 .cr(8)
15956 .kr(25)
15957 .channels(8)
15958 .width(5)
15959 .output_stride(43)
15960 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15961 }
15962 }
15963
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,multipixel_with_qmin)15964 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_qmin) {
15965 TEST_REQUIRES_X86_AVX2;
15966 for (size_t channels = 1; channels <= 40; channels += 7) {
15967 DWConvMicrokernelTester()
15968 .cr(8)
15969 .kr(25)
15970 .channels(channels)
15971 .width(3)
15972 .qmin(128)
15973 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15974 }
15975 }
15976
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,multipixel_with_qmax)15977 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_qmax) {
15978 TEST_REQUIRES_X86_AVX2;
15979 for (size_t channels = 1; channels <= 40; channels += 7) {
15980 DWConvMicrokernelTester()
15981 .cr(8)
15982 .kr(25)
15983 .channels(channels)
15984 .width(3)
15985 .qmax(128)
15986 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15987 }
15988 }
15989
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,input_offset)15990 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, input_offset) {
15991 TEST_REQUIRES_X86_AVX2;
15992 for (uint32_t channels = 16; channels < 128; channels += 24) {
15993 DWConvMicrokernelTester()
15994 .cr(8)
15995 .kr(25)
15996 .channels(channels)
15997 .input_offset(176)
15998 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
15999 }
16000 }
16001
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,zero)16002 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, zero) {
16003 TEST_REQUIRES_X86_AVX2;
16004 for (uint32_t mz = 0; mz < 25; mz++) {
16005 for (uint32_t channels = 16; channels < 128; channels += 24) {
16006 DWConvMicrokernelTester()
16007 .cr(8)
16008 .kr(25)
16009 .channels(channels)
16010 .input_offset(176)
16011 .zero_index(mz)
16012 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16013 }
16014 }
16015 }
16016 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16017
16018
16019 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_eq_8)16020 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_eq_8) {
16021 TEST_REQUIRES_X86_XOP;
16022 DWConvMicrokernelTester()
16023 .cr(8)
16024 .kr(25)
16025 .channels(8)
16026 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16027 }
16028
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_div_8)16029 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_div_8) {
16030 TEST_REQUIRES_X86_XOP;
16031 for (uint32_t channels = 16; channels < 128; channels += 24) {
16032 DWConvMicrokernelTester()
16033 .cr(8)
16034 .kr(25)
16035 .channels(channels)
16036 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16037 }
16038 }
16039
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_div_8_with_qmin)16040 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_div_8_with_qmin) {
16041 TEST_REQUIRES_X86_XOP;
16042 for (uint32_t channels = 16; channels < 128; channels += 24) {
16043 DWConvMicrokernelTester()
16044 .cr(8)
16045 .kr(25)
16046 .channels(channels)
16047 .qmin(128)
16048 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16049 }
16050 }
16051
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_div_8_with_qmax)16052 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_div_8_with_qmax) {
16053 TEST_REQUIRES_X86_XOP;
16054 for (uint32_t channels = 16; channels < 128; channels += 24) {
16055 DWConvMicrokernelTester()
16056 .cr(8)
16057 .kr(25)
16058 .channels(channels)
16059 .qmax(128)
16060 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16061 }
16062 }
16063
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_lt_8)16064 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_lt_8) {
16065 TEST_REQUIRES_X86_XOP;
16066 for (uint32_t channels = 1; channels < 8; channels++) {
16067 DWConvMicrokernelTester()
16068 .cr(8)
16069 .kr(25)
16070 .channels(channels)
16071 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16072 }
16073 }
16074
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_gt_8)16075 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_gt_8) {
16076 TEST_REQUIRES_X86_XOP;
16077 for (uint32_t channels = 9; channels < 16; channels++) {
16078 DWConvMicrokernelTester()
16079 .cr(8)
16080 .kr(25)
16081 .channels(channels)
16082 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16083 }
16084 }
16085
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_gt_8_with_qmin)16086 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_gt_8_with_qmin) {
16087 TEST_REQUIRES_X86_XOP;
16088 for (uint32_t channels = 9; channels < 16; channels++) {
16089 DWConvMicrokernelTester()
16090 .cr(8)
16091 .kr(25)
16092 .channels(channels)
16093 .qmin(128)
16094 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16095 }
16096 }
16097
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,c_gt_8_with_qmax)16098 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, c_gt_8_with_qmax) {
16099 TEST_REQUIRES_X86_XOP;
16100 for (uint32_t channels = 9; channels < 16; channels++) {
16101 DWConvMicrokernelTester()
16102 .cr(8)
16103 .kr(25)
16104 .channels(channels)
16105 .qmax(128)
16106 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16107 }
16108 }
16109
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,multipixel)16110 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel) {
16111 TEST_REQUIRES_X86_XOP;
16112 for (size_t channels = 1; channels <= 40; channels += 7) {
16113 DWConvMicrokernelTester()
16114 .cr(8)
16115 .kr(25)
16116 .channels(channels)
16117 .width(3)
16118 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16119 }
16120 }
16121
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,multipixel_with_step)16122 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel_with_step) {
16123 TEST_REQUIRES_X86_XOP;
16124 for (size_t channels = 1; channels <= 40; channels += 7) {
16125 for (size_t step = 2; step <= 25; step++) {
16126 DWConvMicrokernelTester()
16127 .cr(8)
16128 .kr(25)
16129 .channels(channels)
16130 .width(3)
16131 .step(step)
16132 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16133 }
16134 }
16135 }
16136
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,multipixel_with_output_stride)16137 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel_with_output_stride) {
16138 TEST_REQUIRES_X86_XOP;
16139 for (size_t channels = 1; channels <= 40; channels += 7) {
16140 DWConvMicrokernelTester()
16141 .cr(8)
16142 .kr(25)
16143 .channels(8)
16144 .width(5)
16145 .output_stride(43)
16146 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16147 }
16148 }
16149
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,multipixel_with_qmin)16150 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel_with_qmin) {
16151 TEST_REQUIRES_X86_XOP;
16152 for (size_t channels = 1; channels <= 40; channels += 7) {
16153 DWConvMicrokernelTester()
16154 .cr(8)
16155 .kr(25)
16156 .channels(channels)
16157 .width(3)
16158 .qmin(128)
16159 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16160 }
16161 }
16162
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,multipixel_with_qmax)16163 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, multipixel_with_qmax) {
16164 TEST_REQUIRES_X86_XOP;
16165 for (size_t channels = 1; channels <= 40; channels += 7) {
16166 DWConvMicrokernelTester()
16167 .cr(8)
16168 .kr(25)
16169 .channels(channels)
16170 .width(3)
16171 .qmax(128)
16172 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16173 }
16174 }
16175
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,input_offset)16176 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, input_offset) {
16177 TEST_REQUIRES_X86_XOP;
16178 for (uint32_t channels = 16; channels < 128; channels += 24) {
16179 DWConvMicrokernelTester()
16180 .cr(8)
16181 .kr(25)
16182 .channels(channels)
16183 .input_offset(176)
16184 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16185 }
16186 }
16187
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16,zero)16188 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL16_ADD16, zero) {
16189 TEST_REQUIRES_X86_XOP;
16190 for (uint32_t mz = 0; mz < 25; mz++) {
16191 for (uint32_t channels = 16; channels < 128; channels += 24) {
16192 DWConvMicrokernelTester()
16193 .cr(8)
16194 .kr(25)
16195 .channels(channels)
16196 .input_offset(176)
16197 .zero_index(mz)
16198 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16199 }
16200 }
16201 }
16202 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16203
16204
16205 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_eq_8)16206 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_eq_8) {
16207 TEST_REQUIRES_X86_XOP;
16208 DWConvMicrokernelTester()
16209 .cr(8)
16210 .kr(25)
16211 .channels(8)
16212 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16213 }
16214
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_div_8)16215 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_div_8) {
16216 TEST_REQUIRES_X86_XOP;
16217 for (uint32_t channels = 16; channels < 128; channels += 24) {
16218 DWConvMicrokernelTester()
16219 .cr(8)
16220 .kr(25)
16221 .channels(channels)
16222 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16223 }
16224 }
16225
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_div_8_with_qmin)16226 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_div_8_with_qmin) {
16227 TEST_REQUIRES_X86_XOP;
16228 for (uint32_t channels = 16; channels < 128; channels += 24) {
16229 DWConvMicrokernelTester()
16230 .cr(8)
16231 .kr(25)
16232 .channels(channels)
16233 .qmin(128)
16234 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16235 }
16236 }
16237
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_div_8_with_qmax)16238 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_div_8_with_qmax) {
16239 TEST_REQUIRES_X86_XOP;
16240 for (uint32_t channels = 16; channels < 128; channels += 24) {
16241 DWConvMicrokernelTester()
16242 .cr(8)
16243 .kr(25)
16244 .channels(channels)
16245 .qmax(128)
16246 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16247 }
16248 }
16249
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_lt_8)16250 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_lt_8) {
16251 TEST_REQUIRES_X86_XOP;
16252 for (uint32_t channels = 1; channels < 8; channels++) {
16253 DWConvMicrokernelTester()
16254 .cr(8)
16255 .kr(25)
16256 .channels(channels)
16257 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16258 }
16259 }
16260
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_gt_8)16261 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_gt_8) {
16262 TEST_REQUIRES_X86_XOP;
16263 for (uint32_t channels = 9; channels < 16; channels++) {
16264 DWConvMicrokernelTester()
16265 .cr(8)
16266 .kr(25)
16267 .channels(channels)
16268 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16269 }
16270 }
16271
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_gt_8_with_qmin)16272 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_gt_8_with_qmin) {
16273 TEST_REQUIRES_X86_XOP;
16274 for (uint32_t channels = 9; channels < 16; channels++) {
16275 DWConvMicrokernelTester()
16276 .cr(8)
16277 .kr(25)
16278 .channels(channels)
16279 .qmin(128)
16280 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16281 }
16282 }
16283
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_gt_8_with_qmax)16284 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_gt_8_with_qmax) {
16285 TEST_REQUIRES_X86_XOP;
16286 for (uint32_t channels = 9; channels < 16; channels++) {
16287 DWConvMicrokernelTester()
16288 .cr(8)
16289 .kr(25)
16290 .channels(channels)
16291 .qmax(128)
16292 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16293 }
16294 }
16295
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,multipixel)16296 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel) {
16297 TEST_REQUIRES_X86_XOP;
16298 for (size_t channels = 1; channels <= 40; channels += 7) {
16299 DWConvMicrokernelTester()
16300 .cr(8)
16301 .kr(25)
16302 .channels(channels)
16303 .width(3)
16304 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16305 }
16306 }
16307
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,multipixel_with_step)16308 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_step) {
16309 TEST_REQUIRES_X86_XOP;
16310 for (size_t channels = 1; channels <= 40; channels += 7) {
16311 for (size_t step = 2; step <= 25; step++) {
16312 DWConvMicrokernelTester()
16313 .cr(8)
16314 .kr(25)
16315 .channels(channels)
16316 .width(3)
16317 .step(step)
16318 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16319 }
16320 }
16321 }
16322
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,multipixel_with_output_stride)16323 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_output_stride) {
16324 TEST_REQUIRES_X86_XOP;
16325 for (size_t channels = 1; channels <= 40; channels += 7) {
16326 DWConvMicrokernelTester()
16327 .cr(8)
16328 .kr(25)
16329 .channels(8)
16330 .width(5)
16331 .output_stride(43)
16332 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16333 }
16334 }
16335
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,multipixel_with_qmin)16336 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_qmin) {
16337 TEST_REQUIRES_X86_XOP;
16338 for (size_t channels = 1; channels <= 40; channels += 7) {
16339 DWConvMicrokernelTester()
16340 .cr(8)
16341 .kr(25)
16342 .channels(channels)
16343 .width(3)
16344 .qmin(128)
16345 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16346 }
16347 }
16348
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,multipixel_with_qmax)16349 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_qmax) {
16350 TEST_REQUIRES_X86_XOP;
16351 for (size_t channels = 1; channels <= 40; channels += 7) {
16352 DWConvMicrokernelTester()
16353 .cr(8)
16354 .kr(25)
16355 .channels(channels)
16356 .width(3)
16357 .qmax(128)
16358 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16359 }
16360 }
16361
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,input_offset)16362 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, input_offset) {
16363 TEST_REQUIRES_X86_XOP;
16364 for (uint32_t channels = 16; channels < 128; channels += 24) {
16365 DWConvMicrokernelTester()
16366 .cr(8)
16367 .kr(25)
16368 .channels(channels)
16369 .input_offset(176)
16370 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16371 }
16372 }
16373
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,zero)16374 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, zero) {
16375 TEST_REQUIRES_X86_XOP;
16376 for (uint32_t mz = 0; mz < 25; mz++) {
16377 for (uint32_t channels = 16; channels < 128; channels += 24) {
16378 DWConvMicrokernelTester()
16379 .cr(8)
16380 .kr(25)
16381 .channels(channels)
16382 .input_offset(176)
16383 .zero_index(mz)
16384 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16385 }
16386 }
16387 }
16388 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16389
16390
16391 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,c_eq_16)16392 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, c_eq_16) {
16393 TEST_REQUIRES_X86_AVX;
16394 DWConvMicrokernelTester()
16395 .cr(16)
16396 .kr(3)
16397 .channels(16)
16398 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16399 }
16400
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,c_div_16)16401 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, c_div_16) {
16402 TEST_REQUIRES_X86_AVX;
16403 for (uint32_t channels = 32; channels < 256; channels += 48) {
16404 DWConvMicrokernelTester()
16405 .cr(16)
16406 .kr(3)
16407 .channels(channels)
16408 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16409 }
16410 }
16411
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,c_div_16_with_qmin)16412 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, c_div_16_with_qmin) {
16413 TEST_REQUIRES_X86_AVX;
16414 for (uint32_t channels = 32; channels < 256; channels += 48) {
16415 DWConvMicrokernelTester()
16416 .cr(16)
16417 .kr(3)
16418 .channels(channels)
16419 .qmin(128)
16420 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16421 }
16422 }
16423
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,c_div_16_with_qmax)16424 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, c_div_16_with_qmax) {
16425 TEST_REQUIRES_X86_AVX;
16426 for (uint32_t channels = 32; channels < 256; channels += 48) {
16427 DWConvMicrokernelTester()
16428 .cr(16)
16429 .kr(3)
16430 .channels(channels)
16431 .qmax(128)
16432 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16433 }
16434 }
16435
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,c_lt_16)16436 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, c_lt_16) {
16437 TEST_REQUIRES_X86_AVX;
16438 for (uint32_t channels = 1; channels < 16; channels++) {
16439 DWConvMicrokernelTester()
16440 .cr(16)
16441 .kr(3)
16442 .channels(channels)
16443 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16444 }
16445 }
16446
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,c_gt_16)16447 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, c_gt_16) {
16448 TEST_REQUIRES_X86_AVX;
16449 for (uint32_t channels = 17; channels < 32; channels++) {
16450 DWConvMicrokernelTester()
16451 .cr(16)
16452 .kr(3)
16453 .channels(channels)
16454 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16455 }
16456 }
16457
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,c_gt_16_with_qmin)16458 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, c_gt_16_with_qmin) {
16459 TEST_REQUIRES_X86_AVX;
16460 for (uint32_t channels = 17; channels < 32; channels++) {
16461 DWConvMicrokernelTester()
16462 .cr(16)
16463 .kr(3)
16464 .channels(channels)
16465 .qmin(128)
16466 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16467 }
16468 }
16469
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,c_gt_16_with_qmax)16470 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, c_gt_16_with_qmax) {
16471 TEST_REQUIRES_X86_AVX;
16472 for (uint32_t channels = 17; channels < 32; channels++) {
16473 DWConvMicrokernelTester()
16474 .cr(16)
16475 .kr(3)
16476 .channels(channels)
16477 .qmax(128)
16478 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16479 }
16480 }
16481
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,multipixel)16482 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, multipixel) {
16483 TEST_REQUIRES_X86_AVX;
16484 for (size_t channels = 1; channels <= 80; channels += 15) {
16485 DWConvMicrokernelTester()
16486 .cr(16)
16487 .kr(3)
16488 .channels(channels)
16489 .width(3)
16490 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16491 }
16492 }
16493
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,multipixel_with_step)16494 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, multipixel_with_step) {
16495 TEST_REQUIRES_X86_AVX;
16496 for (size_t channels = 1; channels <= 80; channels += 15) {
16497 for (size_t step = 2; step <= 3; step++) {
16498 DWConvMicrokernelTester()
16499 .cr(16)
16500 .kr(3)
16501 .channels(channels)
16502 .width(3)
16503 .step(step)
16504 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16505 }
16506 }
16507 }
16508
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,multipixel_with_output_stride)16509 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, multipixel_with_output_stride) {
16510 TEST_REQUIRES_X86_AVX;
16511 for (size_t channels = 1; channels <= 80; channels += 15) {
16512 DWConvMicrokernelTester()
16513 .cr(16)
16514 .kr(3)
16515 .channels(16)
16516 .width(5)
16517 .output_stride(83)
16518 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16519 }
16520 }
16521
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,multipixel_with_qmin)16522 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, multipixel_with_qmin) {
16523 TEST_REQUIRES_X86_AVX;
16524 for (size_t channels = 1; channels <= 80; channels += 15) {
16525 DWConvMicrokernelTester()
16526 .cr(16)
16527 .kr(3)
16528 .channels(channels)
16529 .width(3)
16530 .qmin(128)
16531 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16532 }
16533 }
16534
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,multipixel_with_qmax)16535 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, multipixel_with_qmax) {
16536 TEST_REQUIRES_X86_AVX;
16537 for (size_t channels = 1; channels <= 80; channels += 15) {
16538 DWConvMicrokernelTester()
16539 .cr(16)
16540 .kr(3)
16541 .channels(channels)
16542 .width(3)
16543 .qmax(128)
16544 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16545 }
16546 }
16547
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,input_offset)16548 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, input_offset) {
16549 TEST_REQUIRES_X86_AVX;
16550 for (uint32_t channels = 32; channels < 256; channels += 48) {
16551 DWConvMicrokernelTester()
16552 .cr(16)
16553 .kr(3)
16554 .channels(channels)
16555 .input_offset(304)
16556 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16557 }
16558 }
16559
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16,zero)16560 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX_MUL16_ADD16, zero) {
16561 TEST_REQUIRES_X86_AVX;
16562 for (uint32_t mz = 0; mz < 3; mz++) {
16563 for (uint32_t channels = 32; channels < 256; channels += 48) {
16564 DWConvMicrokernelTester()
16565 .cr(16)
16566 .kr(3)
16567 .channels(channels)
16568 .input_offset(304)
16569 .zero_index(mz)
16570 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16571 }
16572 }
16573 }
16574 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16575
16576
16577 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,c_eq_16)16578 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, c_eq_16) {
16579 TEST_REQUIRES_X86_AVX2;
16580 DWConvMicrokernelTester()
16581 .cr(16)
16582 .kr(3)
16583 .channels(16)
16584 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16585 }
16586
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,c_div_16)16587 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, c_div_16) {
16588 TEST_REQUIRES_X86_AVX2;
16589 for (uint32_t channels = 32; channels < 256; channels += 48) {
16590 DWConvMicrokernelTester()
16591 .cr(16)
16592 .kr(3)
16593 .channels(channels)
16594 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16595 }
16596 }
16597
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,c_div_16_with_qmin)16598 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, c_div_16_with_qmin) {
16599 TEST_REQUIRES_X86_AVX2;
16600 for (uint32_t channels = 32; channels < 256; channels += 48) {
16601 DWConvMicrokernelTester()
16602 .cr(16)
16603 .kr(3)
16604 .channels(channels)
16605 .qmin(128)
16606 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16607 }
16608 }
16609
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,c_div_16_with_qmax)16610 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, c_div_16_with_qmax) {
16611 TEST_REQUIRES_X86_AVX2;
16612 for (uint32_t channels = 32; channels < 256; channels += 48) {
16613 DWConvMicrokernelTester()
16614 .cr(16)
16615 .kr(3)
16616 .channels(channels)
16617 .qmax(128)
16618 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16619 }
16620 }
16621
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,c_lt_16)16622 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, c_lt_16) {
16623 TEST_REQUIRES_X86_AVX2;
16624 for (uint32_t channels = 1; channels < 16; channels++) {
16625 DWConvMicrokernelTester()
16626 .cr(16)
16627 .kr(3)
16628 .channels(channels)
16629 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16630 }
16631 }
16632
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,c_gt_16)16633 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, c_gt_16) {
16634 TEST_REQUIRES_X86_AVX2;
16635 for (uint32_t channels = 17; channels < 32; channels++) {
16636 DWConvMicrokernelTester()
16637 .cr(16)
16638 .kr(3)
16639 .channels(channels)
16640 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16641 }
16642 }
16643
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,c_gt_16_with_qmin)16644 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, c_gt_16_with_qmin) {
16645 TEST_REQUIRES_X86_AVX2;
16646 for (uint32_t channels = 17; channels < 32; channels++) {
16647 DWConvMicrokernelTester()
16648 .cr(16)
16649 .kr(3)
16650 .channels(channels)
16651 .qmin(128)
16652 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16653 }
16654 }
16655
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,c_gt_16_with_qmax)16656 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, c_gt_16_with_qmax) {
16657 TEST_REQUIRES_X86_AVX2;
16658 for (uint32_t channels = 17; channels < 32; channels++) {
16659 DWConvMicrokernelTester()
16660 .cr(16)
16661 .kr(3)
16662 .channels(channels)
16663 .qmax(128)
16664 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16665 }
16666 }
16667
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,multipixel)16668 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, multipixel) {
16669 TEST_REQUIRES_X86_AVX2;
16670 for (size_t channels = 1; channels <= 80; channels += 15) {
16671 DWConvMicrokernelTester()
16672 .cr(16)
16673 .kr(3)
16674 .channels(channels)
16675 .width(3)
16676 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16677 }
16678 }
16679
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,multipixel_with_step)16680 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, multipixel_with_step) {
16681 TEST_REQUIRES_X86_AVX2;
16682 for (size_t channels = 1; channels <= 80; channels += 15) {
16683 for (size_t step = 2; step <= 3; step++) {
16684 DWConvMicrokernelTester()
16685 .cr(16)
16686 .kr(3)
16687 .channels(channels)
16688 .width(3)
16689 .step(step)
16690 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16691 }
16692 }
16693 }
16694
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,multipixel_with_output_stride)16695 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, multipixel_with_output_stride) {
16696 TEST_REQUIRES_X86_AVX2;
16697 for (size_t channels = 1; channels <= 80; channels += 15) {
16698 DWConvMicrokernelTester()
16699 .cr(16)
16700 .kr(3)
16701 .channels(16)
16702 .width(5)
16703 .output_stride(83)
16704 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16705 }
16706 }
16707
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,multipixel_with_qmin)16708 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, multipixel_with_qmin) {
16709 TEST_REQUIRES_X86_AVX2;
16710 for (size_t channels = 1; channels <= 80; channels += 15) {
16711 DWConvMicrokernelTester()
16712 .cr(16)
16713 .kr(3)
16714 .channels(channels)
16715 .width(3)
16716 .qmin(128)
16717 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16718 }
16719 }
16720
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,multipixel_with_qmax)16721 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, multipixel_with_qmax) {
16722 TEST_REQUIRES_X86_AVX2;
16723 for (size_t channels = 1; channels <= 80; channels += 15) {
16724 DWConvMicrokernelTester()
16725 .cr(16)
16726 .kr(3)
16727 .channels(channels)
16728 .width(3)
16729 .qmax(128)
16730 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16731 }
16732 }
16733
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,input_offset)16734 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, input_offset) {
16735 TEST_REQUIRES_X86_AVX2;
16736 for (uint32_t channels = 32; channels < 256; channels += 48) {
16737 DWConvMicrokernelTester()
16738 .cr(16)
16739 .kr(3)
16740 .channels(channels)
16741 .input_offset(304)
16742 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16743 }
16744 }
16745
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32,zero)16746 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__AVX2_MUL32, zero) {
16747 TEST_REQUIRES_X86_AVX2;
16748 for (uint32_t mz = 0; mz < 3; mz++) {
16749 for (uint32_t channels = 32; channels < 256; channels += 48) {
16750 DWConvMicrokernelTester()
16751 .cr(16)
16752 .kr(3)
16753 .channels(channels)
16754 .input_offset(304)
16755 .zero_index(mz)
16756 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
16757 }
16758 }
16759 }
16760 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16761
16762
16763 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,c_eq_16)16764 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, c_eq_16) {
16765 TEST_REQUIRES_X86_XOP;
16766 DWConvMicrokernelTester()
16767 .cr(16)
16768 .kr(3)
16769 .channels(16)
16770 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16771 }
16772
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,c_div_16)16773 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, c_div_16) {
16774 TEST_REQUIRES_X86_XOP;
16775 for (uint32_t channels = 32; channels < 256; channels += 48) {
16776 DWConvMicrokernelTester()
16777 .cr(16)
16778 .kr(3)
16779 .channels(channels)
16780 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16781 }
16782 }
16783
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,c_div_16_with_qmin)16784 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, c_div_16_with_qmin) {
16785 TEST_REQUIRES_X86_XOP;
16786 for (uint32_t channels = 32; channels < 256; channels += 48) {
16787 DWConvMicrokernelTester()
16788 .cr(16)
16789 .kr(3)
16790 .channels(channels)
16791 .qmin(128)
16792 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16793 }
16794 }
16795
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,c_div_16_with_qmax)16796 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, c_div_16_with_qmax) {
16797 TEST_REQUIRES_X86_XOP;
16798 for (uint32_t channels = 32; channels < 256; channels += 48) {
16799 DWConvMicrokernelTester()
16800 .cr(16)
16801 .kr(3)
16802 .channels(channels)
16803 .qmax(128)
16804 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16805 }
16806 }
16807
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,c_lt_16)16808 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, c_lt_16) {
16809 TEST_REQUIRES_X86_XOP;
16810 for (uint32_t channels = 1; channels < 16; channels++) {
16811 DWConvMicrokernelTester()
16812 .cr(16)
16813 .kr(3)
16814 .channels(channels)
16815 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16816 }
16817 }
16818
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,c_gt_16)16819 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, c_gt_16) {
16820 TEST_REQUIRES_X86_XOP;
16821 for (uint32_t channels = 17; channels < 32; channels++) {
16822 DWConvMicrokernelTester()
16823 .cr(16)
16824 .kr(3)
16825 .channels(channels)
16826 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16827 }
16828 }
16829
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,c_gt_16_with_qmin)16830 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, c_gt_16_with_qmin) {
16831 TEST_REQUIRES_X86_XOP;
16832 for (uint32_t channels = 17; channels < 32; channels++) {
16833 DWConvMicrokernelTester()
16834 .cr(16)
16835 .kr(3)
16836 .channels(channels)
16837 .qmin(128)
16838 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16839 }
16840 }
16841
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,c_gt_16_with_qmax)16842 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, c_gt_16_with_qmax) {
16843 TEST_REQUIRES_X86_XOP;
16844 for (uint32_t channels = 17; channels < 32; channels++) {
16845 DWConvMicrokernelTester()
16846 .cr(16)
16847 .kr(3)
16848 .channels(channels)
16849 .qmax(128)
16850 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16851 }
16852 }
16853
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,multipixel)16854 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, multipixel) {
16855 TEST_REQUIRES_X86_XOP;
16856 for (size_t channels = 1; channels <= 80; channels += 15) {
16857 DWConvMicrokernelTester()
16858 .cr(16)
16859 .kr(3)
16860 .channels(channels)
16861 .width(3)
16862 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16863 }
16864 }
16865
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,multipixel_with_step)16866 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, multipixel_with_step) {
16867 TEST_REQUIRES_X86_XOP;
16868 for (size_t channels = 1; channels <= 80; channels += 15) {
16869 for (size_t step = 2; step <= 3; step++) {
16870 DWConvMicrokernelTester()
16871 .cr(16)
16872 .kr(3)
16873 .channels(channels)
16874 .width(3)
16875 .step(step)
16876 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16877 }
16878 }
16879 }
16880
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,multipixel_with_output_stride)16881 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, multipixel_with_output_stride) {
16882 TEST_REQUIRES_X86_XOP;
16883 for (size_t channels = 1; channels <= 80; channels += 15) {
16884 DWConvMicrokernelTester()
16885 .cr(16)
16886 .kr(3)
16887 .channels(16)
16888 .width(5)
16889 .output_stride(83)
16890 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16891 }
16892 }
16893
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,multipixel_with_qmin)16894 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, multipixel_with_qmin) {
16895 TEST_REQUIRES_X86_XOP;
16896 for (size_t channels = 1; channels <= 80; channels += 15) {
16897 DWConvMicrokernelTester()
16898 .cr(16)
16899 .kr(3)
16900 .channels(channels)
16901 .width(3)
16902 .qmin(128)
16903 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16904 }
16905 }
16906
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,multipixel_with_qmax)16907 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, multipixel_with_qmax) {
16908 TEST_REQUIRES_X86_XOP;
16909 for (size_t channels = 1; channels <= 80; channels += 15) {
16910 DWConvMicrokernelTester()
16911 .cr(16)
16912 .kr(3)
16913 .channels(channels)
16914 .width(3)
16915 .qmax(128)
16916 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16917 }
16918 }
16919
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,input_offset)16920 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, input_offset) {
16921 TEST_REQUIRES_X86_XOP;
16922 for (uint32_t channels = 32; channels < 256; channels += 48) {
16923 DWConvMicrokernelTester()
16924 .cr(16)
16925 .kr(3)
16926 .channels(channels)
16927 .input_offset(304)
16928 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16929 }
16930 }
16931
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16,zero)16932 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__XOP_MUL16_ADD16, zero) {
16933 TEST_REQUIRES_X86_XOP;
16934 for (uint32_t mz = 0; mz < 3; mz++) {
16935 for (uint32_t channels = 32; channels < 256; channels += 48) {
16936 DWConvMicrokernelTester()
16937 .cr(16)
16938 .kr(3)
16939 .channels(channels)
16940 .input_offset(304)
16941 .zero_index(mz)
16942 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16943 }
16944 }
16945 }
16946 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16947
16948
16949 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_eq_16)16950 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_eq_16) {
16951 TEST_REQUIRES_X86_AVX;
16952 DWConvMicrokernelTester()
16953 .cr(16)
16954 .kr(9)
16955 .channels(16)
16956 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16957 }
16958
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_div_16)16959 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_div_16) {
16960 TEST_REQUIRES_X86_AVX;
16961 for (uint32_t channels = 32; channels < 256; channels += 48) {
16962 DWConvMicrokernelTester()
16963 .cr(16)
16964 .kr(9)
16965 .channels(channels)
16966 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16967 }
16968 }
16969
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_div_16_with_qmin)16970 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_div_16_with_qmin) {
16971 TEST_REQUIRES_X86_AVX;
16972 for (uint32_t channels = 32; channels < 256; channels += 48) {
16973 DWConvMicrokernelTester()
16974 .cr(16)
16975 .kr(9)
16976 .channels(channels)
16977 .qmin(128)
16978 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16979 }
16980 }
16981
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_div_16_with_qmax)16982 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_div_16_with_qmax) {
16983 TEST_REQUIRES_X86_AVX;
16984 for (uint32_t channels = 32; channels < 256; channels += 48) {
16985 DWConvMicrokernelTester()
16986 .cr(16)
16987 .kr(9)
16988 .channels(channels)
16989 .qmax(128)
16990 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16991 }
16992 }
16993
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_lt_16)16994 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_lt_16) {
16995 TEST_REQUIRES_X86_AVX;
16996 for (uint32_t channels = 1; channels < 16; channels++) {
16997 DWConvMicrokernelTester()
16998 .cr(16)
16999 .kr(9)
17000 .channels(channels)
17001 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17002 }
17003 }
17004
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_gt_16)17005 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_gt_16) {
17006 TEST_REQUIRES_X86_AVX;
17007 for (uint32_t channels = 17; channels < 32; channels++) {
17008 DWConvMicrokernelTester()
17009 .cr(16)
17010 .kr(9)
17011 .channels(channels)
17012 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17013 }
17014 }
17015
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_gt_16_with_qmin)17016 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_gt_16_with_qmin) {
17017 TEST_REQUIRES_X86_AVX;
17018 for (uint32_t channels = 17; channels < 32; channels++) {
17019 DWConvMicrokernelTester()
17020 .cr(16)
17021 .kr(9)
17022 .channels(channels)
17023 .qmin(128)
17024 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17025 }
17026 }
17027
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_gt_16_with_qmax)17028 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_gt_16_with_qmax) {
17029 TEST_REQUIRES_X86_AVX;
17030 for (uint32_t channels = 17; channels < 32; channels++) {
17031 DWConvMicrokernelTester()
17032 .cr(16)
17033 .kr(9)
17034 .channels(channels)
17035 .qmax(128)
17036 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17037 }
17038 }
17039
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,multipixel)17040 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel) {
17041 TEST_REQUIRES_X86_AVX;
17042 for (size_t channels = 1; channels <= 80; channels += 15) {
17043 DWConvMicrokernelTester()
17044 .cr(16)
17045 .kr(9)
17046 .channels(channels)
17047 .width(3)
17048 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17049 }
17050 }
17051
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,multipixel_with_step)17052 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_step) {
17053 TEST_REQUIRES_X86_AVX;
17054 for (size_t channels = 1; channels <= 80; channels += 15) {
17055 for (size_t step = 2; step <= 9; step++) {
17056 DWConvMicrokernelTester()
17057 .cr(16)
17058 .kr(9)
17059 .channels(channels)
17060 .width(3)
17061 .step(step)
17062 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17063 }
17064 }
17065 }
17066
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,multipixel_with_output_stride)17067 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_output_stride) {
17068 TEST_REQUIRES_X86_AVX;
17069 for (size_t channels = 1; channels <= 80; channels += 15) {
17070 DWConvMicrokernelTester()
17071 .cr(16)
17072 .kr(9)
17073 .channels(16)
17074 .width(5)
17075 .output_stride(83)
17076 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17077 }
17078 }
17079
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,multipixel_with_qmin)17080 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_qmin) {
17081 TEST_REQUIRES_X86_AVX;
17082 for (size_t channels = 1; channels <= 80; channels += 15) {
17083 DWConvMicrokernelTester()
17084 .cr(16)
17085 .kr(9)
17086 .channels(channels)
17087 .width(3)
17088 .qmin(128)
17089 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17090 }
17091 }
17092
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,multipixel_with_qmax)17093 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_qmax) {
17094 TEST_REQUIRES_X86_AVX;
17095 for (size_t channels = 1; channels <= 80; channels += 15) {
17096 DWConvMicrokernelTester()
17097 .cr(16)
17098 .kr(9)
17099 .channels(channels)
17100 .width(3)
17101 .qmax(128)
17102 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17103 }
17104 }
17105
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,input_offset)17106 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, input_offset) {
17107 TEST_REQUIRES_X86_AVX;
17108 for (uint32_t channels = 32; channels < 256; channels += 48) {
17109 DWConvMicrokernelTester()
17110 .cr(16)
17111 .kr(9)
17112 .channels(channels)
17113 .input_offset(304)
17114 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17115 }
17116 }
17117
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,zero)17118 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, zero) {
17119 TEST_REQUIRES_X86_AVX;
17120 for (uint32_t mz = 0; mz < 9; mz++) {
17121 for (uint32_t channels = 32; channels < 256; channels += 48) {
17122 DWConvMicrokernelTester()
17123 .cr(16)
17124 .kr(9)
17125 .channels(channels)
17126 .input_offset(304)
17127 .zero_index(mz)
17128 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17129 }
17130 }
17131 }
17132 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17133
17134
17135 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_eq_16)17136 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_eq_16) {
17137 TEST_REQUIRES_X86_AVX;
17138 DWConvMicrokernelTester()
17139 .cr(16)
17140 .kr(9)
17141 .channels(16)
17142 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17143 }
17144
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_div_16)17145 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_div_16) {
17146 TEST_REQUIRES_X86_AVX;
17147 for (uint32_t channels = 32; channels < 256; channels += 48) {
17148 DWConvMicrokernelTester()
17149 .cr(16)
17150 .kr(9)
17151 .channels(channels)
17152 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17153 }
17154 }
17155
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_div_16_with_qmin)17156 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_div_16_with_qmin) {
17157 TEST_REQUIRES_X86_AVX;
17158 for (uint32_t channels = 32; channels < 256; channels += 48) {
17159 DWConvMicrokernelTester()
17160 .cr(16)
17161 .kr(9)
17162 .channels(channels)
17163 .qmin(128)
17164 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17165 }
17166 }
17167
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_div_16_with_qmax)17168 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_div_16_with_qmax) {
17169 TEST_REQUIRES_X86_AVX;
17170 for (uint32_t channels = 32; channels < 256; channels += 48) {
17171 DWConvMicrokernelTester()
17172 .cr(16)
17173 .kr(9)
17174 .channels(channels)
17175 .qmax(128)
17176 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17177 }
17178 }
17179
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_lt_16)17180 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_lt_16) {
17181 TEST_REQUIRES_X86_AVX;
17182 for (uint32_t channels = 1; channels < 16; channels++) {
17183 DWConvMicrokernelTester()
17184 .cr(16)
17185 .kr(9)
17186 .channels(channels)
17187 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17188 }
17189 }
17190
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_gt_16)17191 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_gt_16) {
17192 TEST_REQUIRES_X86_AVX;
17193 for (uint32_t channels = 17; channels < 32; channels++) {
17194 DWConvMicrokernelTester()
17195 .cr(16)
17196 .kr(9)
17197 .channels(channels)
17198 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17199 }
17200 }
17201
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_gt_16_with_qmin)17202 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_gt_16_with_qmin) {
17203 TEST_REQUIRES_X86_AVX;
17204 for (uint32_t channels = 17; channels < 32; channels++) {
17205 DWConvMicrokernelTester()
17206 .cr(16)
17207 .kr(9)
17208 .channels(channels)
17209 .qmin(128)
17210 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17211 }
17212 }
17213
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,c_gt_16_with_qmax)17214 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, c_gt_16_with_qmax) {
17215 TEST_REQUIRES_X86_AVX;
17216 for (uint32_t channels = 17; channels < 32; channels++) {
17217 DWConvMicrokernelTester()
17218 .cr(16)
17219 .kr(9)
17220 .channels(channels)
17221 .qmax(128)
17222 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17223 }
17224 }
17225
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,multipixel)17226 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel) {
17227 TEST_REQUIRES_X86_AVX;
17228 for (size_t channels = 1; channels <= 80; channels += 15) {
17229 DWConvMicrokernelTester()
17230 .cr(16)
17231 .kr(9)
17232 .channels(channels)
17233 .width(3)
17234 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17235 }
17236 }
17237
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,multipixel_with_step)17238 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel_with_step) {
17239 TEST_REQUIRES_X86_AVX;
17240 for (size_t channels = 1; channels <= 80; channels += 15) {
17241 for (size_t step = 2; step <= 9; step++) {
17242 DWConvMicrokernelTester()
17243 .cr(16)
17244 .kr(9)
17245 .channels(channels)
17246 .width(3)
17247 .step(step)
17248 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17249 }
17250 }
17251 }
17252
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,multipixel_with_output_stride)17253 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel_with_output_stride) {
17254 TEST_REQUIRES_X86_AVX;
17255 for (size_t channels = 1; channels <= 80; channels += 15) {
17256 DWConvMicrokernelTester()
17257 .cr(16)
17258 .kr(9)
17259 .channels(16)
17260 .width(5)
17261 .output_stride(83)
17262 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17263 }
17264 }
17265
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,multipixel_with_qmin)17266 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel_with_qmin) {
17267 TEST_REQUIRES_X86_AVX;
17268 for (size_t channels = 1; channels <= 80; channels += 15) {
17269 DWConvMicrokernelTester()
17270 .cr(16)
17271 .kr(9)
17272 .channels(channels)
17273 .width(3)
17274 .qmin(128)
17275 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17276 }
17277 }
17278
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,multipixel_with_qmax)17279 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, multipixel_with_qmax) {
17280 TEST_REQUIRES_X86_AVX;
17281 for (size_t channels = 1; channels <= 80; channels += 15) {
17282 DWConvMicrokernelTester()
17283 .cr(16)
17284 .kr(9)
17285 .channels(channels)
17286 .width(3)
17287 .qmax(128)
17288 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17289 }
17290 }
17291
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,input_offset)17292 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, input_offset) {
17293 TEST_REQUIRES_X86_AVX;
17294 for (uint32_t channels = 32; channels < 256; channels += 48) {
17295 DWConvMicrokernelTester()
17296 .cr(16)
17297 .kr(9)
17298 .channels(channels)
17299 .input_offset(304)
17300 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17301 }
17302 }
17303
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16,zero)17304 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16_ADD16, zero) {
17305 TEST_REQUIRES_X86_AVX;
17306 for (uint32_t mz = 0; mz < 9; mz++) {
17307 for (uint32_t channels = 32; channels < 256; channels += 48) {
17308 DWConvMicrokernelTester()
17309 .cr(16)
17310 .kr(9)
17311 .channels(channels)
17312 .input_offset(304)
17313 .zero_index(mz)
17314 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17315 }
17316 }
17317 }
17318 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17319
17320
17321 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_eq_16)17322 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_eq_16) {
17323 TEST_REQUIRES_X86_AVX;
17324 DWConvMicrokernelTester()
17325 .cr(16)
17326 .kr(9)
17327 .channels(16)
17328 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17329 }
17330
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_div_16)17331 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_div_16) {
17332 TEST_REQUIRES_X86_AVX;
17333 for (uint32_t channels = 32; channels < 256; channels += 48) {
17334 DWConvMicrokernelTester()
17335 .cr(16)
17336 .kr(9)
17337 .channels(channels)
17338 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17339 }
17340 }
17341
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_div_16_with_qmin)17342 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_div_16_with_qmin) {
17343 TEST_REQUIRES_X86_AVX;
17344 for (uint32_t channels = 32; channels < 256; channels += 48) {
17345 DWConvMicrokernelTester()
17346 .cr(16)
17347 .kr(9)
17348 .channels(channels)
17349 .qmin(128)
17350 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17351 }
17352 }
17353
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_div_16_with_qmax)17354 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_div_16_with_qmax) {
17355 TEST_REQUIRES_X86_AVX;
17356 for (uint32_t channels = 32; channels < 256; channels += 48) {
17357 DWConvMicrokernelTester()
17358 .cr(16)
17359 .kr(9)
17360 .channels(channels)
17361 .qmax(128)
17362 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17363 }
17364 }
17365
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_lt_16)17366 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_lt_16) {
17367 TEST_REQUIRES_X86_AVX;
17368 for (uint32_t channels = 1; channels < 16; channels++) {
17369 DWConvMicrokernelTester()
17370 .cr(16)
17371 .kr(9)
17372 .channels(channels)
17373 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17374 }
17375 }
17376
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_gt_16)17377 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_gt_16) {
17378 TEST_REQUIRES_X86_AVX;
17379 for (uint32_t channels = 17; channels < 32; channels++) {
17380 DWConvMicrokernelTester()
17381 .cr(16)
17382 .kr(9)
17383 .channels(channels)
17384 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17385 }
17386 }
17387
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_gt_16_with_qmin)17388 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_gt_16_with_qmin) {
17389 TEST_REQUIRES_X86_AVX;
17390 for (uint32_t channels = 17; channels < 32; channels++) {
17391 DWConvMicrokernelTester()
17392 .cr(16)
17393 .kr(9)
17394 .channels(channels)
17395 .qmin(128)
17396 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17397 }
17398 }
17399
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_gt_16_with_qmax)17400 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_gt_16_with_qmax) {
17401 TEST_REQUIRES_X86_AVX;
17402 for (uint32_t channels = 17; channels < 32; channels++) {
17403 DWConvMicrokernelTester()
17404 .cr(16)
17405 .kr(9)
17406 .channels(channels)
17407 .qmax(128)
17408 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17409 }
17410 }
17411
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,multipixel)17412 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel) {
17413 TEST_REQUIRES_X86_AVX;
17414 for (size_t channels = 1; channels <= 80; channels += 15) {
17415 DWConvMicrokernelTester()
17416 .cr(16)
17417 .kr(9)
17418 .channels(channels)
17419 .width(3)
17420 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17421 }
17422 }
17423
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,multipixel_with_step)17424 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_step) {
17425 TEST_REQUIRES_X86_AVX;
17426 for (size_t channels = 1; channels <= 80; channels += 15) {
17427 for (size_t step = 2; step <= 9; step++) {
17428 DWConvMicrokernelTester()
17429 .cr(16)
17430 .kr(9)
17431 .channels(channels)
17432 .width(3)
17433 .step(step)
17434 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17435 }
17436 }
17437 }
17438
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,multipixel_with_output_stride)17439 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_output_stride) {
17440 TEST_REQUIRES_X86_AVX;
17441 for (size_t channels = 1; channels <= 80; channels += 15) {
17442 DWConvMicrokernelTester()
17443 .cr(16)
17444 .kr(9)
17445 .channels(16)
17446 .width(5)
17447 .output_stride(83)
17448 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17449 }
17450 }
17451
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,multipixel_with_qmin)17452 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_qmin) {
17453 TEST_REQUIRES_X86_AVX;
17454 for (size_t channels = 1; channels <= 80; channels += 15) {
17455 DWConvMicrokernelTester()
17456 .cr(16)
17457 .kr(9)
17458 .channels(channels)
17459 .width(3)
17460 .qmin(128)
17461 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17462 }
17463 }
17464
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,multipixel_with_qmax)17465 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_qmax) {
17466 TEST_REQUIRES_X86_AVX;
17467 for (size_t channels = 1; channels <= 80; channels += 15) {
17468 DWConvMicrokernelTester()
17469 .cr(16)
17470 .kr(9)
17471 .channels(channels)
17472 .width(3)
17473 .qmax(128)
17474 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17475 }
17476 }
17477
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,input_offset)17478 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, input_offset) {
17479 TEST_REQUIRES_X86_AVX;
17480 for (uint32_t channels = 32; channels < 256; channels += 48) {
17481 DWConvMicrokernelTester()
17482 .cr(16)
17483 .kr(9)
17484 .channels(channels)
17485 .input_offset(304)
17486 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17487 }
17488 }
17489
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,zero)17490 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, zero) {
17491 TEST_REQUIRES_X86_AVX;
17492 for (uint32_t mz = 0; mz < 9; mz++) {
17493 for (uint32_t channels = 32; channels < 256; channels += 48) {
17494 DWConvMicrokernelTester()
17495 .cr(16)
17496 .kr(9)
17497 .channels(channels)
17498 .input_offset(304)
17499 .zero_index(mz)
17500 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17501 }
17502 }
17503 }
17504 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17505
17506
17507 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_eq_16)17508 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_eq_16) {
17509 TEST_REQUIRES_X86_AVX2;
17510 DWConvMicrokernelTester()
17511 .cr(16)
17512 .kr(9)
17513 .channels(16)
17514 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17515 }
17516
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_div_16)17517 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_16) {
17518 TEST_REQUIRES_X86_AVX2;
17519 for (uint32_t channels = 32; channels < 256; channels += 48) {
17520 DWConvMicrokernelTester()
17521 .cr(16)
17522 .kr(9)
17523 .channels(channels)
17524 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17525 }
17526 }
17527
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_div_16_with_qmin)17528 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_16_with_qmin) {
17529 TEST_REQUIRES_X86_AVX2;
17530 for (uint32_t channels = 32; channels < 256; channels += 48) {
17531 DWConvMicrokernelTester()
17532 .cr(16)
17533 .kr(9)
17534 .channels(channels)
17535 .qmin(128)
17536 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17537 }
17538 }
17539
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_div_16_with_qmax)17540 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_16_with_qmax) {
17541 TEST_REQUIRES_X86_AVX2;
17542 for (uint32_t channels = 32; channels < 256; channels += 48) {
17543 DWConvMicrokernelTester()
17544 .cr(16)
17545 .kr(9)
17546 .channels(channels)
17547 .qmax(128)
17548 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17549 }
17550 }
17551
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_lt_16)17552 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_lt_16) {
17553 TEST_REQUIRES_X86_AVX2;
17554 for (uint32_t channels = 1; channels < 16; channels++) {
17555 DWConvMicrokernelTester()
17556 .cr(16)
17557 .kr(9)
17558 .channels(channels)
17559 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17560 }
17561 }
17562
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_gt_16)17563 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16) {
17564 TEST_REQUIRES_X86_AVX2;
17565 for (uint32_t channels = 17; channels < 32; channels++) {
17566 DWConvMicrokernelTester()
17567 .cr(16)
17568 .kr(9)
17569 .channels(channels)
17570 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17571 }
17572 }
17573
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_gt_16_with_qmin)17574 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16_with_qmin) {
17575 TEST_REQUIRES_X86_AVX2;
17576 for (uint32_t channels = 17; channels < 32; channels++) {
17577 DWConvMicrokernelTester()
17578 .cr(16)
17579 .kr(9)
17580 .channels(channels)
17581 .qmin(128)
17582 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17583 }
17584 }
17585
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,c_gt_16_with_qmax)17586 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16_with_qmax) {
17587 TEST_REQUIRES_X86_AVX2;
17588 for (uint32_t channels = 17; channels < 32; channels++) {
17589 DWConvMicrokernelTester()
17590 .cr(16)
17591 .kr(9)
17592 .channels(channels)
17593 .qmax(128)
17594 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17595 }
17596 }
17597
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel)17598 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel) {
17599 TEST_REQUIRES_X86_AVX2;
17600 for (size_t channels = 1; channels <= 80; channels += 15) {
17601 DWConvMicrokernelTester()
17602 .cr(16)
17603 .kr(9)
17604 .channels(channels)
17605 .width(3)
17606 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17607 }
17608 }
17609
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_step)17610 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_step) {
17611 TEST_REQUIRES_X86_AVX2;
17612 for (size_t channels = 1; channels <= 80; channels += 15) {
17613 for (size_t step = 2; step <= 9; step++) {
17614 DWConvMicrokernelTester()
17615 .cr(16)
17616 .kr(9)
17617 .channels(channels)
17618 .width(3)
17619 .step(step)
17620 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17621 }
17622 }
17623 }
17624
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_output_stride)17625 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_output_stride) {
17626 TEST_REQUIRES_X86_AVX2;
17627 for (size_t channels = 1; channels <= 80; channels += 15) {
17628 DWConvMicrokernelTester()
17629 .cr(16)
17630 .kr(9)
17631 .channels(16)
17632 .width(5)
17633 .output_stride(83)
17634 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17635 }
17636 }
17637
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmin)17638 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmin) {
17639 TEST_REQUIRES_X86_AVX2;
17640 for (size_t channels = 1; channels <= 80; channels += 15) {
17641 DWConvMicrokernelTester()
17642 .cr(16)
17643 .kr(9)
17644 .channels(channels)
17645 .width(3)
17646 .qmin(128)
17647 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17648 }
17649 }
17650
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmax)17651 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmax) {
17652 TEST_REQUIRES_X86_AVX2;
17653 for (size_t channels = 1; channels <= 80; channels += 15) {
17654 DWConvMicrokernelTester()
17655 .cr(16)
17656 .kr(9)
17657 .channels(channels)
17658 .width(3)
17659 .qmax(128)
17660 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17661 }
17662 }
17663
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,input_offset)17664 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, input_offset) {
17665 TEST_REQUIRES_X86_AVX2;
17666 for (uint32_t channels = 32; channels < 256; channels += 48) {
17667 DWConvMicrokernelTester()
17668 .cr(16)
17669 .kr(9)
17670 .channels(channels)
17671 .input_offset(304)
17672 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17673 }
17674 }
17675
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK,zero)17676 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_ADD16_VPUNPCK, zero) {
17677 TEST_REQUIRES_X86_AVX2;
17678 for (uint32_t mz = 0; mz < 9; mz++) {
17679 for (uint32_t channels = 32; channels < 256; channels += 48) {
17680 DWConvMicrokernelTester()
17681 .cr(16)
17682 .kr(9)
17683 .channels(channels)
17684 .input_offset(304)
17685 .zero_index(mz)
17686 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17687 }
17688 }
17689 }
17690 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17691
17692
17693 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_eq_16)17694 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_eq_16) {
17695 TEST_REQUIRES_X86_AVX2;
17696 DWConvMicrokernelTester()
17697 .cr(16)
17698 .kr(9)
17699 .channels(16)
17700 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17701 }
17702
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_div_16)17703 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_div_16) {
17704 TEST_REQUIRES_X86_AVX2;
17705 for (uint32_t channels = 32; channels < 256; channels += 48) {
17706 DWConvMicrokernelTester()
17707 .cr(16)
17708 .kr(9)
17709 .channels(channels)
17710 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17711 }
17712 }
17713
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_div_16_with_qmin)17714 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_div_16_with_qmin) {
17715 TEST_REQUIRES_X86_AVX2;
17716 for (uint32_t channels = 32; channels < 256; channels += 48) {
17717 DWConvMicrokernelTester()
17718 .cr(16)
17719 .kr(9)
17720 .channels(channels)
17721 .qmin(128)
17722 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17723 }
17724 }
17725
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_div_16_with_qmax)17726 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_div_16_with_qmax) {
17727 TEST_REQUIRES_X86_AVX2;
17728 for (uint32_t channels = 32; channels < 256; channels += 48) {
17729 DWConvMicrokernelTester()
17730 .cr(16)
17731 .kr(9)
17732 .channels(channels)
17733 .qmax(128)
17734 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17735 }
17736 }
17737
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_lt_16)17738 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_lt_16) {
17739 TEST_REQUIRES_X86_AVX2;
17740 for (uint32_t channels = 1; channels < 16; channels++) {
17741 DWConvMicrokernelTester()
17742 .cr(16)
17743 .kr(9)
17744 .channels(channels)
17745 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17746 }
17747 }
17748
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_gt_16)17749 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_gt_16) {
17750 TEST_REQUIRES_X86_AVX2;
17751 for (uint32_t channels = 17; channels < 32; channels++) {
17752 DWConvMicrokernelTester()
17753 .cr(16)
17754 .kr(9)
17755 .channels(channels)
17756 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17757 }
17758 }
17759
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_gt_16_with_qmin)17760 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_gt_16_with_qmin) {
17761 TEST_REQUIRES_X86_AVX2;
17762 for (uint32_t channels = 17; channels < 32; channels++) {
17763 DWConvMicrokernelTester()
17764 .cr(16)
17765 .kr(9)
17766 .channels(channels)
17767 .qmin(128)
17768 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17769 }
17770 }
17771
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,c_gt_16_with_qmax)17772 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, c_gt_16_with_qmax) {
17773 TEST_REQUIRES_X86_AVX2;
17774 for (uint32_t channels = 17; channels < 32; channels++) {
17775 DWConvMicrokernelTester()
17776 .cr(16)
17777 .kr(9)
17778 .channels(channels)
17779 .qmax(128)
17780 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17781 }
17782 }
17783
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,multipixel)17784 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel) {
17785 TEST_REQUIRES_X86_AVX2;
17786 for (size_t channels = 1; channels <= 80; channels += 15) {
17787 DWConvMicrokernelTester()
17788 .cr(16)
17789 .kr(9)
17790 .channels(channels)
17791 .width(3)
17792 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17793 }
17794 }
17795
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,multipixel_with_step)17796 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel_with_step) {
17797 TEST_REQUIRES_X86_AVX2;
17798 for (size_t channels = 1; channels <= 80; channels += 15) {
17799 for (size_t step = 2; step <= 9; step++) {
17800 DWConvMicrokernelTester()
17801 .cr(16)
17802 .kr(9)
17803 .channels(channels)
17804 .width(3)
17805 .step(step)
17806 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17807 }
17808 }
17809 }
17810
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,multipixel_with_output_stride)17811 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel_with_output_stride) {
17812 TEST_REQUIRES_X86_AVX2;
17813 for (size_t channels = 1; channels <= 80; channels += 15) {
17814 DWConvMicrokernelTester()
17815 .cr(16)
17816 .kr(9)
17817 .channels(16)
17818 .width(5)
17819 .output_stride(83)
17820 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17821 }
17822 }
17823
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,multipixel_with_qmin)17824 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel_with_qmin) {
17825 TEST_REQUIRES_X86_AVX2;
17826 for (size_t channels = 1; channels <= 80; channels += 15) {
17827 DWConvMicrokernelTester()
17828 .cr(16)
17829 .kr(9)
17830 .channels(channels)
17831 .width(3)
17832 .qmin(128)
17833 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17834 }
17835 }
17836
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,multipixel_with_qmax)17837 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, multipixel_with_qmax) {
17838 TEST_REQUIRES_X86_AVX2;
17839 for (size_t channels = 1; channels <= 80; channels += 15) {
17840 DWConvMicrokernelTester()
17841 .cr(16)
17842 .kr(9)
17843 .channels(channels)
17844 .width(3)
17845 .qmax(128)
17846 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17847 }
17848 }
17849
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,input_offset)17850 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, input_offset) {
17851 TEST_REQUIRES_X86_AVX2;
17852 for (uint32_t channels = 32; channels < 256; channels += 48) {
17853 DWConvMicrokernelTester()
17854 .cr(16)
17855 .kr(9)
17856 .channels(channels)
17857 .input_offset(304)
17858 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17859 }
17860 }
17861
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX,zero)17862 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPMOVSX, zero) {
17863 TEST_REQUIRES_X86_AVX2;
17864 for (uint32_t mz = 0; mz < 9; mz++) {
17865 for (uint32_t channels = 32; channels < 256; channels += 48) {
17866 DWConvMicrokernelTester()
17867 .cr(16)
17868 .kr(9)
17869 .channels(channels)
17870 .input_offset(304)
17871 .zero_index(mz)
17872 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17873 }
17874 }
17875 }
17876 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17877
17878
17879 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_eq_16)17880 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_eq_16) {
17881 TEST_REQUIRES_X86_AVX2;
17882 DWConvMicrokernelTester()
17883 .cr(16)
17884 .kr(9)
17885 .channels(16)
17886 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17887 }
17888
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_div_16)17889 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_div_16) {
17890 TEST_REQUIRES_X86_AVX2;
17891 for (uint32_t channels = 32; channels < 256; channels += 48) {
17892 DWConvMicrokernelTester()
17893 .cr(16)
17894 .kr(9)
17895 .channels(channels)
17896 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17897 }
17898 }
17899
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_div_16_with_qmin)17900 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_div_16_with_qmin) {
17901 TEST_REQUIRES_X86_AVX2;
17902 for (uint32_t channels = 32; channels < 256; channels += 48) {
17903 DWConvMicrokernelTester()
17904 .cr(16)
17905 .kr(9)
17906 .channels(channels)
17907 .qmin(128)
17908 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17909 }
17910 }
17911
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_div_16_with_qmax)17912 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_div_16_with_qmax) {
17913 TEST_REQUIRES_X86_AVX2;
17914 for (uint32_t channels = 32; channels < 256; channels += 48) {
17915 DWConvMicrokernelTester()
17916 .cr(16)
17917 .kr(9)
17918 .channels(channels)
17919 .qmax(128)
17920 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17921 }
17922 }
17923
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_lt_16)17924 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_lt_16) {
17925 TEST_REQUIRES_X86_AVX2;
17926 for (uint32_t channels = 1; channels < 16; channels++) {
17927 DWConvMicrokernelTester()
17928 .cr(16)
17929 .kr(9)
17930 .channels(channels)
17931 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17932 }
17933 }
17934
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_gt_16)17935 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_gt_16) {
17936 TEST_REQUIRES_X86_AVX2;
17937 for (uint32_t channels = 17; channels < 32; channels++) {
17938 DWConvMicrokernelTester()
17939 .cr(16)
17940 .kr(9)
17941 .channels(channels)
17942 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17943 }
17944 }
17945
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_gt_16_with_qmin)17946 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_gt_16_with_qmin) {
17947 TEST_REQUIRES_X86_AVX2;
17948 for (uint32_t channels = 17; channels < 32; channels++) {
17949 DWConvMicrokernelTester()
17950 .cr(16)
17951 .kr(9)
17952 .channels(channels)
17953 .qmin(128)
17954 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17955 }
17956 }
17957
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,c_gt_16_with_qmax)17958 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, c_gt_16_with_qmax) {
17959 TEST_REQUIRES_X86_AVX2;
17960 for (uint32_t channels = 17; channels < 32; channels++) {
17961 DWConvMicrokernelTester()
17962 .cr(16)
17963 .kr(9)
17964 .channels(channels)
17965 .qmax(128)
17966 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17967 }
17968 }
17969
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,multipixel)17970 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel) {
17971 TEST_REQUIRES_X86_AVX2;
17972 for (size_t channels = 1; channels <= 80; channels += 15) {
17973 DWConvMicrokernelTester()
17974 .cr(16)
17975 .kr(9)
17976 .channels(channels)
17977 .width(3)
17978 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17979 }
17980 }
17981
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,multipixel_with_step)17982 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel_with_step) {
17983 TEST_REQUIRES_X86_AVX2;
17984 for (size_t channels = 1; channels <= 80; channels += 15) {
17985 for (size_t step = 2; step <= 9; step++) {
17986 DWConvMicrokernelTester()
17987 .cr(16)
17988 .kr(9)
17989 .channels(channels)
17990 .width(3)
17991 .step(step)
17992 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
17993 }
17994 }
17995 }
17996
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,multipixel_with_output_stride)17997 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel_with_output_stride) {
17998 TEST_REQUIRES_X86_AVX2;
17999 for (size_t channels = 1; channels <= 80; channels += 15) {
18000 DWConvMicrokernelTester()
18001 .cr(16)
18002 .kr(9)
18003 .channels(16)
18004 .width(5)
18005 .output_stride(83)
18006 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18007 }
18008 }
18009
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,multipixel_with_qmin)18010 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel_with_qmin) {
18011 TEST_REQUIRES_X86_AVX2;
18012 for (size_t channels = 1; channels <= 80; channels += 15) {
18013 DWConvMicrokernelTester()
18014 .cr(16)
18015 .kr(9)
18016 .channels(channels)
18017 .width(3)
18018 .qmin(128)
18019 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18020 }
18021 }
18022
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,multipixel_with_qmax)18023 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, multipixel_with_qmax) {
18024 TEST_REQUIRES_X86_AVX2;
18025 for (size_t channels = 1; channels <= 80; channels += 15) {
18026 DWConvMicrokernelTester()
18027 .cr(16)
18028 .kr(9)
18029 .channels(channels)
18030 .width(3)
18031 .qmax(128)
18032 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18033 }
18034 }
18035
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,input_offset)18036 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, input_offset) {
18037 TEST_REQUIRES_X86_AVX2;
18038 for (uint32_t channels = 32; channels < 256; channels += 48) {
18039 DWConvMicrokernelTester()
18040 .cr(16)
18041 .kr(9)
18042 .channels(channels)
18043 .input_offset(304)
18044 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18045 }
18046 }
18047
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK,zero)18048 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16_VPUNPCK, zero) {
18049 TEST_REQUIRES_X86_AVX2;
18050 for (uint32_t mz = 0; mz < 9; mz++) {
18051 for (uint32_t channels = 32; channels < 256; channels += 48) {
18052 DWConvMicrokernelTester()
18053 .cr(16)
18054 .kr(9)
18055 .channels(channels)
18056 .input_offset(304)
18057 .zero_index(mz)
18058 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18059 }
18060 }
18061 }
18062 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18063
18064
18065 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_eq_16)18066 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_eq_16) {
18067 TEST_REQUIRES_X86_AVX2;
18068 DWConvMicrokernelTester()
18069 .cr(16)
18070 .kr(9)
18071 .channels(16)
18072 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18073 }
18074
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_div_16)18075 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16) {
18076 TEST_REQUIRES_X86_AVX2;
18077 for (uint32_t channels = 32; channels < 256; channels += 48) {
18078 DWConvMicrokernelTester()
18079 .cr(16)
18080 .kr(9)
18081 .channels(channels)
18082 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18083 }
18084 }
18085
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_div_16_with_qmin)18086 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16_with_qmin) {
18087 TEST_REQUIRES_X86_AVX2;
18088 for (uint32_t channels = 32; channels < 256; channels += 48) {
18089 DWConvMicrokernelTester()
18090 .cr(16)
18091 .kr(9)
18092 .channels(channels)
18093 .qmin(128)
18094 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18095 }
18096 }
18097
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_div_16_with_qmax)18098 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16_with_qmax) {
18099 TEST_REQUIRES_X86_AVX2;
18100 for (uint32_t channels = 32; channels < 256; channels += 48) {
18101 DWConvMicrokernelTester()
18102 .cr(16)
18103 .kr(9)
18104 .channels(channels)
18105 .qmax(128)
18106 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18107 }
18108 }
18109
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_lt_16)18110 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_lt_16) {
18111 TEST_REQUIRES_X86_AVX2;
18112 for (uint32_t channels = 1; channels < 16; channels++) {
18113 DWConvMicrokernelTester()
18114 .cr(16)
18115 .kr(9)
18116 .channels(channels)
18117 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18118 }
18119 }
18120
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_gt_16)18121 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16) {
18122 TEST_REQUIRES_X86_AVX2;
18123 for (uint32_t channels = 17; channels < 32; channels++) {
18124 DWConvMicrokernelTester()
18125 .cr(16)
18126 .kr(9)
18127 .channels(channels)
18128 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18129 }
18130 }
18131
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_gt_16_with_qmin)18132 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16_with_qmin) {
18133 TEST_REQUIRES_X86_AVX2;
18134 for (uint32_t channels = 17; channels < 32; channels++) {
18135 DWConvMicrokernelTester()
18136 .cr(16)
18137 .kr(9)
18138 .channels(channels)
18139 .qmin(128)
18140 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18141 }
18142 }
18143
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_gt_16_with_qmax)18144 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16_with_qmax) {
18145 TEST_REQUIRES_X86_AVX2;
18146 for (uint32_t channels = 17; channels < 32; channels++) {
18147 DWConvMicrokernelTester()
18148 .cr(16)
18149 .kr(9)
18150 .channels(channels)
18151 .qmax(128)
18152 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18153 }
18154 }
18155
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,multipixel)18156 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel) {
18157 TEST_REQUIRES_X86_AVX2;
18158 for (size_t channels = 1; channels <= 80; channels += 15) {
18159 DWConvMicrokernelTester()
18160 .cr(16)
18161 .kr(9)
18162 .channels(channels)
18163 .width(3)
18164 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18165 }
18166 }
18167
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,multipixel_with_step)18168 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_step) {
18169 TEST_REQUIRES_X86_AVX2;
18170 for (size_t channels = 1; channels <= 80; channels += 15) {
18171 for (size_t step = 2; step <= 9; step++) {
18172 DWConvMicrokernelTester()
18173 .cr(16)
18174 .kr(9)
18175 .channels(channels)
18176 .width(3)
18177 .step(step)
18178 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18179 }
18180 }
18181 }
18182
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,multipixel_with_output_stride)18183 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_output_stride) {
18184 TEST_REQUIRES_X86_AVX2;
18185 for (size_t channels = 1; channels <= 80; channels += 15) {
18186 DWConvMicrokernelTester()
18187 .cr(16)
18188 .kr(9)
18189 .channels(16)
18190 .width(5)
18191 .output_stride(83)
18192 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18193 }
18194 }
18195
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,multipixel_with_qmin)18196 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_qmin) {
18197 TEST_REQUIRES_X86_AVX2;
18198 for (size_t channels = 1; channels <= 80; channels += 15) {
18199 DWConvMicrokernelTester()
18200 .cr(16)
18201 .kr(9)
18202 .channels(channels)
18203 .width(3)
18204 .qmin(128)
18205 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18206 }
18207 }
18208
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,multipixel_with_qmax)18209 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_qmax) {
18210 TEST_REQUIRES_X86_AVX2;
18211 for (size_t channels = 1; channels <= 80; channels += 15) {
18212 DWConvMicrokernelTester()
18213 .cr(16)
18214 .kr(9)
18215 .channels(channels)
18216 .width(3)
18217 .qmax(128)
18218 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18219 }
18220 }
18221
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,input_offset)18222 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, input_offset) {
18223 TEST_REQUIRES_X86_AVX2;
18224 for (uint32_t channels = 32; channels < 256; channels += 48) {
18225 DWConvMicrokernelTester()
18226 .cr(16)
18227 .kr(9)
18228 .channels(channels)
18229 .input_offset(304)
18230 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18231 }
18232 }
18233
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,zero)18234 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, zero) {
18235 TEST_REQUIRES_X86_AVX2;
18236 for (uint32_t mz = 0; mz < 9; mz++) {
18237 for (uint32_t channels = 32; channels < 256; channels += 48) {
18238 DWConvMicrokernelTester()
18239 .cr(16)
18240 .kr(9)
18241 .channels(channels)
18242 .input_offset(304)
18243 .zero_index(mz)
18244 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
18245 }
18246 }
18247 }
18248 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18249
18250
18251 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_eq_16)18252 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_eq_16) {
18253 TEST_REQUIRES_X86_XOP;
18254 DWConvMicrokernelTester()
18255 .cr(16)
18256 .kr(9)
18257 .channels(16)
18258 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18259 }
18260
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_div_16)18261 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_div_16) {
18262 TEST_REQUIRES_X86_XOP;
18263 for (uint32_t channels = 32; channels < 256; channels += 48) {
18264 DWConvMicrokernelTester()
18265 .cr(16)
18266 .kr(9)
18267 .channels(channels)
18268 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18269 }
18270 }
18271
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_div_16_with_qmin)18272 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_div_16_with_qmin) {
18273 TEST_REQUIRES_X86_XOP;
18274 for (uint32_t channels = 32; channels < 256; channels += 48) {
18275 DWConvMicrokernelTester()
18276 .cr(16)
18277 .kr(9)
18278 .channels(channels)
18279 .qmin(128)
18280 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18281 }
18282 }
18283
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_div_16_with_qmax)18284 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_div_16_with_qmax) {
18285 TEST_REQUIRES_X86_XOP;
18286 for (uint32_t channels = 32; channels < 256; channels += 48) {
18287 DWConvMicrokernelTester()
18288 .cr(16)
18289 .kr(9)
18290 .channels(channels)
18291 .qmax(128)
18292 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18293 }
18294 }
18295
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_lt_16)18296 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_lt_16) {
18297 TEST_REQUIRES_X86_XOP;
18298 for (uint32_t channels = 1; channels < 16; channels++) {
18299 DWConvMicrokernelTester()
18300 .cr(16)
18301 .kr(9)
18302 .channels(channels)
18303 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18304 }
18305 }
18306
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_gt_16)18307 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_gt_16) {
18308 TEST_REQUIRES_X86_XOP;
18309 for (uint32_t channels = 17; channels < 32; channels++) {
18310 DWConvMicrokernelTester()
18311 .cr(16)
18312 .kr(9)
18313 .channels(channels)
18314 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18315 }
18316 }
18317
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_gt_16_with_qmin)18318 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_gt_16_with_qmin) {
18319 TEST_REQUIRES_X86_XOP;
18320 for (uint32_t channels = 17; channels < 32; channels++) {
18321 DWConvMicrokernelTester()
18322 .cr(16)
18323 .kr(9)
18324 .channels(channels)
18325 .qmin(128)
18326 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18327 }
18328 }
18329
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,c_gt_16_with_qmax)18330 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, c_gt_16_with_qmax) {
18331 TEST_REQUIRES_X86_XOP;
18332 for (uint32_t channels = 17; channels < 32; channels++) {
18333 DWConvMicrokernelTester()
18334 .cr(16)
18335 .kr(9)
18336 .channels(channels)
18337 .qmax(128)
18338 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18339 }
18340 }
18341
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,multipixel)18342 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel) {
18343 TEST_REQUIRES_X86_XOP;
18344 for (size_t channels = 1; channels <= 80; channels += 15) {
18345 DWConvMicrokernelTester()
18346 .cr(16)
18347 .kr(9)
18348 .channels(channels)
18349 .width(3)
18350 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18351 }
18352 }
18353
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,multipixel_with_step)18354 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel_with_step) {
18355 TEST_REQUIRES_X86_XOP;
18356 for (size_t channels = 1; channels <= 80; channels += 15) {
18357 for (size_t step = 2; step <= 9; step++) {
18358 DWConvMicrokernelTester()
18359 .cr(16)
18360 .kr(9)
18361 .channels(channels)
18362 .width(3)
18363 .step(step)
18364 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18365 }
18366 }
18367 }
18368
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,multipixel_with_output_stride)18369 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel_with_output_stride) {
18370 TEST_REQUIRES_X86_XOP;
18371 for (size_t channels = 1; channels <= 80; channels += 15) {
18372 DWConvMicrokernelTester()
18373 .cr(16)
18374 .kr(9)
18375 .channels(16)
18376 .width(5)
18377 .output_stride(83)
18378 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18379 }
18380 }
18381
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,multipixel_with_qmin)18382 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel_with_qmin) {
18383 TEST_REQUIRES_X86_XOP;
18384 for (size_t channels = 1; channels <= 80; channels += 15) {
18385 DWConvMicrokernelTester()
18386 .cr(16)
18387 .kr(9)
18388 .channels(channels)
18389 .width(3)
18390 .qmin(128)
18391 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18392 }
18393 }
18394
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,multipixel_with_qmax)18395 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, multipixel_with_qmax) {
18396 TEST_REQUIRES_X86_XOP;
18397 for (size_t channels = 1; channels <= 80; channels += 15) {
18398 DWConvMicrokernelTester()
18399 .cr(16)
18400 .kr(9)
18401 .channels(channels)
18402 .width(3)
18403 .qmax(128)
18404 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18405 }
18406 }
18407
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,input_offset)18408 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, input_offset) {
18409 TEST_REQUIRES_X86_XOP;
18410 for (uint32_t channels = 32; channels < 256; channels += 48) {
18411 DWConvMicrokernelTester()
18412 .cr(16)
18413 .kr(9)
18414 .channels(channels)
18415 .input_offset(304)
18416 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18417 }
18418 }
18419
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16,zero)18420 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL16_ADD16, zero) {
18421 TEST_REQUIRES_X86_XOP;
18422 for (uint32_t mz = 0; mz < 9; mz++) {
18423 for (uint32_t channels = 32; channels < 256; channels += 48) {
18424 DWConvMicrokernelTester()
18425 .cr(16)
18426 .kr(9)
18427 .channels(channels)
18428 .input_offset(304)
18429 .zero_index(mz)
18430 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18431 }
18432 }
18433 }
18434 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18435
18436
18437 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_eq_16)18438 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_eq_16) {
18439 TEST_REQUIRES_X86_XOP;
18440 DWConvMicrokernelTester()
18441 .cr(16)
18442 .kr(9)
18443 .channels(16)
18444 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18445 }
18446
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_div_16)18447 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_div_16) {
18448 TEST_REQUIRES_X86_XOP;
18449 for (uint32_t channels = 32; channels < 256; channels += 48) {
18450 DWConvMicrokernelTester()
18451 .cr(16)
18452 .kr(9)
18453 .channels(channels)
18454 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18455 }
18456 }
18457
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_div_16_with_qmin)18458 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_div_16_with_qmin) {
18459 TEST_REQUIRES_X86_XOP;
18460 for (uint32_t channels = 32; channels < 256; channels += 48) {
18461 DWConvMicrokernelTester()
18462 .cr(16)
18463 .kr(9)
18464 .channels(channels)
18465 .qmin(128)
18466 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18467 }
18468 }
18469
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_div_16_with_qmax)18470 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_div_16_with_qmax) {
18471 TEST_REQUIRES_X86_XOP;
18472 for (uint32_t channels = 32; channels < 256; channels += 48) {
18473 DWConvMicrokernelTester()
18474 .cr(16)
18475 .kr(9)
18476 .channels(channels)
18477 .qmax(128)
18478 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18479 }
18480 }
18481
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_lt_16)18482 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_lt_16) {
18483 TEST_REQUIRES_X86_XOP;
18484 for (uint32_t channels = 1; channels < 16; channels++) {
18485 DWConvMicrokernelTester()
18486 .cr(16)
18487 .kr(9)
18488 .channels(channels)
18489 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18490 }
18491 }
18492
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_gt_16)18493 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_gt_16) {
18494 TEST_REQUIRES_X86_XOP;
18495 for (uint32_t channels = 17; channels < 32; channels++) {
18496 DWConvMicrokernelTester()
18497 .cr(16)
18498 .kr(9)
18499 .channels(channels)
18500 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18501 }
18502 }
18503
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_gt_16_with_qmin)18504 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_gt_16_with_qmin) {
18505 TEST_REQUIRES_X86_XOP;
18506 for (uint32_t channels = 17; channels < 32; channels++) {
18507 DWConvMicrokernelTester()
18508 .cr(16)
18509 .kr(9)
18510 .channels(channels)
18511 .qmin(128)
18512 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18513 }
18514 }
18515
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_gt_16_with_qmax)18516 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_gt_16_with_qmax) {
18517 TEST_REQUIRES_X86_XOP;
18518 for (uint32_t channels = 17; channels < 32; channels++) {
18519 DWConvMicrokernelTester()
18520 .cr(16)
18521 .kr(9)
18522 .channels(channels)
18523 .qmax(128)
18524 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18525 }
18526 }
18527
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,multipixel)18528 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel) {
18529 TEST_REQUIRES_X86_XOP;
18530 for (size_t channels = 1; channels <= 80; channels += 15) {
18531 DWConvMicrokernelTester()
18532 .cr(16)
18533 .kr(9)
18534 .channels(channels)
18535 .width(3)
18536 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18537 }
18538 }
18539
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,multipixel_with_step)18540 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_step) {
18541 TEST_REQUIRES_X86_XOP;
18542 for (size_t channels = 1; channels <= 80; channels += 15) {
18543 for (size_t step = 2; step <= 9; step++) {
18544 DWConvMicrokernelTester()
18545 .cr(16)
18546 .kr(9)
18547 .channels(channels)
18548 .width(3)
18549 .step(step)
18550 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18551 }
18552 }
18553 }
18554
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,multipixel_with_output_stride)18555 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_output_stride) {
18556 TEST_REQUIRES_X86_XOP;
18557 for (size_t channels = 1; channels <= 80; channels += 15) {
18558 DWConvMicrokernelTester()
18559 .cr(16)
18560 .kr(9)
18561 .channels(16)
18562 .width(5)
18563 .output_stride(83)
18564 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18565 }
18566 }
18567
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,multipixel_with_qmin)18568 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_qmin) {
18569 TEST_REQUIRES_X86_XOP;
18570 for (size_t channels = 1; channels <= 80; channels += 15) {
18571 DWConvMicrokernelTester()
18572 .cr(16)
18573 .kr(9)
18574 .channels(channels)
18575 .width(3)
18576 .qmin(128)
18577 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18578 }
18579 }
18580
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,multipixel_with_qmax)18581 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_qmax) {
18582 TEST_REQUIRES_X86_XOP;
18583 for (size_t channels = 1; channels <= 80; channels += 15) {
18584 DWConvMicrokernelTester()
18585 .cr(16)
18586 .kr(9)
18587 .channels(channels)
18588 .width(3)
18589 .qmax(128)
18590 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18591 }
18592 }
18593
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,input_offset)18594 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, input_offset) {
18595 TEST_REQUIRES_X86_XOP;
18596 for (uint32_t channels = 32; channels < 256; channels += 48) {
18597 DWConvMicrokernelTester()
18598 .cr(16)
18599 .kr(9)
18600 .channels(channels)
18601 .input_offset(304)
18602 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18603 }
18604 }
18605
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,zero)18606 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, zero) {
18607 TEST_REQUIRES_X86_XOP;
18608 for (uint32_t mz = 0; mz < 9; mz++) {
18609 for (uint32_t channels = 32; channels < 256; channels += 48) {
18610 DWConvMicrokernelTester()
18611 .cr(16)
18612 .kr(9)
18613 .channels(channels)
18614 .input_offset(304)
18615 .zero_index(mz)
18616 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18617 }
18618 }
18619 }
18620 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18621
18622
18623 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_eq_16)18624 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_eq_16) {
18625 TEST_REQUIRES_X86_AVX;
18626 DWConvMicrokernelTester()
18627 .cr(16)
18628 .kr(25)
18629 .channels(16)
18630 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18631 }
18632
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_div_16)18633 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_div_16) {
18634 TEST_REQUIRES_X86_AVX;
18635 for (uint32_t channels = 32; channels < 256; channels += 48) {
18636 DWConvMicrokernelTester()
18637 .cr(16)
18638 .kr(25)
18639 .channels(channels)
18640 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18641 }
18642 }
18643
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_div_16_with_qmin)18644 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_div_16_with_qmin) {
18645 TEST_REQUIRES_X86_AVX;
18646 for (uint32_t channels = 32; channels < 256; channels += 48) {
18647 DWConvMicrokernelTester()
18648 .cr(16)
18649 .kr(25)
18650 .channels(channels)
18651 .qmin(128)
18652 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18653 }
18654 }
18655
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_div_16_with_qmax)18656 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_div_16_with_qmax) {
18657 TEST_REQUIRES_X86_AVX;
18658 for (uint32_t channels = 32; channels < 256; channels += 48) {
18659 DWConvMicrokernelTester()
18660 .cr(16)
18661 .kr(25)
18662 .channels(channels)
18663 .qmax(128)
18664 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18665 }
18666 }
18667
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_lt_16)18668 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_lt_16) {
18669 TEST_REQUIRES_X86_AVX;
18670 for (uint32_t channels = 1; channels < 16; channels++) {
18671 DWConvMicrokernelTester()
18672 .cr(16)
18673 .kr(25)
18674 .channels(channels)
18675 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18676 }
18677 }
18678
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_gt_16)18679 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_gt_16) {
18680 TEST_REQUIRES_X86_AVX;
18681 for (uint32_t channels = 17; channels < 32; channels++) {
18682 DWConvMicrokernelTester()
18683 .cr(16)
18684 .kr(25)
18685 .channels(channels)
18686 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18687 }
18688 }
18689
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_gt_16_with_qmin)18690 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_gt_16_with_qmin) {
18691 TEST_REQUIRES_X86_AVX;
18692 for (uint32_t channels = 17; channels < 32; channels++) {
18693 DWConvMicrokernelTester()
18694 .cr(16)
18695 .kr(25)
18696 .channels(channels)
18697 .qmin(128)
18698 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18699 }
18700 }
18701
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_gt_16_with_qmax)18702 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_gt_16_with_qmax) {
18703 TEST_REQUIRES_X86_AVX;
18704 for (uint32_t channels = 17; channels < 32; channels++) {
18705 DWConvMicrokernelTester()
18706 .cr(16)
18707 .kr(25)
18708 .channels(channels)
18709 .qmax(128)
18710 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18711 }
18712 }
18713
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,multipixel)18714 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel) {
18715 TEST_REQUIRES_X86_AVX;
18716 for (size_t channels = 1; channels <= 80; channels += 15) {
18717 DWConvMicrokernelTester()
18718 .cr(16)
18719 .kr(25)
18720 .channels(channels)
18721 .width(3)
18722 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18723 }
18724 }
18725
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,multipixel_with_step)18726 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_step) {
18727 TEST_REQUIRES_X86_AVX;
18728 for (size_t channels = 1; channels <= 80; channels += 15) {
18729 for (size_t step = 2; step <= 25; step++) {
18730 DWConvMicrokernelTester()
18731 .cr(16)
18732 .kr(25)
18733 .channels(channels)
18734 .width(3)
18735 .step(step)
18736 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18737 }
18738 }
18739 }
18740
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,multipixel_with_output_stride)18741 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_output_stride) {
18742 TEST_REQUIRES_X86_AVX;
18743 for (size_t channels = 1; channels <= 80; channels += 15) {
18744 DWConvMicrokernelTester()
18745 .cr(16)
18746 .kr(25)
18747 .channels(16)
18748 .width(5)
18749 .output_stride(83)
18750 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18751 }
18752 }
18753
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,multipixel_with_qmin)18754 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_qmin) {
18755 TEST_REQUIRES_X86_AVX;
18756 for (size_t channels = 1; channels <= 80; channels += 15) {
18757 DWConvMicrokernelTester()
18758 .cr(16)
18759 .kr(25)
18760 .channels(channels)
18761 .width(3)
18762 .qmin(128)
18763 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18764 }
18765 }
18766
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,multipixel_with_qmax)18767 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_qmax) {
18768 TEST_REQUIRES_X86_AVX;
18769 for (size_t channels = 1; channels <= 80; channels += 15) {
18770 DWConvMicrokernelTester()
18771 .cr(16)
18772 .kr(25)
18773 .channels(channels)
18774 .width(3)
18775 .qmax(128)
18776 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18777 }
18778 }
18779
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,input_offset)18780 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, input_offset) {
18781 TEST_REQUIRES_X86_AVX;
18782 for (uint32_t channels = 32; channels < 256; channels += 48) {
18783 DWConvMicrokernelTester()
18784 .cr(16)
18785 .kr(25)
18786 .channels(channels)
18787 .input_offset(304)
18788 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18789 }
18790 }
18791
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,zero)18792 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, zero) {
18793 TEST_REQUIRES_X86_AVX;
18794 for (uint32_t mz = 0; mz < 25; mz++) {
18795 for (uint32_t channels = 32; channels < 256; channels += 48) {
18796 DWConvMicrokernelTester()
18797 .cr(16)
18798 .kr(25)
18799 .channels(channels)
18800 .input_offset(304)
18801 .zero_index(mz)
18802 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18803 }
18804 }
18805 }
18806 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18807
18808
18809 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_eq_16)18810 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_eq_16) {
18811 TEST_REQUIRES_X86_AVX;
18812 DWConvMicrokernelTester()
18813 .cr(16)
18814 .kr(25)
18815 .channels(16)
18816 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18817 }
18818
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_div_16)18819 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_div_16) {
18820 TEST_REQUIRES_X86_AVX;
18821 for (uint32_t channels = 32; channels < 256; channels += 48) {
18822 DWConvMicrokernelTester()
18823 .cr(16)
18824 .kr(25)
18825 .channels(channels)
18826 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18827 }
18828 }
18829
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_div_16_with_qmin)18830 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_div_16_with_qmin) {
18831 TEST_REQUIRES_X86_AVX;
18832 for (uint32_t channels = 32; channels < 256; channels += 48) {
18833 DWConvMicrokernelTester()
18834 .cr(16)
18835 .kr(25)
18836 .channels(channels)
18837 .qmin(128)
18838 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18839 }
18840 }
18841
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_div_16_with_qmax)18842 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_div_16_with_qmax) {
18843 TEST_REQUIRES_X86_AVX;
18844 for (uint32_t channels = 32; channels < 256; channels += 48) {
18845 DWConvMicrokernelTester()
18846 .cr(16)
18847 .kr(25)
18848 .channels(channels)
18849 .qmax(128)
18850 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18851 }
18852 }
18853
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_lt_16)18854 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_lt_16) {
18855 TEST_REQUIRES_X86_AVX;
18856 for (uint32_t channels = 1; channels < 16; channels++) {
18857 DWConvMicrokernelTester()
18858 .cr(16)
18859 .kr(25)
18860 .channels(channels)
18861 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18862 }
18863 }
18864
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_gt_16)18865 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_gt_16) {
18866 TEST_REQUIRES_X86_AVX;
18867 for (uint32_t channels = 17; channels < 32; channels++) {
18868 DWConvMicrokernelTester()
18869 .cr(16)
18870 .kr(25)
18871 .channels(channels)
18872 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18873 }
18874 }
18875
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_gt_16_with_qmin)18876 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_gt_16_with_qmin) {
18877 TEST_REQUIRES_X86_AVX;
18878 for (uint32_t channels = 17; channels < 32; channels++) {
18879 DWConvMicrokernelTester()
18880 .cr(16)
18881 .kr(25)
18882 .channels(channels)
18883 .qmin(128)
18884 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18885 }
18886 }
18887
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,c_gt_16_with_qmax)18888 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, c_gt_16_with_qmax) {
18889 TEST_REQUIRES_X86_AVX;
18890 for (uint32_t channels = 17; channels < 32; channels++) {
18891 DWConvMicrokernelTester()
18892 .cr(16)
18893 .kr(25)
18894 .channels(channels)
18895 .qmax(128)
18896 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18897 }
18898 }
18899
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,multipixel)18900 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel) {
18901 TEST_REQUIRES_X86_AVX;
18902 for (size_t channels = 1; channels <= 80; channels += 15) {
18903 DWConvMicrokernelTester()
18904 .cr(16)
18905 .kr(25)
18906 .channels(channels)
18907 .width(3)
18908 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18909 }
18910 }
18911
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,multipixel_with_step)18912 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel_with_step) {
18913 TEST_REQUIRES_X86_AVX;
18914 for (size_t channels = 1; channels <= 80; channels += 15) {
18915 for (size_t step = 2; step <= 25; step++) {
18916 DWConvMicrokernelTester()
18917 .cr(16)
18918 .kr(25)
18919 .channels(channels)
18920 .width(3)
18921 .step(step)
18922 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18923 }
18924 }
18925 }
18926
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,multipixel_with_output_stride)18927 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel_with_output_stride) {
18928 TEST_REQUIRES_X86_AVX;
18929 for (size_t channels = 1; channels <= 80; channels += 15) {
18930 DWConvMicrokernelTester()
18931 .cr(16)
18932 .kr(25)
18933 .channels(16)
18934 .width(5)
18935 .output_stride(83)
18936 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18937 }
18938 }
18939
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,multipixel_with_qmin)18940 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel_with_qmin) {
18941 TEST_REQUIRES_X86_AVX;
18942 for (size_t channels = 1; channels <= 80; channels += 15) {
18943 DWConvMicrokernelTester()
18944 .cr(16)
18945 .kr(25)
18946 .channels(channels)
18947 .width(3)
18948 .qmin(128)
18949 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18950 }
18951 }
18952
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,multipixel_with_qmax)18953 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, multipixel_with_qmax) {
18954 TEST_REQUIRES_X86_AVX;
18955 for (size_t channels = 1; channels <= 80; channels += 15) {
18956 DWConvMicrokernelTester()
18957 .cr(16)
18958 .kr(25)
18959 .channels(channels)
18960 .width(3)
18961 .qmax(128)
18962 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18963 }
18964 }
18965
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,input_offset)18966 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, input_offset) {
18967 TEST_REQUIRES_X86_AVX;
18968 for (uint32_t channels = 32; channels < 256; channels += 48) {
18969 DWConvMicrokernelTester()
18970 .cr(16)
18971 .kr(25)
18972 .channels(channels)
18973 .input_offset(304)
18974 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18975 }
18976 }
18977
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16,zero)18978 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16_ADD16, zero) {
18979 TEST_REQUIRES_X86_AVX;
18980 for (uint32_t mz = 0; mz < 25; mz++) {
18981 for (uint32_t channels = 32; channels < 256; channels += 48) {
18982 DWConvMicrokernelTester()
18983 .cr(16)
18984 .kr(25)
18985 .channels(channels)
18986 .input_offset(304)
18987 .zero_index(mz)
18988 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18989 }
18990 }
18991 }
18992 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18993
18994
18995 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_eq_16)18996 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_eq_16) {
18997 TEST_REQUIRES_X86_AVX;
18998 DWConvMicrokernelTester()
18999 .cr(16)
19000 .kr(25)
19001 .channels(16)
19002 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19003 }
19004
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_div_16)19005 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_div_16) {
19006 TEST_REQUIRES_X86_AVX;
19007 for (uint32_t channels = 32; channels < 256; channels += 48) {
19008 DWConvMicrokernelTester()
19009 .cr(16)
19010 .kr(25)
19011 .channels(channels)
19012 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19013 }
19014 }
19015
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_div_16_with_qmin)19016 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_div_16_with_qmin) {
19017 TEST_REQUIRES_X86_AVX;
19018 for (uint32_t channels = 32; channels < 256; channels += 48) {
19019 DWConvMicrokernelTester()
19020 .cr(16)
19021 .kr(25)
19022 .channels(channels)
19023 .qmin(128)
19024 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19025 }
19026 }
19027
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_div_16_with_qmax)19028 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_div_16_with_qmax) {
19029 TEST_REQUIRES_X86_AVX;
19030 for (uint32_t channels = 32; channels < 256; channels += 48) {
19031 DWConvMicrokernelTester()
19032 .cr(16)
19033 .kr(25)
19034 .channels(channels)
19035 .qmax(128)
19036 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19037 }
19038 }
19039
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_lt_16)19040 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_lt_16) {
19041 TEST_REQUIRES_X86_AVX;
19042 for (uint32_t channels = 1; channels < 16; channels++) {
19043 DWConvMicrokernelTester()
19044 .cr(16)
19045 .kr(25)
19046 .channels(channels)
19047 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19048 }
19049 }
19050
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_gt_16)19051 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_gt_16) {
19052 TEST_REQUIRES_X86_AVX;
19053 for (uint32_t channels = 17; channels < 32; channels++) {
19054 DWConvMicrokernelTester()
19055 .cr(16)
19056 .kr(25)
19057 .channels(channels)
19058 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19059 }
19060 }
19061
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_gt_16_with_qmin)19062 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_gt_16_with_qmin) {
19063 TEST_REQUIRES_X86_AVX;
19064 for (uint32_t channels = 17; channels < 32; channels++) {
19065 DWConvMicrokernelTester()
19066 .cr(16)
19067 .kr(25)
19068 .channels(channels)
19069 .qmin(128)
19070 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19071 }
19072 }
19073
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_gt_16_with_qmax)19074 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_gt_16_with_qmax) {
19075 TEST_REQUIRES_X86_AVX;
19076 for (uint32_t channels = 17; channels < 32; channels++) {
19077 DWConvMicrokernelTester()
19078 .cr(16)
19079 .kr(25)
19080 .channels(channels)
19081 .qmax(128)
19082 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19083 }
19084 }
19085
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,multipixel)19086 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel) {
19087 TEST_REQUIRES_X86_AVX;
19088 for (size_t channels = 1; channels <= 80; channels += 15) {
19089 DWConvMicrokernelTester()
19090 .cr(16)
19091 .kr(25)
19092 .channels(channels)
19093 .width(3)
19094 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19095 }
19096 }
19097
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,multipixel_with_step)19098 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_step) {
19099 TEST_REQUIRES_X86_AVX;
19100 for (size_t channels = 1; channels <= 80; channels += 15) {
19101 for (size_t step = 2; step <= 25; step++) {
19102 DWConvMicrokernelTester()
19103 .cr(16)
19104 .kr(25)
19105 .channels(channels)
19106 .width(3)
19107 .step(step)
19108 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19109 }
19110 }
19111 }
19112
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,multipixel_with_output_stride)19113 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_output_stride) {
19114 TEST_REQUIRES_X86_AVX;
19115 for (size_t channels = 1; channels <= 80; channels += 15) {
19116 DWConvMicrokernelTester()
19117 .cr(16)
19118 .kr(25)
19119 .channels(16)
19120 .width(5)
19121 .output_stride(83)
19122 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19123 }
19124 }
19125
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,multipixel_with_qmin)19126 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_qmin) {
19127 TEST_REQUIRES_X86_AVX;
19128 for (size_t channels = 1; channels <= 80; channels += 15) {
19129 DWConvMicrokernelTester()
19130 .cr(16)
19131 .kr(25)
19132 .channels(channels)
19133 .width(3)
19134 .qmin(128)
19135 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19136 }
19137 }
19138
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,multipixel_with_qmax)19139 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_qmax) {
19140 TEST_REQUIRES_X86_AVX;
19141 for (size_t channels = 1; channels <= 80; channels += 15) {
19142 DWConvMicrokernelTester()
19143 .cr(16)
19144 .kr(25)
19145 .channels(channels)
19146 .width(3)
19147 .qmax(128)
19148 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19149 }
19150 }
19151
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,input_offset)19152 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, input_offset) {
19153 TEST_REQUIRES_X86_AVX;
19154 for (uint32_t channels = 32; channels < 256; channels += 48) {
19155 DWConvMicrokernelTester()
19156 .cr(16)
19157 .kr(25)
19158 .channels(channels)
19159 .input_offset(304)
19160 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19161 }
19162 }
19163
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,zero)19164 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, zero) {
19165 TEST_REQUIRES_X86_AVX;
19166 for (uint32_t mz = 0; mz < 25; mz++) {
19167 for (uint32_t channels = 32; channels < 256; channels += 48) {
19168 DWConvMicrokernelTester()
19169 .cr(16)
19170 .kr(25)
19171 .channels(channels)
19172 .input_offset(304)
19173 .zero_index(mz)
19174 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19175 }
19176 }
19177 }
19178 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19179
19180
19181 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_eq_16)19182 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_eq_16) {
19183 TEST_REQUIRES_X86_AVX2;
19184 DWConvMicrokernelTester()
19185 .cr(16)
19186 .kr(25)
19187 .channels(16)
19188 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19189 }
19190
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_div_16)19191 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_16) {
19192 TEST_REQUIRES_X86_AVX2;
19193 for (uint32_t channels = 32; channels < 256; channels += 48) {
19194 DWConvMicrokernelTester()
19195 .cr(16)
19196 .kr(25)
19197 .channels(channels)
19198 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19199 }
19200 }
19201
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_div_16_with_qmin)19202 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_16_with_qmin) {
19203 TEST_REQUIRES_X86_AVX2;
19204 for (uint32_t channels = 32; channels < 256; channels += 48) {
19205 DWConvMicrokernelTester()
19206 .cr(16)
19207 .kr(25)
19208 .channels(channels)
19209 .qmin(128)
19210 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19211 }
19212 }
19213
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_div_16_with_qmax)19214 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_16_with_qmax) {
19215 TEST_REQUIRES_X86_AVX2;
19216 for (uint32_t channels = 32; channels < 256; channels += 48) {
19217 DWConvMicrokernelTester()
19218 .cr(16)
19219 .kr(25)
19220 .channels(channels)
19221 .qmax(128)
19222 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19223 }
19224 }
19225
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_lt_16)19226 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_lt_16) {
19227 TEST_REQUIRES_X86_AVX2;
19228 for (uint32_t channels = 1; channels < 16; channels++) {
19229 DWConvMicrokernelTester()
19230 .cr(16)
19231 .kr(25)
19232 .channels(channels)
19233 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19234 }
19235 }
19236
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_gt_16)19237 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16) {
19238 TEST_REQUIRES_X86_AVX2;
19239 for (uint32_t channels = 17; channels < 32; channels++) {
19240 DWConvMicrokernelTester()
19241 .cr(16)
19242 .kr(25)
19243 .channels(channels)
19244 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19245 }
19246 }
19247
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_gt_16_with_qmin)19248 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16_with_qmin) {
19249 TEST_REQUIRES_X86_AVX2;
19250 for (uint32_t channels = 17; channels < 32; channels++) {
19251 DWConvMicrokernelTester()
19252 .cr(16)
19253 .kr(25)
19254 .channels(channels)
19255 .qmin(128)
19256 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19257 }
19258 }
19259
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,c_gt_16_with_qmax)19260 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_16_with_qmax) {
19261 TEST_REQUIRES_X86_AVX2;
19262 for (uint32_t channels = 17; channels < 32; channels++) {
19263 DWConvMicrokernelTester()
19264 .cr(16)
19265 .kr(25)
19266 .channels(channels)
19267 .qmax(128)
19268 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19269 }
19270 }
19271
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel)19272 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel) {
19273 TEST_REQUIRES_X86_AVX2;
19274 for (size_t channels = 1; channels <= 80; channels += 15) {
19275 DWConvMicrokernelTester()
19276 .cr(16)
19277 .kr(25)
19278 .channels(channels)
19279 .width(3)
19280 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19281 }
19282 }
19283
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_step)19284 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_step) {
19285 TEST_REQUIRES_X86_AVX2;
19286 for (size_t channels = 1; channels <= 80; channels += 15) {
19287 for (size_t step = 2; step <= 25; step++) {
19288 DWConvMicrokernelTester()
19289 .cr(16)
19290 .kr(25)
19291 .channels(channels)
19292 .width(3)
19293 .step(step)
19294 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19295 }
19296 }
19297 }
19298
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_output_stride)19299 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_output_stride) {
19300 TEST_REQUIRES_X86_AVX2;
19301 for (size_t channels = 1; channels <= 80; channels += 15) {
19302 DWConvMicrokernelTester()
19303 .cr(16)
19304 .kr(25)
19305 .channels(16)
19306 .width(5)
19307 .output_stride(83)
19308 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19309 }
19310 }
19311
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmin)19312 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmin) {
19313 TEST_REQUIRES_X86_AVX2;
19314 for (size_t channels = 1; channels <= 80; channels += 15) {
19315 DWConvMicrokernelTester()
19316 .cr(16)
19317 .kr(25)
19318 .channels(channels)
19319 .width(3)
19320 .qmin(128)
19321 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19322 }
19323 }
19324
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmax)19325 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmax) {
19326 TEST_REQUIRES_X86_AVX2;
19327 for (size_t channels = 1; channels <= 80; channels += 15) {
19328 DWConvMicrokernelTester()
19329 .cr(16)
19330 .kr(25)
19331 .channels(channels)
19332 .width(3)
19333 .qmax(128)
19334 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19335 }
19336 }
19337
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,input_offset)19338 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, input_offset) {
19339 TEST_REQUIRES_X86_AVX2;
19340 for (uint32_t channels = 32; channels < 256; channels += 48) {
19341 DWConvMicrokernelTester()
19342 .cr(16)
19343 .kr(25)
19344 .channels(channels)
19345 .input_offset(304)
19346 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19347 }
19348 }
19349
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK,zero)19350 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_ADD16_VPUNPCK, zero) {
19351 TEST_REQUIRES_X86_AVX2;
19352 for (uint32_t mz = 0; mz < 25; mz++) {
19353 for (uint32_t channels = 32; channels < 256; channels += 48) {
19354 DWConvMicrokernelTester()
19355 .cr(16)
19356 .kr(25)
19357 .channels(channels)
19358 .input_offset(304)
19359 .zero_index(mz)
19360 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19361 }
19362 }
19363 }
19364 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19365
19366
19367 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_eq_16)19368 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_eq_16) {
19369 TEST_REQUIRES_X86_AVX2;
19370 DWConvMicrokernelTester()
19371 .cr(16)
19372 .kr(25)
19373 .channels(16)
19374 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19375 }
19376
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_div_16)19377 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_div_16) {
19378 TEST_REQUIRES_X86_AVX2;
19379 for (uint32_t channels = 32; channels < 256; channels += 48) {
19380 DWConvMicrokernelTester()
19381 .cr(16)
19382 .kr(25)
19383 .channels(channels)
19384 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19385 }
19386 }
19387
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_div_16_with_qmin)19388 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_div_16_with_qmin) {
19389 TEST_REQUIRES_X86_AVX2;
19390 for (uint32_t channels = 32; channels < 256; channels += 48) {
19391 DWConvMicrokernelTester()
19392 .cr(16)
19393 .kr(25)
19394 .channels(channels)
19395 .qmin(128)
19396 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19397 }
19398 }
19399
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_div_16_with_qmax)19400 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_div_16_with_qmax) {
19401 TEST_REQUIRES_X86_AVX2;
19402 for (uint32_t channels = 32; channels < 256; channels += 48) {
19403 DWConvMicrokernelTester()
19404 .cr(16)
19405 .kr(25)
19406 .channels(channels)
19407 .qmax(128)
19408 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19409 }
19410 }
19411
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_lt_16)19412 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_lt_16) {
19413 TEST_REQUIRES_X86_AVX2;
19414 for (uint32_t channels = 1; channels < 16; channels++) {
19415 DWConvMicrokernelTester()
19416 .cr(16)
19417 .kr(25)
19418 .channels(channels)
19419 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19420 }
19421 }
19422
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_gt_16)19423 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_gt_16) {
19424 TEST_REQUIRES_X86_AVX2;
19425 for (uint32_t channels = 17; channels < 32; channels++) {
19426 DWConvMicrokernelTester()
19427 .cr(16)
19428 .kr(25)
19429 .channels(channels)
19430 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19431 }
19432 }
19433
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_gt_16_with_qmin)19434 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_gt_16_with_qmin) {
19435 TEST_REQUIRES_X86_AVX2;
19436 for (uint32_t channels = 17; channels < 32; channels++) {
19437 DWConvMicrokernelTester()
19438 .cr(16)
19439 .kr(25)
19440 .channels(channels)
19441 .qmin(128)
19442 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19443 }
19444 }
19445
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,c_gt_16_with_qmax)19446 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, c_gt_16_with_qmax) {
19447 TEST_REQUIRES_X86_AVX2;
19448 for (uint32_t channels = 17; channels < 32; channels++) {
19449 DWConvMicrokernelTester()
19450 .cr(16)
19451 .kr(25)
19452 .channels(channels)
19453 .qmax(128)
19454 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19455 }
19456 }
19457
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,multipixel)19458 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel) {
19459 TEST_REQUIRES_X86_AVX2;
19460 for (size_t channels = 1; channels <= 80; channels += 15) {
19461 DWConvMicrokernelTester()
19462 .cr(16)
19463 .kr(25)
19464 .channels(channels)
19465 .width(3)
19466 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19467 }
19468 }
19469
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,multipixel_with_step)19470 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel_with_step) {
19471 TEST_REQUIRES_X86_AVX2;
19472 for (size_t channels = 1; channels <= 80; channels += 15) {
19473 for (size_t step = 2; step <= 25; step++) {
19474 DWConvMicrokernelTester()
19475 .cr(16)
19476 .kr(25)
19477 .channels(channels)
19478 .width(3)
19479 .step(step)
19480 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19481 }
19482 }
19483 }
19484
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,multipixel_with_output_stride)19485 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel_with_output_stride) {
19486 TEST_REQUIRES_X86_AVX2;
19487 for (size_t channels = 1; channels <= 80; channels += 15) {
19488 DWConvMicrokernelTester()
19489 .cr(16)
19490 .kr(25)
19491 .channels(16)
19492 .width(5)
19493 .output_stride(83)
19494 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19495 }
19496 }
19497
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,multipixel_with_qmin)19498 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel_with_qmin) {
19499 TEST_REQUIRES_X86_AVX2;
19500 for (size_t channels = 1; channels <= 80; channels += 15) {
19501 DWConvMicrokernelTester()
19502 .cr(16)
19503 .kr(25)
19504 .channels(channels)
19505 .width(3)
19506 .qmin(128)
19507 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19508 }
19509 }
19510
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,multipixel_with_qmax)19511 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, multipixel_with_qmax) {
19512 TEST_REQUIRES_X86_AVX2;
19513 for (size_t channels = 1; channels <= 80; channels += 15) {
19514 DWConvMicrokernelTester()
19515 .cr(16)
19516 .kr(25)
19517 .channels(channels)
19518 .width(3)
19519 .qmax(128)
19520 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19521 }
19522 }
19523
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,input_offset)19524 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, input_offset) {
19525 TEST_REQUIRES_X86_AVX2;
19526 for (uint32_t channels = 32; channels < 256; channels += 48) {
19527 DWConvMicrokernelTester()
19528 .cr(16)
19529 .kr(25)
19530 .channels(channels)
19531 .input_offset(304)
19532 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19533 }
19534 }
19535
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX,zero)19536 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPMOVSX, zero) {
19537 TEST_REQUIRES_X86_AVX2;
19538 for (uint32_t mz = 0; mz < 25; mz++) {
19539 for (uint32_t channels = 32; channels < 256; channels += 48) {
19540 DWConvMicrokernelTester()
19541 .cr(16)
19542 .kr(25)
19543 .channels(channels)
19544 .input_offset(304)
19545 .zero_index(mz)
19546 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19547 }
19548 }
19549 }
19550 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19551
19552
19553 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_eq_16)19554 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_eq_16) {
19555 TEST_REQUIRES_X86_AVX2;
19556 DWConvMicrokernelTester()
19557 .cr(16)
19558 .kr(25)
19559 .channels(16)
19560 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19561 }
19562
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_div_16)19563 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_div_16) {
19564 TEST_REQUIRES_X86_AVX2;
19565 for (uint32_t channels = 32; channels < 256; channels += 48) {
19566 DWConvMicrokernelTester()
19567 .cr(16)
19568 .kr(25)
19569 .channels(channels)
19570 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19571 }
19572 }
19573
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_div_16_with_qmin)19574 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_div_16_with_qmin) {
19575 TEST_REQUIRES_X86_AVX2;
19576 for (uint32_t channels = 32; channels < 256; channels += 48) {
19577 DWConvMicrokernelTester()
19578 .cr(16)
19579 .kr(25)
19580 .channels(channels)
19581 .qmin(128)
19582 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19583 }
19584 }
19585
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_div_16_with_qmax)19586 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_div_16_with_qmax) {
19587 TEST_REQUIRES_X86_AVX2;
19588 for (uint32_t channels = 32; channels < 256; channels += 48) {
19589 DWConvMicrokernelTester()
19590 .cr(16)
19591 .kr(25)
19592 .channels(channels)
19593 .qmax(128)
19594 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19595 }
19596 }
19597
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_lt_16)19598 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_lt_16) {
19599 TEST_REQUIRES_X86_AVX2;
19600 for (uint32_t channels = 1; channels < 16; channels++) {
19601 DWConvMicrokernelTester()
19602 .cr(16)
19603 .kr(25)
19604 .channels(channels)
19605 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19606 }
19607 }
19608
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_gt_16)19609 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_gt_16) {
19610 TEST_REQUIRES_X86_AVX2;
19611 for (uint32_t channels = 17; channels < 32; channels++) {
19612 DWConvMicrokernelTester()
19613 .cr(16)
19614 .kr(25)
19615 .channels(channels)
19616 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19617 }
19618 }
19619
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_gt_16_with_qmin)19620 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_gt_16_with_qmin) {
19621 TEST_REQUIRES_X86_AVX2;
19622 for (uint32_t channels = 17; channels < 32; channels++) {
19623 DWConvMicrokernelTester()
19624 .cr(16)
19625 .kr(25)
19626 .channels(channels)
19627 .qmin(128)
19628 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19629 }
19630 }
19631
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,c_gt_16_with_qmax)19632 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, c_gt_16_with_qmax) {
19633 TEST_REQUIRES_X86_AVX2;
19634 for (uint32_t channels = 17; channels < 32; channels++) {
19635 DWConvMicrokernelTester()
19636 .cr(16)
19637 .kr(25)
19638 .channels(channels)
19639 .qmax(128)
19640 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19641 }
19642 }
19643
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,multipixel)19644 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel) {
19645 TEST_REQUIRES_X86_AVX2;
19646 for (size_t channels = 1; channels <= 80; channels += 15) {
19647 DWConvMicrokernelTester()
19648 .cr(16)
19649 .kr(25)
19650 .channels(channels)
19651 .width(3)
19652 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19653 }
19654 }
19655
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,multipixel_with_step)19656 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel_with_step) {
19657 TEST_REQUIRES_X86_AVX2;
19658 for (size_t channels = 1; channels <= 80; channels += 15) {
19659 for (size_t step = 2; step <= 25; step++) {
19660 DWConvMicrokernelTester()
19661 .cr(16)
19662 .kr(25)
19663 .channels(channels)
19664 .width(3)
19665 .step(step)
19666 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19667 }
19668 }
19669 }
19670
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,multipixel_with_output_stride)19671 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel_with_output_stride) {
19672 TEST_REQUIRES_X86_AVX2;
19673 for (size_t channels = 1; channels <= 80; channels += 15) {
19674 DWConvMicrokernelTester()
19675 .cr(16)
19676 .kr(25)
19677 .channels(16)
19678 .width(5)
19679 .output_stride(83)
19680 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19681 }
19682 }
19683
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,multipixel_with_qmin)19684 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel_with_qmin) {
19685 TEST_REQUIRES_X86_AVX2;
19686 for (size_t channels = 1; channels <= 80; channels += 15) {
19687 DWConvMicrokernelTester()
19688 .cr(16)
19689 .kr(25)
19690 .channels(channels)
19691 .width(3)
19692 .qmin(128)
19693 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19694 }
19695 }
19696
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,multipixel_with_qmax)19697 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, multipixel_with_qmax) {
19698 TEST_REQUIRES_X86_AVX2;
19699 for (size_t channels = 1; channels <= 80; channels += 15) {
19700 DWConvMicrokernelTester()
19701 .cr(16)
19702 .kr(25)
19703 .channels(channels)
19704 .width(3)
19705 .qmax(128)
19706 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19707 }
19708 }
19709
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,input_offset)19710 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, input_offset) {
19711 TEST_REQUIRES_X86_AVX2;
19712 for (uint32_t channels = 32; channels < 256; channels += 48) {
19713 DWConvMicrokernelTester()
19714 .cr(16)
19715 .kr(25)
19716 .channels(channels)
19717 .input_offset(304)
19718 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19719 }
19720 }
19721
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK,zero)19722 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16_VPUNPCK, zero) {
19723 TEST_REQUIRES_X86_AVX2;
19724 for (uint32_t mz = 0; mz < 25; mz++) {
19725 for (uint32_t channels = 32; channels < 256; channels += 48) {
19726 DWConvMicrokernelTester()
19727 .cr(16)
19728 .kr(25)
19729 .channels(channels)
19730 .input_offset(304)
19731 .zero_index(mz)
19732 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19733 }
19734 }
19735 }
19736 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19737
19738
19739 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_eq_16)19740 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_eq_16) {
19741 TEST_REQUIRES_X86_AVX2;
19742 DWConvMicrokernelTester()
19743 .cr(16)
19744 .kr(25)
19745 .channels(16)
19746 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19747 }
19748
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_div_16)19749 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16) {
19750 TEST_REQUIRES_X86_AVX2;
19751 for (uint32_t channels = 32; channels < 256; channels += 48) {
19752 DWConvMicrokernelTester()
19753 .cr(16)
19754 .kr(25)
19755 .channels(channels)
19756 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19757 }
19758 }
19759
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_div_16_with_qmin)19760 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16_with_qmin) {
19761 TEST_REQUIRES_X86_AVX2;
19762 for (uint32_t channels = 32; channels < 256; channels += 48) {
19763 DWConvMicrokernelTester()
19764 .cr(16)
19765 .kr(25)
19766 .channels(channels)
19767 .qmin(128)
19768 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19769 }
19770 }
19771
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_div_16_with_qmax)19772 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16_with_qmax) {
19773 TEST_REQUIRES_X86_AVX2;
19774 for (uint32_t channels = 32; channels < 256; channels += 48) {
19775 DWConvMicrokernelTester()
19776 .cr(16)
19777 .kr(25)
19778 .channels(channels)
19779 .qmax(128)
19780 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19781 }
19782 }
19783
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_lt_16)19784 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_lt_16) {
19785 TEST_REQUIRES_X86_AVX2;
19786 for (uint32_t channels = 1; channels < 16; channels++) {
19787 DWConvMicrokernelTester()
19788 .cr(16)
19789 .kr(25)
19790 .channels(channels)
19791 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19792 }
19793 }
19794
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_gt_16)19795 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16) {
19796 TEST_REQUIRES_X86_AVX2;
19797 for (uint32_t channels = 17; channels < 32; channels++) {
19798 DWConvMicrokernelTester()
19799 .cr(16)
19800 .kr(25)
19801 .channels(channels)
19802 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19803 }
19804 }
19805
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_gt_16_with_qmin)19806 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16_with_qmin) {
19807 TEST_REQUIRES_X86_AVX2;
19808 for (uint32_t channels = 17; channels < 32; channels++) {
19809 DWConvMicrokernelTester()
19810 .cr(16)
19811 .kr(25)
19812 .channels(channels)
19813 .qmin(128)
19814 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19815 }
19816 }
19817
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_gt_16_with_qmax)19818 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16_with_qmax) {
19819 TEST_REQUIRES_X86_AVX2;
19820 for (uint32_t channels = 17; channels < 32; channels++) {
19821 DWConvMicrokernelTester()
19822 .cr(16)
19823 .kr(25)
19824 .channels(channels)
19825 .qmax(128)
19826 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19827 }
19828 }
19829
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,multipixel)19830 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel) {
19831 TEST_REQUIRES_X86_AVX2;
19832 for (size_t channels = 1; channels <= 80; channels += 15) {
19833 DWConvMicrokernelTester()
19834 .cr(16)
19835 .kr(25)
19836 .channels(channels)
19837 .width(3)
19838 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19839 }
19840 }
19841
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,multipixel_with_step)19842 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_step) {
19843 TEST_REQUIRES_X86_AVX2;
19844 for (size_t channels = 1; channels <= 80; channels += 15) {
19845 for (size_t step = 2; step <= 25; step++) {
19846 DWConvMicrokernelTester()
19847 .cr(16)
19848 .kr(25)
19849 .channels(channels)
19850 .width(3)
19851 .step(step)
19852 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19853 }
19854 }
19855 }
19856
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,multipixel_with_output_stride)19857 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_output_stride) {
19858 TEST_REQUIRES_X86_AVX2;
19859 for (size_t channels = 1; channels <= 80; channels += 15) {
19860 DWConvMicrokernelTester()
19861 .cr(16)
19862 .kr(25)
19863 .channels(16)
19864 .width(5)
19865 .output_stride(83)
19866 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19867 }
19868 }
19869
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,multipixel_with_qmin)19870 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_qmin) {
19871 TEST_REQUIRES_X86_AVX2;
19872 for (size_t channels = 1; channels <= 80; channels += 15) {
19873 DWConvMicrokernelTester()
19874 .cr(16)
19875 .kr(25)
19876 .channels(channels)
19877 .width(3)
19878 .qmin(128)
19879 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19880 }
19881 }
19882
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,multipixel_with_qmax)19883 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_qmax) {
19884 TEST_REQUIRES_X86_AVX2;
19885 for (size_t channels = 1; channels <= 80; channels += 15) {
19886 DWConvMicrokernelTester()
19887 .cr(16)
19888 .kr(25)
19889 .channels(channels)
19890 .width(3)
19891 .qmax(128)
19892 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19893 }
19894 }
19895
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,input_offset)19896 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, input_offset) {
19897 TEST_REQUIRES_X86_AVX2;
19898 for (uint32_t channels = 32; channels < 256; channels += 48) {
19899 DWConvMicrokernelTester()
19900 .cr(16)
19901 .kr(25)
19902 .channels(channels)
19903 .input_offset(304)
19904 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19905 }
19906 }
19907
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,zero)19908 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, zero) {
19909 TEST_REQUIRES_X86_AVX2;
19910 for (uint32_t mz = 0; mz < 25; mz++) {
19911 for (uint32_t channels = 32; channels < 256; channels += 48) {
19912 DWConvMicrokernelTester()
19913 .cr(16)
19914 .kr(25)
19915 .channels(channels)
19916 .input_offset(304)
19917 .zero_index(mz)
19918 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
19919 }
19920 }
19921 }
19922 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19923
19924
19925 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_eq_16)19926 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_eq_16) {
19927 TEST_REQUIRES_X86_XOP;
19928 DWConvMicrokernelTester()
19929 .cr(16)
19930 .kr(25)
19931 .channels(16)
19932 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19933 }
19934
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_div_16)19935 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_div_16) {
19936 TEST_REQUIRES_X86_XOP;
19937 for (uint32_t channels = 32; channels < 256; channels += 48) {
19938 DWConvMicrokernelTester()
19939 .cr(16)
19940 .kr(25)
19941 .channels(channels)
19942 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19943 }
19944 }
19945
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_div_16_with_qmin)19946 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_div_16_with_qmin) {
19947 TEST_REQUIRES_X86_XOP;
19948 for (uint32_t channels = 32; channels < 256; channels += 48) {
19949 DWConvMicrokernelTester()
19950 .cr(16)
19951 .kr(25)
19952 .channels(channels)
19953 .qmin(128)
19954 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19955 }
19956 }
19957
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_div_16_with_qmax)19958 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_div_16_with_qmax) {
19959 TEST_REQUIRES_X86_XOP;
19960 for (uint32_t channels = 32; channels < 256; channels += 48) {
19961 DWConvMicrokernelTester()
19962 .cr(16)
19963 .kr(25)
19964 .channels(channels)
19965 .qmax(128)
19966 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19967 }
19968 }
19969
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_lt_16)19970 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_lt_16) {
19971 TEST_REQUIRES_X86_XOP;
19972 for (uint32_t channels = 1; channels < 16; channels++) {
19973 DWConvMicrokernelTester()
19974 .cr(16)
19975 .kr(25)
19976 .channels(channels)
19977 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19978 }
19979 }
19980
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_gt_16)19981 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_gt_16) {
19982 TEST_REQUIRES_X86_XOP;
19983 for (uint32_t channels = 17; channels < 32; channels++) {
19984 DWConvMicrokernelTester()
19985 .cr(16)
19986 .kr(25)
19987 .channels(channels)
19988 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19989 }
19990 }
19991
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_gt_16_with_qmin)19992 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_gt_16_with_qmin) {
19993 TEST_REQUIRES_X86_XOP;
19994 for (uint32_t channels = 17; channels < 32; channels++) {
19995 DWConvMicrokernelTester()
19996 .cr(16)
19997 .kr(25)
19998 .channels(channels)
19999 .qmin(128)
20000 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20001 }
20002 }
20003
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,c_gt_16_with_qmax)20004 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, c_gt_16_with_qmax) {
20005 TEST_REQUIRES_X86_XOP;
20006 for (uint32_t channels = 17; channels < 32; channels++) {
20007 DWConvMicrokernelTester()
20008 .cr(16)
20009 .kr(25)
20010 .channels(channels)
20011 .qmax(128)
20012 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20013 }
20014 }
20015
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,multipixel)20016 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel) {
20017 TEST_REQUIRES_X86_XOP;
20018 for (size_t channels = 1; channels <= 80; channels += 15) {
20019 DWConvMicrokernelTester()
20020 .cr(16)
20021 .kr(25)
20022 .channels(channels)
20023 .width(3)
20024 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20025 }
20026 }
20027
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,multipixel_with_step)20028 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel_with_step) {
20029 TEST_REQUIRES_X86_XOP;
20030 for (size_t channels = 1; channels <= 80; channels += 15) {
20031 for (size_t step = 2; step <= 25; step++) {
20032 DWConvMicrokernelTester()
20033 .cr(16)
20034 .kr(25)
20035 .channels(channels)
20036 .width(3)
20037 .step(step)
20038 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20039 }
20040 }
20041 }
20042
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,multipixel_with_output_stride)20043 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel_with_output_stride) {
20044 TEST_REQUIRES_X86_XOP;
20045 for (size_t channels = 1; channels <= 80; channels += 15) {
20046 DWConvMicrokernelTester()
20047 .cr(16)
20048 .kr(25)
20049 .channels(16)
20050 .width(5)
20051 .output_stride(83)
20052 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20053 }
20054 }
20055
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,multipixel_with_qmin)20056 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel_with_qmin) {
20057 TEST_REQUIRES_X86_XOP;
20058 for (size_t channels = 1; channels <= 80; channels += 15) {
20059 DWConvMicrokernelTester()
20060 .cr(16)
20061 .kr(25)
20062 .channels(channels)
20063 .width(3)
20064 .qmin(128)
20065 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20066 }
20067 }
20068
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,multipixel_with_qmax)20069 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, multipixel_with_qmax) {
20070 TEST_REQUIRES_X86_XOP;
20071 for (size_t channels = 1; channels <= 80; channels += 15) {
20072 DWConvMicrokernelTester()
20073 .cr(16)
20074 .kr(25)
20075 .channels(channels)
20076 .width(3)
20077 .qmax(128)
20078 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20079 }
20080 }
20081
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,input_offset)20082 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, input_offset) {
20083 TEST_REQUIRES_X86_XOP;
20084 for (uint32_t channels = 32; channels < 256; channels += 48) {
20085 DWConvMicrokernelTester()
20086 .cr(16)
20087 .kr(25)
20088 .channels(channels)
20089 .input_offset(304)
20090 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20091 }
20092 }
20093
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16,zero)20094 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL16_ADD16, zero) {
20095 TEST_REQUIRES_X86_XOP;
20096 for (uint32_t mz = 0; mz < 25; mz++) {
20097 for (uint32_t channels = 32; channels < 256; channels += 48) {
20098 DWConvMicrokernelTester()
20099 .cr(16)
20100 .kr(25)
20101 .channels(channels)
20102 .input_offset(304)
20103 .zero_index(mz)
20104 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20105 }
20106 }
20107 }
20108 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20109
20110
20111 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_eq_16)20112 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_eq_16) {
20113 TEST_REQUIRES_X86_XOP;
20114 DWConvMicrokernelTester()
20115 .cr(16)
20116 .kr(25)
20117 .channels(16)
20118 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20119 }
20120
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_div_16)20121 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_div_16) {
20122 TEST_REQUIRES_X86_XOP;
20123 for (uint32_t channels = 32; channels < 256; channels += 48) {
20124 DWConvMicrokernelTester()
20125 .cr(16)
20126 .kr(25)
20127 .channels(channels)
20128 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20129 }
20130 }
20131
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_div_16_with_qmin)20132 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_div_16_with_qmin) {
20133 TEST_REQUIRES_X86_XOP;
20134 for (uint32_t channels = 32; channels < 256; channels += 48) {
20135 DWConvMicrokernelTester()
20136 .cr(16)
20137 .kr(25)
20138 .channels(channels)
20139 .qmin(128)
20140 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20141 }
20142 }
20143
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_div_16_with_qmax)20144 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_div_16_with_qmax) {
20145 TEST_REQUIRES_X86_XOP;
20146 for (uint32_t channels = 32; channels < 256; channels += 48) {
20147 DWConvMicrokernelTester()
20148 .cr(16)
20149 .kr(25)
20150 .channels(channels)
20151 .qmax(128)
20152 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20153 }
20154 }
20155
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_lt_16)20156 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_lt_16) {
20157 TEST_REQUIRES_X86_XOP;
20158 for (uint32_t channels = 1; channels < 16; channels++) {
20159 DWConvMicrokernelTester()
20160 .cr(16)
20161 .kr(25)
20162 .channels(channels)
20163 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20164 }
20165 }
20166
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_gt_16)20167 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_gt_16) {
20168 TEST_REQUIRES_X86_XOP;
20169 for (uint32_t channels = 17; channels < 32; channels++) {
20170 DWConvMicrokernelTester()
20171 .cr(16)
20172 .kr(25)
20173 .channels(channels)
20174 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20175 }
20176 }
20177
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_gt_16_with_qmin)20178 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_gt_16_with_qmin) {
20179 TEST_REQUIRES_X86_XOP;
20180 for (uint32_t channels = 17; channels < 32; channels++) {
20181 DWConvMicrokernelTester()
20182 .cr(16)
20183 .kr(25)
20184 .channels(channels)
20185 .qmin(128)
20186 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20187 }
20188 }
20189
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_gt_16_with_qmax)20190 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_gt_16_with_qmax) {
20191 TEST_REQUIRES_X86_XOP;
20192 for (uint32_t channels = 17; channels < 32; channels++) {
20193 DWConvMicrokernelTester()
20194 .cr(16)
20195 .kr(25)
20196 .channels(channels)
20197 .qmax(128)
20198 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20199 }
20200 }
20201
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,multipixel)20202 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel) {
20203 TEST_REQUIRES_X86_XOP;
20204 for (size_t channels = 1; channels <= 80; channels += 15) {
20205 DWConvMicrokernelTester()
20206 .cr(16)
20207 .kr(25)
20208 .channels(channels)
20209 .width(3)
20210 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20211 }
20212 }
20213
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,multipixel_with_step)20214 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_step) {
20215 TEST_REQUIRES_X86_XOP;
20216 for (size_t channels = 1; channels <= 80; channels += 15) {
20217 for (size_t step = 2; step <= 25; step++) {
20218 DWConvMicrokernelTester()
20219 .cr(16)
20220 .kr(25)
20221 .channels(channels)
20222 .width(3)
20223 .step(step)
20224 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20225 }
20226 }
20227 }
20228
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,multipixel_with_output_stride)20229 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_output_stride) {
20230 TEST_REQUIRES_X86_XOP;
20231 for (size_t channels = 1; channels <= 80; channels += 15) {
20232 DWConvMicrokernelTester()
20233 .cr(16)
20234 .kr(25)
20235 .channels(16)
20236 .width(5)
20237 .output_stride(83)
20238 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20239 }
20240 }
20241
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,multipixel_with_qmin)20242 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_qmin) {
20243 TEST_REQUIRES_X86_XOP;
20244 for (size_t channels = 1; channels <= 80; channels += 15) {
20245 DWConvMicrokernelTester()
20246 .cr(16)
20247 .kr(25)
20248 .channels(channels)
20249 .width(3)
20250 .qmin(128)
20251 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20252 }
20253 }
20254
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,multipixel_with_qmax)20255 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_qmax) {
20256 TEST_REQUIRES_X86_XOP;
20257 for (size_t channels = 1; channels <= 80; channels += 15) {
20258 DWConvMicrokernelTester()
20259 .cr(16)
20260 .kr(25)
20261 .channels(channels)
20262 .width(3)
20263 .qmax(128)
20264 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20265 }
20266 }
20267
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,input_offset)20268 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, input_offset) {
20269 TEST_REQUIRES_X86_XOP;
20270 for (uint32_t channels = 32; channels < 256; channels += 48) {
20271 DWConvMicrokernelTester()
20272 .cr(16)
20273 .kr(25)
20274 .channels(channels)
20275 .input_offset(304)
20276 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20277 }
20278 }
20279
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,zero)20280 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, zero) {
20281 TEST_REQUIRES_X86_XOP;
20282 for (uint32_t mz = 0; mz < 25; mz++) {
20283 for (uint32_t channels = 32; channels < 256; channels += 48) {
20284 DWConvMicrokernelTester()
20285 .cr(16)
20286 .kr(25)
20287 .channels(channels)
20288 .input_offset(304)
20289 .zero_index(mz)
20290 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20291 }
20292 }
20293 }
20294 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20295
20296
20297 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_eq_24)20298 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_eq_24) {
20299 TEST_REQUIRES_X86_AVX;
20300 DWConvMicrokernelTester()
20301 .cr(24)
20302 .kr(9)
20303 .channels(24)
20304 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20305 }
20306
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_div_24)20307 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_div_24) {
20308 TEST_REQUIRES_X86_AVX;
20309 for (uint32_t channels = 48; channels < 384; channels += 72) {
20310 DWConvMicrokernelTester()
20311 .cr(24)
20312 .kr(9)
20313 .channels(channels)
20314 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20315 }
20316 }
20317
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_div_24_with_qmin)20318 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_div_24_with_qmin) {
20319 TEST_REQUIRES_X86_AVX;
20320 for (uint32_t channels = 48; channels < 384; channels += 72) {
20321 DWConvMicrokernelTester()
20322 .cr(24)
20323 .kr(9)
20324 .channels(channels)
20325 .qmin(128)
20326 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20327 }
20328 }
20329
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_div_24_with_qmax)20330 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_div_24_with_qmax) {
20331 TEST_REQUIRES_X86_AVX;
20332 for (uint32_t channels = 48; channels < 384; channels += 72) {
20333 DWConvMicrokernelTester()
20334 .cr(24)
20335 .kr(9)
20336 .channels(channels)
20337 .qmax(128)
20338 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20339 }
20340 }
20341
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_lt_24)20342 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_lt_24) {
20343 TEST_REQUIRES_X86_AVX;
20344 for (uint32_t channels = 1; channels < 24; channels++) {
20345 DWConvMicrokernelTester()
20346 .cr(24)
20347 .kr(9)
20348 .channels(channels)
20349 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20350 }
20351 }
20352
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_gt_24)20353 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_gt_24) {
20354 TEST_REQUIRES_X86_AVX;
20355 for (uint32_t channels = 25; channels < 48; channels++) {
20356 DWConvMicrokernelTester()
20357 .cr(24)
20358 .kr(9)
20359 .channels(channels)
20360 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20361 }
20362 }
20363
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_gt_24_with_qmin)20364 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_gt_24_with_qmin) {
20365 TEST_REQUIRES_X86_AVX;
20366 for (uint32_t channels = 25; channels < 48; channels++) {
20367 DWConvMicrokernelTester()
20368 .cr(24)
20369 .kr(9)
20370 .channels(channels)
20371 .qmin(128)
20372 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20373 }
20374 }
20375
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,c_gt_24_with_qmax)20376 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, c_gt_24_with_qmax) {
20377 TEST_REQUIRES_X86_AVX;
20378 for (uint32_t channels = 25; channels < 48; channels++) {
20379 DWConvMicrokernelTester()
20380 .cr(24)
20381 .kr(9)
20382 .channels(channels)
20383 .qmax(128)
20384 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20385 }
20386 }
20387
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,multipixel)20388 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel) {
20389 TEST_REQUIRES_X86_AVX;
20390 for (size_t channels = 1; channels <= 120; channels += 23) {
20391 DWConvMicrokernelTester()
20392 .cr(24)
20393 .kr(9)
20394 .channels(channels)
20395 .width(3)
20396 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20397 }
20398 }
20399
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,multipixel_with_step)20400 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel_with_step) {
20401 TEST_REQUIRES_X86_AVX;
20402 for (size_t channels = 1; channels <= 120; channels += 23) {
20403 for (size_t step = 2; step <= 9; step++) {
20404 DWConvMicrokernelTester()
20405 .cr(24)
20406 .kr(9)
20407 .channels(channels)
20408 .width(3)
20409 .step(step)
20410 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20411 }
20412 }
20413 }
20414
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,multipixel_with_output_stride)20415 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel_with_output_stride) {
20416 TEST_REQUIRES_X86_AVX;
20417 for (size_t channels = 1; channels <= 120; channels += 23) {
20418 DWConvMicrokernelTester()
20419 .cr(24)
20420 .kr(9)
20421 .channels(24)
20422 .width(5)
20423 .output_stride(127)
20424 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20425 }
20426 }
20427
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,multipixel_with_qmin)20428 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel_with_qmin) {
20429 TEST_REQUIRES_X86_AVX;
20430 for (size_t channels = 1; channels <= 120; channels += 23) {
20431 DWConvMicrokernelTester()
20432 .cr(24)
20433 .kr(9)
20434 .channels(channels)
20435 .width(3)
20436 .qmin(128)
20437 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20438 }
20439 }
20440
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,multipixel_with_qmax)20441 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, multipixel_with_qmax) {
20442 TEST_REQUIRES_X86_AVX;
20443 for (size_t channels = 1; channels <= 120; channels += 23) {
20444 DWConvMicrokernelTester()
20445 .cr(24)
20446 .kr(9)
20447 .channels(channels)
20448 .width(3)
20449 .qmax(128)
20450 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20451 }
20452 }
20453
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,input_offset)20454 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, input_offset) {
20455 TEST_REQUIRES_X86_AVX;
20456 for (uint32_t channels = 48; channels < 384; channels += 72) {
20457 DWConvMicrokernelTester()
20458 .cr(24)
20459 .kr(9)
20460 .channels(channels)
20461 .input_offset(464)
20462 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20463 }
20464 }
20465
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16,zero)20466 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL16, zero) {
20467 TEST_REQUIRES_X86_AVX;
20468 for (uint32_t mz = 0; mz < 9; mz++) {
20469 for (uint32_t channels = 48; channels < 384; channels += 72) {
20470 DWConvMicrokernelTester()
20471 .cr(24)
20472 .kr(9)
20473 .channels(channels)
20474 .input_offset(464)
20475 .zero_index(mz)
20476 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20477 }
20478 }
20479 }
20480 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20481
20482
20483 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_eq_24)20484 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_eq_24) {
20485 TEST_REQUIRES_X86_AVX;
20486 DWConvMicrokernelTester()
20487 .cr(24)
20488 .kr(9)
20489 .channels(24)
20490 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20491 }
20492
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_div_24)20493 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_div_24) {
20494 TEST_REQUIRES_X86_AVX;
20495 for (uint32_t channels = 48; channels < 384; channels += 72) {
20496 DWConvMicrokernelTester()
20497 .cr(24)
20498 .kr(9)
20499 .channels(channels)
20500 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20501 }
20502 }
20503
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_div_24_with_qmin)20504 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_div_24_with_qmin) {
20505 TEST_REQUIRES_X86_AVX;
20506 for (uint32_t channels = 48; channels < 384; channels += 72) {
20507 DWConvMicrokernelTester()
20508 .cr(24)
20509 .kr(9)
20510 .channels(channels)
20511 .qmin(128)
20512 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20513 }
20514 }
20515
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_div_24_with_qmax)20516 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_div_24_with_qmax) {
20517 TEST_REQUIRES_X86_AVX;
20518 for (uint32_t channels = 48; channels < 384; channels += 72) {
20519 DWConvMicrokernelTester()
20520 .cr(24)
20521 .kr(9)
20522 .channels(channels)
20523 .qmax(128)
20524 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20525 }
20526 }
20527
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_lt_24)20528 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_lt_24) {
20529 TEST_REQUIRES_X86_AVX;
20530 for (uint32_t channels = 1; channels < 24; channels++) {
20531 DWConvMicrokernelTester()
20532 .cr(24)
20533 .kr(9)
20534 .channels(channels)
20535 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20536 }
20537 }
20538
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_gt_24)20539 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_gt_24) {
20540 TEST_REQUIRES_X86_AVX;
20541 for (uint32_t channels = 25; channels < 48; channels++) {
20542 DWConvMicrokernelTester()
20543 .cr(24)
20544 .kr(9)
20545 .channels(channels)
20546 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20547 }
20548 }
20549
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_gt_24_with_qmin)20550 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_gt_24_with_qmin) {
20551 TEST_REQUIRES_X86_AVX;
20552 for (uint32_t channels = 25; channels < 48; channels++) {
20553 DWConvMicrokernelTester()
20554 .cr(24)
20555 .kr(9)
20556 .channels(channels)
20557 .qmin(128)
20558 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20559 }
20560 }
20561
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,c_gt_24_with_qmax)20562 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, c_gt_24_with_qmax) {
20563 TEST_REQUIRES_X86_AVX;
20564 for (uint32_t channels = 25; channels < 48; channels++) {
20565 DWConvMicrokernelTester()
20566 .cr(24)
20567 .kr(9)
20568 .channels(channels)
20569 .qmax(128)
20570 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20571 }
20572 }
20573
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,multipixel)20574 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel) {
20575 TEST_REQUIRES_X86_AVX;
20576 for (size_t channels = 1; channels <= 120; channels += 23) {
20577 DWConvMicrokernelTester()
20578 .cr(24)
20579 .kr(9)
20580 .channels(channels)
20581 .width(3)
20582 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20583 }
20584 }
20585
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,multipixel_with_step)20586 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel_with_step) {
20587 TEST_REQUIRES_X86_AVX;
20588 for (size_t channels = 1; channels <= 120; channels += 23) {
20589 for (size_t step = 2; step <= 9; step++) {
20590 DWConvMicrokernelTester()
20591 .cr(24)
20592 .kr(9)
20593 .channels(channels)
20594 .width(3)
20595 .step(step)
20596 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20597 }
20598 }
20599 }
20600
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,multipixel_with_output_stride)20601 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel_with_output_stride) {
20602 TEST_REQUIRES_X86_AVX;
20603 for (size_t channels = 1; channels <= 120; channels += 23) {
20604 DWConvMicrokernelTester()
20605 .cr(24)
20606 .kr(9)
20607 .channels(24)
20608 .width(5)
20609 .output_stride(127)
20610 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20611 }
20612 }
20613
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,multipixel_with_qmin)20614 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel_with_qmin) {
20615 TEST_REQUIRES_X86_AVX;
20616 for (size_t channels = 1; channels <= 120; channels += 23) {
20617 DWConvMicrokernelTester()
20618 .cr(24)
20619 .kr(9)
20620 .channels(channels)
20621 .width(3)
20622 .qmin(128)
20623 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20624 }
20625 }
20626
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,multipixel_with_qmax)20627 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, multipixel_with_qmax) {
20628 TEST_REQUIRES_X86_AVX;
20629 for (size_t channels = 1; channels <= 120; channels += 23) {
20630 DWConvMicrokernelTester()
20631 .cr(24)
20632 .kr(9)
20633 .channels(channels)
20634 .width(3)
20635 .qmax(128)
20636 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20637 }
20638 }
20639
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,input_offset)20640 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, input_offset) {
20641 TEST_REQUIRES_X86_AVX;
20642 for (uint32_t channels = 48; channels < 384; channels += 72) {
20643 DWConvMicrokernelTester()
20644 .cr(24)
20645 .kr(9)
20646 .channels(channels)
20647 .input_offset(464)
20648 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20649 }
20650 }
20651
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32,zero)20652 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX_MUL32, zero) {
20653 TEST_REQUIRES_X86_AVX;
20654 for (uint32_t mz = 0; mz < 9; mz++) {
20655 for (uint32_t channels = 48; channels < 384; channels += 72) {
20656 DWConvMicrokernelTester()
20657 .cr(24)
20658 .kr(9)
20659 .channels(channels)
20660 .input_offset(464)
20661 .zero_index(mz)
20662 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20663 }
20664 }
20665 }
20666 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20667
20668
20669 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_eq_24)20670 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_eq_24) {
20671 TEST_REQUIRES_X86_AVX2;
20672 DWConvMicrokernelTester()
20673 .cr(24)
20674 .kr(9)
20675 .channels(24)
20676 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20677 }
20678
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_div_24)20679 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_div_24) {
20680 TEST_REQUIRES_X86_AVX2;
20681 for (uint32_t channels = 48; channels < 384; channels += 72) {
20682 DWConvMicrokernelTester()
20683 .cr(24)
20684 .kr(9)
20685 .channels(channels)
20686 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20687 }
20688 }
20689
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_div_24_with_qmin)20690 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_div_24_with_qmin) {
20691 TEST_REQUIRES_X86_AVX2;
20692 for (uint32_t channels = 48; channels < 384; channels += 72) {
20693 DWConvMicrokernelTester()
20694 .cr(24)
20695 .kr(9)
20696 .channels(channels)
20697 .qmin(128)
20698 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20699 }
20700 }
20701
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_div_24_with_qmax)20702 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_div_24_with_qmax) {
20703 TEST_REQUIRES_X86_AVX2;
20704 for (uint32_t channels = 48; channels < 384; channels += 72) {
20705 DWConvMicrokernelTester()
20706 .cr(24)
20707 .kr(9)
20708 .channels(channels)
20709 .qmax(128)
20710 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20711 }
20712 }
20713
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_lt_24)20714 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_lt_24) {
20715 TEST_REQUIRES_X86_AVX2;
20716 for (uint32_t channels = 1; channels < 24; channels++) {
20717 DWConvMicrokernelTester()
20718 .cr(24)
20719 .kr(9)
20720 .channels(channels)
20721 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20722 }
20723 }
20724
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_gt_24)20725 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_gt_24) {
20726 TEST_REQUIRES_X86_AVX2;
20727 for (uint32_t channels = 25; channels < 48; channels++) {
20728 DWConvMicrokernelTester()
20729 .cr(24)
20730 .kr(9)
20731 .channels(channels)
20732 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20733 }
20734 }
20735
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_gt_24_with_qmin)20736 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_gt_24_with_qmin) {
20737 TEST_REQUIRES_X86_AVX2;
20738 for (uint32_t channels = 25; channels < 48; channels++) {
20739 DWConvMicrokernelTester()
20740 .cr(24)
20741 .kr(9)
20742 .channels(channels)
20743 .qmin(128)
20744 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20745 }
20746 }
20747
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,c_gt_24_with_qmax)20748 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_gt_24_with_qmax) {
20749 TEST_REQUIRES_X86_AVX2;
20750 for (uint32_t channels = 25; channels < 48; channels++) {
20751 DWConvMicrokernelTester()
20752 .cr(24)
20753 .kr(9)
20754 .channels(channels)
20755 .qmax(128)
20756 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20757 }
20758 }
20759
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,multipixel)20760 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel) {
20761 TEST_REQUIRES_X86_AVX2;
20762 for (size_t channels = 1; channels <= 120; channels += 23) {
20763 DWConvMicrokernelTester()
20764 .cr(24)
20765 .kr(9)
20766 .channels(channels)
20767 .width(3)
20768 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20769 }
20770 }
20771
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,multipixel_with_step)20772 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_step) {
20773 TEST_REQUIRES_X86_AVX2;
20774 for (size_t channels = 1; channels <= 120; channels += 23) {
20775 for (size_t step = 2; step <= 9; step++) {
20776 DWConvMicrokernelTester()
20777 .cr(24)
20778 .kr(9)
20779 .channels(channels)
20780 .width(3)
20781 .step(step)
20782 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20783 }
20784 }
20785 }
20786
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,multipixel_with_output_stride)20787 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_output_stride) {
20788 TEST_REQUIRES_X86_AVX2;
20789 for (size_t channels = 1; channels <= 120; channels += 23) {
20790 DWConvMicrokernelTester()
20791 .cr(24)
20792 .kr(9)
20793 .channels(24)
20794 .width(5)
20795 .output_stride(127)
20796 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20797 }
20798 }
20799
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,multipixel_with_qmin)20800 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_qmin) {
20801 TEST_REQUIRES_X86_AVX2;
20802 for (size_t channels = 1; channels <= 120; channels += 23) {
20803 DWConvMicrokernelTester()
20804 .cr(24)
20805 .kr(9)
20806 .channels(channels)
20807 .width(3)
20808 .qmin(128)
20809 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20810 }
20811 }
20812
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,multipixel_with_qmax)20813 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_qmax) {
20814 TEST_REQUIRES_X86_AVX2;
20815 for (size_t channels = 1; channels <= 120; channels += 23) {
20816 DWConvMicrokernelTester()
20817 .cr(24)
20818 .kr(9)
20819 .channels(channels)
20820 .width(3)
20821 .qmax(128)
20822 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20823 }
20824 }
20825
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,input_offset)20826 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, input_offset) {
20827 TEST_REQUIRES_X86_AVX2;
20828 for (uint32_t channels = 48; channels < 384; channels += 72) {
20829 DWConvMicrokernelTester()
20830 .cr(24)
20831 .kr(9)
20832 .channels(channels)
20833 .input_offset(464)
20834 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20835 }
20836 }
20837
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32,zero)20838 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, zero) {
20839 TEST_REQUIRES_X86_AVX2;
20840 for (uint32_t mz = 0; mz < 9; mz++) {
20841 for (uint32_t channels = 48; channels < 384; channels += 72) {
20842 DWConvMicrokernelTester()
20843 .cr(24)
20844 .kr(9)
20845 .channels(channels)
20846 .input_offset(464)
20847 .zero_index(mz)
20848 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
20849 }
20850 }
20851 }
20852 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20853
20854
20855 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_eq_24)20856 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_eq_24) {
20857 TEST_REQUIRES_X86_XOP;
20858 DWConvMicrokernelTester()
20859 .cr(24)
20860 .kr(9)
20861 .channels(24)
20862 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20863 }
20864
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_div_24)20865 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_div_24) {
20866 TEST_REQUIRES_X86_XOP;
20867 for (uint32_t channels = 48; channels < 384; channels += 72) {
20868 DWConvMicrokernelTester()
20869 .cr(24)
20870 .kr(9)
20871 .channels(channels)
20872 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20873 }
20874 }
20875
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_div_24_with_qmin)20876 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_div_24_with_qmin) {
20877 TEST_REQUIRES_X86_XOP;
20878 for (uint32_t channels = 48; channels < 384; channels += 72) {
20879 DWConvMicrokernelTester()
20880 .cr(24)
20881 .kr(9)
20882 .channels(channels)
20883 .qmin(128)
20884 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20885 }
20886 }
20887
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_div_24_with_qmax)20888 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_div_24_with_qmax) {
20889 TEST_REQUIRES_X86_XOP;
20890 for (uint32_t channels = 48; channels < 384; channels += 72) {
20891 DWConvMicrokernelTester()
20892 .cr(24)
20893 .kr(9)
20894 .channels(channels)
20895 .qmax(128)
20896 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20897 }
20898 }
20899
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_lt_24)20900 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_lt_24) {
20901 TEST_REQUIRES_X86_XOP;
20902 for (uint32_t channels = 1; channels < 24; channels++) {
20903 DWConvMicrokernelTester()
20904 .cr(24)
20905 .kr(9)
20906 .channels(channels)
20907 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20908 }
20909 }
20910
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_gt_24)20911 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_gt_24) {
20912 TEST_REQUIRES_X86_XOP;
20913 for (uint32_t channels = 25; channels < 48; channels++) {
20914 DWConvMicrokernelTester()
20915 .cr(24)
20916 .kr(9)
20917 .channels(channels)
20918 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20919 }
20920 }
20921
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_gt_24_with_qmin)20922 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_gt_24_with_qmin) {
20923 TEST_REQUIRES_X86_XOP;
20924 for (uint32_t channels = 25; channels < 48; channels++) {
20925 DWConvMicrokernelTester()
20926 .cr(24)
20927 .kr(9)
20928 .channels(channels)
20929 .qmin(128)
20930 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20931 }
20932 }
20933
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,c_gt_24_with_qmax)20934 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, c_gt_24_with_qmax) {
20935 TEST_REQUIRES_X86_XOP;
20936 for (uint32_t channels = 25; channels < 48; channels++) {
20937 DWConvMicrokernelTester()
20938 .cr(24)
20939 .kr(9)
20940 .channels(channels)
20941 .qmax(128)
20942 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20943 }
20944 }
20945
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,multipixel)20946 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel) {
20947 TEST_REQUIRES_X86_XOP;
20948 for (size_t channels = 1; channels <= 120; channels += 23) {
20949 DWConvMicrokernelTester()
20950 .cr(24)
20951 .kr(9)
20952 .channels(channels)
20953 .width(3)
20954 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20955 }
20956 }
20957
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,multipixel_with_step)20958 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel_with_step) {
20959 TEST_REQUIRES_X86_XOP;
20960 for (size_t channels = 1; channels <= 120; channels += 23) {
20961 for (size_t step = 2; step <= 9; step++) {
20962 DWConvMicrokernelTester()
20963 .cr(24)
20964 .kr(9)
20965 .channels(channels)
20966 .width(3)
20967 .step(step)
20968 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20969 }
20970 }
20971 }
20972
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,multipixel_with_output_stride)20973 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel_with_output_stride) {
20974 TEST_REQUIRES_X86_XOP;
20975 for (size_t channels = 1; channels <= 120; channels += 23) {
20976 DWConvMicrokernelTester()
20977 .cr(24)
20978 .kr(9)
20979 .channels(24)
20980 .width(5)
20981 .output_stride(127)
20982 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20983 }
20984 }
20985
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,multipixel_with_qmin)20986 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel_with_qmin) {
20987 TEST_REQUIRES_X86_XOP;
20988 for (size_t channels = 1; channels <= 120; channels += 23) {
20989 DWConvMicrokernelTester()
20990 .cr(24)
20991 .kr(9)
20992 .channels(channels)
20993 .width(3)
20994 .qmin(128)
20995 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20996 }
20997 }
20998
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,multipixel_with_qmax)20999 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, multipixel_with_qmax) {
21000 TEST_REQUIRES_X86_XOP;
21001 for (size_t channels = 1; channels <= 120; channels += 23) {
21002 DWConvMicrokernelTester()
21003 .cr(24)
21004 .kr(9)
21005 .channels(channels)
21006 .width(3)
21007 .qmax(128)
21008 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21009 }
21010 }
21011
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,input_offset)21012 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, input_offset) {
21013 TEST_REQUIRES_X86_XOP;
21014 for (uint32_t channels = 48; channels < 384; channels += 72) {
21015 DWConvMicrokernelTester()
21016 .cr(24)
21017 .kr(9)
21018 .channels(channels)
21019 .input_offset(464)
21020 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21021 }
21022 }
21023
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32,zero)21024 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__XOP_MUL32, zero) {
21025 TEST_REQUIRES_X86_XOP;
21026 for (uint32_t mz = 0; mz < 9; mz++) {
21027 for (uint32_t channels = 48; channels < 384; channels += 72) {
21028 DWConvMicrokernelTester()
21029 .cr(24)
21030 .kr(9)
21031 .channels(channels)
21032 .input_offset(464)
21033 .zero_index(mz)
21034 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21035 }
21036 }
21037 }
21038 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21039
21040
21041 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_eq_24)21042 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_eq_24) {
21043 TEST_REQUIRES_X86_AVX;
21044 DWConvMicrokernelTester()
21045 .cr(24)
21046 .kr(25)
21047 .channels(24)
21048 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21049 }
21050
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_div_24)21051 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_div_24) {
21052 TEST_REQUIRES_X86_AVX;
21053 for (uint32_t channels = 48; channels < 384; channels += 72) {
21054 DWConvMicrokernelTester()
21055 .cr(24)
21056 .kr(25)
21057 .channels(channels)
21058 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21059 }
21060 }
21061
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_div_24_with_qmin)21062 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_div_24_with_qmin) {
21063 TEST_REQUIRES_X86_AVX;
21064 for (uint32_t channels = 48; channels < 384; channels += 72) {
21065 DWConvMicrokernelTester()
21066 .cr(24)
21067 .kr(25)
21068 .channels(channels)
21069 .qmin(128)
21070 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21071 }
21072 }
21073
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_div_24_with_qmax)21074 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_div_24_with_qmax) {
21075 TEST_REQUIRES_X86_AVX;
21076 for (uint32_t channels = 48; channels < 384; channels += 72) {
21077 DWConvMicrokernelTester()
21078 .cr(24)
21079 .kr(25)
21080 .channels(channels)
21081 .qmax(128)
21082 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21083 }
21084 }
21085
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_lt_24)21086 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_lt_24) {
21087 TEST_REQUIRES_X86_AVX;
21088 for (uint32_t channels = 1; channels < 24; channels++) {
21089 DWConvMicrokernelTester()
21090 .cr(24)
21091 .kr(25)
21092 .channels(channels)
21093 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21094 }
21095 }
21096
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_gt_24)21097 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_gt_24) {
21098 TEST_REQUIRES_X86_AVX;
21099 for (uint32_t channels = 25; channels < 48; channels++) {
21100 DWConvMicrokernelTester()
21101 .cr(24)
21102 .kr(25)
21103 .channels(channels)
21104 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21105 }
21106 }
21107
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_gt_24_with_qmin)21108 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_gt_24_with_qmin) {
21109 TEST_REQUIRES_X86_AVX;
21110 for (uint32_t channels = 25; channels < 48; channels++) {
21111 DWConvMicrokernelTester()
21112 .cr(24)
21113 .kr(25)
21114 .channels(channels)
21115 .qmin(128)
21116 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21117 }
21118 }
21119
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,c_gt_24_with_qmax)21120 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, c_gt_24_with_qmax) {
21121 TEST_REQUIRES_X86_AVX;
21122 for (uint32_t channels = 25; channels < 48; channels++) {
21123 DWConvMicrokernelTester()
21124 .cr(24)
21125 .kr(25)
21126 .channels(channels)
21127 .qmax(128)
21128 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21129 }
21130 }
21131
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,multipixel)21132 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel) {
21133 TEST_REQUIRES_X86_AVX;
21134 for (size_t channels = 1; channels <= 120; channels += 23) {
21135 DWConvMicrokernelTester()
21136 .cr(24)
21137 .kr(25)
21138 .channels(channels)
21139 .width(3)
21140 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21141 }
21142 }
21143
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,multipixel_with_step)21144 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel_with_step) {
21145 TEST_REQUIRES_X86_AVX;
21146 for (size_t channels = 1; channels <= 120; channels += 23) {
21147 for (size_t step = 2; step <= 25; step++) {
21148 DWConvMicrokernelTester()
21149 .cr(24)
21150 .kr(25)
21151 .channels(channels)
21152 .width(3)
21153 .step(step)
21154 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21155 }
21156 }
21157 }
21158
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,multipixel_with_output_stride)21159 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel_with_output_stride) {
21160 TEST_REQUIRES_X86_AVX;
21161 for (size_t channels = 1; channels <= 120; channels += 23) {
21162 DWConvMicrokernelTester()
21163 .cr(24)
21164 .kr(25)
21165 .channels(24)
21166 .width(5)
21167 .output_stride(127)
21168 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21169 }
21170 }
21171
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,multipixel_with_qmin)21172 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel_with_qmin) {
21173 TEST_REQUIRES_X86_AVX;
21174 for (size_t channels = 1; channels <= 120; channels += 23) {
21175 DWConvMicrokernelTester()
21176 .cr(24)
21177 .kr(25)
21178 .channels(channels)
21179 .width(3)
21180 .qmin(128)
21181 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21182 }
21183 }
21184
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,multipixel_with_qmax)21185 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, multipixel_with_qmax) {
21186 TEST_REQUIRES_X86_AVX;
21187 for (size_t channels = 1; channels <= 120; channels += 23) {
21188 DWConvMicrokernelTester()
21189 .cr(24)
21190 .kr(25)
21191 .channels(channels)
21192 .width(3)
21193 .qmax(128)
21194 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21195 }
21196 }
21197
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,input_offset)21198 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, input_offset) {
21199 TEST_REQUIRES_X86_AVX;
21200 for (uint32_t channels = 48; channels < 384; channels += 72) {
21201 DWConvMicrokernelTester()
21202 .cr(24)
21203 .kr(25)
21204 .channels(channels)
21205 .input_offset(464)
21206 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21207 }
21208 }
21209
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16,zero)21210 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL16, zero) {
21211 TEST_REQUIRES_X86_AVX;
21212 for (uint32_t mz = 0; mz < 25; mz++) {
21213 for (uint32_t channels = 48; channels < 384; channels += 72) {
21214 DWConvMicrokernelTester()
21215 .cr(24)
21216 .kr(25)
21217 .channels(channels)
21218 .input_offset(464)
21219 .zero_index(mz)
21220 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul16, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21221 }
21222 }
21223 }
21224 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21225
21226
21227 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_eq_24)21228 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_eq_24) {
21229 TEST_REQUIRES_X86_AVX;
21230 DWConvMicrokernelTester()
21231 .cr(24)
21232 .kr(25)
21233 .channels(24)
21234 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21235 }
21236
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_div_24)21237 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_div_24) {
21238 TEST_REQUIRES_X86_AVX;
21239 for (uint32_t channels = 48; channels < 384; channels += 72) {
21240 DWConvMicrokernelTester()
21241 .cr(24)
21242 .kr(25)
21243 .channels(channels)
21244 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21245 }
21246 }
21247
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_div_24_with_qmin)21248 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_div_24_with_qmin) {
21249 TEST_REQUIRES_X86_AVX;
21250 for (uint32_t channels = 48; channels < 384; channels += 72) {
21251 DWConvMicrokernelTester()
21252 .cr(24)
21253 .kr(25)
21254 .channels(channels)
21255 .qmin(128)
21256 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21257 }
21258 }
21259
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_div_24_with_qmax)21260 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_div_24_with_qmax) {
21261 TEST_REQUIRES_X86_AVX;
21262 for (uint32_t channels = 48; channels < 384; channels += 72) {
21263 DWConvMicrokernelTester()
21264 .cr(24)
21265 .kr(25)
21266 .channels(channels)
21267 .qmax(128)
21268 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21269 }
21270 }
21271
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_lt_24)21272 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_lt_24) {
21273 TEST_REQUIRES_X86_AVX;
21274 for (uint32_t channels = 1; channels < 24; channels++) {
21275 DWConvMicrokernelTester()
21276 .cr(24)
21277 .kr(25)
21278 .channels(channels)
21279 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21280 }
21281 }
21282
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_gt_24)21283 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_gt_24) {
21284 TEST_REQUIRES_X86_AVX;
21285 for (uint32_t channels = 25; channels < 48; channels++) {
21286 DWConvMicrokernelTester()
21287 .cr(24)
21288 .kr(25)
21289 .channels(channels)
21290 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21291 }
21292 }
21293
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_gt_24_with_qmin)21294 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_gt_24_with_qmin) {
21295 TEST_REQUIRES_X86_AVX;
21296 for (uint32_t channels = 25; channels < 48; channels++) {
21297 DWConvMicrokernelTester()
21298 .cr(24)
21299 .kr(25)
21300 .channels(channels)
21301 .qmin(128)
21302 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21303 }
21304 }
21305
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,c_gt_24_with_qmax)21306 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, c_gt_24_with_qmax) {
21307 TEST_REQUIRES_X86_AVX;
21308 for (uint32_t channels = 25; channels < 48; channels++) {
21309 DWConvMicrokernelTester()
21310 .cr(24)
21311 .kr(25)
21312 .channels(channels)
21313 .qmax(128)
21314 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21315 }
21316 }
21317
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,multipixel)21318 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel) {
21319 TEST_REQUIRES_X86_AVX;
21320 for (size_t channels = 1; channels <= 120; channels += 23) {
21321 DWConvMicrokernelTester()
21322 .cr(24)
21323 .kr(25)
21324 .channels(channels)
21325 .width(3)
21326 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21327 }
21328 }
21329
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,multipixel_with_step)21330 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel_with_step) {
21331 TEST_REQUIRES_X86_AVX;
21332 for (size_t channels = 1; channels <= 120; channels += 23) {
21333 for (size_t step = 2; step <= 25; step++) {
21334 DWConvMicrokernelTester()
21335 .cr(24)
21336 .kr(25)
21337 .channels(channels)
21338 .width(3)
21339 .step(step)
21340 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21341 }
21342 }
21343 }
21344
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,multipixel_with_output_stride)21345 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel_with_output_stride) {
21346 TEST_REQUIRES_X86_AVX;
21347 for (size_t channels = 1; channels <= 120; channels += 23) {
21348 DWConvMicrokernelTester()
21349 .cr(24)
21350 .kr(25)
21351 .channels(24)
21352 .width(5)
21353 .output_stride(127)
21354 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21355 }
21356 }
21357
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,multipixel_with_qmin)21358 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel_with_qmin) {
21359 TEST_REQUIRES_X86_AVX;
21360 for (size_t channels = 1; channels <= 120; channels += 23) {
21361 DWConvMicrokernelTester()
21362 .cr(24)
21363 .kr(25)
21364 .channels(channels)
21365 .width(3)
21366 .qmin(128)
21367 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21368 }
21369 }
21370
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,multipixel_with_qmax)21371 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, multipixel_with_qmax) {
21372 TEST_REQUIRES_X86_AVX;
21373 for (size_t channels = 1; channels <= 120; channels += 23) {
21374 DWConvMicrokernelTester()
21375 .cr(24)
21376 .kr(25)
21377 .channels(channels)
21378 .width(3)
21379 .qmax(128)
21380 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21381 }
21382 }
21383
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,input_offset)21384 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, input_offset) {
21385 TEST_REQUIRES_X86_AVX;
21386 for (uint32_t channels = 48; channels < 384; channels += 72) {
21387 DWConvMicrokernelTester()
21388 .cr(24)
21389 .kr(25)
21390 .channels(channels)
21391 .input_offset(464)
21392 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21393 }
21394 }
21395
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32,zero)21396 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX_MUL32, zero) {
21397 TEST_REQUIRES_X86_AVX;
21398 for (uint32_t mz = 0; mz < 25; mz++) {
21399 for (uint32_t channels = 48; channels < 384; channels += 72) {
21400 DWConvMicrokernelTester()
21401 .cr(24)
21402 .kr(25)
21403 .channels(channels)
21404 .input_offset(464)
21405 .zero_index(mz)
21406 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21407 }
21408 }
21409 }
21410 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21411
21412
21413 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_eq_24)21414 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_eq_24) {
21415 TEST_REQUIRES_X86_AVX2;
21416 DWConvMicrokernelTester()
21417 .cr(24)
21418 .kr(25)
21419 .channels(24)
21420 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21421 }
21422
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_div_24)21423 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_div_24) {
21424 TEST_REQUIRES_X86_AVX2;
21425 for (uint32_t channels = 48; channels < 384; channels += 72) {
21426 DWConvMicrokernelTester()
21427 .cr(24)
21428 .kr(25)
21429 .channels(channels)
21430 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21431 }
21432 }
21433
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_div_24_with_qmin)21434 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_div_24_with_qmin) {
21435 TEST_REQUIRES_X86_AVX2;
21436 for (uint32_t channels = 48; channels < 384; channels += 72) {
21437 DWConvMicrokernelTester()
21438 .cr(24)
21439 .kr(25)
21440 .channels(channels)
21441 .qmin(128)
21442 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21443 }
21444 }
21445
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_div_24_with_qmax)21446 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_div_24_with_qmax) {
21447 TEST_REQUIRES_X86_AVX2;
21448 for (uint32_t channels = 48; channels < 384; channels += 72) {
21449 DWConvMicrokernelTester()
21450 .cr(24)
21451 .kr(25)
21452 .channels(channels)
21453 .qmax(128)
21454 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21455 }
21456 }
21457
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_lt_24)21458 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_lt_24) {
21459 TEST_REQUIRES_X86_AVX2;
21460 for (uint32_t channels = 1; channels < 24; channels++) {
21461 DWConvMicrokernelTester()
21462 .cr(24)
21463 .kr(25)
21464 .channels(channels)
21465 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21466 }
21467 }
21468
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_gt_24)21469 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_gt_24) {
21470 TEST_REQUIRES_X86_AVX2;
21471 for (uint32_t channels = 25; channels < 48; channels++) {
21472 DWConvMicrokernelTester()
21473 .cr(24)
21474 .kr(25)
21475 .channels(channels)
21476 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21477 }
21478 }
21479
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_gt_24_with_qmin)21480 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_gt_24_with_qmin) {
21481 TEST_REQUIRES_X86_AVX2;
21482 for (uint32_t channels = 25; channels < 48; channels++) {
21483 DWConvMicrokernelTester()
21484 .cr(24)
21485 .kr(25)
21486 .channels(channels)
21487 .qmin(128)
21488 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21489 }
21490 }
21491
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,c_gt_24_with_qmax)21492 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_gt_24_with_qmax) {
21493 TEST_REQUIRES_X86_AVX2;
21494 for (uint32_t channels = 25; channels < 48; channels++) {
21495 DWConvMicrokernelTester()
21496 .cr(24)
21497 .kr(25)
21498 .channels(channels)
21499 .qmax(128)
21500 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21501 }
21502 }
21503
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,multipixel)21504 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel) {
21505 TEST_REQUIRES_X86_AVX2;
21506 for (size_t channels = 1; channels <= 120; channels += 23) {
21507 DWConvMicrokernelTester()
21508 .cr(24)
21509 .kr(25)
21510 .channels(channels)
21511 .width(3)
21512 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21513 }
21514 }
21515
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,multipixel_with_step)21516 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_step) {
21517 TEST_REQUIRES_X86_AVX2;
21518 for (size_t channels = 1; channels <= 120; channels += 23) {
21519 for (size_t step = 2; step <= 25; step++) {
21520 DWConvMicrokernelTester()
21521 .cr(24)
21522 .kr(25)
21523 .channels(channels)
21524 .width(3)
21525 .step(step)
21526 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21527 }
21528 }
21529 }
21530
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,multipixel_with_output_stride)21531 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_output_stride) {
21532 TEST_REQUIRES_X86_AVX2;
21533 for (size_t channels = 1; channels <= 120; channels += 23) {
21534 DWConvMicrokernelTester()
21535 .cr(24)
21536 .kr(25)
21537 .channels(24)
21538 .width(5)
21539 .output_stride(127)
21540 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21541 }
21542 }
21543
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,multipixel_with_qmin)21544 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_qmin) {
21545 TEST_REQUIRES_X86_AVX2;
21546 for (size_t channels = 1; channels <= 120; channels += 23) {
21547 DWConvMicrokernelTester()
21548 .cr(24)
21549 .kr(25)
21550 .channels(channels)
21551 .width(3)
21552 .qmin(128)
21553 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21554 }
21555 }
21556
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,multipixel_with_qmax)21557 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_qmax) {
21558 TEST_REQUIRES_X86_AVX2;
21559 for (size_t channels = 1; channels <= 120; channels += 23) {
21560 DWConvMicrokernelTester()
21561 .cr(24)
21562 .kr(25)
21563 .channels(channels)
21564 .width(3)
21565 .qmax(128)
21566 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21567 }
21568 }
21569
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,input_offset)21570 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, input_offset) {
21571 TEST_REQUIRES_X86_AVX2;
21572 for (uint32_t channels = 48; channels < 384; channels += 72) {
21573 DWConvMicrokernelTester()
21574 .cr(24)
21575 .kr(25)
21576 .channels(channels)
21577 .input_offset(464)
21578 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21579 }
21580 }
21581
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32,zero)21582 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, zero) {
21583 TEST_REQUIRES_X86_AVX2;
21584 for (uint32_t mz = 0; mz < 25; mz++) {
21585 for (uint32_t channels = 48; channels < 384; channels += 72) {
21586 DWConvMicrokernelTester()
21587 .cr(24)
21588 .kr(25)
21589 .channels(channels)
21590 .input_offset(464)
21591 .zero_index(mz)
21592 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21593 }
21594 }
21595 }
21596 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21597
21598
21599 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_eq_24)21600 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_eq_24) {
21601 TEST_REQUIRES_X86_XOP;
21602 DWConvMicrokernelTester()
21603 .cr(24)
21604 .kr(25)
21605 .channels(24)
21606 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21607 }
21608
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_div_24)21609 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_div_24) {
21610 TEST_REQUIRES_X86_XOP;
21611 for (uint32_t channels = 48; channels < 384; channels += 72) {
21612 DWConvMicrokernelTester()
21613 .cr(24)
21614 .kr(25)
21615 .channels(channels)
21616 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21617 }
21618 }
21619
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_div_24_with_qmin)21620 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_div_24_with_qmin) {
21621 TEST_REQUIRES_X86_XOP;
21622 for (uint32_t channels = 48; channels < 384; channels += 72) {
21623 DWConvMicrokernelTester()
21624 .cr(24)
21625 .kr(25)
21626 .channels(channels)
21627 .qmin(128)
21628 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21629 }
21630 }
21631
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_div_24_with_qmax)21632 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_div_24_with_qmax) {
21633 TEST_REQUIRES_X86_XOP;
21634 for (uint32_t channels = 48; channels < 384; channels += 72) {
21635 DWConvMicrokernelTester()
21636 .cr(24)
21637 .kr(25)
21638 .channels(channels)
21639 .qmax(128)
21640 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21641 }
21642 }
21643
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_lt_24)21644 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_lt_24) {
21645 TEST_REQUIRES_X86_XOP;
21646 for (uint32_t channels = 1; channels < 24; channels++) {
21647 DWConvMicrokernelTester()
21648 .cr(24)
21649 .kr(25)
21650 .channels(channels)
21651 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21652 }
21653 }
21654
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_gt_24)21655 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_gt_24) {
21656 TEST_REQUIRES_X86_XOP;
21657 for (uint32_t channels = 25; channels < 48; channels++) {
21658 DWConvMicrokernelTester()
21659 .cr(24)
21660 .kr(25)
21661 .channels(channels)
21662 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21663 }
21664 }
21665
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_gt_24_with_qmin)21666 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_gt_24_with_qmin) {
21667 TEST_REQUIRES_X86_XOP;
21668 for (uint32_t channels = 25; channels < 48; channels++) {
21669 DWConvMicrokernelTester()
21670 .cr(24)
21671 .kr(25)
21672 .channels(channels)
21673 .qmin(128)
21674 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21675 }
21676 }
21677
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,c_gt_24_with_qmax)21678 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, c_gt_24_with_qmax) {
21679 TEST_REQUIRES_X86_XOP;
21680 for (uint32_t channels = 25; channels < 48; channels++) {
21681 DWConvMicrokernelTester()
21682 .cr(24)
21683 .kr(25)
21684 .channels(channels)
21685 .qmax(128)
21686 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21687 }
21688 }
21689
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,multipixel)21690 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel) {
21691 TEST_REQUIRES_X86_XOP;
21692 for (size_t channels = 1; channels <= 120; channels += 23) {
21693 DWConvMicrokernelTester()
21694 .cr(24)
21695 .kr(25)
21696 .channels(channels)
21697 .width(3)
21698 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21699 }
21700 }
21701
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,multipixel_with_step)21702 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel_with_step) {
21703 TEST_REQUIRES_X86_XOP;
21704 for (size_t channels = 1; channels <= 120; channels += 23) {
21705 for (size_t step = 2; step <= 25; step++) {
21706 DWConvMicrokernelTester()
21707 .cr(24)
21708 .kr(25)
21709 .channels(channels)
21710 .width(3)
21711 .step(step)
21712 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21713 }
21714 }
21715 }
21716
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,multipixel_with_output_stride)21717 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel_with_output_stride) {
21718 TEST_REQUIRES_X86_XOP;
21719 for (size_t channels = 1; channels <= 120; channels += 23) {
21720 DWConvMicrokernelTester()
21721 .cr(24)
21722 .kr(25)
21723 .channels(24)
21724 .width(5)
21725 .output_stride(127)
21726 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21727 }
21728 }
21729
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,multipixel_with_qmin)21730 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel_with_qmin) {
21731 TEST_REQUIRES_X86_XOP;
21732 for (size_t channels = 1; channels <= 120; channels += 23) {
21733 DWConvMicrokernelTester()
21734 .cr(24)
21735 .kr(25)
21736 .channels(channels)
21737 .width(3)
21738 .qmin(128)
21739 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21740 }
21741 }
21742
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,multipixel_with_qmax)21743 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, multipixel_with_qmax) {
21744 TEST_REQUIRES_X86_XOP;
21745 for (size_t channels = 1; channels <= 120; channels += 23) {
21746 DWConvMicrokernelTester()
21747 .cr(24)
21748 .kr(25)
21749 .channels(channels)
21750 .width(3)
21751 .qmax(128)
21752 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21753 }
21754 }
21755
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,input_offset)21756 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, input_offset) {
21757 TEST_REQUIRES_X86_XOP;
21758 for (uint32_t channels = 48; channels < 384; channels += 72) {
21759 DWConvMicrokernelTester()
21760 .cr(24)
21761 .kr(25)
21762 .channels(channels)
21763 .input_offset(464)
21764 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21765 }
21766 }
21767
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32,zero)21768 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__XOP_MUL32, zero) {
21769 TEST_REQUIRES_X86_XOP;
21770 for (uint32_t mz = 0; mz < 25; mz++) {
21771 for (uint32_t channels = 48; channels < 384; channels += 72) {
21772 DWConvMicrokernelTester()
21773 .cr(24)
21774 .kr(25)
21775 .channels(channels)
21776 .input_offset(464)
21777 .zero_index(mz)
21778 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__xop_mul32, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21779 }
21780 }
21781 }
21782 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21783
21784
21785 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_eq_32)21786 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_eq_32) {
21787 TEST_REQUIRES_X86_AVX2;
21788 DWConvMicrokernelTester()
21789 .cr(32)
21790 .kr(9)
21791 .channels(32)
21792 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21793 }
21794
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_div_32)21795 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_32) {
21796 TEST_REQUIRES_X86_AVX2;
21797 for (uint32_t channels = 64; channels < 512; channels += 96) {
21798 DWConvMicrokernelTester()
21799 .cr(32)
21800 .kr(9)
21801 .channels(channels)
21802 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21803 }
21804 }
21805
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_div_32_with_qmin)21806 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_32_with_qmin) {
21807 TEST_REQUIRES_X86_AVX2;
21808 for (uint32_t channels = 64; channels < 512; channels += 96) {
21809 DWConvMicrokernelTester()
21810 .cr(32)
21811 .kr(9)
21812 .channels(channels)
21813 .qmin(128)
21814 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21815 }
21816 }
21817
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_div_32_with_qmax)21818 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_div_32_with_qmax) {
21819 TEST_REQUIRES_X86_AVX2;
21820 for (uint32_t channels = 64; channels < 512; channels += 96) {
21821 DWConvMicrokernelTester()
21822 .cr(32)
21823 .kr(9)
21824 .channels(channels)
21825 .qmax(128)
21826 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21827 }
21828 }
21829
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_lt_32)21830 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_lt_32) {
21831 TEST_REQUIRES_X86_AVX2;
21832 for (uint32_t channels = 1; channels < 32; channels++) {
21833 DWConvMicrokernelTester()
21834 .cr(32)
21835 .kr(9)
21836 .channels(channels)
21837 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21838 }
21839 }
21840
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_gt_32)21841 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32) {
21842 TEST_REQUIRES_X86_AVX2;
21843 for (uint32_t channels = 33; channels < 64; channels++) {
21844 DWConvMicrokernelTester()
21845 .cr(32)
21846 .kr(9)
21847 .channels(channels)
21848 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21849 }
21850 }
21851
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_gt_32_with_qmin)21852 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32_with_qmin) {
21853 TEST_REQUIRES_X86_AVX2;
21854 for (uint32_t channels = 33; channels < 64; channels++) {
21855 DWConvMicrokernelTester()
21856 .cr(32)
21857 .kr(9)
21858 .channels(channels)
21859 .qmin(128)
21860 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21861 }
21862 }
21863
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,c_gt_32_with_qmax)21864 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32_with_qmax) {
21865 TEST_REQUIRES_X86_AVX2;
21866 for (uint32_t channels = 33; channels < 64; channels++) {
21867 DWConvMicrokernelTester()
21868 .cr(32)
21869 .kr(9)
21870 .channels(channels)
21871 .qmax(128)
21872 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21873 }
21874 }
21875
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel)21876 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel) {
21877 TEST_REQUIRES_X86_AVX2;
21878 for (size_t channels = 1; channels <= 160; channels += 31) {
21879 DWConvMicrokernelTester()
21880 .cr(32)
21881 .kr(9)
21882 .channels(channels)
21883 .width(3)
21884 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21885 }
21886 }
21887
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_step)21888 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_step) {
21889 TEST_REQUIRES_X86_AVX2;
21890 for (size_t channels = 1; channels <= 160; channels += 31) {
21891 for (size_t step = 2; step <= 9; step++) {
21892 DWConvMicrokernelTester()
21893 .cr(32)
21894 .kr(9)
21895 .channels(channels)
21896 .width(3)
21897 .step(step)
21898 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21899 }
21900 }
21901 }
21902
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_output_stride)21903 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_output_stride) {
21904 TEST_REQUIRES_X86_AVX2;
21905 for (size_t channels = 1; channels <= 160; channels += 31) {
21906 DWConvMicrokernelTester()
21907 .cr(32)
21908 .kr(9)
21909 .channels(32)
21910 .width(5)
21911 .output_stride(163)
21912 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21913 }
21914 }
21915
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmin)21916 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmin) {
21917 TEST_REQUIRES_X86_AVX2;
21918 for (size_t channels = 1; channels <= 160; channels += 31) {
21919 DWConvMicrokernelTester()
21920 .cr(32)
21921 .kr(9)
21922 .channels(channels)
21923 .width(3)
21924 .qmin(128)
21925 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21926 }
21927 }
21928
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmax)21929 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmax) {
21930 TEST_REQUIRES_X86_AVX2;
21931 for (size_t channels = 1; channels <= 160; channels += 31) {
21932 DWConvMicrokernelTester()
21933 .cr(32)
21934 .kr(9)
21935 .channels(channels)
21936 .width(3)
21937 .qmax(128)
21938 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21939 }
21940 }
21941
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,input_offset)21942 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, input_offset) {
21943 TEST_REQUIRES_X86_AVX2;
21944 for (uint32_t channels = 64; channels < 512; channels += 96) {
21945 DWConvMicrokernelTester()
21946 .cr(32)
21947 .kr(9)
21948 .channels(channels)
21949 .input_offset(592)
21950 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21951 }
21952 }
21953
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK,zero)21954 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_ADD16_VPUNPCK, zero) {
21955 TEST_REQUIRES_X86_AVX2;
21956 for (uint32_t mz = 0; mz < 9; mz++) {
21957 for (uint32_t channels = 64; channels < 512; channels += 96) {
21958 DWConvMicrokernelTester()
21959 .cr(32)
21960 .kr(9)
21961 .channels(channels)
21962 .input_offset(592)
21963 .zero_index(mz)
21964 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21965 }
21966 }
21967 }
21968 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21969
21970
21971 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_eq_32)21972 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_eq_32) {
21973 TEST_REQUIRES_X86_AVX2;
21974 DWConvMicrokernelTester()
21975 .cr(32)
21976 .kr(9)
21977 .channels(32)
21978 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21979 }
21980
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_div_32)21981 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_div_32) {
21982 TEST_REQUIRES_X86_AVX2;
21983 for (uint32_t channels = 64; channels < 512; channels += 96) {
21984 DWConvMicrokernelTester()
21985 .cr(32)
21986 .kr(9)
21987 .channels(channels)
21988 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
21989 }
21990 }
21991
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_div_32_with_qmin)21992 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_div_32_with_qmin) {
21993 TEST_REQUIRES_X86_AVX2;
21994 for (uint32_t channels = 64; channels < 512; channels += 96) {
21995 DWConvMicrokernelTester()
21996 .cr(32)
21997 .kr(9)
21998 .channels(channels)
21999 .qmin(128)
22000 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22001 }
22002 }
22003
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_div_32_with_qmax)22004 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_div_32_with_qmax) {
22005 TEST_REQUIRES_X86_AVX2;
22006 for (uint32_t channels = 64; channels < 512; channels += 96) {
22007 DWConvMicrokernelTester()
22008 .cr(32)
22009 .kr(9)
22010 .channels(channels)
22011 .qmax(128)
22012 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22013 }
22014 }
22015
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_lt_32)22016 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_lt_32) {
22017 TEST_REQUIRES_X86_AVX2;
22018 for (uint32_t channels = 1; channels < 32; channels++) {
22019 DWConvMicrokernelTester()
22020 .cr(32)
22021 .kr(9)
22022 .channels(channels)
22023 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22024 }
22025 }
22026
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_gt_32)22027 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_gt_32) {
22028 TEST_REQUIRES_X86_AVX2;
22029 for (uint32_t channels = 33; channels < 64; channels++) {
22030 DWConvMicrokernelTester()
22031 .cr(32)
22032 .kr(9)
22033 .channels(channels)
22034 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22035 }
22036 }
22037
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_gt_32_with_qmin)22038 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_gt_32_with_qmin) {
22039 TEST_REQUIRES_X86_AVX2;
22040 for (uint32_t channels = 33; channels < 64; channels++) {
22041 DWConvMicrokernelTester()
22042 .cr(32)
22043 .kr(9)
22044 .channels(channels)
22045 .qmin(128)
22046 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22047 }
22048 }
22049
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,c_gt_32_with_qmax)22050 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, c_gt_32_with_qmax) {
22051 TEST_REQUIRES_X86_AVX2;
22052 for (uint32_t channels = 33; channels < 64; channels++) {
22053 DWConvMicrokernelTester()
22054 .cr(32)
22055 .kr(9)
22056 .channels(channels)
22057 .qmax(128)
22058 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22059 }
22060 }
22061
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,multipixel)22062 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel) {
22063 TEST_REQUIRES_X86_AVX2;
22064 for (size_t channels = 1; channels <= 160; channels += 31) {
22065 DWConvMicrokernelTester()
22066 .cr(32)
22067 .kr(9)
22068 .channels(channels)
22069 .width(3)
22070 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22071 }
22072 }
22073
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,multipixel_with_step)22074 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel_with_step) {
22075 TEST_REQUIRES_X86_AVX2;
22076 for (size_t channels = 1; channels <= 160; channels += 31) {
22077 for (size_t step = 2; step <= 9; step++) {
22078 DWConvMicrokernelTester()
22079 .cr(32)
22080 .kr(9)
22081 .channels(channels)
22082 .width(3)
22083 .step(step)
22084 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22085 }
22086 }
22087 }
22088
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,multipixel_with_output_stride)22089 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel_with_output_stride) {
22090 TEST_REQUIRES_X86_AVX2;
22091 for (size_t channels = 1; channels <= 160; channels += 31) {
22092 DWConvMicrokernelTester()
22093 .cr(32)
22094 .kr(9)
22095 .channels(32)
22096 .width(5)
22097 .output_stride(163)
22098 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22099 }
22100 }
22101
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,multipixel_with_qmin)22102 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel_with_qmin) {
22103 TEST_REQUIRES_X86_AVX2;
22104 for (size_t channels = 1; channels <= 160; channels += 31) {
22105 DWConvMicrokernelTester()
22106 .cr(32)
22107 .kr(9)
22108 .channels(channels)
22109 .width(3)
22110 .qmin(128)
22111 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22112 }
22113 }
22114
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,multipixel_with_qmax)22115 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, multipixel_with_qmax) {
22116 TEST_REQUIRES_X86_AVX2;
22117 for (size_t channels = 1; channels <= 160; channels += 31) {
22118 DWConvMicrokernelTester()
22119 .cr(32)
22120 .kr(9)
22121 .channels(channels)
22122 .width(3)
22123 .qmax(128)
22124 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22125 }
22126 }
22127
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,input_offset)22128 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, input_offset) {
22129 TEST_REQUIRES_X86_AVX2;
22130 for (uint32_t channels = 64; channels < 512; channels += 96) {
22131 DWConvMicrokernelTester()
22132 .cr(32)
22133 .kr(9)
22134 .channels(channels)
22135 .input_offset(592)
22136 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22137 }
22138 }
22139
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX,zero)22140 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPMOVSX, zero) {
22141 TEST_REQUIRES_X86_AVX2;
22142 for (uint32_t mz = 0; mz < 9; mz++) {
22143 for (uint32_t channels = 64; channels < 512; channels += 96) {
22144 DWConvMicrokernelTester()
22145 .cr(32)
22146 .kr(9)
22147 .channels(channels)
22148 .input_offset(592)
22149 .zero_index(mz)
22150 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22151 }
22152 }
22153 }
22154 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22155
22156
22157 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_eq_32)22158 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_eq_32) {
22159 TEST_REQUIRES_X86_AVX2;
22160 DWConvMicrokernelTester()
22161 .cr(32)
22162 .kr(9)
22163 .channels(32)
22164 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22165 }
22166
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_div_32)22167 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_div_32) {
22168 TEST_REQUIRES_X86_AVX2;
22169 for (uint32_t channels = 64; channels < 512; channels += 96) {
22170 DWConvMicrokernelTester()
22171 .cr(32)
22172 .kr(9)
22173 .channels(channels)
22174 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22175 }
22176 }
22177
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_div_32_with_qmin)22178 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_div_32_with_qmin) {
22179 TEST_REQUIRES_X86_AVX2;
22180 for (uint32_t channels = 64; channels < 512; channels += 96) {
22181 DWConvMicrokernelTester()
22182 .cr(32)
22183 .kr(9)
22184 .channels(channels)
22185 .qmin(128)
22186 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22187 }
22188 }
22189
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_div_32_with_qmax)22190 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_div_32_with_qmax) {
22191 TEST_REQUIRES_X86_AVX2;
22192 for (uint32_t channels = 64; channels < 512; channels += 96) {
22193 DWConvMicrokernelTester()
22194 .cr(32)
22195 .kr(9)
22196 .channels(channels)
22197 .qmax(128)
22198 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22199 }
22200 }
22201
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_lt_32)22202 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_lt_32) {
22203 TEST_REQUIRES_X86_AVX2;
22204 for (uint32_t channels = 1; channels < 32; channels++) {
22205 DWConvMicrokernelTester()
22206 .cr(32)
22207 .kr(9)
22208 .channels(channels)
22209 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22210 }
22211 }
22212
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_gt_32)22213 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_gt_32) {
22214 TEST_REQUIRES_X86_AVX2;
22215 for (uint32_t channels = 33; channels < 64; channels++) {
22216 DWConvMicrokernelTester()
22217 .cr(32)
22218 .kr(9)
22219 .channels(channels)
22220 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22221 }
22222 }
22223
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_gt_32_with_qmin)22224 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_gt_32_with_qmin) {
22225 TEST_REQUIRES_X86_AVX2;
22226 for (uint32_t channels = 33; channels < 64; channels++) {
22227 DWConvMicrokernelTester()
22228 .cr(32)
22229 .kr(9)
22230 .channels(channels)
22231 .qmin(128)
22232 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22233 }
22234 }
22235
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,c_gt_32_with_qmax)22236 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, c_gt_32_with_qmax) {
22237 TEST_REQUIRES_X86_AVX2;
22238 for (uint32_t channels = 33; channels < 64; channels++) {
22239 DWConvMicrokernelTester()
22240 .cr(32)
22241 .kr(9)
22242 .channels(channels)
22243 .qmax(128)
22244 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22245 }
22246 }
22247
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,multipixel)22248 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel) {
22249 TEST_REQUIRES_X86_AVX2;
22250 for (size_t channels = 1; channels <= 160; channels += 31) {
22251 DWConvMicrokernelTester()
22252 .cr(32)
22253 .kr(9)
22254 .channels(channels)
22255 .width(3)
22256 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22257 }
22258 }
22259
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,multipixel_with_step)22260 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel_with_step) {
22261 TEST_REQUIRES_X86_AVX2;
22262 for (size_t channels = 1; channels <= 160; channels += 31) {
22263 for (size_t step = 2; step <= 9; step++) {
22264 DWConvMicrokernelTester()
22265 .cr(32)
22266 .kr(9)
22267 .channels(channels)
22268 .width(3)
22269 .step(step)
22270 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22271 }
22272 }
22273 }
22274
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,multipixel_with_output_stride)22275 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel_with_output_stride) {
22276 TEST_REQUIRES_X86_AVX2;
22277 for (size_t channels = 1; channels <= 160; channels += 31) {
22278 DWConvMicrokernelTester()
22279 .cr(32)
22280 .kr(9)
22281 .channels(32)
22282 .width(5)
22283 .output_stride(163)
22284 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22285 }
22286 }
22287
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,multipixel_with_qmin)22288 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel_with_qmin) {
22289 TEST_REQUIRES_X86_AVX2;
22290 for (size_t channels = 1; channels <= 160; channels += 31) {
22291 DWConvMicrokernelTester()
22292 .cr(32)
22293 .kr(9)
22294 .channels(channels)
22295 .width(3)
22296 .qmin(128)
22297 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22298 }
22299 }
22300
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,multipixel_with_qmax)22301 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, multipixel_with_qmax) {
22302 TEST_REQUIRES_X86_AVX2;
22303 for (size_t channels = 1; channels <= 160; channels += 31) {
22304 DWConvMicrokernelTester()
22305 .cr(32)
22306 .kr(9)
22307 .channels(channels)
22308 .width(3)
22309 .qmax(128)
22310 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22311 }
22312 }
22313
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,input_offset)22314 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, input_offset) {
22315 TEST_REQUIRES_X86_AVX2;
22316 for (uint32_t channels = 64; channels < 512; channels += 96) {
22317 DWConvMicrokernelTester()
22318 .cr(32)
22319 .kr(9)
22320 .channels(channels)
22321 .input_offset(592)
22322 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22323 }
22324 }
22325
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK,zero)22326 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16_VPUNPCK, zero) {
22327 TEST_REQUIRES_X86_AVX2;
22328 for (uint32_t mz = 0; mz < 9; mz++) {
22329 for (uint32_t channels = 64; channels < 512; channels += 96) {
22330 DWConvMicrokernelTester()
22331 .cr(32)
22332 .kr(9)
22333 .channels(channels)
22334 .input_offset(592)
22335 .zero_index(mz)
22336 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22337 }
22338 }
22339 }
22340 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22341
22342
22343 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_eq_32)22344 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_eq_32) {
22345 TEST_REQUIRES_X86_AVX2;
22346 DWConvMicrokernelTester()
22347 .cr(32)
22348 .kr(9)
22349 .channels(32)
22350 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22351 }
22352
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_div_32)22353 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32) {
22354 TEST_REQUIRES_X86_AVX2;
22355 for (uint32_t channels = 64; channels < 512; channels += 96) {
22356 DWConvMicrokernelTester()
22357 .cr(32)
22358 .kr(9)
22359 .channels(channels)
22360 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22361 }
22362 }
22363
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_div_32_with_qmin)22364 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32_with_qmin) {
22365 TEST_REQUIRES_X86_AVX2;
22366 for (uint32_t channels = 64; channels < 512; channels += 96) {
22367 DWConvMicrokernelTester()
22368 .cr(32)
22369 .kr(9)
22370 .channels(channels)
22371 .qmin(128)
22372 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22373 }
22374 }
22375
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_div_32_with_qmax)22376 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32_with_qmax) {
22377 TEST_REQUIRES_X86_AVX2;
22378 for (uint32_t channels = 64; channels < 512; channels += 96) {
22379 DWConvMicrokernelTester()
22380 .cr(32)
22381 .kr(9)
22382 .channels(channels)
22383 .qmax(128)
22384 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22385 }
22386 }
22387
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_lt_32)22388 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_lt_32) {
22389 TEST_REQUIRES_X86_AVX2;
22390 for (uint32_t channels = 1; channels < 32; channels++) {
22391 DWConvMicrokernelTester()
22392 .cr(32)
22393 .kr(9)
22394 .channels(channels)
22395 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22396 }
22397 }
22398
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_gt_32)22399 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32) {
22400 TEST_REQUIRES_X86_AVX2;
22401 for (uint32_t channels = 33; channels < 64; channels++) {
22402 DWConvMicrokernelTester()
22403 .cr(32)
22404 .kr(9)
22405 .channels(channels)
22406 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22407 }
22408 }
22409
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_gt_32_with_qmin)22410 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32_with_qmin) {
22411 TEST_REQUIRES_X86_AVX2;
22412 for (uint32_t channels = 33; channels < 64; channels++) {
22413 DWConvMicrokernelTester()
22414 .cr(32)
22415 .kr(9)
22416 .channels(channels)
22417 .qmin(128)
22418 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22419 }
22420 }
22421
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_gt_32_with_qmax)22422 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32_with_qmax) {
22423 TEST_REQUIRES_X86_AVX2;
22424 for (uint32_t channels = 33; channels < 64; channels++) {
22425 DWConvMicrokernelTester()
22426 .cr(32)
22427 .kr(9)
22428 .channels(channels)
22429 .qmax(128)
22430 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22431 }
22432 }
22433
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,multipixel)22434 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel) {
22435 TEST_REQUIRES_X86_AVX2;
22436 for (size_t channels = 1; channels <= 160; channels += 31) {
22437 DWConvMicrokernelTester()
22438 .cr(32)
22439 .kr(9)
22440 .channels(channels)
22441 .width(3)
22442 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22443 }
22444 }
22445
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,multipixel_with_step)22446 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_step) {
22447 TEST_REQUIRES_X86_AVX2;
22448 for (size_t channels = 1; channels <= 160; channels += 31) {
22449 for (size_t step = 2; step <= 9; step++) {
22450 DWConvMicrokernelTester()
22451 .cr(32)
22452 .kr(9)
22453 .channels(channels)
22454 .width(3)
22455 .step(step)
22456 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22457 }
22458 }
22459 }
22460
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,multipixel_with_output_stride)22461 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_output_stride) {
22462 TEST_REQUIRES_X86_AVX2;
22463 for (size_t channels = 1; channels <= 160; channels += 31) {
22464 DWConvMicrokernelTester()
22465 .cr(32)
22466 .kr(9)
22467 .channels(32)
22468 .width(5)
22469 .output_stride(163)
22470 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22471 }
22472 }
22473
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,multipixel_with_qmin)22474 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_qmin) {
22475 TEST_REQUIRES_X86_AVX2;
22476 for (size_t channels = 1; channels <= 160; channels += 31) {
22477 DWConvMicrokernelTester()
22478 .cr(32)
22479 .kr(9)
22480 .channels(channels)
22481 .width(3)
22482 .qmin(128)
22483 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22484 }
22485 }
22486
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,multipixel_with_qmax)22487 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_qmax) {
22488 TEST_REQUIRES_X86_AVX2;
22489 for (size_t channels = 1; channels <= 160; channels += 31) {
22490 DWConvMicrokernelTester()
22491 .cr(32)
22492 .kr(9)
22493 .channels(channels)
22494 .width(3)
22495 .qmax(128)
22496 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22497 }
22498 }
22499
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,input_offset)22500 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, input_offset) {
22501 TEST_REQUIRES_X86_AVX2;
22502 for (uint32_t channels = 64; channels < 512; channels += 96) {
22503 DWConvMicrokernelTester()
22504 .cr(32)
22505 .kr(9)
22506 .channels(channels)
22507 .input_offset(592)
22508 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22509 }
22510 }
22511
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,zero)22512 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, zero) {
22513 TEST_REQUIRES_X86_AVX2;
22514 for (uint32_t mz = 0; mz < 9; mz++) {
22515 for (uint32_t channels = 64; channels < 512; channels += 96) {
22516 DWConvMicrokernelTester()
22517 .cr(32)
22518 .kr(9)
22519 .channels(channels)
22520 .input_offset(592)
22521 .zero_index(mz)
22522 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22523 }
22524 }
22525 }
22526 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22527
22528
22529 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_eq_32)22530 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_eq_32) {
22531 TEST_REQUIRES_X86_AVX2;
22532 DWConvMicrokernelTester()
22533 .cr(32)
22534 .kr(25)
22535 .channels(32)
22536 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22537 }
22538
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_div_32)22539 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_32) {
22540 TEST_REQUIRES_X86_AVX2;
22541 for (uint32_t channels = 64; channels < 512; channels += 96) {
22542 DWConvMicrokernelTester()
22543 .cr(32)
22544 .kr(25)
22545 .channels(channels)
22546 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22547 }
22548 }
22549
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_div_32_with_qmin)22550 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_32_with_qmin) {
22551 TEST_REQUIRES_X86_AVX2;
22552 for (uint32_t channels = 64; channels < 512; channels += 96) {
22553 DWConvMicrokernelTester()
22554 .cr(32)
22555 .kr(25)
22556 .channels(channels)
22557 .qmin(128)
22558 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22559 }
22560 }
22561
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_div_32_with_qmax)22562 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_div_32_with_qmax) {
22563 TEST_REQUIRES_X86_AVX2;
22564 for (uint32_t channels = 64; channels < 512; channels += 96) {
22565 DWConvMicrokernelTester()
22566 .cr(32)
22567 .kr(25)
22568 .channels(channels)
22569 .qmax(128)
22570 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22571 }
22572 }
22573
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_lt_32)22574 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_lt_32) {
22575 TEST_REQUIRES_X86_AVX2;
22576 for (uint32_t channels = 1; channels < 32; channels++) {
22577 DWConvMicrokernelTester()
22578 .cr(32)
22579 .kr(25)
22580 .channels(channels)
22581 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22582 }
22583 }
22584
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_gt_32)22585 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32) {
22586 TEST_REQUIRES_X86_AVX2;
22587 for (uint32_t channels = 33; channels < 64; channels++) {
22588 DWConvMicrokernelTester()
22589 .cr(32)
22590 .kr(25)
22591 .channels(channels)
22592 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22593 }
22594 }
22595
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_gt_32_with_qmin)22596 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32_with_qmin) {
22597 TEST_REQUIRES_X86_AVX2;
22598 for (uint32_t channels = 33; channels < 64; channels++) {
22599 DWConvMicrokernelTester()
22600 .cr(32)
22601 .kr(25)
22602 .channels(channels)
22603 .qmin(128)
22604 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22605 }
22606 }
22607
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,c_gt_32_with_qmax)22608 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, c_gt_32_with_qmax) {
22609 TEST_REQUIRES_X86_AVX2;
22610 for (uint32_t channels = 33; channels < 64; channels++) {
22611 DWConvMicrokernelTester()
22612 .cr(32)
22613 .kr(25)
22614 .channels(channels)
22615 .qmax(128)
22616 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22617 }
22618 }
22619
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel)22620 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel) {
22621 TEST_REQUIRES_X86_AVX2;
22622 for (size_t channels = 1; channels <= 160; channels += 31) {
22623 DWConvMicrokernelTester()
22624 .cr(32)
22625 .kr(25)
22626 .channels(channels)
22627 .width(3)
22628 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22629 }
22630 }
22631
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_step)22632 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_step) {
22633 TEST_REQUIRES_X86_AVX2;
22634 for (size_t channels = 1; channels <= 160; channels += 31) {
22635 for (size_t step = 2; step <= 25; step++) {
22636 DWConvMicrokernelTester()
22637 .cr(32)
22638 .kr(25)
22639 .channels(channels)
22640 .width(3)
22641 .step(step)
22642 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22643 }
22644 }
22645 }
22646
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_output_stride)22647 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_output_stride) {
22648 TEST_REQUIRES_X86_AVX2;
22649 for (size_t channels = 1; channels <= 160; channels += 31) {
22650 DWConvMicrokernelTester()
22651 .cr(32)
22652 .kr(25)
22653 .channels(32)
22654 .width(5)
22655 .output_stride(163)
22656 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22657 }
22658 }
22659
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmin)22660 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmin) {
22661 TEST_REQUIRES_X86_AVX2;
22662 for (size_t channels = 1; channels <= 160; channels += 31) {
22663 DWConvMicrokernelTester()
22664 .cr(32)
22665 .kr(25)
22666 .channels(channels)
22667 .width(3)
22668 .qmin(128)
22669 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22670 }
22671 }
22672
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,multipixel_with_qmax)22673 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, multipixel_with_qmax) {
22674 TEST_REQUIRES_X86_AVX2;
22675 for (size_t channels = 1; channels <= 160; channels += 31) {
22676 DWConvMicrokernelTester()
22677 .cr(32)
22678 .kr(25)
22679 .channels(channels)
22680 .width(3)
22681 .qmax(128)
22682 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22683 }
22684 }
22685
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,input_offset)22686 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, input_offset) {
22687 TEST_REQUIRES_X86_AVX2;
22688 for (uint32_t channels = 64; channels < 512; channels += 96) {
22689 DWConvMicrokernelTester()
22690 .cr(32)
22691 .kr(25)
22692 .channels(channels)
22693 .input_offset(592)
22694 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22695 }
22696 }
22697
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK,zero)22698 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_ADD16_VPUNPCK, zero) {
22699 TEST_REQUIRES_X86_AVX2;
22700 for (uint32_t mz = 0; mz < 25; mz++) {
22701 for (uint32_t channels = 64; channels < 512; channels += 96) {
22702 DWConvMicrokernelTester()
22703 .cr(32)
22704 .kr(25)
22705 .channels(channels)
22706 .input_offset(592)
22707 .zero_index(mz)
22708 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_add16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22709 }
22710 }
22711 }
22712 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22713
22714
22715 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_eq_32)22716 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_eq_32) {
22717 TEST_REQUIRES_X86_AVX2;
22718 DWConvMicrokernelTester()
22719 .cr(32)
22720 .kr(25)
22721 .channels(32)
22722 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22723 }
22724
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_div_32)22725 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_div_32) {
22726 TEST_REQUIRES_X86_AVX2;
22727 for (uint32_t channels = 64; channels < 512; channels += 96) {
22728 DWConvMicrokernelTester()
22729 .cr(32)
22730 .kr(25)
22731 .channels(channels)
22732 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22733 }
22734 }
22735
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_div_32_with_qmin)22736 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_div_32_with_qmin) {
22737 TEST_REQUIRES_X86_AVX2;
22738 for (uint32_t channels = 64; channels < 512; channels += 96) {
22739 DWConvMicrokernelTester()
22740 .cr(32)
22741 .kr(25)
22742 .channels(channels)
22743 .qmin(128)
22744 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22745 }
22746 }
22747
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_div_32_with_qmax)22748 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_div_32_with_qmax) {
22749 TEST_REQUIRES_X86_AVX2;
22750 for (uint32_t channels = 64; channels < 512; channels += 96) {
22751 DWConvMicrokernelTester()
22752 .cr(32)
22753 .kr(25)
22754 .channels(channels)
22755 .qmax(128)
22756 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22757 }
22758 }
22759
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_lt_32)22760 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_lt_32) {
22761 TEST_REQUIRES_X86_AVX2;
22762 for (uint32_t channels = 1; channels < 32; channels++) {
22763 DWConvMicrokernelTester()
22764 .cr(32)
22765 .kr(25)
22766 .channels(channels)
22767 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22768 }
22769 }
22770
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_gt_32)22771 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_gt_32) {
22772 TEST_REQUIRES_X86_AVX2;
22773 for (uint32_t channels = 33; channels < 64; channels++) {
22774 DWConvMicrokernelTester()
22775 .cr(32)
22776 .kr(25)
22777 .channels(channels)
22778 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22779 }
22780 }
22781
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_gt_32_with_qmin)22782 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_gt_32_with_qmin) {
22783 TEST_REQUIRES_X86_AVX2;
22784 for (uint32_t channels = 33; channels < 64; channels++) {
22785 DWConvMicrokernelTester()
22786 .cr(32)
22787 .kr(25)
22788 .channels(channels)
22789 .qmin(128)
22790 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22791 }
22792 }
22793
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,c_gt_32_with_qmax)22794 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, c_gt_32_with_qmax) {
22795 TEST_REQUIRES_X86_AVX2;
22796 for (uint32_t channels = 33; channels < 64; channels++) {
22797 DWConvMicrokernelTester()
22798 .cr(32)
22799 .kr(25)
22800 .channels(channels)
22801 .qmax(128)
22802 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22803 }
22804 }
22805
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,multipixel)22806 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel) {
22807 TEST_REQUIRES_X86_AVX2;
22808 for (size_t channels = 1; channels <= 160; channels += 31) {
22809 DWConvMicrokernelTester()
22810 .cr(32)
22811 .kr(25)
22812 .channels(channels)
22813 .width(3)
22814 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22815 }
22816 }
22817
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,multipixel_with_step)22818 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel_with_step) {
22819 TEST_REQUIRES_X86_AVX2;
22820 for (size_t channels = 1; channels <= 160; channels += 31) {
22821 for (size_t step = 2; step <= 25; step++) {
22822 DWConvMicrokernelTester()
22823 .cr(32)
22824 .kr(25)
22825 .channels(channels)
22826 .width(3)
22827 .step(step)
22828 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22829 }
22830 }
22831 }
22832
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,multipixel_with_output_stride)22833 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel_with_output_stride) {
22834 TEST_REQUIRES_X86_AVX2;
22835 for (size_t channels = 1; channels <= 160; channels += 31) {
22836 DWConvMicrokernelTester()
22837 .cr(32)
22838 .kr(25)
22839 .channels(32)
22840 .width(5)
22841 .output_stride(163)
22842 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22843 }
22844 }
22845
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,multipixel_with_qmin)22846 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel_with_qmin) {
22847 TEST_REQUIRES_X86_AVX2;
22848 for (size_t channels = 1; channels <= 160; channels += 31) {
22849 DWConvMicrokernelTester()
22850 .cr(32)
22851 .kr(25)
22852 .channels(channels)
22853 .width(3)
22854 .qmin(128)
22855 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22856 }
22857 }
22858
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,multipixel_with_qmax)22859 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, multipixel_with_qmax) {
22860 TEST_REQUIRES_X86_AVX2;
22861 for (size_t channels = 1; channels <= 160; channels += 31) {
22862 DWConvMicrokernelTester()
22863 .cr(32)
22864 .kr(25)
22865 .channels(channels)
22866 .width(3)
22867 .qmax(128)
22868 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22869 }
22870 }
22871
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,input_offset)22872 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, input_offset) {
22873 TEST_REQUIRES_X86_AVX2;
22874 for (uint32_t channels = 64; channels < 512; channels += 96) {
22875 DWConvMicrokernelTester()
22876 .cr(32)
22877 .kr(25)
22878 .channels(channels)
22879 .input_offset(592)
22880 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22881 }
22882 }
22883
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX,zero)22884 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPMOVSX, zero) {
22885 TEST_REQUIRES_X86_AVX2;
22886 for (uint32_t mz = 0; mz < 25; mz++) {
22887 for (uint32_t channels = 64; channels < 512; channels += 96) {
22888 DWConvMicrokernelTester()
22889 .cr(32)
22890 .kr(25)
22891 .channels(channels)
22892 .input_offset(592)
22893 .zero_index(mz)
22894 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22895 }
22896 }
22897 }
22898 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22899
22900
22901 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_eq_32)22902 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_eq_32) {
22903 TEST_REQUIRES_X86_AVX2;
22904 DWConvMicrokernelTester()
22905 .cr(32)
22906 .kr(25)
22907 .channels(32)
22908 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22909 }
22910
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_div_32)22911 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_div_32) {
22912 TEST_REQUIRES_X86_AVX2;
22913 for (uint32_t channels = 64; channels < 512; channels += 96) {
22914 DWConvMicrokernelTester()
22915 .cr(32)
22916 .kr(25)
22917 .channels(channels)
22918 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22919 }
22920 }
22921
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_div_32_with_qmin)22922 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_div_32_with_qmin) {
22923 TEST_REQUIRES_X86_AVX2;
22924 for (uint32_t channels = 64; channels < 512; channels += 96) {
22925 DWConvMicrokernelTester()
22926 .cr(32)
22927 .kr(25)
22928 .channels(channels)
22929 .qmin(128)
22930 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22931 }
22932 }
22933
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_div_32_with_qmax)22934 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_div_32_with_qmax) {
22935 TEST_REQUIRES_X86_AVX2;
22936 for (uint32_t channels = 64; channels < 512; channels += 96) {
22937 DWConvMicrokernelTester()
22938 .cr(32)
22939 .kr(25)
22940 .channels(channels)
22941 .qmax(128)
22942 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22943 }
22944 }
22945
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_lt_32)22946 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_lt_32) {
22947 TEST_REQUIRES_X86_AVX2;
22948 for (uint32_t channels = 1; channels < 32; channels++) {
22949 DWConvMicrokernelTester()
22950 .cr(32)
22951 .kr(25)
22952 .channels(channels)
22953 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22954 }
22955 }
22956
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_gt_32)22957 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_gt_32) {
22958 TEST_REQUIRES_X86_AVX2;
22959 for (uint32_t channels = 33; channels < 64; channels++) {
22960 DWConvMicrokernelTester()
22961 .cr(32)
22962 .kr(25)
22963 .channels(channels)
22964 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22965 }
22966 }
22967
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_gt_32_with_qmin)22968 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_gt_32_with_qmin) {
22969 TEST_REQUIRES_X86_AVX2;
22970 for (uint32_t channels = 33; channels < 64; channels++) {
22971 DWConvMicrokernelTester()
22972 .cr(32)
22973 .kr(25)
22974 .channels(channels)
22975 .qmin(128)
22976 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22977 }
22978 }
22979
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,c_gt_32_with_qmax)22980 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, c_gt_32_with_qmax) {
22981 TEST_REQUIRES_X86_AVX2;
22982 for (uint32_t channels = 33; channels < 64; channels++) {
22983 DWConvMicrokernelTester()
22984 .cr(32)
22985 .kr(25)
22986 .channels(channels)
22987 .qmax(128)
22988 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
22989 }
22990 }
22991
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,multipixel)22992 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel) {
22993 TEST_REQUIRES_X86_AVX2;
22994 for (size_t channels = 1; channels <= 160; channels += 31) {
22995 DWConvMicrokernelTester()
22996 .cr(32)
22997 .kr(25)
22998 .channels(channels)
22999 .width(3)
23000 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23001 }
23002 }
23003
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,multipixel_with_step)23004 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel_with_step) {
23005 TEST_REQUIRES_X86_AVX2;
23006 for (size_t channels = 1; channels <= 160; channels += 31) {
23007 for (size_t step = 2; step <= 25; step++) {
23008 DWConvMicrokernelTester()
23009 .cr(32)
23010 .kr(25)
23011 .channels(channels)
23012 .width(3)
23013 .step(step)
23014 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23015 }
23016 }
23017 }
23018
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,multipixel_with_output_stride)23019 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel_with_output_stride) {
23020 TEST_REQUIRES_X86_AVX2;
23021 for (size_t channels = 1; channels <= 160; channels += 31) {
23022 DWConvMicrokernelTester()
23023 .cr(32)
23024 .kr(25)
23025 .channels(32)
23026 .width(5)
23027 .output_stride(163)
23028 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23029 }
23030 }
23031
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,multipixel_with_qmin)23032 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel_with_qmin) {
23033 TEST_REQUIRES_X86_AVX2;
23034 for (size_t channels = 1; channels <= 160; channels += 31) {
23035 DWConvMicrokernelTester()
23036 .cr(32)
23037 .kr(25)
23038 .channels(channels)
23039 .width(3)
23040 .qmin(128)
23041 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23042 }
23043 }
23044
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,multipixel_with_qmax)23045 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, multipixel_with_qmax) {
23046 TEST_REQUIRES_X86_AVX2;
23047 for (size_t channels = 1; channels <= 160; channels += 31) {
23048 DWConvMicrokernelTester()
23049 .cr(32)
23050 .kr(25)
23051 .channels(channels)
23052 .width(3)
23053 .qmax(128)
23054 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23055 }
23056 }
23057
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,input_offset)23058 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, input_offset) {
23059 TEST_REQUIRES_X86_AVX2;
23060 for (uint32_t channels = 64; channels < 512; channels += 96) {
23061 DWConvMicrokernelTester()
23062 .cr(32)
23063 .kr(25)
23064 .channels(channels)
23065 .input_offset(592)
23066 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23067 }
23068 }
23069
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK,zero)23070 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16_VPUNPCK, zero) {
23071 TEST_REQUIRES_X86_AVX2;
23072 for (uint32_t mz = 0; mz < 25; mz++) {
23073 for (uint32_t channels = 64; channels < 512; channels += 96) {
23074 DWConvMicrokernelTester()
23075 .cr(32)
23076 .kr(25)
23077 .channels(channels)
23078 .input_offset(592)
23079 .zero_index(mz)
23080 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpunpck, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23081 }
23082 }
23083 }
23084 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23085
23086
23087 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_eq_32)23088 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_eq_32) {
23089 TEST_REQUIRES_X86_AVX2;
23090 DWConvMicrokernelTester()
23091 .cr(32)
23092 .kr(25)
23093 .channels(32)
23094 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23095 }
23096
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_div_32)23097 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32) {
23098 TEST_REQUIRES_X86_AVX2;
23099 for (uint32_t channels = 64; channels < 512; channels += 96) {
23100 DWConvMicrokernelTester()
23101 .cr(32)
23102 .kr(25)
23103 .channels(channels)
23104 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23105 }
23106 }
23107
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_div_32_with_qmin)23108 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32_with_qmin) {
23109 TEST_REQUIRES_X86_AVX2;
23110 for (uint32_t channels = 64; channels < 512; channels += 96) {
23111 DWConvMicrokernelTester()
23112 .cr(32)
23113 .kr(25)
23114 .channels(channels)
23115 .qmin(128)
23116 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23117 }
23118 }
23119
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_div_32_with_qmax)23120 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32_with_qmax) {
23121 TEST_REQUIRES_X86_AVX2;
23122 for (uint32_t channels = 64; channels < 512; channels += 96) {
23123 DWConvMicrokernelTester()
23124 .cr(32)
23125 .kr(25)
23126 .channels(channels)
23127 .qmax(128)
23128 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23129 }
23130 }
23131
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_lt_32)23132 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_lt_32) {
23133 TEST_REQUIRES_X86_AVX2;
23134 for (uint32_t channels = 1; channels < 32; channels++) {
23135 DWConvMicrokernelTester()
23136 .cr(32)
23137 .kr(25)
23138 .channels(channels)
23139 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23140 }
23141 }
23142
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_gt_32)23143 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32) {
23144 TEST_REQUIRES_X86_AVX2;
23145 for (uint32_t channels = 33; channels < 64; channels++) {
23146 DWConvMicrokernelTester()
23147 .cr(32)
23148 .kr(25)
23149 .channels(channels)
23150 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23151 }
23152 }
23153
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_gt_32_with_qmin)23154 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32_with_qmin) {
23155 TEST_REQUIRES_X86_AVX2;
23156 for (uint32_t channels = 33; channels < 64; channels++) {
23157 DWConvMicrokernelTester()
23158 .cr(32)
23159 .kr(25)
23160 .channels(channels)
23161 .qmin(128)
23162 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23163 }
23164 }
23165
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_gt_32_with_qmax)23166 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32_with_qmax) {
23167 TEST_REQUIRES_X86_AVX2;
23168 for (uint32_t channels = 33; channels < 64; channels++) {
23169 DWConvMicrokernelTester()
23170 .cr(32)
23171 .kr(25)
23172 .channels(channels)
23173 .qmax(128)
23174 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23175 }
23176 }
23177
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,multipixel)23178 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel) {
23179 TEST_REQUIRES_X86_AVX2;
23180 for (size_t channels = 1; channels <= 160; channels += 31) {
23181 DWConvMicrokernelTester()
23182 .cr(32)
23183 .kr(25)
23184 .channels(channels)
23185 .width(3)
23186 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23187 }
23188 }
23189
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,multipixel_with_step)23190 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_step) {
23191 TEST_REQUIRES_X86_AVX2;
23192 for (size_t channels = 1; channels <= 160; channels += 31) {
23193 for (size_t step = 2; step <= 25; step++) {
23194 DWConvMicrokernelTester()
23195 .cr(32)
23196 .kr(25)
23197 .channels(channels)
23198 .width(3)
23199 .step(step)
23200 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23201 }
23202 }
23203 }
23204
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,multipixel_with_output_stride)23205 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_output_stride) {
23206 TEST_REQUIRES_X86_AVX2;
23207 for (size_t channels = 1; channels <= 160; channels += 31) {
23208 DWConvMicrokernelTester()
23209 .cr(32)
23210 .kr(25)
23211 .channels(32)
23212 .width(5)
23213 .output_stride(163)
23214 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23215 }
23216 }
23217
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,multipixel_with_qmin)23218 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_qmin) {
23219 TEST_REQUIRES_X86_AVX2;
23220 for (size_t channels = 1; channels <= 160; channels += 31) {
23221 DWConvMicrokernelTester()
23222 .cr(32)
23223 .kr(25)
23224 .channels(channels)
23225 .width(3)
23226 .qmin(128)
23227 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23228 }
23229 }
23230
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,multipixel_with_qmax)23231 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_qmax) {
23232 TEST_REQUIRES_X86_AVX2;
23233 for (size_t channels = 1; channels <= 160; channels += 31) {
23234 DWConvMicrokernelTester()
23235 .cr(32)
23236 .kr(25)
23237 .channels(channels)
23238 .width(3)
23239 .qmax(128)
23240 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23241 }
23242 }
23243
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,input_offset)23244 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, input_offset) {
23245 TEST_REQUIRES_X86_AVX2;
23246 for (uint32_t channels = 64; channels < 512; channels += 96) {
23247 DWConvMicrokernelTester()
23248 .cr(32)
23249 .kr(25)
23250 .channels(channels)
23251 .input_offset(592)
23252 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23253 }
23254 }
23255
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,zero)23256 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, zero) {
23257 TEST_REQUIRES_X86_AVX2;
23258 for (uint32_t mz = 0; mz < 25; mz++) {
23259 for (uint32_t channels = 64; channels < 512; channels += 96) {
23260 DWConvMicrokernelTester()
23261 .cr(32)
23262 .kr(25)
23263 .channels(channels)
23264 .input_offset(592)
23265 .zero_index(mz)
23266 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
23267 }
23268 }
23269 }
23270 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23271
23272
23273 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_eq_16)23274 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_eq_16) {
23275 TEST_REQUIRES_X86_AVX512SKX;
23276 DWConvMicrokernelTester()
23277 .cr(16)
23278 .kr(9)
23279 .channels(16)
23280 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23281 }
23282
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_div_16)23283 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16) {
23284 TEST_REQUIRES_X86_AVX512SKX;
23285 for (uint32_t channels = 32; channels < 256; channels += 48) {
23286 DWConvMicrokernelTester()
23287 .cr(16)
23288 .kr(9)
23289 .channels(channels)
23290 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23291 }
23292 }
23293
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_div_16_with_qmin)23294 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmin) {
23295 TEST_REQUIRES_X86_AVX512SKX;
23296 for (uint32_t channels = 32; channels < 256; channels += 48) {
23297 DWConvMicrokernelTester()
23298 .cr(16)
23299 .kr(9)
23300 .channels(channels)
23301 .qmin(128)
23302 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23303 }
23304 }
23305
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_div_16_with_qmax)23306 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmax) {
23307 TEST_REQUIRES_X86_AVX512SKX;
23308 for (uint32_t channels = 32; channels < 256; channels += 48) {
23309 DWConvMicrokernelTester()
23310 .cr(16)
23311 .kr(9)
23312 .channels(channels)
23313 .qmax(128)
23314 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23315 }
23316 }
23317
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_lt_16)23318 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_lt_16) {
23319 TEST_REQUIRES_X86_AVX512SKX;
23320 for (uint32_t channels = 1; channels < 16; channels++) {
23321 DWConvMicrokernelTester()
23322 .cr(16)
23323 .kr(9)
23324 .channels(channels)
23325 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23326 }
23327 }
23328
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_gt_16)23329 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16) {
23330 TEST_REQUIRES_X86_AVX512SKX;
23331 for (uint32_t channels = 17; channels < 32; channels++) {
23332 DWConvMicrokernelTester()
23333 .cr(16)
23334 .kr(9)
23335 .channels(channels)
23336 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23337 }
23338 }
23339
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_gt_16_with_qmin)23340 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmin) {
23341 TEST_REQUIRES_X86_AVX512SKX;
23342 for (uint32_t channels = 17; channels < 32; channels++) {
23343 DWConvMicrokernelTester()
23344 .cr(16)
23345 .kr(9)
23346 .channels(channels)
23347 .qmin(128)
23348 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23349 }
23350 }
23351
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_gt_16_with_qmax)23352 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmax) {
23353 TEST_REQUIRES_X86_AVX512SKX;
23354 for (uint32_t channels = 17; channels < 32; channels++) {
23355 DWConvMicrokernelTester()
23356 .cr(16)
23357 .kr(9)
23358 .channels(channels)
23359 .qmax(128)
23360 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23361 }
23362 }
23363
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,multipixel)23364 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel) {
23365 TEST_REQUIRES_X86_AVX512SKX;
23366 for (size_t channels = 1; channels <= 80; channels += 15) {
23367 DWConvMicrokernelTester()
23368 .cr(16)
23369 .kr(9)
23370 .channels(channels)
23371 .width(3)
23372 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23373 }
23374 }
23375
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,multipixel_with_step)23376 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_step) {
23377 TEST_REQUIRES_X86_AVX512SKX;
23378 for (size_t channels = 1; channels <= 80; channels += 15) {
23379 for (size_t step = 2; step <= 9; step++) {
23380 DWConvMicrokernelTester()
23381 .cr(16)
23382 .kr(9)
23383 .channels(channels)
23384 .width(3)
23385 .step(step)
23386 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23387 }
23388 }
23389 }
23390
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,multipixel_with_output_stride)23391 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
23392 TEST_REQUIRES_X86_AVX512SKX;
23393 for (size_t channels = 1; channels <= 80; channels += 15) {
23394 DWConvMicrokernelTester()
23395 .cr(16)
23396 .kr(9)
23397 .channels(16)
23398 .width(5)
23399 .output_stride(83)
23400 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23401 }
23402 }
23403
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,multipixel_with_qmin)23404 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_qmin) {
23405 TEST_REQUIRES_X86_AVX512SKX;
23406 for (size_t channels = 1; channels <= 80; channels += 15) {
23407 DWConvMicrokernelTester()
23408 .cr(16)
23409 .kr(9)
23410 .channels(channels)
23411 .width(3)
23412 .qmin(128)
23413 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23414 }
23415 }
23416
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,multipixel_with_qmax)23417 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_qmax) {
23418 TEST_REQUIRES_X86_AVX512SKX;
23419 for (size_t channels = 1; channels <= 80; channels += 15) {
23420 DWConvMicrokernelTester()
23421 .cr(16)
23422 .kr(9)
23423 .channels(channels)
23424 .width(3)
23425 .qmax(128)
23426 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23427 }
23428 }
23429
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,input_offset)23430 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, input_offset) {
23431 TEST_REQUIRES_X86_AVX512SKX;
23432 for (uint32_t channels = 32; channels < 256; channels += 48) {
23433 DWConvMicrokernelTester()
23434 .cr(16)
23435 .kr(9)
23436 .channels(channels)
23437 .input_offset(304)
23438 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23439 }
23440 }
23441
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,zero)23442 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, zero) {
23443 TEST_REQUIRES_X86_AVX512SKX;
23444 for (uint32_t mz = 0; mz < 9; mz++) {
23445 for (uint32_t channels = 32; channels < 256; channels += 48) {
23446 DWConvMicrokernelTester()
23447 .cr(16)
23448 .kr(9)
23449 .channels(channels)
23450 .input_offset(304)
23451 .zero_index(mz)
23452 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23453 }
23454 }
23455 }
23456 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23457
23458
23459 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_eq_16)23460 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_eq_16) {
23461 TEST_REQUIRES_X86_AVX512SKX;
23462 DWConvMicrokernelTester()
23463 .cr(16)
23464 .kr(25)
23465 .channels(16)
23466 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23467 }
23468
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_div_16)23469 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16) {
23470 TEST_REQUIRES_X86_AVX512SKX;
23471 for (uint32_t channels = 32; channels < 256; channels += 48) {
23472 DWConvMicrokernelTester()
23473 .cr(16)
23474 .kr(25)
23475 .channels(channels)
23476 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23477 }
23478 }
23479
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_div_16_with_qmin)23480 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16_with_qmin) {
23481 TEST_REQUIRES_X86_AVX512SKX;
23482 for (uint32_t channels = 32; channels < 256; channels += 48) {
23483 DWConvMicrokernelTester()
23484 .cr(16)
23485 .kr(25)
23486 .channels(channels)
23487 .qmin(128)
23488 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23489 }
23490 }
23491
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_div_16_with_qmax)23492 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16_with_qmax) {
23493 TEST_REQUIRES_X86_AVX512SKX;
23494 for (uint32_t channels = 32; channels < 256; channels += 48) {
23495 DWConvMicrokernelTester()
23496 .cr(16)
23497 .kr(25)
23498 .channels(channels)
23499 .qmax(128)
23500 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23501 }
23502 }
23503
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_lt_16)23504 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_lt_16) {
23505 TEST_REQUIRES_X86_AVX512SKX;
23506 for (uint32_t channels = 1; channels < 16; channels++) {
23507 DWConvMicrokernelTester()
23508 .cr(16)
23509 .kr(25)
23510 .channels(channels)
23511 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23512 }
23513 }
23514
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_gt_16)23515 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16) {
23516 TEST_REQUIRES_X86_AVX512SKX;
23517 for (uint32_t channels = 17; channels < 32; channels++) {
23518 DWConvMicrokernelTester()
23519 .cr(16)
23520 .kr(25)
23521 .channels(channels)
23522 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23523 }
23524 }
23525
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_gt_16_with_qmin)23526 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16_with_qmin) {
23527 TEST_REQUIRES_X86_AVX512SKX;
23528 for (uint32_t channels = 17; channels < 32; channels++) {
23529 DWConvMicrokernelTester()
23530 .cr(16)
23531 .kr(25)
23532 .channels(channels)
23533 .qmin(128)
23534 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23535 }
23536 }
23537
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_gt_16_with_qmax)23538 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16_with_qmax) {
23539 TEST_REQUIRES_X86_AVX512SKX;
23540 for (uint32_t channels = 17; channels < 32; channels++) {
23541 DWConvMicrokernelTester()
23542 .cr(16)
23543 .kr(25)
23544 .channels(channels)
23545 .qmax(128)
23546 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23547 }
23548 }
23549
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,multipixel)23550 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel) {
23551 TEST_REQUIRES_X86_AVX512SKX;
23552 for (size_t channels = 1; channels <= 80; channels += 15) {
23553 DWConvMicrokernelTester()
23554 .cr(16)
23555 .kr(25)
23556 .channels(channels)
23557 .width(3)
23558 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23559 }
23560 }
23561
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,multipixel_with_step)23562 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_step) {
23563 TEST_REQUIRES_X86_AVX512SKX;
23564 for (size_t channels = 1; channels <= 80; channels += 15) {
23565 for (size_t step = 2; step <= 25; step++) {
23566 DWConvMicrokernelTester()
23567 .cr(16)
23568 .kr(25)
23569 .channels(channels)
23570 .width(3)
23571 .step(step)
23572 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23573 }
23574 }
23575 }
23576
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,multipixel_with_output_stride)23577 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_output_stride) {
23578 TEST_REQUIRES_X86_AVX512SKX;
23579 for (size_t channels = 1; channels <= 80; channels += 15) {
23580 DWConvMicrokernelTester()
23581 .cr(16)
23582 .kr(25)
23583 .channels(16)
23584 .width(5)
23585 .output_stride(83)
23586 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23587 }
23588 }
23589
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,multipixel_with_qmin)23590 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_qmin) {
23591 TEST_REQUIRES_X86_AVX512SKX;
23592 for (size_t channels = 1; channels <= 80; channels += 15) {
23593 DWConvMicrokernelTester()
23594 .cr(16)
23595 .kr(25)
23596 .channels(channels)
23597 .width(3)
23598 .qmin(128)
23599 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23600 }
23601 }
23602
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,multipixel_with_qmax)23603 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_qmax) {
23604 TEST_REQUIRES_X86_AVX512SKX;
23605 for (size_t channels = 1; channels <= 80; channels += 15) {
23606 DWConvMicrokernelTester()
23607 .cr(16)
23608 .kr(25)
23609 .channels(channels)
23610 .width(3)
23611 .qmax(128)
23612 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23613 }
23614 }
23615
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,input_offset)23616 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, input_offset) {
23617 TEST_REQUIRES_X86_AVX512SKX;
23618 for (uint32_t channels = 32; channels < 256; channels += 48) {
23619 DWConvMicrokernelTester()
23620 .cr(16)
23621 .kr(25)
23622 .channels(channels)
23623 .input_offset(304)
23624 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23625 }
23626 }
23627
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,zero)23628 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, zero) {
23629 TEST_REQUIRES_X86_AVX512SKX;
23630 for (uint32_t mz = 0; mz < 25; mz++) {
23631 for (uint32_t channels = 32; channels < 256; channels += 48) {
23632 DWConvMicrokernelTester()
23633 .cr(16)
23634 .kr(25)
23635 .channels(channels)
23636 .input_offset(304)
23637 .zero_index(mz)
23638 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23639 }
23640 }
23641 }
23642 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23643
23644
23645 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,c_eq_32)23646 TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, c_eq_32) {
23647 TEST_REQUIRES_X86_AVX512SKX;
23648 DWConvMicrokernelTester()
23649 .cr(32)
23650 .kr(3)
23651 .channels(32)
23652 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23653 }
23654
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,c_div_32)23655 TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, c_div_32) {
23656 TEST_REQUIRES_X86_AVX512SKX;
23657 for (uint32_t channels = 64; channels < 512; channels += 96) {
23658 DWConvMicrokernelTester()
23659 .cr(32)
23660 .kr(3)
23661 .channels(channels)
23662 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23663 }
23664 }
23665
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,c_div_32_with_qmin)23666 TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, c_div_32_with_qmin) {
23667 TEST_REQUIRES_X86_AVX512SKX;
23668 for (uint32_t channels = 64; channels < 512; channels += 96) {
23669 DWConvMicrokernelTester()
23670 .cr(32)
23671 .kr(3)
23672 .channels(channels)
23673 .qmin(128)
23674 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23675 }
23676 }
23677
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,c_div_32_with_qmax)23678 TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, c_div_32_with_qmax) {
23679 TEST_REQUIRES_X86_AVX512SKX;
23680 for (uint32_t channels = 64; channels < 512; channels += 96) {
23681 DWConvMicrokernelTester()
23682 .cr(32)
23683 .kr(3)
23684 .channels(channels)
23685 .qmax(128)
23686 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23687 }
23688 }
23689
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,c_lt_32)23690 TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, c_lt_32) {
23691 TEST_REQUIRES_X86_AVX512SKX;
23692 for (uint32_t channels = 1; channels < 32; channels++) {
23693 DWConvMicrokernelTester()
23694 .cr(32)
23695 .kr(3)
23696 .channels(channels)
23697 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23698 }
23699 }
23700
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,c_gt_32)23701 TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, c_gt_32) {
23702 TEST_REQUIRES_X86_AVX512SKX;
23703 for (uint32_t channels = 33; channels < 64; channels++) {
23704 DWConvMicrokernelTester()
23705 .cr(32)
23706 .kr(3)
23707 .channels(channels)
23708 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23709 }
23710 }
23711
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,c_gt_32_with_qmin)23712 TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, c_gt_32_with_qmin) {
23713 TEST_REQUIRES_X86_AVX512SKX;
23714 for (uint32_t channels = 33; channels < 64; channels++) {
23715 DWConvMicrokernelTester()
23716 .cr(32)
23717 .kr(3)
23718 .channels(channels)
23719 .qmin(128)
23720 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23721 }
23722 }
23723
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,c_gt_32_with_qmax)23724 TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, c_gt_32_with_qmax) {
23725 TEST_REQUIRES_X86_AVX512SKX;
23726 for (uint32_t channels = 33; channels < 64; channels++) {
23727 DWConvMicrokernelTester()
23728 .cr(32)
23729 .kr(3)
23730 .channels(channels)
23731 .qmax(128)
23732 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23733 }
23734 }
23735
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,multipixel)23736 TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, multipixel) {
23737 TEST_REQUIRES_X86_AVX512SKX;
23738 for (size_t channels = 1; channels <= 160; channels += 31) {
23739 DWConvMicrokernelTester()
23740 .cr(32)
23741 .kr(3)
23742 .channels(channels)
23743 .width(3)
23744 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23745 }
23746 }
23747
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,multipixel_with_step)23748 TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, multipixel_with_step) {
23749 TEST_REQUIRES_X86_AVX512SKX;
23750 for (size_t channels = 1; channels <= 160; channels += 31) {
23751 for (size_t step = 2; step <= 3; step++) {
23752 DWConvMicrokernelTester()
23753 .cr(32)
23754 .kr(3)
23755 .channels(channels)
23756 .width(3)
23757 .step(step)
23758 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23759 }
23760 }
23761 }
23762
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,multipixel_with_output_stride)23763 TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, multipixel_with_output_stride) {
23764 TEST_REQUIRES_X86_AVX512SKX;
23765 for (size_t channels = 1; channels <= 160; channels += 31) {
23766 DWConvMicrokernelTester()
23767 .cr(32)
23768 .kr(3)
23769 .channels(32)
23770 .width(5)
23771 .output_stride(163)
23772 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23773 }
23774 }
23775
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,multipixel_with_qmin)23776 TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, multipixel_with_qmin) {
23777 TEST_REQUIRES_X86_AVX512SKX;
23778 for (size_t channels = 1; channels <= 160; channels += 31) {
23779 DWConvMicrokernelTester()
23780 .cr(32)
23781 .kr(3)
23782 .channels(channels)
23783 .width(3)
23784 .qmin(128)
23785 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23786 }
23787 }
23788
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,multipixel_with_qmax)23789 TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, multipixel_with_qmax) {
23790 TEST_REQUIRES_X86_AVX512SKX;
23791 for (size_t channels = 1; channels <= 160; channels += 31) {
23792 DWConvMicrokernelTester()
23793 .cr(32)
23794 .kr(3)
23795 .channels(channels)
23796 .width(3)
23797 .qmax(128)
23798 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23799 }
23800 }
23801
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,input_offset)23802 TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, input_offset) {
23803 TEST_REQUIRES_X86_AVX512SKX;
23804 for (uint32_t channels = 64; channels < 512; channels += 96) {
23805 DWConvMicrokernelTester()
23806 .cr(32)
23807 .kr(3)
23808 .channels(channels)
23809 .input_offset(592)
23810 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23811 }
23812 }
23813
TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32,zero)23814 TEST(QC8_DWCONV_MINMAX_FP32_UP32X3__AVX512SKX_MUL32, zero) {
23815 TEST_REQUIRES_X86_AVX512SKX;
23816 for (uint32_t mz = 0; mz < 3; mz++) {
23817 for (uint32_t channels = 64; channels < 512; channels += 96) {
23818 DWConvMicrokernelTester()
23819 .cr(32)
23820 .kr(3)
23821 .channels(channels)
23822 .input_offset(592)
23823 .zero_index(mz)
23824 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23825 }
23826 }
23827 }
23828 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23829
23830
23831 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_eq_32)23832 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_eq_32) {
23833 TEST_REQUIRES_X86_AVX512SKX;
23834 DWConvMicrokernelTester()
23835 .cr(32)
23836 .kr(9)
23837 .channels(32)
23838 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23839 }
23840
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_div_32)23841 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32) {
23842 TEST_REQUIRES_X86_AVX512SKX;
23843 for (uint32_t channels = 64; channels < 512; channels += 96) {
23844 DWConvMicrokernelTester()
23845 .cr(32)
23846 .kr(9)
23847 .channels(channels)
23848 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23849 }
23850 }
23851
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_div_32_with_qmin)23852 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmin) {
23853 TEST_REQUIRES_X86_AVX512SKX;
23854 for (uint32_t channels = 64; channels < 512; channels += 96) {
23855 DWConvMicrokernelTester()
23856 .cr(32)
23857 .kr(9)
23858 .channels(channels)
23859 .qmin(128)
23860 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23861 }
23862 }
23863
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_div_32_with_qmax)23864 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmax) {
23865 TEST_REQUIRES_X86_AVX512SKX;
23866 for (uint32_t channels = 64; channels < 512; channels += 96) {
23867 DWConvMicrokernelTester()
23868 .cr(32)
23869 .kr(9)
23870 .channels(channels)
23871 .qmax(128)
23872 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23873 }
23874 }
23875
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_lt_32)23876 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_lt_32) {
23877 TEST_REQUIRES_X86_AVX512SKX;
23878 for (uint32_t channels = 1; channels < 32; channels++) {
23879 DWConvMicrokernelTester()
23880 .cr(32)
23881 .kr(9)
23882 .channels(channels)
23883 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23884 }
23885 }
23886
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_gt_32)23887 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32) {
23888 TEST_REQUIRES_X86_AVX512SKX;
23889 for (uint32_t channels = 33; channels < 64; channels++) {
23890 DWConvMicrokernelTester()
23891 .cr(32)
23892 .kr(9)
23893 .channels(channels)
23894 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23895 }
23896 }
23897
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_gt_32_with_qmin)23898 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmin) {
23899 TEST_REQUIRES_X86_AVX512SKX;
23900 for (uint32_t channels = 33; channels < 64; channels++) {
23901 DWConvMicrokernelTester()
23902 .cr(32)
23903 .kr(9)
23904 .channels(channels)
23905 .qmin(128)
23906 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23907 }
23908 }
23909
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_gt_32_with_qmax)23910 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmax) {
23911 TEST_REQUIRES_X86_AVX512SKX;
23912 for (uint32_t channels = 33; channels < 64; channels++) {
23913 DWConvMicrokernelTester()
23914 .cr(32)
23915 .kr(9)
23916 .channels(channels)
23917 .qmax(128)
23918 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23919 }
23920 }
23921
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,multipixel)23922 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel) {
23923 TEST_REQUIRES_X86_AVX512SKX;
23924 for (size_t channels = 1; channels <= 160; channels += 31) {
23925 DWConvMicrokernelTester()
23926 .cr(32)
23927 .kr(9)
23928 .channels(channels)
23929 .width(3)
23930 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23931 }
23932 }
23933
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,multipixel_with_step)23934 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_step) {
23935 TEST_REQUIRES_X86_AVX512SKX;
23936 for (size_t channels = 1; channels <= 160; channels += 31) {
23937 for (size_t step = 2; step <= 9; step++) {
23938 DWConvMicrokernelTester()
23939 .cr(32)
23940 .kr(9)
23941 .channels(channels)
23942 .width(3)
23943 .step(step)
23944 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23945 }
23946 }
23947 }
23948
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,multipixel_with_output_stride)23949 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
23950 TEST_REQUIRES_X86_AVX512SKX;
23951 for (size_t channels = 1; channels <= 160; channels += 31) {
23952 DWConvMicrokernelTester()
23953 .cr(32)
23954 .kr(9)
23955 .channels(32)
23956 .width(5)
23957 .output_stride(163)
23958 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23959 }
23960 }
23961
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,multipixel_with_qmin)23962 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_qmin) {
23963 TEST_REQUIRES_X86_AVX512SKX;
23964 for (size_t channels = 1; channels <= 160; channels += 31) {
23965 DWConvMicrokernelTester()
23966 .cr(32)
23967 .kr(9)
23968 .channels(channels)
23969 .width(3)
23970 .qmin(128)
23971 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23972 }
23973 }
23974
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,multipixel_with_qmax)23975 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_qmax) {
23976 TEST_REQUIRES_X86_AVX512SKX;
23977 for (size_t channels = 1; channels <= 160; channels += 31) {
23978 DWConvMicrokernelTester()
23979 .cr(32)
23980 .kr(9)
23981 .channels(channels)
23982 .width(3)
23983 .qmax(128)
23984 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23985 }
23986 }
23987
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,input_offset)23988 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, input_offset) {
23989 TEST_REQUIRES_X86_AVX512SKX;
23990 for (uint32_t channels = 64; channels < 512; channels += 96) {
23991 DWConvMicrokernelTester()
23992 .cr(32)
23993 .kr(9)
23994 .channels(channels)
23995 .input_offset(592)
23996 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
23997 }
23998 }
23999
TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,zero)24000 TEST(QC8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, zero) {
24001 TEST_REQUIRES_X86_AVX512SKX;
24002 for (uint32_t mz = 0; mz < 9; mz++) {
24003 for (uint32_t channels = 64; channels < 512; channels += 96) {
24004 DWConvMicrokernelTester()
24005 .cr(32)
24006 .kr(9)
24007 .channels(channels)
24008 .input_offset(592)
24009 .zero_index(mz)
24010 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24011 }
24012 }
24013 }
24014 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24015
24016
24017 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_eq_32)24018 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_eq_32) {
24019 TEST_REQUIRES_X86_AVX512SKX;
24020 DWConvMicrokernelTester()
24021 .cr(32)
24022 .kr(25)
24023 .channels(32)
24024 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24025 }
24026
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_div_32)24027 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32) {
24028 TEST_REQUIRES_X86_AVX512SKX;
24029 for (uint32_t channels = 64; channels < 512; channels += 96) {
24030 DWConvMicrokernelTester()
24031 .cr(32)
24032 .kr(25)
24033 .channels(channels)
24034 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24035 }
24036 }
24037
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_div_32_with_qmin)24038 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32_with_qmin) {
24039 TEST_REQUIRES_X86_AVX512SKX;
24040 for (uint32_t channels = 64; channels < 512; channels += 96) {
24041 DWConvMicrokernelTester()
24042 .cr(32)
24043 .kr(25)
24044 .channels(channels)
24045 .qmin(128)
24046 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24047 }
24048 }
24049
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_div_32_with_qmax)24050 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32_with_qmax) {
24051 TEST_REQUIRES_X86_AVX512SKX;
24052 for (uint32_t channels = 64; channels < 512; channels += 96) {
24053 DWConvMicrokernelTester()
24054 .cr(32)
24055 .kr(25)
24056 .channels(channels)
24057 .qmax(128)
24058 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24059 }
24060 }
24061
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_lt_32)24062 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_lt_32) {
24063 TEST_REQUIRES_X86_AVX512SKX;
24064 for (uint32_t channels = 1; channels < 32; channels++) {
24065 DWConvMicrokernelTester()
24066 .cr(32)
24067 .kr(25)
24068 .channels(channels)
24069 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24070 }
24071 }
24072
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_gt_32)24073 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32) {
24074 TEST_REQUIRES_X86_AVX512SKX;
24075 for (uint32_t channels = 33; channels < 64; channels++) {
24076 DWConvMicrokernelTester()
24077 .cr(32)
24078 .kr(25)
24079 .channels(channels)
24080 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24081 }
24082 }
24083
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_gt_32_with_qmin)24084 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32_with_qmin) {
24085 TEST_REQUIRES_X86_AVX512SKX;
24086 for (uint32_t channels = 33; channels < 64; channels++) {
24087 DWConvMicrokernelTester()
24088 .cr(32)
24089 .kr(25)
24090 .channels(channels)
24091 .qmin(128)
24092 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24093 }
24094 }
24095
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_gt_32_with_qmax)24096 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32_with_qmax) {
24097 TEST_REQUIRES_X86_AVX512SKX;
24098 for (uint32_t channels = 33; channels < 64; channels++) {
24099 DWConvMicrokernelTester()
24100 .cr(32)
24101 .kr(25)
24102 .channels(channels)
24103 .qmax(128)
24104 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24105 }
24106 }
24107
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,multipixel)24108 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel) {
24109 TEST_REQUIRES_X86_AVX512SKX;
24110 for (size_t channels = 1; channels <= 160; channels += 31) {
24111 DWConvMicrokernelTester()
24112 .cr(32)
24113 .kr(25)
24114 .channels(channels)
24115 .width(3)
24116 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24117 }
24118 }
24119
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,multipixel_with_step)24120 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_step) {
24121 TEST_REQUIRES_X86_AVX512SKX;
24122 for (size_t channels = 1; channels <= 160; channels += 31) {
24123 for (size_t step = 2; step <= 25; step++) {
24124 DWConvMicrokernelTester()
24125 .cr(32)
24126 .kr(25)
24127 .channels(channels)
24128 .width(3)
24129 .step(step)
24130 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24131 }
24132 }
24133 }
24134
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,multipixel_with_output_stride)24135 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_output_stride) {
24136 TEST_REQUIRES_X86_AVX512SKX;
24137 for (size_t channels = 1; channels <= 160; channels += 31) {
24138 DWConvMicrokernelTester()
24139 .cr(32)
24140 .kr(25)
24141 .channels(32)
24142 .width(5)
24143 .output_stride(163)
24144 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24145 }
24146 }
24147
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,multipixel_with_qmin)24148 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_qmin) {
24149 TEST_REQUIRES_X86_AVX512SKX;
24150 for (size_t channels = 1; channels <= 160; channels += 31) {
24151 DWConvMicrokernelTester()
24152 .cr(32)
24153 .kr(25)
24154 .channels(channels)
24155 .width(3)
24156 .qmin(128)
24157 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24158 }
24159 }
24160
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,multipixel_with_qmax)24161 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_qmax) {
24162 TEST_REQUIRES_X86_AVX512SKX;
24163 for (size_t channels = 1; channels <= 160; channels += 31) {
24164 DWConvMicrokernelTester()
24165 .cr(32)
24166 .kr(25)
24167 .channels(channels)
24168 .width(3)
24169 .qmax(128)
24170 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24171 }
24172 }
24173
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,input_offset)24174 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, input_offset) {
24175 TEST_REQUIRES_X86_AVX512SKX;
24176 for (uint32_t channels = 64; channels < 512; channels += 96) {
24177 DWConvMicrokernelTester()
24178 .cr(32)
24179 .kr(25)
24180 .channels(channels)
24181 .input_offset(592)
24182 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24183 }
24184 }
24185
TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,zero)24186 TEST(QC8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, zero) {
24187 TEST_REQUIRES_X86_AVX512SKX;
24188 for (uint32_t mz = 0; mz < 25; mz++) {
24189 for (uint32_t channels = 64; channels < 512; channels += 96) {
24190 DWConvMicrokernelTester()
24191 .cr(32)
24192 .kr(25)
24193 .channels(channels)
24194 .input_offset(592)
24195 .zero_index(mz)
24196 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
24197 }
24198 }
24199 }
24200 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24201
24202
24203 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_eq_8)24204 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_eq_8) {
24205 DWConvMicrokernelTester()
24206 .cr(8)
24207 .kr(9)
24208 .channels(8)
24209 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24210 }
24211
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_div_8)24212 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_div_8) {
24213 for (uint32_t channels = 16; channels < 128; channels += 24) {
24214 DWConvMicrokernelTester()
24215 .cr(8)
24216 .kr(9)
24217 .channels(channels)
24218 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24219 }
24220 }
24221
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_div_8_with_qmin)24222 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmin) {
24223 for (uint32_t channels = 16; channels < 128; channels += 24) {
24224 DWConvMicrokernelTester()
24225 .cr(8)
24226 .kr(9)
24227 .channels(channels)
24228 .qmin(128)
24229 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24230 }
24231 }
24232
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_div_8_with_qmax)24233 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmax) {
24234 for (uint32_t channels = 16; channels < 128; channels += 24) {
24235 DWConvMicrokernelTester()
24236 .cr(8)
24237 .kr(9)
24238 .channels(channels)
24239 .qmax(128)
24240 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24241 }
24242 }
24243
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_lt_8)24244 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_lt_8) {
24245 for (uint32_t channels = 1; channels < 8; channels++) {
24246 DWConvMicrokernelTester()
24247 .cr(8)
24248 .kr(9)
24249 .channels(channels)
24250 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24251 }
24252 }
24253
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_gt_8)24254 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_gt_8) {
24255 for (uint32_t channels = 9; channels < 16; channels++) {
24256 DWConvMicrokernelTester()
24257 .cr(8)
24258 .kr(9)
24259 .channels(channels)
24260 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24261 }
24262 }
24263
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_gt_8_with_qmin)24264 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmin) {
24265 for (uint32_t channels = 9; channels < 16; channels++) {
24266 DWConvMicrokernelTester()
24267 .cr(8)
24268 .kr(9)
24269 .channels(channels)
24270 .qmin(128)
24271 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24272 }
24273 }
24274
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_gt_8_with_qmax)24275 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmax) {
24276 for (uint32_t channels = 9; channels < 16; channels++) {
24277 DWConvMicrokernelTester()
24278 .cr(8)
24279 .kr(9)
24280 .channels(channels)
24281 .qmax(128)
24282 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24283 }
24284 }
24285
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,multipixel)24286 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel) {
24287 for (size_t channels = 1; channels <= 40; channels += 7) {
24288 DWConvMicrokernelTester()
24289 .cr(8)
24290 .kr(9)
24291 .channels(channels)
24292 .width(3)
24293 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24294 }
24295 }
24296
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,multipixel_with_step)24297 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_step) {
24298 for (size_t channels = 1; channels <= 40; channels += 7) {
24299 for (size_t step = 2; step <= 9; step++) {
24300 DWConvMicrokernelTester()
24301 .cr(8)
24302 .kr(9)
24303 .channels(channels)
24304 .width(3)
24305 .step(step)
24306 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24307 }
24308 }
24309 }
24310
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,multipixel_with_output_stride)24311 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
24312 for (size_t channels = 1; channels <= 40; channels += 7) {
24313 DWConvMicrokernelTester()
24314 .cr(8)
24315 .kr(9)
24316 .channels(8)
24317 .width(5)
24318 .output_stride(43)
24319 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24320 }
24321 }
24322
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,multipixel_with_qmin)24323 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_qmin) {
24324 for (size_t channels = 1; channels <= 40; channels += 7) {
24325 DWConvMicrokernelTester()
24326 .cr(8)
24327 .kr(9)
24328 .channels(channels)
24329 .width(3)
24330 .qmin(128)
24331 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24332 }
24333 }
24334
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,multipixel_with_qmax)24335 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_qmax) {
24336 for (size_t channels = 1; channels <= 40; channels += 7) {
24337 DWConvMicrokernelTester()
24338 .cr(8)
24339 .kr(9)
24340 .channels(channels)
24341 .width(3)
24342 .qmax(128)
24343 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24344 }
24345 }
24346
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,input_offset)24347 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, input_offset) {
24348 for (uint32_t channels = 16; channels < 128; channels += 24) {
24349 DWConvMicrokernelTester()
24350 .cr(8)
24351 .kr(9)
24352 .channels(channels)
24353 .input_offset(176)
24354 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24355 }
24356 }
24357
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,zero)24358 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, zero) {
24359 for (uint32_t mz = 0; mz < 9; mz++) {
24360 for (uint32_t channels = 16; channels < 128; channels += 24) {
24361 DWConvMicrokernelTester()
24362 .cr(8)
24363 .kr(9)
24364 .channels(channels)
24365 .input_offset(176)
24366 .zero_index(mz)
24367 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24368 }
24369 }
24370 }
24371 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24372
24373
24374 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_eq_8)24375 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_eq_8) {
24376 DWConvMicrokernelTester()
24377 .cr(8)
24378 .kr(9)
24379 .channels(8)
24380 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24381 }
24382
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_div_8)24383 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_div_8) {
24384 for (uint32_t channels = 16; channels < 128; channels += 24) {
24385 DWConvMicrokernelTester()
24386 .cr(8)
24387 .kr(9)
24388 .channels(channels)
24389 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24390 }
24391 }
24392
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_div_8_with_qmin)24393 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_div_8_with_qmin) {
24394 for (uint32_t channels = 16; channels < 128; channels += 24) {
24395 DWConvMicrokernelTester()
24396 .cr(8)
24397 .kr(9)
24398 .channels(channels)
24399 .qmin(128)
24400 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24401 }
24402 }
24403
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_div_8_with_qmax)24404 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_div_8_with_qmax) {
24405 for (uint32_t channels = 16; channels < 128; channels += 24) {
24406 DWConvMicrokernelTester()
24407 .cr(8)
24408 .kr(9)
24409 .channels(channels)
24410 .qmax(128)
24411 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24412 }
24413 }
24414
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_lt_8)24415 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_lt_8) {
24416 for (uint32_t channels = 1; channels < 8; channels++) {
24417 DWConvMicrokernelTester()
24418 .cr(8)
24419 .kr(9)
24420 .channels(channels)
24421 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24422 }
24423 }
24424
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_gt_8)24425 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_gt_8) {
24426 for (uint32_t channels = 9; channels < 16; channels++) {
24427 DWConvMicrokernelTester()
24428 .cr(8)
24429 .kr(9)
24430 .channels(channels)
24431 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24432 }
24433 }
24434
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_gt_8_with_qmin)24435 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_gt_8_with_qmin) {
24436 for (uint32_t channels = 9; channels < 16; channels++) {
24437 DWConvMicrokernelTester()
24438 .cr(8)
24439 .kr(9)
24440 .channels(channels)
24441 .qmin(128)
24442 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24443 }
24444 }
24445
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,c_gt_8_with_qmax)24446 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, c_gt_8_with_qmax) {
24447 for (uint32_t channels = 9; channels < 16; channels++) {
24448 DWConvMicrokernelTester()
24449 .cr(8)
24450 .kr(9)
24451 .channels(channels)
24452 .qmax(128)
24453 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24454 }
24455 }
24456
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,multipixel)24457 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel) {
24458 for (size_t channels = 1; channels <= 40; channels += 7) {
24459 DWConvMicrokernelTester()
24460 .cr(8)
24461 .kr(9)
24462 .channels(channels)
24463 .width(3)
24464 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24465 }
24466 }
24467
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,multipixel_with_step)24468 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
24469 for (size_t channels = 1; channels <= 40; channels += 7) {
24470 for (size_t step = 2; step <= 9; step++) {
24471 DWConvMicrokernelTester()
24472 .cr(8)
24473 .kr(9)
24474 .channels(channels)
24475 .width(3)
24476 .step(step)
24477 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24478 }
24479 }
24480 }
24481
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,multipixel_with_output_stride)24482 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
24483 for (size_t channels = 1; channels <= 40; channels += 7) {
24484 DWConvMicrokernelTester()
24485 .cr(8)
24486 .kr(9)
24487 .channels(8)
24488 .width(5)
24489 .output_stride(43)
24490 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24491 }
24492 }
24493
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,multipixel_with_qmin)24494 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
24495 for (size_t channels = 1; channels <= 40; channels += 7) {
24496 DWConvMicrokernelTester()
24497 .cr(8)
24498 .kr(9)
24499 .channels(channels)
24500 .width(3)
24501 .qmin(128)
24502 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24503 }
24504 }
24505
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,multipixel_with_qmax)24506 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
24507 for (size_t channels = 1; channels <= 40; channels += 7) {
24508 DWConvMicrokernelTester()
24509 .cr(8)
24510 .kr(9)
24511 .channels(channels)
24512 .width(3)
24513 .qmax(128)
24514 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24515 }
24516 }
24517
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,input_offset)24518 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, input_offset) {
24519 for (uint32_t channels = 16; channels < 128; channels += 24) {
24520 DWConvMicrokernelTester()
24521 .cr(8)
24522 .kr(9)
24523 .channels(channels)
24524 .input_offset(176)
24525 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24526 }
24527 }
24528
TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16,zero)24529 TEST(QC8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16_ADD16, zero) {
24530 for (uint32_t mz = 0; mz < 9; mz++) {
24531 for (uint32_t channels = 16; channels < 128; channels += 24) {
24532 DWConvMicrokernelTester()
24533 .cr(8)
24534 .kr(9)
24535 .channels(channels)
24536 .input_offset(176)
24537 .zero_index(mz)
24538 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24539 }
24540 }
24541 }
24542 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24543
24544
24545 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_eq_8)24546 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_eq_8) {
24547 DWConvMicrokernelTester()
24548 .cr(8)
24549 .kr(25)
24550 .channels(8)
24551 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24552 }
24553
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_div_8)24554 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_div_8) {
24555 for (uint32_t channels = 16; channels < 128; channels += 24) {
24556 DWConvMicrokernelTester()
24557 .cr(8)
24558 .kr(25)
24559 .channels(channels)
24560 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24561 }
24562 }
24563
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_div_8_with_qmin)24564 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_div_8_with_qmin) {
24565 for (uint32_t channels = 16; channels < 128; channels += 24) {
24566 DWConvMicrokernelTester()
24567 .cr(8)
24568 .kr(25)
24569 .channels(channels)
24570 .qmin(128)
24571 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24572 }
24573 }
24574
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_div_8_with_qmax)24575 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_div_8_with_qmax) {
24576 for (uint32_t channels = 16; channels < 128; channels += 24) {
24577 DWConvMicrokernelTester()
24578 .cr(8)
24579 .kr(25)
24580 .channels(channels)
24581 .qmax(128)
24582 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24583 }
24584 }
24585
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_lt_8)24586 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_lt_8) {
24587 for (uint32_t channels = 1; channels < 8; channels++) {
24588 DWConvMicrokernelTester()
24589 .cr(8)
24590 .kr(25)
24591 .channels(channels)
24592 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24593 }
24594 }
24595
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_gt_8)24596 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_gt_8) {
24597 for (uint32_t channels = 9; channels < 16; channels++) {
24598 DWConvMicrokernelTester()
24599 .cr(8)
24600 .kr(25)
24601 .channels(channels)
24602 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24603 }
24604 }
24605
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_gt_8_with_qmin)24606 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_gt_8_with_qmin) {
24607 for (uint32_t channels = 9; channels < 16; channels++) {
24608 DWConvMicrokernelTester()
24609 .cr(8)
24610 .kr(25)
24611 .channels(channels)
24612 .qmin(128)
24613 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24614 }
24615 }
24616
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_gt_8_with_qmax)24617 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_gt_8_with_qmax) {
24618 for (uint32_t channels = 9; channels < 16; channels++) {
24619 DWConvMicrokernelTester()
24620 .cr(8)
24621 .kr(25)
24622 .channels(channels)
24623 .qmax(128)
24624 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24625 }
24626 }
24627
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,multipixel)24628 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel) {
24629 for (size_t channels = 1; channels <= 40; channels += 7) {
24630 DWConvMicrokernelTester()
24631 .cr(8)
24632 .kr(25)
24633 .channels(channels)
24634 .width(3)
24635 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24636 }
24637 }
24638
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,multipixel_with_step)24639 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_step) {
24640 for (size_t channels = 1; channels <= 40; channels += 7) {
24641 for (size_t step = 2; step <= 25; step++) {
24642 DWConvMicrokernelTester()
24643 .cr(8)
24644 .kr(25)
24645 .channels(channels)
24646 .width(3)
24647 .step(step)
24648 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24649 }
24650 }
24651 }
24652
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,multipixel_with_output_stride)24653 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
24654 for (size_t channels = 1; channels <= 40; channels += 7) {
24655 DWConvMicrokernelTester()
24656 .cr(8)
24657 .kr(25)
24658 .channels(8)
24659 .width(5)
24660 .output_stride(43)
24661 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24662 }
24663 }
24664
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,multipixel_with_qmin)24665 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_qmin) {
24666 for (size_t channels = 1; channels <= 40; channels += 7) {
24667 DWConvMicrokernelTester()
24668 .cr(8)
24669 .kr(25)
24670 .channels(channels)
24671 .width(3)
24672 .qmin(128)
24673 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24674 }
24675 }
24676
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,multipixel_with_qmax)24677 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_qmax) {
24678 for (size_t channels = 1; channels <= 40; channels += 7) {
24679 DWConvMicrokernelTester()
24680 .cr(8)
24681 .kr(25)
24682 .channels(channels)
24683 .width(3)
24684 .qmax(128)
24685 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24686 }
24687 }
24688
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,input_offset)24689 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, input_offset) {
24690 for (uint32_t channels = 16; channels < 128; channels += 24) {
24691 DWConvMicrokernelTester()
24692 .cr(8)
24693 .kr(25)
24694 .channels(channels)
24695 .input_offset(176)
24696 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24697 }
24698 }
24699
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,zero)24700 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, zero) {
24701 for (uint32_t mz = 0; mz < 25; mz++) {
24702 for (uint32_t channels = 16; channels < 128; channels += 24) {
24703 DWConvMicrokernelTester()
24704 .cr(8)
24705 .kr(25)
24706 .channels(channels)
24707 .input_offset(176)
24708 .zero_index(mz)
24709 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24710 }
24711 }
24712 }
24713 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24714
24715
24716 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_eq_8)24717 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_eq_8) {
24718 DWConvMicrokernelTester()
24719 .cr(8)
24720 .kr(25)
24721 .channels(8)
24722 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24723 }
24724
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_div_8)24725 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_div_8) {
24726 for (uint32_t channels = 16; channels < 128; channels += 24) {
24727 DWConvMicrokernelTester()
24728 .cr(8)
24729 .kr(25)
24730 .channels(channels)
24731 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24732 }
24733 }
24734
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_div_8_with_qmin)24735 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_div_8_with_qmin) {
24736 for (uint32_t channels = 16; channels < 128; channels += 24) {
24737 DWConvMicrokernelTester()
24738 .cr(8)
24739 .kr(25)
24740 .channels(channels)
24741 .qmin(128)
24742 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24743 }
24744 }
24745
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_div_8_with_qmax)24746 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_div_8_with_qmax) {
24747 for (uint32_t channels = 16; channels < 128; channels += 24) {
24748 DWConvMicrokernelTester()
24749 .cr(8)
24750 .kr(25)
24751 .channels(channels)
24752 .qmax(128)
24753 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24754 }
24755 }
24756
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_lt_8)24757 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_lt_8) {
24758 for (uint32_t channels = 1; channels < 8; channels++) {
24759 DWConvMicrokernelTester()
24760 .cr(8)
24761 .kr(25)
24762 .channels(channels)
24763 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24764 }
24765 }
24766
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_gt_8)24767 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_gt_8) {
24768 for (uint32_t channels = 9; channels < 16; channels++) {
24769 DWConvMicrokernelTester()
24770 .cr(8)
24771 .kr(25)
24772 .channels(channels)
24773 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24774 }
24775 }
24776
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_gt_8_with_qmin)24777 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_gt_8_with_qmin) {
24778 for (uint32_t channels = 9; channels < 16; channels++) {
24779 DWConvMicrokernelTester()
24780 .cr(8)
24781 .kr(25)
24782 .channels(channels)
24783 .qmin(128)
24784 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24785 }
24786 }
24787
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,c_gt_8_with_qmax)24788 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, c_gt_8_with_qmax) {
24789 for (uint32_t channels = 9; channels < 16; channels++) {
24790 DWConvMicrokernelTester()
24791 .cr(8)
24792 .kr(25)
24793 .channels(channels)
24794 .qmax(128)
24795 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24796 }
24797 }
24798
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,multipixel)24799 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel) {
24800 for (size_t channels = 1; channels <= 40; channels += 7) {
24801 DWConvMicrokernelTester()
24802 .cr(8)
24803 .kr(25)
24804 .channels(channels)
24805 .width(3)
24806 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24807 }
24808 }
24809
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,multipixel_with_step)24810 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
24811 for (size_t channels = 1; channels <= 40; channels += 7) {
24812 for (size_t step = 2; step <= 25; step++) {
24813 DWConvMicrokernelTester()
24814 .cr(8)
24815 .kr(25)
24816 .channels(channels)
24817 .width(3)
24818 .step(step)
24819 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24820 }
24821 }
24822 }
24823
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,multipixel_with_output_stride)24824 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
24825 for (size_t channels = 1; channels <= 40; channels += 7) {
24826 DWConvMicrokernelTester()
24827 .cr(8)
24828 .kr(25)
24829 .channels(8)
24830 .width(5)
24831 .output_stride(43)
24832 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24833 }
24834 }
24835
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,multipixel_with_qmin)24836 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
24837 for (size_t channels = 1; channels <= 40; channels += 7) {
24838 DWConvMicrokernelTester()
24839 .cr(8)
24840 .kr(25)
24841 .channels(channels)
24842 .width(3)
24843 .qmin(128)
24844 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24845 }
24846 }
24847
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,multipixel_with_qmax)24848 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
24849 for (size_t channels = 1; channels <= 40; channels += 7) {
24850 DWConvMicrokernelTester()
24851 .cr(8)
24852 .kr(25)
24853 .channels(channels)
24854 .width(3)
24855 .qmax(128)
24856 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24857 }
24858 }
24859
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,input_offset)24860 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, input_offset) {
24861 for (uint32_t channels = 16; channels < 128; channels += 24) {
24862 DWConvMicrokernelTester()
24863 .cr(8)
24864 .kr(25)
24865 .channels(channels)
24866 .input_offset(176)
24867 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24868 }
24869 }
24870
TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16,zero)24871 TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16_ADD16, zero) {
24872 for (uint32_t mz = 0; mz < 25; mz++) {
24873 for (uint32_t channels = 16; channels < 128; channels += 24) {
24874 DWConvMicrokernelTester()
24875 .cr(8)
24876 .kr(25)
24877 .channels(channels)
24878 .input_offset(176)
24879 .zero_index(mz)
24880 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24881 }
24882 }
24883 }
24884 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24885
24886
24887 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,c_eq_16)24888 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, c_eq_16) {
24889 DWConvMicrokernelTester()
24890 .cr(16)
24891 .kr(3)
24892 .channels(16)
24893 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24894 }
24895
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,c_div_16)24896 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, c_div_16) {
24897 for (uint32_t channels = 32; channels < 256; channels += 48) {
24898 DWConvMicrokernelTester()
24899 .cr(16)
24900 .kr(3)
24901 .channels(channels)
24902 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24903 }
24904 }
24905
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,c_div_16_with_qmin)24906 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, c_div_16_with_qmin) {
24907 for (uint32_t channels = 32; channels < 256; channels += 48) {
24908 DWConvMicrokernelTester()
24909 .cr(16)
24910 .kr(3)
24911 .channels(channels)
24912 .qmin(128)
24913 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24914 }
24915 }
24916
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,c_div_16_with_qmax)24917 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, c_div_16_with_qmax) {
24918 for (uint32_t channels = 32; channels < 256; channels += 48) {
24919 DWConvMicrokernelTester()
24920 .cr(16)
24921 .kr(3)
24922 .channels(channels)
24923 .qmax(128)
24924 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24925 }
24926 }
24927
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,c_lt_16)24928 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, c_lt_16) {
24929 for (uint32_t channels = 1; channels < 16; channels++) {
24930 DWConvMicrokernelTester()
24931 .cr(16)
24932 .kr(3)
24933 .channels(channels)
24934 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24935 }
24936 }
24937
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,c_gt_16)24938 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, c_gt_16) {
24939 for (uint32_t channels = 17; channels < 32; channels++) {
24940 DWConvMicrokernelTester()
24941 .cr(16)
24942 .kr(3)
24943 .channels(channels)
24944 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24945 }
24946 }
24947
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,c_gt_16_with_qmin)24948 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmin) {
24949 for (uint32_t channels = 17; channels < 32; channels++) {
24950 DWConvMicrokernelTester()
24951 .cr(16)
24952 .kr(3)
24953 .channels(channels)
24954 .qmin(128)
24955 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24956 }
24957 }
24958
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,c_gt_16_with_qmax)24959 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmax) {
24960 for (uint32_t channels = 17; channels < 32; channels++) {
24961 DWConvMicrokernelTester()
24962 .cr(16)
24963 .kr(3)
24964 .channels(channels)
24965 .qmax(128)
24966 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24967 }
24968 }
24969
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,multipixel)24970 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, multipixel) {
24971 for (size_t channels = 1; channels <= 80; channels += 15) {
24972 DWConvMicrokernelTester()
24973 .cr(16)
24974 .kr(3)
24975 .channels(channels)
24976 .width(3)
24977 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24978 }
24979 }
24980
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,multipixel_with_step)24981 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
24982 for (size_t channels = 1; channels <= 80; channels += 15) {
24983 for (size_t step = 2; step <= 3; step++) {
24984 DWConvMicrokernelTester()
24985 .cr(16)
24986 .kr(3)
24987 .channels(channels)
24988 .width(3)
24989 .step(step)
24990 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
24991 }
24992 }
24993 }
24994
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,multipixel_with_output_stride)24995 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
24996 for (size_t channels = 1; channels <= 80; channels += 15) {
24997 DWConvMicrokernelTester()
24998 .cr(16)
24999 .kr(3)
25000 .channels(16)
25001 .width(5)
25002 .output_stride(83)
25003 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25004 }
25005 }
25006
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,multipixel_with_qmin)25007 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
25008 for (size_t channels = 1; channels <= 80; channels += 15) {
25009 DWConvMicrokernelTester()
25010 .cr(16)
25011 .kr(3)
25012 .channels(channels)
25013 .width(3)
25014 .qmin(128)
25015 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25016 }
25017 }
25018
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,multipixel_with_qmax)25019 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
25020 for (size_t channels = 1; channels <= 80; channels += 15) {
25021 DWConvMicrokernelTester()
25022 .cr(16)
25023 .kr(3)
25024 .channels(channels)
25025 .width(3)
25026 .qmax(128)
25027 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25028 }
25029 }
25030
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,input_offset)25031 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, input_offset) {
25032 for (uint32_t channels = 32; channels < 256; channels += 48) {
25033 DWConvMicrokernelTester()
25034 .cr(16)
25035 .kr(3)
25036 .channels(channels)
25037 .input_offset(304)
25038 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25039 }
25040 }
25041
TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16,zero)25042 TEST(QC8_DWCONV_MINMAX_FP32_UP16X3__WASMSIMD_MUL16_ADD16, zero) {
25043 for (uint32_t mz = 0; mz < 3; mz++) {
25044 for (uint32_t channels = 32; channels < 256; channels += 48) {
25045 DWConvMicrokernelTester()
25046 .cr(16)
25047 .kr(3)
25048 .channels(channels)
25049 .input_offset(304)
25050 .zero_index(mz)
25051 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25052 }
25053 }
25054 }
25055 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25056
25057
25058 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_eq_16)25059 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_eq_16) {
25060 DWConvMicrokernelTester()
25061 .cr(16)
25062 .kr(9)
25063 .channels(16)
25064 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25065 }
25066
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_div_16)25067 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_div_16) {
25068 for (uint32_t channels = 32; channels < 256; channels += 48) {
25069 DWConvMicrokernelTester()
25070 .cr(16)
25071 .kr(9)
25072 .channels(channels)
25073 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25074 }
25075 }
25076
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_div_16_with_qmin)25077 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmin) {
25078 for (uint32_t channels = 32; channels < 256; channels += 48) {
25079 DWConvMicrokernelTester()
25080 .cr(16)
25081 .kr(9)
25082 .channels(channels)
25083 .qmin(128)
25084 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25085 }
25086 }
25087
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_div_16_with_qmax)25088 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmax) {
25089 for (uint32_t channels = 32; channels < 256; channels += 48) {
25090 DWConvMicrokernelTester()
25091 .cr(16)
25092 .kr(9)
25093 .channels(channels)
25094 .qmax(128)
25095 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25096 }
25097 }
25098
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_lt_16)25099 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_lt_16) {
25100 for (uint32_t channels = 1; channels < 16; channels++) {
25101 DWConvMicrokernelTester()
25102 .cr(16)
25103 .kr(9)
25104 .channels(channels)
25105 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25106 }
25107 }
25108
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_gt_16)25109 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_gt_16) {
25110 for (uint32_t channels = 17; channels < 32; channels++) {
25111 DWConvMicrokernelTester()
25112 .cr(16)
25113 .kr(9)
25114 .channels(channels)
25115 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25116 }
25117 }
25118
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_gt_16_with_qmin)25119 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmin) {
25120 for (uint32_t channels = 17; channels < 32; channels++) {
25121 DWConvMicrokernelTester()
25122 .cr(16)
25123 .kr(9)
25124 .channels(channels)
25125 .qmin(128)
25126 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25127 }
25128 }
25129
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_gt_16_with_qmax)25130 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmax) {
25131 for (uint32_t channels = 17; channels < 32; channels++) {
25132 DWConvMicrokernelTester()
25133 .cr(16)
25134 .kr(9)
25135 .channels(channels)
25136 .qmax(128)
25137 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25138 }
25139 }
25140
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,multipixel)25141 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel) {
25142 for (size_t channels = 1; channels <= 80; channels += 15) {
25143 DWConvMicrokernelTester()
25144 .cr(16)
25145 .kr(9)
25146 .channels(channels)
25147 .width(3)
25148 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25149 }
25150 }
25151
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,multipixel_with_step)25152 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_step) {
25153 for (size_t channels = 1; channels <= 80; channels += 15) {
25154 for (size_t step = 2; step <= 9; step++) {
25155 DWConvMicrokernelTester()
25156 .cr(16)
25157 .kr(9)
25158 .channels(channels)
25159 .width(3)
25160 .step(step)
25161 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25162 }
25163 }
25164 }
25165
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,multipixel_with_output_stride)25166 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
25167 for (size_t channels = 1; channels <= 80; channels += 15) {
25168 DWConvMicrokernelTester()
25169 .cr(16)
25170 .kr(9)
25171 .channels(16)
25172 .width(5)
25173 .output_stride(83)
25174 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25175 }
25176 }
25177
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,multipixel_with_qmin)25178 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_qmin) {
25179 for (size_t channels = 1; channels <= 80; channels += 15) {
25180 DWConvMicrokernelTester()
25181 .cr(16)
25182 .kr(9)
25183 .channels(channels)
25184 .width(3)
25185 .qmin(128)
25186 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25187 }
25188 }
25189
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,multipixel_with_qmax)25190 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_qmax) {
25191 for (size_t channels = 1; channels <= 80; channels += 15) {
25192 DWConvMicrokernelTester()
25193 .cr(16)
25194 .kr(9)
25195 .channels(channels)
25196 .width(3)
25197 .qmax(128)
25198 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25199 }
25200 }
25201
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,input_offset)25202 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, input_offset) {
25203 for (uint32_t channels = 32; channels < 256; channels += 48) {
25204 DWConvMicrokernelTester()
25205 .cr(16)
25206 .kr(9)
25207 .channels(channels)
25208 .input_offset(304)
25209 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25210 }
25211 }
25212
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,zero)25213 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, zero) {
25214 for (uint32_t mz = 0; mz < 9; mz++) {
25215 for (uint32_t channels = 32; channels < 256; channels += 48) {
25216 DWConvMicrokernelTester()
25217 .cr(16)
25218 .kr(9)
25219 .channels(channels)
25220 .input_offset(304)
25221 .zero_index(mz)
25222 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25223 }
25224 }
25225 }
25226 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25227
25228
25229 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_eq_16)25230 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_eq_16) {
25231 DWConvMicrokernelTester()
25232 .cr(16)
25233 .kr(9)
25234 .channels(16)
25235 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25236 }
25237
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_div_16)25238 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_div_16) {
25239 for (uint32_t channels = 32; channels < 256; channels += 48) {
25240 DWConvMicrokernelTester()
25241 .cr(16)
25242 .kr(9)
25243 .channels(channels)
25244 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25245 }
25246 }
25247
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_div_16_with_qmin)25248 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_div_16_with_qmin) {
25249 for (uint32_t channels = 32; channels < 256; channels += 48) {
25250 DWConvMicrokernelTester()
25251 .cr(16)
25252 .kr(9)
25253 .channels(channels)
25254 .qmin(128)
25255 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25256 }
25257 }
25258
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_div_16_with_qmax)25259 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_div_16_with_qmax) {
25260 for (uint32_t channels = 32; channels < 256; channels += 48) {
25261 DWConvMicrokernelTester()
25262 .cr(16)
25263 .kr(9)
25264 .channels(channels)
25265 .qmax(128)
25266 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25267 }
25268 }
25269
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_lt_16)25270 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_lt_16) {
25271 for (uint32_t channels = 1; channels < 16; channels++) {
25272 DWConvMicrokernelTester()
25273 .cr(16)
25274 .kr(9)
25275 .channels(channels)
25276 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25277 }
25278 }
25279
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_gt_16)25280 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_gt_16) {
25281 for (uint32_t channels = 17; channels < 32; channels++) {
25282 DWConvMicrokernelTester()
25283 .cr(16)
25284 .kr(9)
25285 .channels(channels)
25286 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25287 }
25288 }
25289
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_gt_16_with_qmin)25290 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmin) {
25291 for (uint32_t channels = 17; channels < 32; channels++) {
25292 DWConvMicrokernelTester()
25293 .cr(16)
25294 .kr(9)
25295 .channels(channels)
25296 .qmin(128)
25297 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25298 }
25299 }
25300
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,c_gt_16_with_qmax)25301 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmax) {
25302 for (uint32_t channels = 17; channels < 32; channels++) {
25303 DWConvMicrokernelTester()
25304 .cr(16)
25305 .kr(9)
25306 .channels(channels)
25307 .qmax(128)
25308 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25309 }
25310 }
25311
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,multipixel)25312 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel) {
25313 for (size_t channels = 1; channels <= 80; channels += 15) {
25314 DWConvMicrokernelTester()
25315 .cr(16)
25316 .kr(9)
25317 .channels(channels)
25318 .width(3)
25319 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25320 }
25321 }
25322
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,multipixel_with_step)25323 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
25324 for (size_t channels = 1; channels <= 80; channels += 15) {
25325 for (size_t step = 2; step <= 9; step++) {
25326 DWConvMicrokernelTester()
25327 .cr(16)
25328 .kr(9)
25329 .channels(channels)
25330 .width(3)
25331 .step(step)
25332 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25333 }
25334 }
25335 }
25336
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,multipixel_with_output_stride)25337 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
25338 for (size_t channels = 1; channels <= 80; channels += 15) {
25339 DWConvMicrokernelTester()
25340 .cr(16)
25341 .kr(9)
25342 .channels(16)
25343 .width(5)
25344 .output_stride(83)
25345 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25346 }
25347 }
25348
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,multipixel_with_qmin)25349 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
25350 for (size_t channels = 1; channels <= 80; channels += 15) {
25351 DWConvMicrokernelTester()
25352 .cr(16)
25353 .kr(9)
25354 .channels(channels)
25355 .width(3)
25356 .qmin(128)
25357 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25358 }
25359 }
25360
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,multipixel_with_qmax)25361 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
25362 for (size_t channels = 1; channels <= 80; channels += 15) {
25363 DWConvMicrokernelTester()
25364 .cr(16)
25365 .kr(9)
25366 .channels(channels)
25367 .width(3)
25368 .qmax(128)
25369 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25370 }
25371 }
25372
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,input_offset)25373 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, input_offset) {
25374 for (uint32_t channels = 32; channels < 256; channels += 48) {
25375 DWConvMicrokernelTester()
25376 .cr(16)
25377 .kr(9)
25378 .channels(channels)
25379 .input_offset(304)
25380 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25381 }
25382 }
25383
TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16,zero)25384 TEST(QC8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16_ADD16, zero) {
25385 for (uint32_t mz = 0; mz < 9; mz++) {
25386 for (uint32_t channels = 32; channels < 256; channels += 48) {
25387 DWConvMicrokernelTester()
25388 .cr(16)
25389 .kr(9)
25390 .channels(channels)
25391 .input_offset(304)
25392 .zero_index(mz)
25393 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25394 }
25395 }
25396 }
25397 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25398
25399
25400 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_eq_16)25401 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_eq_16) {
25402 DWConvMicrokernelTester()
25403 .cr(16)
25404 .kr(25)
25405 .channels(16)
25406 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25407 }
25408
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_div_16)25409 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_div_16) {
25410 for (uint32_t channels = 32; channels < 256; channels += 48) {
25411 DWConvMicrokernelTester()
25412 .cr(16)
25413 .kr(25)
25414 .channels(channels)
25415 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25416 }
25417 }
25418
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_div_16_with_qmin)25419 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_div_16_with_qmin) {
25420 for (uint32_t channels = 32; channels < 256; channels += 48) {
25421 DWConvMicrokernelTester()
25422 .cr(16)
25423 .kr(25)
25424 .channels(channels)
25425 .qmin(128)
25426 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25427 }
25428 }
25429
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_div_16_with_qmax)25430 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_div_16_with_qmax) {
25431 for (uint32_t channels = 32; channels < 256; channels += 48) {
25432 DWConvMicrokernelTester()
25433 .cr(16)
25434 .kr(25)
25435 .channels(channels)
25436 .qmax(128)
25437 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25438 }
25439 }
25440
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_lt_16)25441 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_lt_16) {
25442 for (uint32_t channels = 1; channels < 16; channels++) {
25443 DWConvMicrokernelTester()
25444 .cr(16)
25445 .kr(25)
25446 .channels(channels)
25447 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25448 }
25449 }
25450
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_gt_16)25451 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_gt_16) {
25452 for (uint32_t channels = 17; channels < 32; channels++) {
25453 DWConvMicrokernelTester()
25454 .cr(16)
25455 .kr(25)
25456 .channels(channels)
25457 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25458 }
25459 }
25460
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_gt_16_with_qmin)25461 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_gt_16_with_qmin) {
25462 for (uint32_t channels = 17; channels < 32; channels++) {
25463 DWConvMicrokernelTester()
25464 .cr(16)
25465 .kr(25)
25466 .channels(channels)
25467 .qmin(128)
25468 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25469 }
25470 }
25471
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_gt_16_with_qmax)25472 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_gt_16_with_qmax) {
25473 for (uint32_t channels = 17; channels < 32; channels++) {
25474 DWConvMicrokernelTester()
25475 .cr(16)
25476 .kr(25)
25477 .channels(channels)
25478 .qmax(128)
25479 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25480 }
25481 }
25482
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,multipixel)25483 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel) {
25484 for (size_t channels = 1; channels <= 80; channels += 15) {
25485 DWConvMicrokernelTester()
25486 .cr(16)
25487 .kr(25)
25488 .channels(channels)
25489 .width(3)
25490 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25491 }
25492 }
25493
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,multipixel_with_step)25494 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_step) {
25495 for (size_t channels = 1; channels <= 80; channels += 15) {
25496 for (size_t step = 2; step <= 25; step++) {
25497 DWConvMicrokernelTester()
25498 .cr(16)
25499 .kr(25)
25500 .channels(channels)
25501 .width(3)
25502 .step(step)
25503 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25504 }
25505 }
25506 }
25507
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,multipixel_with_output_stride)25508 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
25509 for (size_t channels = 1; channels <= 80; channels += 15) {
25510 DWConvMicrokernelTester()
25511 .cr(16)
25512 .kr(25)
25513 .channels(16)
25514 .width(5)
25515 .output_stride(83)
25516 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25517 }
25518 }
25519
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,multipixel_with_qmin)25520 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_qmin) {
25521 for (size_t channels = 1; channels <= 80; channels += 15) {
25522 DWConvMicrokernelTester()
25523 .cr(16)
25524 .kr(25)
25525 .channels(channels)
25526 .width(3)
25527 .qmin(128)
25528 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25529 }
25530 }
25531
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,multipixel_with_qmax)25532 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_qmax) {
25533 for (size_t channels = 1; channels <= 80; channels += 15) {
25534 DWConvMicrokernelTester()
25535 .cr(16)
25536 .kr(25)
25537 .channels(channels)
25538 .width(3)
25539 .qmax(128)
25540 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25541 }
25542 }
25543
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,input_offset)25544 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, input_offset) {
25545 for (uint32_t channels = 32; channels < 256; channels += 48) {
25546 DWConvMicrokernelTester()
25547 .cr(16)
25548 .kr(25)
25549 .channels(channels)
25550 .input_offset(304)
25551 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25552 }
25553 }
25554
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,zero)25555 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, zero) {
25556 for (uint32_t mz = 0; mz < 25; mz++) {
25557 for (uint32_t channels = 32; channels < 256; channels += 48) {
25558 DWConvMicrokernelTester()
25559 .cr(16)
25560 .kr(25)
25561 .channels(channels)
25562 .input_offset(304)
25563 .zero_index(mz)
25564 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25565 }
25566 }
25567 }
25568 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25569
25570
25571 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_eq_16)25572 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_eq_16) {
25573 DWConvMicrokernelTester()
25574 .cr(16)
25575 .kr(25)
25576 .channels(16)
25577 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25578 }
25579
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_div_16)25580 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_div_16) {
25581 for (uint32_t channels = 32; channels < 256; channels += 48) {
25582 DWConvMicrokernelTester()
25583 .cr(16)
25584 .kr(25)
25585 .channels(channels)
25586 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25587 }
25588 }
25589
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_div_16_with_qmin)25590 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_div_16_with_qmin) {
25591 for (uint32_t channels = 32; channels < 256; channels += 48) {
25592 DWConvMicrokernelTester()
25593 .cr(16)
25594 .kr(25)
25595 .channels(channels)
25596 .qmin(128)
25597 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25598 }
25599 }
25600
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_div_16_with_qmax)25601 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_div_16_with_qmax) {
25602 for (uint32_t channels = 32; channels < 256; channels += 48) {
25603 DWConvMicrokernelTester()
25604 .cr(16)
25605 .kr(25)
25606 .channels(channels)
25607 .qmax(128)
25608 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25609 }
25610 }
25611
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_lt_16)25612 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_lt_16) {
25613 for (uint32_t channels = 1; channels < 16; channels++) {
25614 DWConvMicrokernelTester()
25615 .cr(16)
25616 .kr(25)
25617 .channels(channels)
25618 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25619 }
25620 }
25621
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_gt_16)25622 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_gt_16) {
25623 for (uint32_t channels = 17; channels < 32; channels++) {
25624 DWConvMicrokernelTester()
25625 .cr(16)
25626 .kr(25)
25627 .channels(channels)
25628 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25629 }
25630 }
25631
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_gt_16_with_qmin)25632 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmin) {
25633 for (uint32_t channels = 17; channels < 32; channels++) {
25634 DWConvMicrokernelTester()
25635 .cr(16)
25636 .kr(25)
25637 .channels(channels)
25638 .qmin(128)
25639 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25640 }
25641 }
25642
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,c_gt_16_with_qmax)25643 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, c_gt_16_with_qmax) {
25644 for (uint32_t channels = 17; channels < 32; channels++) {
25645 DWConvMicrokernelTester()
25646 .cr(16)
25647 .kr(25)
25648 .channels(channels)
25649 .qmax(128)
25650 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25651 }
25652 }
25653
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,multipixel)25654 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel) {
25655 for (size_t channels = 1; channels <= 80; channels += 15) {
25656 DWConvMicrokernelTester()
25657 .cr(16)
25658 .kr(25)
25659 .channels(channels)
25660 .width(3)
25661 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25662 }
25663 }
25664
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,multipixel_with_step)25665 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
25666 for (size_t channels = 1; channels <= 80; channels += 15) {
25667 for (size_t step = 2; step <= 25; step++) {
25668 DWConvMicrokernelTester()
25669 .cr(16)
25670 .kr(25)
25671 .channels(channels)
25672 .width(3)
25673 .step(step)
25674 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25675 }
25676 }
25677 }
25678
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,multipixel_with_output_stride)25679 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
25680 for (size_t channels = 1; channels <= 80; channels += 15) {
25681 DWConvMicrokernelTester()
25682 .cr(16)
25683 .kr(25)
25684 .channels(16)
25685 .width(5)
25686 .output_stride(83)
25687 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25688 }
25689 }
25690
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,multipixel_with_qmin)25691 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
25692 for (size_t channels = 1; channels <= 80; channels += 15) {
25693 DWConvMicrokernelTester()
25694 .cr(16)
25695 .kr(25)
25696 .channels(channels)
25697 .width(3)
25698 .qmin(128)
25699 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25700 }
25701 }
25702
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,multipixel_with_qmax)25703 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
25704 for (size_t channels = 1; channels <= 80; channels += 15) {
25705 DWConvMicrokernelTester()
25706 .cr(16)
25707 .kr(25)
25708 .channels(channels)
25709 .width(3)
25710 .qmax(128)
25711 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25712 }
25713 }
25714
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,input_offset)25715 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, input_offset) {
25716 for (uint32_t channels = 32; channels < 256; channels += 48) {
25717 DWConvMicrokernelTester()
25718 .cr(16)
25719 .kr(25)
25720 .channels(channels)
25721 .input_offset(304)
25722 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25723 }
25724 }
25725
TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16,zero)25726 TEST(QC8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16_ADD16, zero) {
25727 for (uint32_t mz = 0; mz < 25; mz++) {
25728 for (uint32_t channels = 32; channels < 256; channels += 48) {
25729 DWConvMicrokernelTester()
25730 .cr(16)
25731 .kr(25)
25732 .channels(channels)
25733 .input_offset(304)
25734 .zero_index(mz)
25735 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25736 }
25737 }
25738 }
25739 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25740
25741
25742 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_eq_24)25743 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_eq_24) {
25744 DWConvMicrokernelTester()
25745 .cr(24)
25746 .kr(9)
25747 .channels(24)
25748 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25749 }
25750
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_div_24)25751 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_div_24) {
25752 for (uint32_t channels = 48; channels < 384; channels += 72) {
25753 DWConvMicrokernelTester()
25754 .cr(24)
25755 .kr(9)
25756 .channels(channels)
25757 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25758 }
25759 }
25760
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_div_24_with_qmin)25761 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmin) {
25762 for (uint32_t channels = 48; channels < 384; channels += 72) {
25763 DWConvMicrokernelTester()
25764 .cr(24)
25765 .kr(9)
25766 .channels(channels)
25767 .qmin(128)
25768 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25769 }
25770 }
25771
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_div_24_with_qmax)25772 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmax) {
25773 for (uint32_t channels = 48; channels < 384; channels += 72) {
25774 DWConvMicrokernelTester()
25775 .cr(24)
25776 .kr(9)
25777 .channels(channels)
25778 .qmax(128)
25779 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25780 }
25781 }
25782
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_lt_24)25783 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_lt_24) {
25784 for (uint32_t channels = 1; channels < 24; channels++) {
25785 DWConvMicrokernelTester()
25786 .cr(24)
25787 .kr(9)
25788 .channels(channels)
25789 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25790 }
25791 }
25792
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_gt_24)25793 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_gt_24) {
25794 for (uint32_t channels = 25; channels < 48; channels++) {
25795 DWConvMicrokernelTester()
25796 .cr(24)
25797 .kr(9)
25798 .channels(channels)
25799 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25800 }
25801 }
25802
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_gt_24_with_qmin)25803 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmin) {
25804 for (uint32_t channels = 25; channels < 48; channels++) {
25805 DWConvMicrokernelTester()
25806 .cr(24)
25807 .kr(9)
25808 .channels(channels)
25809 .qmin(128)
25810 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25811 }
25812 }
25813
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_gt_24_with_qmax)25814 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmax) {
25815 for (uint32_t channels = 25; channels < 48; channels++) {
25816 DWConvMicrokernelTester()
25817 .cr(24)
25818 .kr(9)
25819 .channels(channels)
25820 .qmax(128)
25821 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25822 }
25823 }
25824
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,multipixel)25825 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel) {
25826 for (size_t channels = 1; channels <= 120; channels += 23) {
25827 DWConvMicrokernelTester()
25828 .cr(24)
25829 .kr(9)
25830 .channels(channels)
25831 .width(3)
25832 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25833 }
25834 }
25835
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,multipixel_with_step)25836 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_step) {
25837 for (size_t channels = 1; channels <= 120; channels += 23) {
25838 for (size_t step = 2; step <= 9; step++) {
25839 DWConvMicrokernelTester()
25840 .cr(24)
25841 .kr(9)
25842 .channels(channels)
25843 .width(3)
25844 .step(step)
25845 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25846 }
25847 }
25848 }
25849
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,multipixel_with_output_stride)25850 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
25851 for (size_t channels = 1; channels <= 120; channels += 23) {
25852 DWConvMicrokernelTester()
25853 .cr(24)
25854 .kr(9)
25855 .channels(24)
25856 .width(5)
25857 .output_stride(127)
25858 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25859 }
25860 }
25861
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,multipixel_with_qmin)25862 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_qmin) {
25863 for (size_t channels = 1; channels <= 120; channels += 23) {
25864 DWConvMicrokernelTester()
25865 .cr(24)
25866 .kr(9)
25867 .channels(channels)
25868 .width(3)
25869 .qmin(128)
25870 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25871 }
25872 }
25873
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,multipixel_with_qmax)25874 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_qmax) {
25875 for (size_t channels = 1; channels <= 120; channels += 23) {
25876 DWConvMicrokernelTester()
25877 .cr(24)
25878 .kr(9)
25879 .channels(channels)
25880 .width(3)
25881 .qmax(128)
25882 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25883 }
25884 }
25885
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,input_offset)25886 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, input_offset) {
25887 for (uint32_t channels = 48; channels < 384; channels += 72) {
25888 DWConvMicrokernelTester()
25889 .cr(24)
25890 .kr(9)
25891 .channels(channels)
25892 .input_offset(464)
25893 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25894 }
25895 }
25896
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,zero)25897 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, zero) {
25898 for (uint32_t mz = 0; mz < 9; mz++) {
25899 for (uint32_t channels = 48; channels < 384; channels += 72) {
25900 DWConvMicrokernelTester()
25901 .cr(24)
25902 .kr(9)
25903 .channels(channels)
25904 .input_offset(464)
25905 .zero_index(mz)
25906 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25907 }
25908 }
25909 }
25910 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25911
25912
25913 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_eq_24)25914 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_eq_24) {
25915 DWConvMicrokernelTester()
25916 .cr(24)
25917 .kr(9)
25918 .channels(24)
25919 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25920 }
25921
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_div_24)25922 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_div_24) {
25923 for (uint32_t channels = 48; channels < 384; channels += 72) {
25924 DWConvMicrokernelTester()
25925 .cr(24)
25926 .kr(9)
25927 .channels(channels)
25928 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25929 }
25930 }
25931
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_div_24_with_qmin)25932 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_div_24_with_qmin) {
25933 for (uint32_t channels = 48; channels < 384; channels += 72) {
25934 DWConvMicrokernelTester()
25935 .cr(24)
25936 .kr(9)
25937 .channels(channels)
25938 .qmin(128)
25939 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25940 }
25941 }
25942
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_div_24_with_qmax)25943 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_div_24_with_qmax) {
25944 for (uint32_t channels = 48; channels < 384; channels += 72) {
25945 DWConvMicrokernelTester()
25946 .cr(24)
25947 .kr(9)
25948 .channels(channels)
25949 .qmax(128)
25950 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25951 }
25952 }
25953
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_lt_24)25954 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_lt_24) {
25955 for (uint32_t channels = 1; channels < 24; channels++) {
25956 DWConvMicrokernelTester()
25957 .cr(24)
25958 .kr(9)
25959 .channels(channels)
25960 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25961 }
25962 }
25963
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_gt_24)25964 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_gt_24) {
25965 for (uint32_t channels = 25; channels < 48; channels++) {
25966 DWConvMicrokernelTester()
25967 .cr(24)
25968 .kr(9)
25969 .channels(channels)
25970 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25971 }
25972 }
25973
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_gt_24_with_qmin)25974 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_gt_24_with_qmin) {
25975 for (uint32_t channels = 25; channels < 48; channels++) {
25976 DWConvMicrokernelTester()
25977 .cr(24)
25978 .kr(9)
25979 .channels(channels)
25980 .qmin(128)
25981 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25982 }
25983 }
25984
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,c_gt_24_with_qmax)25985 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, c_gt_24_with_qmax) {
25986 for (uint32_t channels = 25; channels < 48; channels++) {
25987 DWConvMicrokernelTester()
25988 .cr(24)
25989 .kr(9)
25990 .channels(channels)
25991 .qmax(128)
25992 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
25993 }
25994 }
25995
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,multipixel)25996 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel) {
25997 for (size_t channels = 1; channels <= 120; channels += 23) {
25998 DWConvMicrokernelTester()
25999 .cr(24)
26000 .kr(9)
26001 .channels(channels)
26002 .width(3)
26003 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26004 }
26005 }
26006
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,multipixel_with_step)26007 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
26008 for (size_t channels = 1; channels <= 120; channels += 23) {
26009 for (size_t step = 2; step <= 9; step++) {
26010 DWConvMicrokernelTester()
26011 .cr(24)
26012 .kr(9)
26013 .channels(channels)
26014 .width(3)
26015 .step(step)
26016 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26017 }
26018 }
26019 }
26020
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,multipixel_with_output_stride)26021 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
26022 for (size_t channels = 1; channels <= 120; channels += 23) {
26023 DWConvMicrokernelTester()
26024 .cr(24)
26025 .kr(9)
26026 .channels(24)
26027 .width(5)
26028 .output_stride(127)
26029 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26030 }
26031 }
26032
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,multipixel_with_qmin)26033 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
26034 for (size_t channels = 1; channels <= 120; channels += 23) {
26035 DWConvMicrokernelTester()
26036 .cr(24)
26037 .kr(9)
26038 .channels(channels)
26039 .width(3)
26040 .qmin(128)
26041 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26042 }
26043 }
26044
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,multipixel_with_qmax)26045 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
26046 for (size_t channels = 1; channels <= 120; channels += 23) {
26047 DWConvMicrokernelTester()
26048 .cr(24)
26049 .kr(9)
26050 .channels(channels)
26051 .width(3)
26052 .qmax(128)
26053 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26054 }
26055 }
26056
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,input_offset)26057 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, input_offset) {
26058 for (uint32_t channels = 48; channels < 384; channels += 72) {
26059 DWConvMicrokernelTester()
26060 .cr(24)
26061 .kr(9)
26062 .channels(channels)
26063 .input_offset(464)
26064 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26065 }
26066 }
26067
TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16,zero)26068 TEST(QC8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16_ADD16, zero) {
26069 for (uint32_t mz = 0; mz < 9; mz++) {
26070 for (uint32_t channels = 48; channels < 384; channels += 72) {
26071 DWConvMicrokernelTester()
26072 .cr(24)
26073 .kr(9)
26074 .channels(channels)
26075 .input_offset(464)
26076 .zero_index(mz)
26077 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26078 }
26079 }
26080 }
26081 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26082
26083
26084 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_eq_24)26085 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_eq_24) {
26086 DWConvMicrokernelTester()
26087 .cr(24)
26088 .kr(25)
26089 .channels(24)
26090 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26091 }
26092
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_div_24)26093 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_div_24) {
26094 for (uint32_t channels = 48; channels < 384; channels += 72) {
26095 DWConvMicrokernelTester()
26096 .cr(24)
26097 .kr(25)
26098 .channels(channels)
26099 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26100 }
26101 }
26102
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_div_24_with_qmin)26103 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_div_24_with_qmin) {
26104 for (uint32_t channels = 48; channels < 384; channels += 72) {
26105 DWConvMicrokernelTester()
26106 .cr(24)
26107 .kr(25)
26108 .channels(channels)
26109 .qmin(128)
26110 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26111 }
26112 }
26113
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_div_24_with_qmax)26114 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_div_24_with_qmax) {
26115 for (uint32_t channels = 48; channels < 384; channels += 72) {
26116 DWConvMicrokernelTester()
26117 .cr(24)
26118 .kr(25)
26119 .channels(channels)
26120 .qmax(128)
26121 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26122 }
26123 }
26124
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_lt_24)26125 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_lt_24) {
26126 for (uint32_t channels = 1; channels < 24; channels++) {
26127 DWConvMicrokernelTester()
26128 .cr(24)
26129 .kr(25)
26130 .channels(channels)
26131 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26132 }
26133 }
26134
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_gt_24)26135 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_gt_24) {
26136 for (uint32_t channels = 25; channels < 48; channels++) {
26137 DWConvMicrokernelTester()
26138 .cr(24)
26139 .kr(25)
26140 .channels(channels)
26141 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26142 }
26143 }
26144
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_gt_24_with_qmin)26145 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_gt_24_with_qmin) {
26146 for (uint32_t channels = 25; channels < 48; channels++) {
26147 DWConvMicrokernelTester()
26148 .cr(24)
26149 .kr(25)
26150 .channels(channels)
26151 .qmin(128)
26152 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26153 }
26154 }
26155
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_gt_24_with_qmax)26156 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_gt_24_with_qmax) {
26157 for (uint32_t channels = 25; channels < 48; channels++) {
26158 DWConvMicrokernelTester()
26159 .cr(24)
26160 .kr(25)
26161 .channels(channels)
26162 .qmax(128)
26163 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26164 }
26165 }
26166
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,multipixel)26167 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel) {
26168 for (size_t channels = 1; channels <= 120; channels += 23) {
26169 DWConvMicrokernelTester()
26170 .cr(24)
26171 .kr(25)
26172 .channels(channels)
26173 .width(3)
26174 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26175 }
26176 }
26177
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,multipixel_with_step)26178 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_step) {
26179 for (size_t channels = 1; channels <= 120; channels += 23) {
26180 for (size_t step = 2; step <= 25; step++) {
26181 DWConvMicrokernelTester()
26182 .cr(24)
26183 .kr(25)
26184 .channels(channels)
26185 .width(3)
26186 .step(step)
26187 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26188 }
26189 }
26190 }
26191
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,multipixel_with_output_stride)26192 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
26193 for (size_t channels = 1; channels <= 120; channels += 23) {
26194 DWConvMicrokernelTester()
26195 .cr(24)
26196 .kr(25)
26197 .channels(24)
26198 .width(5)
26199 .output_stride(127)
26200 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26201 }
26202 }
26203
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,multipixel_with_qmin)26204 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_qmin) {
26205 for (size_t channels = 1; channels <= 120; channels += 23) {
26206 DWConvMicrokernelTester()
26207 .cr(24)
26208 .kr(25)
26209 .channels(channels)
26210 .width(3)
26211 .qmin(128)
26212 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26213 }
26214 }
26215
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,multipixel_with_qmax)26216 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_qmax) {
26217 for (size_t channels = 1; channels <= 120; channels += 23) {
26218 DWConvMicrokernelTester()
26219 .cr(24)
26220 .kr(25)
26221 .channels(channels)
26222 .width(3)
26223 .qmax(128)
26224 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26225 }
26226 }
26227
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,input_offset)26228 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, input_offset) {
26229 for (uint32_t channels = 48; channels < 384; channels += 72) {
26230 DWConvMicrokernelTester()
26231 .cr(24)
26232 .kr(25)
26233 .channels(channels)
26234 .input_offset(464)
26235 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26236 }
26237 }
26238
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,zero)26239 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, zero) {
26240 for (uint32_t mz = 0; mz < 25; mz++) {
26241 for (uint32_t channels = 48; channels < 384; channels += 72) {
26242 DWConvMicrokernelTester()
26243 .cr(24)
26244 .kr(25)
26245 .channels(channels)
26246 .input_offset(464)
26247 .zero_index(mz)
26248 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26249 }
26250 }
26251 }
26252 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26253
26254
26255 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_eq_24)26256 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_eq_24) {
26257 DWConvMicrokernelTester()
26258 .cr(24)
26259 .kr(25)
26260 .channels(24)
26261 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26262 }
26263
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_div_24)26264 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_div_24) {
26265 for (uint32_t channels = 48; channels < 384; channels += 72) {
26266 DWConvMicrokernelTester()
26267 .cr(24)
26268 .kr(25)
26269 .channels(channels)
26270 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26271 }
26272 }
26273
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_div_24_with_qmin)26274 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_div_24_with_qmin) {
26275 for (uint32_t channels = 48; channels < 384; channels += 72) {
26276 DWConvMicrokernelTester()
26277 .cr(24)
26278 .kr(25)
26279 .channels(channels)
26280 .qmin(128)
26281 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26282 }
26283 }
26284
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_div_24_with_qmax)26285 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_div_24_with_qmax) {
26286 for (uint32_t channels = 48; channels < 384; channels += 72) {
26287 DWConvMicrokernelTester()
26288 .cr(24)
26289 .kr(25)
26290 .channels(channels)
26291 .qmax(128)
26292 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26293 }
26294 }
26295
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_lt_24)26296 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_lt_24) {
26297 for (uint32_t channels = 1; channels < 24; channels++) {
26298 DWConvMicrokernelTester()
26299 .cr(24)
26300 .kr(25)
26301 .channels(channels)
26302 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26303 }
26304 }
26305
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_gt_24)26306 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_gt_24) {
26307 for (uint32_t channels = 25; channels < 48; channels++) {
26308 DWConvMicrokernelTester()
26309 .cr(24)
26310 .kr(25)
26311 .channels(channels)
26312 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26313 }
26314 }
26315
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_gt_24_with_qmin)26316 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_gt_24_with_qmin) {
26317 for (uint32_t channels = 25; channels < 48; channels++) {
26318 DWConvMicrokernelTester()
26319 .cr(24)
26320 .kr(25)
26321 .channels(channels)
26322 .qmin(128)
26323 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26324 }
26325 }
26326
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,c_gt_24_with_qmax)26327 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, c_gt_24_with_qmax) {
26328 for (uint32_t channels = 25; channels < 48; channels++) {
26329 DWConvMicrokernelTester()
26330 .cr(24)
26331 .kr(25)
26332 .channels(channels)
26333 .qmax(128)
26334 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26335 }
26336 }
26337
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,multipixel)26338 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel) {
26339 for (size_t channels = 1; channels <= 120; channels += 23) {
26340 DWConvMicrokernelTester()
26341 .cr(24)
26342 .kr(25)
26343 .channels(channels)
26344 .width(3)
26345 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26346 }
26347 }
26348
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,multipixel_with_step)26349 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel_with_step) {
26350 for (size_t channels = 1; channels <= 120; channels += 23) {
26351 for (size_t step = 2; step <= 25; step++) {
26352 DWConvMicrokernelTester()
26353 .cr(24)
26354 .kr(25)
26355 .channels(channels)
26356 .width(3)
26357 .step(step)
26358 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26359 }
26360 }
26361 }
26362
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,multipixel_with_output_stride)26363 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel_with_output_stride) {
26364 for (size_t channels = 1; channels <= 120; channels += 23) {
26365 DWConvMicrokernelTester()
26366 .cr(24)
26367 .kr(25)
26368 .channels(24)
26369 .width(5)
26370 .output_stride(127)
26371 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26372 }
26373 }
26374
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,multipixel_with_qmin)26375 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmin) {
26376 for (size_t channels = 1; channels <= 120; channels += 23) {
26377 DWConvMicrokernelTester()
26378 .cr(24)
26379 .kr(25)
26380 .channels(channels)
26381 .width(3)
26382 .qmin(128)
26383 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26384 }
26385 }
26386
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,multipixel_with_qmax)26387 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, multipixel_with_qmax) {
26388 for (size_t channels = 1; channels <= 120; channels += 23) {
26389 DWConvMicrokernelTester()
26390 .cr(24)
26391 .kr(25)
26392 .channels(channels)
26393 .width(3)
26394 .qmax(128)
26395 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26396 }
26397 }
26398
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,input_offset)26399 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, input_offset) {
26400 for (uint32_t channels = 48; channels < 384; channels += 72) {
26401 DWConvMicrokernelTester()
26402 .cr(24)
26403 .kr(25)
26404 .channels(channels)
26405 .input_offset(464)
26406 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26407 }
26408 }
26409
TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16,zero)26410 TEST(QC8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16_ADD16, zero) {
26411 for (uint32_t mz = 0; mz < 25; mz++) {
26412 for (uint32_t channels = 48; channels < 384; channels += 72) {
26413 DWConvMicrokernelTester()
26414 .cr(24)
26415 .kr(25)
26416 .channels(channels)
26417 .input_offset(464)
26418 .zero_index(mz)
26419 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
26420 }
26421 }
26422 }
26423 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26424
26425
26426 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,c_eq_1)26427 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_eq_1) {
26428 DWConvMicrokernelTester()
26429 .cr(1)
26430 .kr(9)
26431 .channels(1)
26432 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26433 }
26434
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,c_gt_1)26435 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_gt_1) {
26436 for (uint32_t channels = 2; channels < 10; channels++) {
26437 DWConvMicrokernelTester()
26438 .cr(1)
26439 .kr(9)
26440 .channels(channels)
26441 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26442 }
26443 }
26444
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,c_gt_1_with_qmin)26445 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_gt_1_with_qmin) {
26446 for (uint32_t channels = 2; channels < 10; channels++) {
26447 DWConvMicrokernelTester()
26448 .cr(1)
26449 .kr(9)
26450 .channels(channels)
26451 .qmin(128)
26452 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26453 }
26454 }
26455
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,c_gt_1_with_qmax)26456 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_gt_1_with_qmax) {
26457 for (uint32_t channels = 2; channels < 10; channels++) {
26458 DWConvMicrokernelTester()
26459 .cr(1)
26460 .kr(9)
26461 .channels(channels)
26462 .qmax(128)
26463 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26464 }
26465 }
26466
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,multipixel)26467 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel) {
26468 for (size_t channels = 1; channels <= 5; channels += 1) {
26469 DWConvMicrokernelTester()
26470 .cr(1)
26471 .kr(9)
26472 .channels(channels)
26473 .width(3)
26474 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26475 }
26476 }
26477
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,multipixel_with_step)26478 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_step) {
26479 for (size_t channels = 1; channels <= 5; channels += 1) {
26480 for (size_t step = 2; step <= 9; step++) {
26481 DWConvMicrokernelTester()
26482 .cr(1)
26483 .kr(9)
26484 .channels(channels)
26485 .width(3)
26486 .step(step)
26487 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26488 }
26489 }
26490 }
26491
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,multipixel_with_output_stride)26492 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_output_stride) {
26493 for (size_t channels = 1; channels <= 5; channels += 1) {
26494 DWConvMicrokernelTester()
26495 .cr(1)
26496 .kr(9)
26497 .channels(1)
26498 .width(5)
26499 .output_stride(7)
26500 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26501 }
26502 }
26503
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,multipixel_with_qmin)26504 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_qmin) {
26505 for (size_t channels = 1; channels <= 5; channels += 1) {
26506 DWConvMicrokernelTester()
26507 .cr(1)
26508 .kr(9)
26509 .channels(channels)
26510 .width(3)
26511 .qmin(128)
26512 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26513 }
26514 }
26515
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,multipixel_with_qmax)26516 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_qmax) {
26517 for (size_t channels = 1; channels <= 5; channels += 1) {
26518 DWConvMicrokernelTester()
26519 .cr(1)
26520 .kr(9)
26521 .channels(channels)
26522 .width(3)
26523 .qmax(128)
26524 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26525 }
26526 }
26527
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,input_offset)26528 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, input_offset) {
26529 for (uint32_t channels = 2; channels < 16; channels += 3) {
26530 DWConvMicrokernelTester()
26531 .cr(1)
26532 .kr(9)
26533 .channels(channels)
26534 .input_offset(48)
26535 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26536 }
26537 }
26538
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,zero)26539 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, zero) {
26540 for (uint32_t mz = 0; mz < 9; mz++) {
26541 for (uint32_t channels = 2; channels < 16; channels += 3) {
26542 DWConvMicrokernelTester()
26543 .cr(1)
26544 .kr(9)
26545 .channels(channels)
26546 .input_offset(48)
26547 .zero_index(mz)
26548 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26549 }
26550 }
26551 }
26552 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26553
26554
26555 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,c_eq_1)26556 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_eq_1) {
26557 DWConvMicrokernelTester()
26558 .cr(1)
26559 .kr(25)
26560 .channels(1)
26561 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26562 }
26563
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,c_gt_1)26564 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_gt_1) {
26565 for (uint32_t channels = 2; channels < 10; channels++) {
26566 DWConvMicrokernelTester()
26567 .cr(1)
26568 .kr(25)
26569 .channels(channels)
26570 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26571 }
26572 }
26573
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,c_gt_1_with_qmin)26574 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_gt_1_with_qmin) {
26575 for (uint32_t channels = 2; channels < 10; channels++) {
26576 DWConvMicrokernelTester()
26577 .cr(1)
26578 .kr(25)
26579 .channels(channels)
26580 .qmin(128)
26581 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26582 }
26583 }
26584
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,c_gt_1_with_qmax)26585 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_gt_1_with_qmax) {
26586 for (uint32_t channels = 2; channels < 10; channels++) {
26587 DWConvMicrokernelTester()
26588 .cr(1)
26589 .kr(25)
26590 .channels(channels)
26591 .qmax(128)
26592 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26593 }
26594 }
26595
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,multipixel)26596 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel) {
26597 for (size_t channels = 1; channels <= 5; channels += 1) {
26598 DWConvMicrokernelTester()
26599 .cr(1)
26600 .kr(25)
26601 .channels(channels)
26602 .width(3)
26603 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26604 }
26605 }
26606
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,multipixel_with_step)26607 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_step) {
26608 for (size_t channels = 1; channels <= 5; channels += 1) {
26609 for (size_t step = 2; step <= 25; step++) {
26610 DWConvMicrokernelTester()
26611 .cr(1)
26612 .kr(25)
26613 .channels(channels)
26614 .width(3)
26615 .step(step)
26616 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26617 }
26618 }
26619 }
26620
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,multipixel_with_output_stride)26621 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_output_stride) {
26622 for (size_t channels = 1; channels <= 5; channels += 1) {
26623 DWConvMicrokernelTester()
26624 .cr(1)
26625 .kr(25)
26626 .channels(1)
26627 .width(5)
26628 .output_stride(7)
26629 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26630 }
26631 }
26632
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,multipixel_with_qmin)26633 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_qmin) {
26634 for (size_t channels = 1; channels <= 5; channels += 1) {
26635 DWConvMicrokernelTester()
26636 .cr(1)
26637 .kr(25)
26638 .channels(channels)
26639 .width(3)
26640 .qmin(128)
26641 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26642 }
26643 }
26644
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,multipixel_with_qmax)26645 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_qmax) {
26646 for (size_t channels = 1; channels <= 5; channels += 1) {
26647 DWConvMicrokernelTester()
26648 .cr(1)
26649 .kr(25)
26650 .channels(channels)
26651 .width(3)
26652 .qmax(128)
26653 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26654 }
26655 }
26656
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,input_offset)26657 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, input_offset) {
26658 for (uint32_t channels = 2; channels < 16; channels += 3) {
26659 DWConvMicrokernelTester()
26660 .cr(1)
26661 .kr(25)
26662 .channels(channels)
26663 .input_offset(48)
26664 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26665 }
26666 }
26667
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,zero)26668 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, zero) {
26669 for (uint32_t mz = 0; mz < 25; mz++) {
26670 for (uint32_t channels = 2; channels < 16; channels += 3) {
26671 DWConvMicrokernelTester()
26672 .cr(1)
26673 .kr(25)
26674 .channels(channels)
26675 .input_offset(48)
26676 .zero_index(mz)
26677 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26678 }
26679 }
26680 }
26681 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26682
26683
26684 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,c_eq_2)26685 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, c_eq_2) {
26686 DWConvMicrokernelTester()
26687 .cr(2)
26688 .kr(3)
26689 .channels(2)
26690 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26691 }
26692
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,c_div_2)26693 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, c_div_2) {
26694 for (uint32_t channels = 4; channels < 32; channels += 6) {
26695 DWConvMicrokernelTester()
26696 .cr(2)
26697 .kr(3)
26698 .channels(channels)
26699 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26700 }
26701 }
26702
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,c_div_2_with_qmin)26703 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, c_div_2_with_qmin) {
26704 for (uint32_t channels = 4; channels < 32; channels += 6) {
26705 DWConvMicrokernelTester()
26706 .cr(2)
26707 .kr(3)
26708 .channels(channels)
26709 .qmin(128)
26710 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26711 }
26712 }
26713
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,c_div_2_with_qmax)26714 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, c_div_2_with_qmax) {
26715 for (uint32_t channels = 4; channels < 32; channels += 6) {
26716 DWConvMicrokernelTester()
26717 .cr(2)
26718 .kr(3)
26719 .channels(channels)
26720 .qmax(128)
26721 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26722 }
26723 }
26724
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,c_lt_2)26725 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, c_lt_2) {
26726 for (uint32_t channels = 1; channels < 2; channels++) {
26727 DWConvMicrokernelTester()
26728 .cr(2)
26729 .kr(3)
26730 .channels(channels)
26731 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26732 }
26733 }
26734
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,c_gt_2)26735 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, c_gt_2) {
26736 for (uint32_t channels = 3; channels < 4; channels++) {
26737 DWConvMicrokernelTester()
26738 .cr(2)
26739 .kr(3)
26740 .channels(channels)
26741 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26742 }
26743 }
26744
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,c_gt_2_with_qmin)26745 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, c_gt_2_with_qmin) {
26746 for (uint32_t channels = 3; channels < 4; channels++) {
26747 DWConvMicrokernelTester()
26748 .cr(2)
26749 .kr(3)
26750 .channels(channels)
26751 .qmin(128)
26752 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26753 }
26754 }
26755
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,c_gt_2_with_qmax)26756 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, c_gt_2_with_qmax) {
26757 for (uint32_t channels = 3; channels < 4; channels++) {
26758 DWConvMicrokernelTester()
26759 .cr(2)
26760 .kr(3)
26761 .channels(channels)
26762 .qmax(128)
26763 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26764 }
26765 }
26766
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,multipixel)26767 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, multipixel) {
26768 for (size_t channels = 1; channels <= 10; channels += 1) {
26769 DWConvMicrokernelTester()
26770 .cr(2)
26771 .kr(3)
26772 .channels(channels)
26773 .width(3)
26774 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26775 }
26776 }
26777
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,multipixel_with_step)26778 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, multipixel_with_step) {
26779 for (size_t channels = 1; channels <= 10; channels += 1) {
26780 for (size_t step = 2; step <= 3; step++) {
26781 DWConvMicrokernelTester()
26782 .cr(2)
26783 .kr(3)
26784 .channels(channels)
26785 .width(3)
26786 .step(step)
26787 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26788 }
26789 }
26790 }
26791
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,multipixel_with_output_stride)26792 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, multipixel_with_output_stride) {
26793 for (size_t channels = 1; channels <= 10; channels += 1) {
26794 DWConvMicrokernelTester()
26795 .cr(2)
26796 .kr(3)
26797 .channels(2)
26798 .width(5)
26799 .output_stride(13)
26800 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26801 }
26802 }
26803
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,multipixel_with_qmin)26804 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, multipixel_with_qmin) {
26805 for (size_t channels = 1; channels <= 10; channels += 1) {
26806 DWConvMicrokernelTester()
26807 .cr(2)
26808 .kr(3)
26809 .channels(channels)
26810 .width(3)
26811 .qmin(128)
26812 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26813 }
26814 }
26815
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,multipixel_with_qmax)26816 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, multipixel_with_qmax) {
26817 for (size_t channels = 1; channels <= 10; channels += 1) {
26818 DWConvMicrokernelTester()
26819 .cr(2)
26820 .kr(3)
26821 .channels(channels)
26822 .width(3)
26823 .qmax(128)
26824 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26825 }
26826 }
26827
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,input_offset)26828 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, input_offset) {
26829 for (uint32_t channels = 4; channels < 32; channels += 6) {
26830 DWConvMicrokernelTester()
26831 .cr(2)
26832 .kr(3)
26833 .channels(channels)
26834 .input_offset(80)
26835 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26836 }
26837 }
26838
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC,zero)26839 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__WASM_FMAGIC, zero) {
26840 for (uint32_t mz = 0; mz < 3; mz++) {
26841 for (uint32_t channels = 4; channels < 32; channels += 6) {
26842 DWConvMicrokernelTester()
26843 .cr(2)
26844 .kr(3)
26845 .channels(channels)
26846 .input_offset(80)
26847 .zero_index(mz)
26848 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26849 }
26850 }
26851 }
26852 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26853
26854
26855 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_eq_2)26856 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_eq_2) {
26857 DWConvMicrokernelTester()
26858 .cr(2)
26859 .kr(9)
26860 .channels(2)
26861 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26862 }
26863
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_div_2)26864 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_div_2) {
26865 for (uint32_t channels = 4; channels < 32; channels += 6) {
26866 DWConvMicrokernelTester()
26867 .cr(2)
26868 .kr(9)
26869 .channels(channels)
26870 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26871 }
26872 }
26873
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_div_2_with_qmin)26874 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_div_2_with_qmin) {
26875 for (uint32_t channels = 4; channels < 32; channels += 6) {
26876 DWConvMicrokernelTester()
26877 .cr(2)
26878 .kr(9)
26879 .channels(channels)
26880 .qmin(128)
26881 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26882 }
26883 }
26884
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_div_2_with_qmax)26885 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_div_2_with_qmax) {
26886 for (uint32_t channels = 4; channels < 32; channels += 6) {
26887 DWConvMicrokernelTester()
26888 .cr(2)
26889 .kr(9)
26890 .channels(channels)
26891 .qmax(128)
26892 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26893 }
26894 }
26895
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_lt_2)26896 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_lt_2) {
26897 for (uint32_t channels = 1; channels < 2; channels++) {
26898 DWConvMicrokernelTester()
26899 .cr(2)
26900 .kr(9)
26901 .channels(channels)
26902 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26903 }
26904 }
26905
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_gt_2)26906 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_gt_2) {
26907 for (uint32_t channels = 3; channels < 4; channels++) {
26908 DWConvMicrokernelTester()
26909 .cr(2)
26910 .kr(9)
26911 .channels(channels)
26912 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26913 }
26914 }
26915
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_gt_2_with_qmin)26916 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_gt_2_with_qmin) {
26917 for (uint32_t channels = 3; channels < 4; channels++) {
26918 DWConvMicrokernelTester()
26919 .cr(2)
26920 .kr(9)
26921 .channels(channels)
26922 .qmin(128)
26923 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26924 }
26925 }
26926
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_gt_2_with_qmax)26927 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_gt_2_with_qmax) {
26928 for (uint32_t channels = 3; channels < 4; channels++) {
26929 DWConvMicrokernelTester()
26930 .cr(2)
26931 .kr(9)
26932 .channels(channels)
26933 .qmax(128)
26934 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26935 }
26936 }
26937
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,multipixel)26938 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel) {
26939 for (size_t channels = 1; channels <= 10; channels += 1) {
26940 DWConvMicrokernelTester()
26941 .cr(2)
26942 .kr(9)
26943 .channels(channels)
26944 .width(3)
26945 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26946 }
26947 }
26948
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,multipixel_with_step)26949 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_step) {
26950 for (size_t channels = 1; channels <= 10; channels += 1) {
26951 for (size_t step = 2; step <= 9; step++) {
26952 DWConvMicrokernelTester()
26953 .cr(2)
26954 .kr(9)
26955 .channels(channels)
26956 .width(3)
26957 .step(step)
26958 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26959 }
26960 }
26961 }
26962
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,multipixel_with_output_stride)26963 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_output_stride) {
26964 for (size_t channels = 1; channels <= 10; channels += 1) {
26965 DWConvMicrokernelTester()
26966 .cr(2)
26967 .kr(9)
26968 .channels(2)
26969 .width(5)
26970 .output_stride(13)
26971 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26972 }
26973 }
26974
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,multipixel_with_qmin)26975 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_qmin) {
26976 for (size_t channels = 1; channels <= 10; channels += 1) {
26977 DWConvMicrokernelTester()
26978 .cr(2)
26979 .kr(9)
26980 .channels(channels)
26981 .width(3)
26982 .qmin(128)
26983 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26984 }
26985 }
26986
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,multipixel_with_qmax)26987 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_qmax) {
26988 for (size_t channels = 1; channels <= 10; channels += 1) {
26989 DWConvMicrokernelTester()
26990 .cr(2)
26991 .kr(9)
26992 .channels(channels)
26993 .width(3)
26994 .qmax(128)
26995 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
26996 }
26997 }
26998
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,input_offset)26999 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, input_offset) {
27000 for (uint32_t channels = 4; channels < 32; channels += 6) {
27001 DWConvMicrokernelTester()
27002 .cr(2)
27003 .kr(9)
27004 .channels(channels)
27005 .input_offset(80)
27006 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27007 }
27008 }
27009
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,zero)27010 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, zero) {
27011 for (uint32_t mz = 0; mz < 9; mz++) {
27012 for (uint32_t channels = 4; channels < 32; channels += 6) {
27013 DWConvMicrokernelTester()
27014 .cr(2)
27015 .kr(9)
27016 .channels(channels)
27017 .input_offset(80)
27018 .zero_index(mz)
27019 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27020 }
27021 }
27022 }
27023 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27024
27025
27026 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_eq_2)27027 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_eq_2) {
27028 DWConvMicrokernelTester()
27029 .cr(2)
27030 .kr(25)
27031 .channels(2)
27032 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27033 }
27034
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_div_2)27035 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_div_2) {
27036 for (uint32_t channels = 4; channels < 32; channels += 6) {
27037 DWConvMicrokernelTester()
27038 .cr(2)
27039 .kr(25)
27040 .channels(channels)
27041 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27042 }
27043 }
27044
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_div_2_with_qmin)27045 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_div_2_with_qmin) {
27046 for (uint32_t channels = 4; channels < 32; channels += 6) {
27047 DWConvMicrokernelTester()
27048 .cr(2)
27049 .kr(25)
27050 .channels(channels)
27051 .qmin(128)
27052 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27053 }
27054 }
27055
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_div_2_with_qmax)27056 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_div_2_with_qmax) {
27057 for (uint32_t channels = 4; channels < 32; channels += 6) {
27058 DWConvMicrokernelTester()
27059 .cr(2)
27060 .kr(25)
27061 .channels(channels)
27062 .qmax(128)
27063 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27064 }
27065 }
27066
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_lt_2)27067 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_lt_2) {
27068 for (uint32_t channels = 1; channels < 2; channels++) {
27069 DWConvMicrokernelTester()
27070 .cr(2)
27071 .kr(25)
27072 .channels(channels)
27073 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27074 }
27075 }
27076
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_gt_2)27077 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_gt_2) {
27078 for (uint32_t channels = 3; channels < 4; channels++) {
27079 DWConvMicrokernelTester()
27080 .cr(2)
27081 .kr(25)
27082 .channels(channels)
27083 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27084 }
27085 }
27086
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_gt_2_with_qmin)27087 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_gt_2_with_qmin) {
27088 for (uint32_t channels = 3; channels < 4; channels++) {
27089 DWConvMicrokernelTester()
27090 .cr(2)
27091 .kr(25)
27092 .channels(channels)
27093 .qmin(128)
27094 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27095 }
27096 }
27097
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_gt_2_with_qmax)27098 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_gt_2_with_qmax) {
27099 for (uint32_t channels = 3; channels < 4; channels++) {
27100 DWConvMicrokernelTester()
27101 .cr(2)
27102 .kr(25)
27103 .channels(channels)
27104 .qmax(128)
27105 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27106 }
27107 }
27108
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,multipixel)27109 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel) {
27110 for (size_t channels = 1; channels <= 10; channels += 1) {
27111 DWConvMicrokernelTester()
27112 .cr(2)
27113 .kr(25)
27114 .channels(channels)
27115 .width(3)
27116 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27117 }
27118 }
27119
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,multipixel_with_step)27120 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_step) {
27121 for (size_t channels = 1; channels <= 10; channels += 1) {
27122 for (size_t step = 2; step <= 25; step++) {
27123 DWConvMicrokernelTester()
27124 .cr(2)
27125 .kr(25)
27126 .channels(channels)
27127 .width(3)
27128 .step(step)
27129 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27130 }
27131 }
27132 }
27133
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,multipixel_with_output_stride)27134 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_output_stride) {
27135 for (size_t channels = 1; channels <= 10; channels += 1) {
27136 DWConvMicrokernelTester()
27137 .cr(2)
27138 .kr(25)
27139 .channels(2)
27140 .width(5)
27141 .output_stride(13)
27142 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27143 }
27144 }
27145
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,multipixel_with_qmin)27146 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_qmin) {
27147 for (size_t channels = 1; channels <= 10; channels += 1) {
27148 DWConvMicrokernelTester()
27149 .cr(2)
27150 .kr(25)
27151 .channels(channels)
27152 .width(3)
27153 .qmin(128)
27154 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27155 }
27156 }
27157
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,multipixel_with_qmax)27158 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_qmax) {
27159 for (size_t channels = 1; channels <= 10; channels += 1) {
27160 DWConvMicrokernelTester()
27161 .cr(2)
27162 .kr(25)
27163 .channels(channels)
27164 .width(3)
27165 .qmax(128)
27166 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27167 }
27168 }
27169
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,input_offset)27170 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, input_offset) {
27171 for (uint32_t channels = 4; channels < 32; channels += 6) {
27172 DWConvMicrokernelTester()
27173 .cr(2)
27174 .kr(25)
27175 .channels(channels)
27176 .input_offset(80)
27177 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27178 }
27179 }
27180
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,zero)27181 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, zero) {
27182 for (uint32_t mz = 0; mz < 25; mz++) {
27183 for (uint32_t channels = 4; channels < 32; channels += 6) {
27184 DWConvMicrokernelTester()
27185 .cr(2)
27186 .kr(25)
27187 .channels(channels)
27188 .input_offset(80)
27189 .zero_index(mz)
27190 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27191 }
27192 }
27193 }
27194 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27195
27196
27197 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_eq_4)27198 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_eq_4) {
27199 DWConvMicrokernelTester()
27200 .cr(4)
27201 .kr(9)
27202 .channels(4)
27203 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27204 }
27205
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_div_4)27206 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_div_4) {
27207 for (uint32_t channels = 8; channels < 64; channels += 12) {
27208 DWConvMicrokernelTester()
27209 .cr(4)
27210 .kr(9)
27211 .channels(channels)
27212 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27213 }
27214 }
27215
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_div_4_with_qmin)27216 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_div_4_with_qmin) {
27217 for (uint32_t channels = 8; channels < 64; channels += 12) {
27218 DWConvMicrokernelTester()
27219 .cr(4)
27220 .kr(9)
27221 .channels(channels)
27222 .qmin(128)
27223 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27224 }
27225 }
27226
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_div_4_with_qmax)27227 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_div_4_with_qmax) {
27228 for (uint32_t channels = 8; channels < 64; channels += 12) {
27229 DWConvMicrokernelTester()
27230 .cr(4)
27231 .kr(9)
27232 .channels(channels)
27233 .qmax(128)
27234 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27235 }
27236 }
27237
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_lt_4)27238 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_lt_4) {
27239 for (uint32_t channels = 1; channels < 4; channels++) {
27240 DWConvMicrokernelTester()
27241 .cr(4)
27242 .kr(9)
27243 .channels(channels)
27244 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27245 }
27246 }
27247
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_gt_4)27248 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_gt_4) {
27249 for (uint32_t channels = 5; channels < 8; channels++) {
27250 DWConvMicrokernelTester()
27251 .cr(4)
27252 .kr(9)
27253 .channels(channels)
27254 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27255 }
27256 }
27257
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_gt_4_with_qmin)27258 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_gt_4_with_qmin) {
27259 for (uint32_t channels = 5; channels < 8; channels++) {
27260 DWConvMicrokernelTester()
27261 .cr(4)
27262 .kr(9)
27263 .channels(channels)
27264 .qmin(128)
27265 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27266 }
27267 }
27268
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_gt_4_with_qmax)27269 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_gt_4_with_qmax) {
27270 for (uint32_t channels = 5; channels < 8; channels++) {
27271 DWConvMicrokernelTester()
27272 .cr(4)
27273 .kr(9)
27274 .channels(channels)
27275 .qmax(128)
27276 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27277 }
27278 }
27279
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,multipixel)27280 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel) {
27281 for (size_t channels = 1; channels <= 20; channels += 3) {
27282 DWConvMicrokernelTester()
27283 .cr(4)
27284 .kr(9)
27285 .channels(channels)
27286 .width(3)
27287 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27288 }
27289 }
27290
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,multipixel_with_step)27291 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_step) {
27292 for (size_t channels = 1; channels <= 20; channels += 3) {
27293 for (size_t step = 2; step <= 9; step++) {
27294 DWConvMicrokernelTester()
27295 .cr(4)
27296 .kr(9)
27297 .channels(channels)
27298 .width(3)
27299 .step(step)
27300 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27301 }
27302 }
27303 }
27304
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,multipixel_with_output_stride)27305 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_output_stride) {
27306 for (size_t channels = 1; channels <= 20; channels += 3) {
27307 DWConvMicrokernelTester()
27308 .cr(4)
27309 .kr(9)
27310 .channels(4)
27311 .width(5)
27312 .output_stride(23)
27313 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27314 }
27315 }
27316
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,multipixel_with_qmin)27317 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_qmin) {
27318 for (size_t channels = 1; channels <= 20; channels += 3) {
27319 DWConvMicrokernelTester()
27320 .cr(4)
27321 .kr(9)
27322 .channels(channels)
27323 .width(3)
27324 .qmin(128)
27325 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27326 }
27327 }
27328
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,multipixel_with_qmax)27329 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_qmax) {
27330 for (size_t channels = 1; channels <= 20; channels += 3) {
27331 DWConvMicrokernelTester()
27332 .cr(4)
27333 .kr(9)
27334 .channels(channels)
27335 .width(3)
27336 .qmax(128)
27337 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27338 }
27339 }
27340
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,input_offset)27341 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, input_offset) {
27342 for (uint32_t channels = 8; channels < 64; channels += 12) {
27343 DWConvMicrokernelTester()
27344 .cr(4)
27345 .kr(9)
27346 .channels(channels)
27347 .input_offset(112)
27348 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27349 }
27350 }
27351
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,zero)27352 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, zero) {
27353 for (uint32_t mz = 0; mz < 9; mz++) {
27354 for (uint32_t channels = 8; channels < 64; channels += 12) {
27355 DWConvMicrokernelTester()
27356 .cr(4)
27357 .kr(9)
27358 .channels(channels)
27359 .input_offset(112)
27360 .zero_index(mz)
27361 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27362 }
27363 }
27364 }
27365 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27366
27367
27368 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_eq_4)27369 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_eq_4) {
27370 DWConvMicrokernelTester()
27371 .cr(4)
27372 .kr(25)
27373 .channels(4)
27374 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27375 }
27376
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_div_4)27377 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_div_4) {
27378 for (uint32_t channels = 8; channels < 64; channels += 12) {
27379 DWConvMicrokernelTester()
27380 .cr(4)
27381 .kr(25)
27382 .channels(channels)
27383 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27384 }
27385 }
27386
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_div_4_with_qmin)27387 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_div_4_with_qmin) {
27388 for (uint32_t channels = 8; channels < 64; channels += 12) {
27389 DWConvMicrokernelTester()
27390 .cr(4)
27391 .kr(25)
27392 .channels(channels)
27393 .qmin(128)
27394 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27395 }
27396 }
27397
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_div_4_with_qmax)27398 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_div_4_with_qmax) {
27399 for (uint32_t channels = 8; channels < 64; channels += 12) {
27400 DWConvMicrokernelTester()
27401 .cr(4)
27402 .kr(25)
27403 .channels(channels)
27404 .qmax(128)
27405 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27406 }
27407 }
27408
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_lt_4)27409 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_lt_4) {
27410 for (uint32_t channels = 1; channels < 4; channels++) {
27411 DWConvMicrokernelTester()
27412 .cr(4)
27413 .kr(25)
27414 .channels(channels)
27415 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27416 }
27417 }
27418
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_gt_4)27419 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_gt_4) {
27420 for (uint32_t channels = 5; channels < 8; channels++) {
27421 DWConvMicrokernelTester()
27422 .cr(4)
27423 .kr(25)
27424 .channels(channels)
27425 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27426 }
27427 }
27428
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_gt_4_with_qmin)27429 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_gt_4_with_qmin) {
27430 for (uint32_t channels = 5; channels < 8; channels++) {
27431 DWConvMicrokernelTester()
27432 .cr(4)
27433 .kr(25)
27434 .channels(channels)
27435 .qmin(128)
27436 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27437 }
27438 }
27439
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_gt_4_with_qmax)27440 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_gt_4_with_qmax) {
27441 for (uint32_t channels = 5; channels < 8; channels++) {
27442 DWConvMicrokernelTester()
27443 .cr(4)
27444 .kr(25)
27445 .channels(channels)
27446 .qmax(128)
27447 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27448 }
27449 }
27450
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,multipixel)27451 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel) {
27452 for (size_t channels = 1; channels <= 20; channels += 3) {
27453 DWConvMicrokernelTester()
27454 .cr(4)
27455 .kr(25)
27456 .channels(channels)
27457 .width(3)
27458 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27459 }
27460 }
27461
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,multipixel_with_step)27462 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_step) {
27463 for (size_t channels = 1; channels <= 20; channels += 3) {
27464 for (size_t step = 2; step <= 25; step++) {
27465 DWConvMicrokernelTester()
27466 .cr(4)
27467 .kr(25)
27468 .channels(channels)
27469 .width(3)
27470 .step(step)
27471 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27472 }
27473 }
27474 }
27475
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,multipixel_with_output_stride)27476 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_output_stride) {
27477 for (size_t channels = 1; channels <= 20; channels += 3) {
27478 DWConvMicrokernelTester()
27479 .cr(4)
27480 .kr(25)
27481 .channels(4)
27482 .width(5)
27483 .output_stride(23)
27484 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27485 }
27486 }
27487
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,multipixel_with_qmin)27488 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_qmin) {
27489 for (size_t channels = 1; channels <= 20; channels += 3) {
27490 DWConvMicrokernelTester()
27491 .cr(4)
27492 .kr(25)
27493 .channels(channels)
27494 .width(3)
27495 .qmin(128)
27496 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27497 }
27498 }
27499
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,multipixel_with_qmax)27500 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_qmax) {
27501 for (size_t channels = 1; channels <= 20; channels += 3) {
27502 DWConvMicrokernelTester()
27503 .cr(4)
27504 .kr(25)
27505 .channels(channels)
27506 .width(3)
27507 .qmax(128)
27508 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27509 }
27510 }
27511
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,input_offset)27512 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, input_offset) {
27513 for (uint32_t channels = 8; channels < 64; channels += 12) {
27514 DWConvMicrokernelTester()
27515 .cr(4)
27516 .kr(25)
27517 .channels(channels)
27518 .input_offset(112)
27519 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27520 }
27521 }
27522
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,zero)27523 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, zero) {
27524 for (uint32_t mz = 0; mz < 25; mz++) {
27525 for (uint32_t channels = 8; channels < 64; channels += 12) {
27526 DWConvMicrokernelTester()
27527 .cr(4)
27528 .kr(25)
27529 .channels(channels)
27530 .input_offset(112)
27531 .zero_index(mz)
27532 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27533 }
27534 }
27535 }
27536 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27537
27538
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,c_eq_1)27539 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_eq_1) {
27540 DWConvMicrokernelTester()
27541 .cr(1)
27542 .kr(9)
27543 .channels(1)
27544 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27545 }
27546
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,c_gt_1)27547 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_gt_1) {
27548 for (uint32_t channels = 2; channels < 10; channels++) {
27549 DWConvMicrokernelTester()
27550 .cr(1)
27551 .kr(9)
27552 .channels(channels)
27553 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27554 }
27555 }
27556
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,c_gt_1_with_qmin)27557 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_gt_1_with_qmin) {
27558 for (uint32_t channels = 2; channels < 10; channels++) {
27559 DWConvMicrokernelTester()
27560 .cr(1)
27561 .kr(9)
27562 .channels(channels)
27563 .qmin(128)
27564 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27565 }
27566 }
27567
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,c_gt_1_with_qmax)27568 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_gt_1_with_qmax) {
27569 for (uint32_t channels = 2; channels < 10; channels++) {
27570 DWConvMicrokernelTester()
27571 .cr(1)
27572 .kr(9)
27573 .channels(channels)
27574 .qmax(128)
27575 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27576 }
27577 }
27578
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,multipixel)27579 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel) {
27580 for (size_t channels = 1; channels <= 5; channels += 1) {
27581 DWConvMicrokernelTester()
27582 .cr(1)
27583 .kr(9)
27584 .channels(channels)
27585 .width(3)
27586 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27587 }
27588 }
27589
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,multipixel_with_step)27590 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_step) {
27591 for (size_t channels = 1; channels <= 5; channels += 1) {
27592 for (size_t step = 2; step <= 9; step++) {
27593 DWConvMicrokernelTester()
27594 .cr(1)
27595 .kr(9)
27596 .channels(channels)
27597 .width(3)
27598 .step(step)
27599 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27600 }
27601 }
27602 }
27603
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,multipixel_with_output_stride)27604 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_output_stride) {
27605 for (size_t channels = 1; channels <= 5; channels += 1) {
27606 DWConvMicrokernelTester()
27607 .cr(1)
27608 .kr(9)
27609 .channels(1)
27610 .width(5)
27611 .output_stride(7)
27612 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27613 }
27614 }
27615
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,multipixel_with_qmin)27616 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_qmin) {
27617 for (size_t channels = 1; channels <= 5; channels += 1) {
27618 DWConvMicrokernelTester()
27619 .cr(1)
27620 .kr(9)
27621 .channels(channels)
27622 .width(3)
27623 .qmin(128)
27624 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27625 }
27626 }
27627
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,multipixel_with_qmax)27628 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_qmax) {
27629 for (size_t channels = 1; channels <= 5; channels += 1) {
27630 DWConvMicrokernelTester()
27631 .cr(1)
27632 .kr(9)
27633 .channels(channels)
27634 .width(3)
27635 .qmax(128)
27636 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27637 }
27638 }
27639
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,input_offset)27640 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, input_offset) {
27641 for (uint32_t channels = 2; channels < 16; channels += 3) {
27642 DWConvMicrokernelTester()
27643 .cr(1)
27644 .kr(9)
27645 .channels(channels)
27646 .input_offset(48)
27647 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27648 }
27649 }
27650
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,zero)27651 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, zero) {
27652 for (uint32_t mz = 0; mz < 9; mz++) {
27653 for (uint32_t channels = 2; channels < 16; channels += 3) {
27654 DWConvMicrokernelTester()
27655 .cr(1)
27656 .kr(9)
27657 .channels(channels)
27658 .input_offset(48)
27659 .zero_index(mz)
27660 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27661 }
27662 }
27663 }
27664
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,c_eq_1)27665 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_eq_1) {
27666 DWConvMicrokernelTester()
27667 .cr(1)
27668 .kr(9)
27669 .channels(1)
27670 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27671 }
27672
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,c_gt_1)27673 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1) {
27674 for (uint32_t channels = 2; channels < 10; channels++) {
27675 DWConvMicrokernelTester()
27676 .cr(1)
27677 .kr(9)
27678 .channels(channels)
27679 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27680 }
27681 }
27682
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,c_gt_1_with_qmin)27683 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1_with_qmin) {
27684 for (uint32_t channels = 2; channels < 10; channels++) {
27685 DWConvMicrokernelTester()
27686 .cr(1)
27687 .kr(9)
27688 .channels(channels)
27689 .qmin(128)
27690 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27691 }
27692 }
27693
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,c_gt_1_with_qmax)27694 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1_with_qmax) {
27695 for (uint32_t channels = 2; channels < 10; channels++) {
27696 DWConvMicrokernelTester()
27697 .cr(1)
27698 .kr(9)
27699 .channels(channels)
27700 .qmax(128)
27701 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27702 }
27703 }
27704
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,multipixel)27705 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel) {
27706 for (size_t channels = 1; channels <= 5; channels += 1) {
27707 DWConvMicrokernelTester()
27708 .cr(1)
27709 .kr(9)
27710 .channels(channels)
27711 .width(3)
27712 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27713 }
27714 }
27715
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,multipixel_with_step)27716 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_step) {
27717 for (size_t channels = 1; channels <= 5; channels += 1) {
27718 for (size_t step = 2; step <= 9; step++) {
27719 DWConvMicrokernelTester()
27720 .cr(1)
27721 .kr(9)
27722 .channels(channels)
27723 .width(3)
27724 .step(step)
27725 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27726 }
27727 }
27728 }
27729
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,multipixel_with_output_stride)27730 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
27731 for (size_t channels = 1; channels <= 5; channels += 1) {
27732 DWConvMicrokernelTester()
27733 .cr(1)
27734 .kr(9)
27735 .channels(1)
27736 .width(5)
27737 .output_stride(7)
27738 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27739 }
27740 }
27741
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,multipixel_with_qmin)27742 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_qmin) {
27743 for (size_t channels = 1; channels <= 5; channels += 1) {
27744 DWConvMicrokernelTester()
27745 .cr(1)
27746 .kr(9)
27747 .channels(channels)
27748 .width(3)
27749 .qmin(128)
27750 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27751 }
27752 }
27753
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,multipixel_with_qmax)27754 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_qmax) {
27755 for (size_t channels = 1; channels <= 5; channels += 1) {
27756 DWConvMicrokernelTester()
27757 .cr(1)
27758 .kr(9)
27759 .channels(channels)
27760 .width(3)
27761 .qmax(128)
27762 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27763 }
27764 }
27765
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,input_offset)27766 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, input_offset) {
27767 for (uint32_t channels = 2; channels < 16; channels += 3) {
27768 DWConvMicrokernelTester()
27769 .cr(1)
27770 .kr(9)
27771 .channels(channels)
27772 .input_offset(48)
27773 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27774 }
27775 }
27776
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,zero)27777 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, zero) {
27778 for (uint32_t mz = 0; mz < 9; mz++) {
27779 for (uint32_t channels = 2; channels < 16; channels += 3) {
27780 DWConvMicrokernelTester()
27781 .cr(1)
27782 .kr(9)
27783 .channels(channels)
27784 .input_offset(48)
27785 .zero_index(mz)
27786 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
27787 }
27788 }
27789 }
27790
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,c_eq_1)27791 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_eq_1) {
27792 DWConvMicrokernelTester()
27793 .cr(1)
27794 .kr(9)
27795 .channels(1)
27796 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27797 }
27798
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,c_gt_1)27799 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1) {
27800 for (uint32_t channels = 2; channels < 10; channels++) {
27801 DWConvMicrokernelTester()
27802 .cr(1)
27803 .kr(9)
27804 .channels(channels)
27805 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27806 }
27807 }
27808
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,c_gt_1_with_qmin)27809 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmin) {
27810 for (uint32_t channels = 2; channels < 10; channels++) {
27811 DWConvMicrokernelTester()
27812 .cr(1)
27813 .kr(9)
27814 .channels(channels)
27815 .qmin(128)
27816 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27817 }
27818 }
27819
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,c_gt_1_with_qmax)27820 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmax) {
27821 for (uint32_t channels = 2; channels < 10; channels++) {
27822 DWConvMicrokernelTester()
27823 .cr(1)
27824 .kr(9)
27825 .channels(channels)
27826 .qmax(128)
27827 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27828 }
27829 }
27830
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,multipixel)27831 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel) {
27832 for (size_t channels = 1; channels <= 5; channels += 1) {
27833 DWConvMicrokernelTester()
27834 .cr(1)
27835 .kr(9)
27836 .channels(channels)
27837 .width(3)
27838 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27839 }
27840 }
27841
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,multipixel_with_step)27842 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_step) {
27843 for (size_t channels = 1; channels <= 5; channels += 1) {
27844 for (size_t step = 2; step <= 9; step++) {
27845 DWConvMicrokernelTester()
27846 .cr(1)
27847 .kr(9)
27848 .channels(channels)
27849 .width(3)
27850 .step(step)
27851 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27852 }
27853 }
27854 }
27855
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,multipixel_with_output_stride)27856 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_output_stride) {
27857 for (size_t channels = 1; channels <= 5; channels += 1) {
27858 DWConvMicrokernelTester()
27859 .cr(1)
27860 .kr(9)
27861 .channels(1)
27862 .width(5)
27863 .output_stride(7)
27864 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27865 }
27866 }
27867
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,multipixel_with_qmin)27868 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmin) {
27869 for (size_t channels = 1; channels <= 5; channels += 1) {
27870 DWConvMicrokernelTester()
27871 .cr(1)
27872 .kr(9)
27873 .channels(channels)
27874 .width(3)
27875 .qmin(128)
27876 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27877 }
27878 }
27879
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,multipixel_with_qmax)27880 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmax) {
27881 for (size_t channels = 1; channels <= 5; channels += 1) {
27882 DWConvMicrokernelTester()
27883 .cr(1)
27884 .kr(9)
27885 .channels(channels)
27886 .width(3)
27887 .qmax(128)
27888 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27889 }
27890 }
27891
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,input_offset)27892 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, input_offset) {
27893 for (uint32_t channels = 2; channels < 16; channels += 3) {
27894 DWConvMicrokernelTester()
27895 .cr(1)
27896 .kr(9)
27897 .channels(channels)
27898 .input_offset(48)
27899 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27900 }
27901 }
27902
TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,zero)27903 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, zero) {
27904 for (uint32_t mz = 0; mz < 9; mz++) {
27905 for (uint32_t channels = 2; channels < 16; channels += 3) {
27906 DWConvMicrokernelTester()
27907 .cr(1)
27908 .kr(9)
27909 .channels(channels)
27910 .input_offset(48)
27911 .zero_index(mz)
27912 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
27913 }
27914 }
27915 }
27916
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,c_eq_1)27917 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_eq_1) {
27918 DWConvMicrokernelTester()
27919 .cr(1)
27920 .kr(25)
27921 .channels(1)
27922 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27923 }
27924
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,c_gt_1)27925 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_gt_1) {
27926 for (uint32_t channels = 2; channels < 10; channels++) {
27927 DWConvMicrokernelTester()
27928 .cr(1)
27929 .kr(25)
27930 .channels(channels)
27931 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27932 }
27933 }
27934
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,c_gt_1_with_qmin)27935 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_gt_1_with_qmin) {
27936 for (uint32_t channels = 2; channels < 10; channels++) {
27937 DWConvMicrokernelTester()
27938 .cr(1)
27939 .kr(25)
27940 .channels(channels)
27941 .qmin(128)
27942 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27943 }
27944 }
27945
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,c_gt_1_with_qmax)27946 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_gt_1_with_qmax) {
27947 for (uint32_t channels = 2; channels < 10; channels++) {
27948 DWConvMicrokernelTester()
27949 .cr(1)
27950 .kr(25)
27951 .channels(channels)
27952 .qmax(128)
27953 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27954 }
27955 }
27956
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,multipixel)27957 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel) {
27958 for (size_t channels = 1; channels <= 5; channels += 1) {
27959 DWConvMicrokernelTester()
27960 .cr(1)
27961 .kr(25)
27962 .channels(channels)
27963 .width(3)
27964 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27965 }
27966 }
27967
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,multipixel_with_step)27968 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_step) {
27969 for (size_t channels = 1; channels <= 5; channels += 1) {
27970 for (size_t step = 2; step <= 25; step++) {
27971 DWConvMicrokernelTester()
27972 .cr(1)
27973 .kr(25)
27974 .channels(channels)
27975 .width(3)
27976 .step(step)
27977 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27978 }
27979 }
27980 }
27981
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,multipixel_with_output_stride)27982 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_output_stride) {
27983 for (size_t channels = 1; channels <= 5; channels += 1) {
27984 DWConvMicrokernelTester()
27985 .cr(1)
27986 .kr(25)
27987 .channels(1)
27988 .width(5)
27989 .output_stride(7)
27990 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
27991 }
27992 }
27993
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,multipixel_with_qmin)27994 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_qmin) {
27995 for (size_t channels = 1; channels <= 5; channels += 1) {
27996 DWConvMicrokernelTester()
27997 .cr(1)
27998 .kr(25)
27999 .channels(channels)
28000 .width(3)
28001 .qmin(128)
28002 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28003 }
28004 }
28005
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,multipixel_with_qmax)28006 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_qmax) {
28007 for (size_t channels = 1; channels <= 5; channels += 1) {
28008 DWConvMicrokernelTester()
28009 .cr(1)
28010 .kr(25)
28011 .channels(channels)
28012 .width(3)
28013 .qmax(128)
28014 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28015 }
28016 }
28017
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,input_offset)28018 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, input_offset) {
28019 for (uint32_t channels = 2; channels < 16; channels += 3) {
28020 DWConvMicrokernelTester()
28021 .cr(1)
28022 .kr(25)
28023 .channels(channels)
28024 .input_offset(48)
28025 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28026 }
28027 }
28028
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,zero)28029 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, zero) {
28030 for (uint32_t mz = 0; mz < 25; mz++) {
28031 for (uint32_t channels = 2; channels < 16; channels += 3) {
28032 DWConvMicrokernelTester()
28033 .cr(1)
28034 .kr(25)
28035 .channels(channels)
28036 .input_offset(48)
28037 .zero_index(mz)
28038 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28039 }
28040 }
28041 }
28042
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,c_eq_1)28043 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_eq_1) {
28044 DWConvMicrokernelTester()
28045 .cr(1)
28046 .kr(25)
28047 .channels(1)
28048 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28049 }
28050
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,c_gt_1)28051 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1) {
28052 for (uint32_t channels = 2; channels < 10; channels++) {
28053 DWConvMicrokernelTester()
28054 .cr(1)
28055 .kr(25)
28056 .channels(channels)
28057 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28058 }
28059 }
28060
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,c_gt_1_with_qmin)28061 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1_with_qmin) {
28062 for (uint32_t channels = 2; channels < 10; channels++) {
28063 DWConvMicrokernelTester()
28064 .cr(1)
28065 .kr(25)
28066 .channels(channels)
28067 .qmin(128)
28068 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28069 }
28070 }
28071
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,c_gt_1_with_qmax)28072 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1_with_qmax) {
28073 for (uint32_t channels = 2; channels < 10; channels++) {
28074 DWConvMicrokernelTester()
28075 .cr(1)
28076 .kr(25)
28077 .channels(channels)
28078 .qmax(128)
28079 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28080 }
28081 }
28082
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,multipixel)28083 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel) {
28084 for (size_t channels = 1; channels <= 5; channels += 1) {
28085 DWConvMicrokernelTester()
28086 .cr(1)
28087 .kr(25)
28088 .channels(channels)
28089 .width(3)
28090 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28091 }
28092 }
28093
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,multipixel_with_step)28094 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_step) {
28095 for (size_t channels = 1; channels <= 5; channels += 1) {
28096 for (size_t step = 2; step <= 25; step++) {
28097 DWConvMicrokernelTester()
28098 .cr(1)
28099 .kr(25)
28100 .channels(channels)
28101 .width(3)
28102 .step(step)
28103 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28104 }
28105 }
28106 }
28107
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,multipixel_with_output_stride)28108 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
28109 for (size_t channels = 1; channels <= 5; channels += 1) {
28110 DWConvMicrokernelTester()
28111 .cr(1)
28112 .kr(25)
28113 .channels(1)
28114 .width(5)
28115 .output_stride(7)
28116 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28117 }
28118 }
28119
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,multipixel_with_qmin)28120 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_qmin) {
28121 for (size_t channels = 1; channels <= 5; channels += 1) {
28122 DWConvMicrokernelTester()
28123 .cr(1)
28124 .kr(25)
28125 .channels(channels)
28126 .width(3)
28127 .qmin(128)
28128 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28129 }
28130 }
28131
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,multipixel_with_qmax)28132 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_qmax) {
28133 for (size_t channels = 1; channels <= 5; channels += 1) {
28134 DWConvMicrokernelTester()
28135 .cr(1)
28136 .kr(25)
28137 .channels(channels)
28138 .width(3)
28139 .qmax(128)
28140 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28141 }
28142 }
28143
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,input_offset)28144 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, input_offset) {
28145 for (uint32_t channels = 2; channels < 16; channels += 3) {
28146 DWConvMicrokernelTester()
28147 .cr(1)
28148 .kr(25)
28149 .channels(channels)
28150 .input_offset(48)
28151 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28152 }
28153 }
28154
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,zero)28155 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, zero) {
28156 for (uint32_t mz = 0; mz < 25; mz++) {
28157 for (uint32_t channels = 2; channels < 16; channels += 3) {
28158 DWConvMicrokernelTester()
28159 .cr(1)
28160 .kr(25)
28161 .channels(channels)
28162 .input_offset(48)
28163 .zero_index(mz)
28164 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28165 }
28166 }
28167 }
28168
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,c_eq_1)28169 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_eq_1) {
28170 DWConvMicrokernelTester()
28171 .cr(1)
28172 .kr(25)
28173 .channels(1)
28174 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28175 }
28176
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,c_gt_1)28177 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1) {
28178 for (uint32_t channels = 2; channels < 10; channels++) {
28179 DWConvMicrokernelTester()
28180 .cr(1)
28181 .kr(25)
28182 .channels(channels)
28183 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28184 }
28185 }
28186
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,c_gt_1_with_qmin)28187 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmin) {
28188 for (uint32_t channels = 2; channels < 10; channels++) {
28189 DWConvMicrokernelTester()
28190 .cr(1)
28191 .kr(25)
28192 .channels(channels)
28193 .qmin(128)
28194 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28195 }
28196 }
28197
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,c_gt_1_with_qmax)28198 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmax) {
28199 for (uint32_t channels = 2; channels < 10; channels++) {
28200 DWConvMicrokernelTester()
28201 .cr(1)
28202 .kr(25)
28203 .channels(channels)
28204 .qmax(128)
28205 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28206 }
28207 }
28208
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,multipixel)28209 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel) {
28210 for (size_t channels = 1; channels <= 5; channels += 1) {
28211 DWConvMicrokernelTester()
28212 .cr(1)
28213 .kr(25)
28214 .channels(channels)
28215 .width(3)
28216 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28217 }
28218 }
28219
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,multipixel_with_step)28220 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_step) {
28221 for (size_t channels = 1; channels <= 5; channels += 1) {
28222 for (size_t step = 2; step <= 25; step++) {
28223 DWConvMicrokernelTester()
28224 .cr(1)
28225 .kr(25)
28226 .channels(channels)
28227 .width(3)
28228 .step(step)
28229 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28230 }
28231 }
28232 }
28233
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,multipixel_with_output_stride)28234 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_output_stride) {
28235 for (size_t channels = 1; channels <= 5; channels += 1) {
28236 DWConvMicrokernelTester()
28237 .cr(1)
28238 .kr(25)
28239 .channels(1)
28240 .width(5)
28241 .output_stride(7)
28242 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28243 }
28244 }
28245
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,multipixel_with_qmin)28246 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmin) {
28247 for (size_t channels = 1; channels <= 5; channels += 1) {
28248 DWConvMicrokernelTester()
28249 .cr(1)
28250 .kr(25)
28251 .channels(channels)
28252 .width(3)
28253 .qmin(128)
28254 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28255 }
28256 }
28257
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,multipixel_with_qmax)28258 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmax) {
28259 for (size_t channels = 1; channels <= 5; channels += 1) {
28260 DWConvMicrokernelTester()
28261 .cr(1)
28262 .kr(25)
28263 .channels(channels)
28264 .width(3)
28265 .qmax(128)
28266 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28267 }
28268 }
28269
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,input_offset)28270 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, input_offset) {
28271 for (uint32_t channels = 2; channels < 16; channels += 3) {
28272 DWConvMicrokernelTester()
28273 .cr(1)
28274 .kr(25)
28275 .channels(channels)
28276 .input_offset(48)
28277 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28278 }
28279 }
28280
TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,zero)28281 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, zero) {
28282 for (uint32_t mz = 0; mz < 25; mz++) {
28283 for (uint32_t channels = 2; channels < 16; channels += 3) {
28284 DWConvMicrokernelTester()
28285 .cr(1)
28286 .kr(25)
28287 .channels(channels)
28288 .input_offset(48)
28289 .zero_index(mz)
28290 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28291 }
28292 }
28293 }
28294
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,c_eq_2)28295 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, c_eq_2) {
28296 DWConvMicrokernelTester()
28297 .cr(2)
28298 .kr(3)
28299 .channels(2)
28300 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28301 }
28302
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,c_div_2)28303 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, c_div_2) {
28304 for (uint32_t channels = 4; channels < 32; channels += 6) {
28305 DWConvMicrokernelTester()
28306 .cr(2)
28307 .kr(3)
28308 .channels(channels)
28309 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28310 }
28311 }
28312
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,c_div_2_with_qmin)28313 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, c_div_2_with_qmin) {
28314 for (uint32_t channels = 4; channels < 32; channels += 6) {
28315 DWConvMicrokernelTester()
28316 .cr(2)
28317 .kr(3)
28318 .channels(channels)
28319 .qmin(128)
28320 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28321 }
28322 }
28323
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,c_div_2_with_qmax)28324 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, c_div_2_with_qmax) {
28325 for (uint32_t channels = 4; channels < 32; channels += 6) {
28326 DWConvMicrokernelTester()
28327 .cr(2)
28328 .kr(3)
28329 .channels(channels)
28330 .qmax(128)
28331 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28332 }
28333 }
28334
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,c_lt_2)28335 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, c_lt_2) {
28336 for (uint32_t channels = 1; channels < 2; channels++) {
28337 DWConvMicrokernelTester()
28338 .cr(2)
28339 .kr(3)
28340 .channels(channels)
28341 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28342 }
28343 }
28344
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,c_gt_2)28345 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, c_gt_2) {
28346 for (uint32_t channels = 3; channels < 4; channels++) {
28347 DWConvMicrokernelTester()
28348 .cr(2)
28349 .kr(3)
28350 .channels(channels)
28351 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28352 }
28353 }
28354
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,c_gt_2_with_qmin)28355 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, c_gt_2_with_qmin) {
28356 for (uint32_t channels = 3; channels < 4; channels++) {
28357 DWConvMicrokernelTester()
28358 .cr(2)
28359 .kr(3)
28360 .channels(channels)
28361 .qmin(128)
28362 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28363 }
28364 }
28365
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,c_gt_2_with_qmax)28366 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, c_gt_2_with_qmax) {
28367 for (uint32_t channels = 3; channels < 4; channels++) {
28368 DWConvMicrokernelTester()
28369 .cr(2)
28370 .kr(3)
28371 .channels(channels)
28372 .qmax(128)
28373 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28374 }
28375 }
28376
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,multipixel)28377 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, multipixel) {
28378 for (size_t channels = 1; channels <= 10; channels += 1) {
28379 DWConvMicrokernelTester()
28380 .cr(2)
28381 .kr(3)
28382 .channels(channels)
28383 .width(3)
28384 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28385 }
28386 }
28387
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,multipixel_with_step)28388 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, multipixel_with_step) {
28389 for (size_t channels = 1; channels <= 10; channels += 1) {
28390 for (size_t step = 2; step <= 3; step++) {
28391 DWConvMicrokernelTester()
28392 .cr(2)
28393 .kr(3)
28394 .channels(channels)
28395 .width(3)
28396 .step(step)
28397 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28398 }
28399 }
28400 }
28401
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,multipixel_with_output_stride)28402 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, multipixel_with_output_stride) {
28403 for (size_t channels = 1; channels <= 10; channels += 1) {
28404 DWConvMicrokernelTester()
28405 .cr(2)
28406 .kr(3)
28407 .channels(2)
28408 .width(5)
28409 .output_stride(13)
28410 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28411 }
28412 }
28413
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,multipixel_with_qmin)28414 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, multipixel_with_qmin) {
28415 for (size_t channels = 1; channels <= 10; channels += 1) {
28416 DWConvMicrokernelTester()
28417 .cr(2)
28418 .kr(3)
28419 .channels(channels)
28420 .width(3)
28421 .qmin(128)
28422 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28423 }
28424 }
28425
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,multipixel_with_qmax)28426 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, multipixel_with_qmax) {
28427 for (size_t channels = 1; channels <= 10; channels += 1) {
28428 DWConvMicrokernelTester()
28429 .cr(2)
28430 .kr(3)
28431 .channels(channels)
28432 .width(3)
28433 .qmax(128)
28434 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28435 }
28436 }
28437
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,input_offset)28438 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, input_offset) {
28439 for (uint32_t channels = 4; channels < 32; channels += 6) {
28440 DWConvMicrokernelTester()
28441 .cr(2)
28442 .kr(3)
28443 .channels(channels)
28444 .input_offset(80)
28445 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28446 }
28447 }
28448
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC,zero)28449 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_IMAGIC, zero) {
28450 for (uint32_t mz = 0; mz < 3; mz++) {
28451 for (uint32_t channels = 4; channels < 32; channels += 6) {
28452 DWConvMicrokernelTester()
28453 .cr(2)
28454 .kr(3)
28455 .channels(channels)
28456 .input_offset(80)
28457 .zero_index(mz)
28458 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28459 }
28460 }
28461 }
28462
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,c_eq_2)28463 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, c_eq_2) {
28464 DWConvMicrokernelTester()
28465 .cr(2)
28466 .kr(3)
28467 .channels(2)
28468 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28469 }
28470
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,c_div_2)28471 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, c_div_2) {
28472 for (uint32_t channels = 4; channels < 32; channels += 6) {
28473 DWConvMicrokernelTester()
28474 .cr(2)
28475 .kr(3)
28476 .channels(channels)
28477 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28478 }
28479 }
28480
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,c_div_2_with_qmin)28481 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, c_div_2_with_qmin) {
28482 for (uint32_t channels = 4; channels < 32; channels += 6) {
28483 DWConvMicrokernelTester()
28484 .cr(2)
28485 .kr(3)
28486 .channels(channels)
28487 .qmin(128)
28488 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28489 }
28490 }
28491
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,c_div_2_with_qmax)28492 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, c_div_2_with_qmax) {
28493 for (uint32_t channels = 4; channels < 32; channels += 6) {
28494 DWConvMicrokernelTester()
28495 .cr(2)
28496 .kr(3)
28497 .channels(channels)
28498 .qmax(128)
28499 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28500 }
28501 }
28502
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,c_lt_2)28503 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, c_lt_2) {
28504 for (uint32_t channels = 1; channels < 2; channels++) {
28505 DWConvMicrokernelTester()
28506 .cr(2)
28507 .kr(3)
28508 .channels(channels)
28509 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28510 }
28511 }
28512
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,c_gt_2)28513 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, c_gt_2) {
28514 for (uint32_t channels = 3; channels < 4; channels++) {
28515 DWConvMicrokernelTester()
28516 .cr(2)
28517 .kr(3)
28518 .channels(channels)
28519 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28520 }
28521 }
28522
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,c_gt_2_with_qmin)28523 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, c_gt_2_with_qmin) {
28524 for (uint32_t channels = 3; channels < 4; channels++) {
28525 DWConvMicrokernelTester()
28526 .cr(2)
28527 .kr(3)
28528 .channels(channels)
28529 .qmin(128)
28530 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28531 }
28532 }
28533
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,c_gt_2_with_qmax)28534 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, c_gt_2_with_qmax) {
28535 for (uint32_t channels = 3; channels < 4; channels++) {
28536 DWConvMicrokernelTester()
28537 .cr(2)
28538 .kr(3)
28539 .channels(channels)
28540 .qmax(128)
28541 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28542 }
28543 }
28544
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,multipixel)28545 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, multipixel) {
28546 for (size_t channels = 1; channels <= 10; channels += 1) {
28547 DWConvMicrokernelTester()
28548 .cr(2)
28549 .kr(3)
28550 .channels(channels)
28551 .width(3)
28552 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28553 }
28554 }
28555
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,multipixel_with_step)28556 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, multipixel_with_step) {
28557 for (size_t channels = 1; channels <= 10; channels += 1) {
28558 for (size_t step = 2; step <= 3; step++) {
28559 DWConvMicrokernelTester()
28560 .cr(2)
28561 .kr(3)
28562 .channels(channels)
28563 .width(3)
28564 .step(step)
28565 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28566 }
28567 }
28568 }
28569
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,multipixel_with_output_stride)28570 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, multipixel_with_output_stride) {
28571 for (size_t channels = 1; channels <= 10; channels += 1) {
28572 DWConvMicrokernelTester()
28573 .cr(2)
28574 .kr(3)
28575 .channels(2)
28576 .width(5)
28577 .output_stride(13)
28578 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28579 }
28580 }
28581
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,multipixel_with_qmin)28582 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, multipixel_with_qmin) {
28583 for (size_t channels = 1; channels <= 10; channels += 1) {
28584 DWConvMicrokernelTester()
28585 .cr(2)
28586 .kr(3)
28587 .channels(channels)
28588 .width(3)
28589 .qmin(128)
28590 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28591 }
28592 }
28593
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,multipixel_with_qmax)28594 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, multipixel_with_qmax) {
28595 for (size_t channels = 1; channels <= 10; channels += 1) {
28596 DWConvMicrokernelTester()
28597 .cr(2)
28598 .kr(3)
28599 .channels(channels)
28600 .width(3)
28601 .qmax(128)
28602 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28603 }
28604 }
28605
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,input_offset)28606 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, input_offset) {
28607 for (uint32_t channels = 4; channels < 32; channels += 6) {
28608 DWConvMicrokernelTester()
28609 .cr(2)
28610 .kr(3)
28611 .channels(channels)
28612 .input_offset(80)
28613 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28614 }
28615 }
28616
TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF,zero)28617 TEST(QC8_DWCONV_MINMAX_FP32_UP2X3__SCALAR_LRINTF, zero) {
28618 for (uint32_t mz = 0; mz < 3; mz++) {
28619 for (uint32_t channels = 4; channels < 32; channels += 6) {
28620 DWConvMicrokernelTester()
28621 .cr(2)
28622 .kr(3)
28623 .channels(channels)
28624 .input_offset(80)
28625 .zero_index(mz)
28626 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28627 }
28628 }
28629 }
28630
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_eq_2)28631 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_eq_2) {
28632 DWConvMicrokernelTester()
28633 .cr(2)
28634 .kr(9)
28635 .channels(2)
28636 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28637 }
28638
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_div_2)28639 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_div_2) {
28640 for (uint32_t channels = 4; channels < 32; channels += 6) {
28641 DWConvMicrokernelTester()
28642 .cr(2)
28643 .kr(9)
28644 .channels(channels)
28645 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28646 }
28647 }
28648
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_div_2_with_qmin)28649 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_div_2_with_qmin) {
28650 for (uint32_t channels = 4; channels < 32; channels += 6) {
28651 DWConvMicrokernelTester()
28652 .cr(2)
28653 .kr(9)
28654 .channels(channels)
28655 .qmin(128)
28656 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28657 }
28658 }
28659
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_div_2_with_qmax)28660 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_div_2_with_qmax) {
28661 for (uint32_t channels = 4; channels < 32; channels += 6) {
28662 DWConvMicrokernelTester()
28663 .cr(2)
28664 .kr(9)
28665 .channels(channels)
28666 .qmax(128)
28667 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28668 }
28669 }
28670
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_lt_2)28671 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_lt_2) {
28672 for (uint32_t channels = 1; channels < 2; channels++) {
28673 DWConvMicrokernelTester()
28674 .cr(2)
28675 .kr(9)
28676 .channels(channels)
28677 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28678 }
28679 }
28680
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_gt_2)28681 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_gt_2) {
28682 for (uint32_t channels = 3; channels < 4; channels++) {
28683 DWConvMicrokernelTester()
28684 .cr(2)
28685 .kr(9)
28686 .channels(channels)
28687 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28688 }
28689 }
28690
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_gt_2_with_qmin)28691 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_gt_2_with_qmin) {
28692 for (uint32_t channels = 3; channels < 4; channels++) {
28693 DWConvMicrokernelTester()
28694 .cr(2)
28695 .kr(9)
28696 .channels(channels)
28697 .qmin(128)
28698 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28699 }
28700 }
28701
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_gt_2_with_qmax)28702 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_gt_2_with_qmax) {
28703 for (uint32_t channels = 3; channels < 4; channels++) {
28704 DWConvMicrokernelTester()
28705 .cr(2)
28706 .kr(9)
28707 .channels(channels)
28708 .qmax(128)
28709 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28710 }
28711 }
28712
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,multipixel)28713 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel) {
28714 for (size_t channels = 1; channels <= 10; channels += 1) {
28715 DWConvMicrokernelTester()
28716 .cr(2)
28717 .kr(9)
28718 .channels(channels)
28719 .width(3)
28720 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28721 }
28722 }
28723
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,multipixel_with_step)28724 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_step) {
28725 for (size_t channels = 1; channels <= 10; channels += 1) {
28726 for (size_t step = 2; step <= 9; step++) {
28727 DWConvMicrokernelTester()
28728 .cr(2)
28729 .kr(9)
28730 .channels(channels)
28731 .width(3)
28732 .step(step)
28733 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28734 }
28735 }
28736 }
28737
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,multipixel_with_output_stride)28738 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_output_stride) {
28739 for (size_t channels = 1; channels <= 10; channels += 1) {
28740 DWConvMicrokernelTester()
28741 .cr(2)
28742 .kr(9)
28743 .channels(2)
28744 .width(5)
28745 .output_stride(13)
28746 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28747 }
28748 }
28749
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,multipixel_with_qmin)28750 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_qmin) {
28751 for (size_t channels = 1; channels <= 10; channels += 1) {
28752 DWConvMicrokernelTester()
28753 .cr(2)
28754 .kr(9)
28755 .channels(channels)
28756 .width(3)
28757 .qmin(128)
28758 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28759 }
28760 }
28761
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,multipixel_with_qmax)28762 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_qmax) {
28763 for (size_t channels = 1; channels <= 10; channels += 1) {
28764 DWConvMicrokernelTester()
28765 .cr(2)
28766 .kr(9)
28767 .channels(channels)
28768 .width(3)
28769 .qmax(128)
28770 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28771 }
28772 }
28773
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,input_offset)28774 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, input_offset) {
28775 for (uint32_t channels = 4; channels < 32; channels += 6) {
28776 DWConvMicrokernelTester()
28777 .cr(2)
28778 .kr(9)
28779 .channels(channels)
28780 .input_offset(80)
28781 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28782 }
28783 }
28784
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,zero)28785 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, zero) {
28786 for (uint32_t mz = 0; mz < 9; mz++) {
28787 for (uint32_t channels = 4; channels < 32; channels += 6) {
28788 DWConvMicrokernelTester()
28789 .cr(2)
28790 .kr(9)
28791 .channels(channels)
28792 .input_offset(80)
28793 .zero_index(mz)
28794 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
28795 }
28796 }
28797 }
28798
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_eq_2)28799 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_eq_2) {
28800 DWConvMicrokernelTester()
28801 .cr(2)
28802 .kr(9)
28803 .channels(2)
28804 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28805 }
28806
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_div_2)28807 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2) {
28808 for (uint32_t channels = 4; channels < 32; channels += 6) {
28809 DWConvMicrokernelTester()
28810 .cr(2)
28811 .kr(9)
28812 .channels(channels)
28813 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28814 }
28815 }
28816
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_div_2_with_qmin)28817 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2_with_qmin) {
28818 for (uint32_t channels = 4; channels < 32; channels += 6) {
28819 DWConvMicrokernelTester()
28820 .cr(2)
28821 .kr(9)
28822 .channels(channels)
28823 .qmin(128)
28824 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28825 }
28826 }
28827
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_div_2_with_qmax)28828 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2_with_qmax) {
28829 for (uint32_t channels = 4; channels < 32; channels += 6) {
28830 DWConvMicrokernelTester()
28831 .cr(2)
28832 .kr(9)
28833 .channels(channels)
28834 .qmax(128)
28835 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28836 }
28837 }
28838
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_lt_2)28839 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_lt_2) {
28840 for (uint32_t channels = 1; channels < 2; channels++) {
28841 DWConvMicrokernelTester()
28842 .cr(2)
28843 .kr(9)
28844 .channels(channels)
28845 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28846 }
28847 }
28848
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_gt_2)28849 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2) {
28850 for (uint32_t channels = 3; channels < 4; channels++) {
28851 DWConvMicrokernelTester()
28852 .cr(2)
28853 .kr(9)
28854 .channels(channels)
28855 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28856 }
28857 }
28858
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_gt_2_with_qmin)28859 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2_with_qmin) {
28860 for (uint32_t channels = 3; channels < 4; channels++) {
28861 DWConvMicrokernelTester()
28862 .cr(2)
28863 .kr(9)
28864 .channels(channels)
28865 .qmin(128)
28866 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28867 }
28868 }
28869
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_gt_2_with_qmax)28870 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2_with_qmax) {
28871 for (uint32_t channels = 3; channels < 4; channels++) {
28872 DWConvMicrokernelTester()
28873 .cr(2)
28874 .kr(9)
28875 .channels(channels)
28876 .qmax(128)
28877 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28878 }
28879 }
28880
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,multipixel)28881 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel) {
28882 for (size_t channels = 1; channels <= 10; channels += 1) {
28883 DWConvMicrokernelTester()
28884 .cr(2)
28885 .kr(9)
28886 .channels(channels)
28887 .width(3)
28888 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28889 }
28890 }
28891
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,multipixel_with_step)28892 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_step) {
28893 for (size_t channels = 1; channels <= 10; channels += 1) {
28894 for (size_t step = 2; step <= 9; step++) {
28895 DWConvMicrokernelTester()
28896 .cr(2)
28897 .kr(9)
28898 .channels(channels)
28899 .width(3)
28900 .step(step)
28901 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28902 }
28903 }
28904 }
28905
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,multipixel_with_output_stride)28906 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
28907 for (size_t channels = 1; channels <= 10; channels += 1) {
28908 DWConvMicrokernelTester()
28909 .cr(2)
28910 .kr(9)
28911 .channels(2)
28912 .width(5)
28913 .output_stride(13)
28914 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28915 }
28916 }
28917
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,multipixel_with_qmin)28918 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_qmin) {
28919 for (size_t channels = 1; channels <= 10; channels += 1) {
28920 DWConvMicrokernelTester()
28921 .cr(2)
28922 .kr(9)
28923 .channels(channels)
28924 .width(3)
28925 .qmin(128)
28926 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28927 }
28928 }
28929
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,multipixel_with_qmax)28930 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_qmax) {
28931 for (size_t channels = 1; channels <= 10; channels += 1) {
28932 DWConvMicrokernelTester()
28933 .cr(2)
28934 .kr(9)
28935 .channels(channels)
28936 .width(3)
28937 .qmax(128)
28938 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28939 }
28940 }
28941
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,input_offset)28942 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, input_offset) {
28943 for (uint32_t channels = 4; channels < 32; channels += 6) {
28944 DWConvMicrokernelTester()
28945 .cr(2)
28946 .kr(9)
28947 .channels(channels)
28948 .input_offset(80)
28949 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28950 }
28951 }
28952
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,zero)28953 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, zero) {
28954 for (uint32_t mz = 0; mz < 9; mz++) {
28955 for (uint32_t channels = 4; channels < 32; channels += 6) {
28956 DWConvMicrokernelTester()
28957 .cr(2)
28958 .kr(9)
28959 .channels(channels)
28960 .input_offset(80)
28961 .zero_index(mz)
28962 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
28963 }
28964 }
28965 }
28966
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_eq_2)28967 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_eq_2) {
28968 DWConvMicrokernelTester()
28969 .cr(2)
28970 .kr(9)
28971 .channels(2)
28972 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28973 }
28974
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_div_2)28975 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2) {
28976 for (uint32_t channels = 4; channels < 32; channels += 6) {
28977 DWConvMicrokernelTester()
28978 .cr(2)
28979 .kr(9)
28980 .channels(channels)
28981 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28982 }
28983 }
28984
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_div_2_with_qmin)28985 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmin) {
28986 for (uint32_t channels = 4; channels < 32; channels += 6) {
28987 DWConvMicrokernelTester()
28988 .cr(2)
28989 .kr(9)
28990 .channels(channels)
28991 .qmin(128)
28992 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
28993 }
28994 }
28995
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_div_2_with_qmax)28996 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmax) {
28997 for (uint32_t channels = 4; channels < 32; channels += 6) {
28998 DWConvMicrokernelTester()
28999 .cr(2)
29000 .kr(9)
29001 .channels(channels)
29002 .qmax(128)
29003 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29004 }
29005 }
29006
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_lt_2)29007 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_lt_2) {
29008 for (uint32_t channels = 1; channels < 2; channels++) {
29009 DWConvMicrokernelTester()
29010 .cr(2)
29011 .kr(9)
29012 .channels(channels)
29013 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29014 }
29015 }
29016
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_gt_2)29017 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2) {
29018 for (uint32_t channels = 3; channels < 4; channels++) {
29019 DWConvMicrokernelTester()
29020 .cr(2)
29021 .kr(9)
29022 .channels(channels)
29023 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29024 }
29025 }
29026
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_gt_2_with_qmin)29027 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmin) {
29028 for (uint32_t channels = 3; channels < 4; channels++) {
29029 DWConvMicrokernelTester()
29030 .cr(2)
29031 .kr(9)
29032 .channels(channels)
29033 .qmin(128)
29034 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29035 }
29036 }
29037
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_gt_2_with_qmax)29038 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmax) {
29039 for (uint32_t channels = 3; channels < 4; channels++) {
29040 DWConvMicrokernelTester()
29041 .cr(2)
29042 .kr(9)
29043 .channels(channels)
29044 .qmax(128)
29045 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29046 }
29047 }
29048
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,multipixel)29049 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel) {
29050 for (size_t channels = 1; channels <= 10; channels += 1) {
29051 DWConvMicrokernelTester()
29052 .cr(2)
29053 .kr(9)
29054 .channels(channels)
29055 .width(3)
29056 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29057 }
29058 }
29059
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,multipixel_with_step)29060 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_step) {
29061 for (size_t channels = 1; channels <= 10; channels += 1) {
29062 for (size_t step = 2; step <= 9; step++) {
29063 DWConvMicrokernelTester()
29064 .cr(2)
29065 .kr(9)
29066 .channels(channels)
29067 .width(3)
29068 .step(step)
29069 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29070 }
29071 }
29072 }
29073
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,multipixel_with_output_stride)29074 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_output_stride) {
29075 for (size_t channels = 1; channels <= 10; channels += 1) {
29076 DWConvMicrokernelTester()
29077 .cr(2)
29078 .kr(9)
29079 .channels(2)
29080 .width(5)
29081 .output_stride(13)
29082 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29083 }
29084 }
29085
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,multipixel_with_qmin)29086 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmin) {
29087 for (size_t channels = 1; channels <= 10; channels += 1) {
29088 DWConvMicrokernelTester()
29089 .cr(2)
29090 .kr(9)
29091 .channels(channels)
29092 .width(3)
29093 .qmin(128)
29094 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29095 }
29096 }
29097
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,multipixel_with_qmax)29098 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmax) {
29099 for (size_t channels = 1; channels <= 10; channels += 1) {
29100 DWConvMicrokernelTester()
29101 .cr(2)
29102 .kr(9)
29103 .channels(channels)
29104 .width(3)
29105 .qmax(128)
29106 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29107 }
29108 }
29109
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,input_offset)29110 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, input_offset) {
29111 for (uint32_t channels = 4; channels < 32; channels += 6) {
29112 DWConvMicrokernelTester()
29113 .cr(2)
29114 .kr(9)
29115 .channels(channels)
29116 .input_offset(80)
29117 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29118 }
29119 }
29120
TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,zero)29121 TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, zero) {
29122 for (uint32_t mz = 0; mz < 9; mz++) {
29123 for (uint32_t channels = 4; channels < 32; channels += 6) {
29124 DWConvMicrokernelTester()
29125 .cr(2)
29126 .kr(9)
29127 .channels(channels)
29128 .input_offset(80)
29129 .zero_index(mz)
29130 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29131 }
29132 }
29133 }
29134
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_eq_2)29135 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_eq_2) {
29136 DWConvMicrokernelTester()
29137 .cr(2)
29138 .kr(25)
29139 .channels(2)
29140 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29141 }
29142
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_div_2)29143 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_div_2) {
29144 for (uint32_t channels = 4; channels < 32; channels += 6) {
29145 DWConvMicrokernelTester()
29146 .cr(2)
29147 .kr(25)
29148 .channels(channels)
29149 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29150 }
29151 }
29152
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_div_2_with_qmin)29153 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_div_2_with_qmin) {
29154 for (uint32_t channels = 4; channels < 32; channels += 6) {
29155 DWConvMicrokernelTester()
29156 .cr(2)
29157 .kr(25)
29158 .channels(channels)
29159 .qmin(128)
29160 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29161 }
29162 }
29163
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_div_2_with_qmax)29164 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_div_2_with_qmax) {
29165 for (uint32_t channels = 4; channels < 32; channels += 6) {
29166 DWConvMicrokernelTester()
29167 .cr(2)
29168 .kr(25)
29169 .channels(channels)
29170 .qmax(128)
29171 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29172 }
29173 }
29174
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_lt_2)29175 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_lt_2) {
29176 for (uint32_t channels = 1; channels < 2; channels++) {
29177 DWConvMicrokernelTester()
29178 .cr(2)
29179 .kr(25)
29180 .channels(channels)
29181 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29182 }
29183 }
29184
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_gt_2)29185 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_gt_2) {
29186 for (uint32_t channels = 3; channels < 4; channels++) {
29187 DWConvMicrokernelTester()
29188 .cr(2)
29189 .kr(25)
29190 .channels(channels)
29191 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29192 }
29193 }
29194
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_gt_2_with_qmin)29195 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_gt_2_with_qmin) {
29196 for (uint32_t channels = 3; channels < 4; channels++) {
29197 DWConvMicrokernelTester()
29198 .cr(2)
29199 .kr(25)
29200 .channels(channels)
29201 .qmin(128)
29202 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29203 }
29204 }
29205
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_gt_2_with_qmax)29206 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_gt_2_with_qmax) {
29207 for (uint32_t channels = 3; channels < 4; channels++) {
29208 DWConvMicrokernelTester()
29209 .cr(2)
29210 .kr(25)
29211 .channels(channels)
29212 .qmax(128)
29213 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29214 }
29215 }
29216
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,multipixel)29217 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel) {
29218 for (size_t channels = 1; channels <= 10; channels += 1) {
29219 DWConvMicrokernelTester()
29220 .cr(2)
29221 .kr(25)
29222 .channels(channels)
29223 .width(3)
29224 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29225 }
29226 }
29227
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,multipixel_with_step)29228 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_step) {
29229 for (size_t channels = 1; channels <= 10; channels += 1) {
29230 for (size_t step = 2; step <= 25; step++) {
29231 DWConvMicrokernelTester()
29232 .cr(2)
29233 .kr(25)
29234 .channels(channels)
29235 .width(3)
29236 .step(step)
29237 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29238 }
29239 }
29240 }
29241
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,multipixel_with_output_stride)29242 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_output_stride) {
29243 for (size_t channels = 1; channels <= 10; channels += 1) {
29244 DWConvMicrokernelTester()
29245 .cr(2)
29246 .kr(25)
29247 .channels(2)
29248 .width(5)
29249 .output_stride(13)
29250 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29251 }
29252 }
29253
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,multipixel_with_qmin)29254 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_qmin) {
29255 for (size_t channels = 1; channels <= 10; channels += 1) {
29256 DWConvMicrokernelTester()
29257 .cr(2)
29258 .kr(25)
29259 .channels(channels)
29260 .width(3)
29261 .qmin(128)
29262 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29263 }
29264 }
29265
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,multipixel_with_qmax)29266 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_qmax) {
29267 for (size_t channels = 1; channels <= 10; channels += 1) {
29268 DWConvMicrokernelTester()
29269 .cr(2)
29270 .kr(25)
29271 .channels(channels)
29272 .width(3)
29273 .qmax(128)
29274 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29275 }
29276 }
29277
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,input_offset)29278 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, input_offset) {
29279 for (uint32_t channels = 4; channels < 32; channels += 6) {
29280 DWConvMicrokernelTester()
29281 .cr(2)
29282 .kr(25)
29283 .channels(channels)
29284 .input_offset(80)
29285 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29286 }
29287 }
29288
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,zero)29289 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, zero) {
29290 for (uint32_t mz = 0; mz < 25; mz++) {
29291 for (uint32_t channels = 4; channels < 32; channels += 6) {
29292 DWConvMicrokernelTester()
29293 .cr(2)
29294 .kr(25)
29295 .channels(channels)
29296 .input_offset(80)
29297 .zero_index(mz)
29298 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29299 }
29300 }
29301 }
29302
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_eq_2)29303 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_eq_2) {
29304 DWConvMicrokernelTester()
29305 .cr(2)
29306 .kr(25)
29307 .channels(2)
29308 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29309 }
29310
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_div_2)29311 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2) {
29312 for (uint32_t channels = 4; channels < 32; channels += 6) {
29313 DWConvMicrokernelTester()
29314 .cr(2)
29315 .kr(25)
29316 .channels(channels)
29317 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29318 }
29319 }
29320
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_div_2_with_qmin)29321 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2_with_qmin) {
29322 for (uint32_t channels = 4; channels < 32; channels += 6) {
29323 DWConvMicrokernelTester()
29324 .cr(2)
29325 .kr(25)
29326 .channels(channels)
29327 .qmin(128)
29328 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29329 }
29330 }
29331
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_div_2_with_qmax)29332 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2_with_qmax) {
29333 for (uint32_t channels = 4; channels < 32; channels += 6) {
29334 DWConvMicrokernelTester()
29335 .cr(2)
29336 .kr(25)
29337 .channels(channels)
29338 .qmax(128)
29339 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29340 }
29341 }
29342
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_lt_2)29343 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_lt_2) {
29344 for (uint32_t channels = 1; channels < 2; channels++) {
29345 DWConvMicrokernelTester()
29346 .cr(2)
29347 .kr(25)
29348 .channels(channels)
29349 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29350 }
29351 }
29352
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_gt_2)29353 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2) {
29354 for (uint32_t channels = 3; channels < 4; channels++) {
29355 DWConvMicrokernelTester()
29356 .cr(2)
29357 .kr(25)
29358 .channels(channels)
29359 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29360 }
29361 }
29362
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_gt_2_with_qmin)29363 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2_with_qmin) {
29364 for (uint32_t channels = 3; channels < 4; channels++) {
29365 DWConvMicrokernelTester()
29366 .cr(2)
29367 .kr(25)
29368 .channels(channels)
29369 .qmin(128)
29370 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29371 }
29372 }
29373
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_gt_2_with_qmax)29374 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2_with_qmax) {
29375 for (uint32_t channels = 3; channels < 4; channels++) {
29376 DWConvMicrokernelTester()
29377 .cr(2)
29378 .kr(25)
29379 .channels(channels)
29380 .qmax(128)
29381 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29382 }
29383 }
29384
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,multipixel)29385 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel) {
29386 for (size_t channels = 1; channels <= 10; channels += 1) {
29387 DWConvMicrokernelTester()
29388 .cr(2)
29389 .kr(25)
29390 .channels(channels)
29391 .width(3)
29392 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29393 }
29394 }
29395
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,multipixel_with_step)29396 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_step) {
29397 for (size_t channels = 1; channels <= 10; channels += 1) {
29398 for (size_t step = 2; step <= 25; step++) {
29399 DWConvMicrokernelTester()
29400 .cr(2)
29401 .kr(25)
29402 .channels(channels)
29403 .width(3)
29404 .step(step)
29405 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29406 }
29407 }
29408 }
29409
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,multipixel_with_output_stride)29410 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
29411 for (size_t channels = 1; channels <= 10; channels += 1) {
29412 DWConvMicrokernelTester()
29413 .cr(2)
29414 .kr(25)
29415 .channels(2)
29416 .width(5)
29417 .output_stride(13)
29418 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29419 }
29420 }
29421
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,multipixel_with_qmin)29422 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_qmin) {
29423 for (size_t channels = 1; channels <= 10; channels += 1) {
29424 DWConvMicrokernelTester()
29425 .cr(2)
29426 .kr(25)
29427 .channels(channels)
29428 .width(3)
29429 .qmin(128)
29430 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29431 }
29432 }
29433
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,multipixel_with_qmax)29434 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_qmax) {
29435 for (size_t channels = 1; channels <= 10; channels += 1) {
29436 DWConvMicrokernelTester()
29437 .cr(2)
29438 .kr(25)
29439 .channels(channels)
29440 .width(3)
29441 .qmax(128)
29442 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29443 }
29444 }
29445
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,input_offset)29446 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, input_offset) {
29447 for (uint32_t channels = 4; channels < 32; channels += 6) {
29448 DWConvMicrokernelTester()
29449 .cr(2)
29450 .kr(25)
29451 .channels(channels)
29452 .input_offset(80)
29453 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29454 }
29455 }
29456
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,zero)29457 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, zero) {
29458 for (uint32_t mz = 0; mz < 25; mz++) {
29459 for (uint32_t channels = 4; channels < 32; channels += 6) {
29460 DWConvMicrokernelTester()
29461 .cr(2)
29462 .kr(25)
29463 .channels(channels)
29464 .input_offset(80)
29465 .zero_index(mz)
29466 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29467 }
29468 }
29469 }
29470
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_eq_2)29471 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_eq_2) {
29472 DWConvMicrokernelTester()
29473 .cr(2)
29474 .kr(25)
29475 .channels(2)
29476 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29477 }
29478
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_div_2)29479 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2) {
29480 for (uint32_t channels = 4; channels < 32; channels += 6) {
29481 DWConvMicrokernelTester()
29482 .cr(2)
29483 .kr(25)
29484 .channels(channels)
29485 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29486 }
29487 }
29488
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_div_2_with_qmin)29489 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmin) {
29490 for (uint32_t channels = 4; channels < 32; channels += 6) {
29491 DWConvMicrokernelTester()
29492 .cr(2)
29493 .kr(25)
29494 .channels(channels)
29495 .qmin(128)
29496 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29497 }
29498 }
29499
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_div_2_with_qmax)29500 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmax) {
29501 for (uint32_t channels = 4; channels < 32; channels += 6) {
29502 DWConvMicrokernelTester()
29503 .cr(2)
29504 .kr(25)
29505 .channels(channels)
29506 .qmax(128)
29507 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29508 }
29509 }
29510
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_lt_2)29511 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_lt_2) {
29512 for (uint32_t channels = 1; channels < 2; channels++) {
29513 DWConvMicrokernelTester()
29514 .cr(2)
29515 .kr(25)
29516 .channels(channels)
29517 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29518 }
29519 }
29520
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_gt_2)29521 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2) {
29522 for (uint32_t channels = 3; channels < 4; channels++) {
29523 DWConvMicrokernelTester()
29524 .cr(2)
29525 .kr(25)
29526 .channels(channels)
29527 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29528 }
29529 }
29530
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_gt_2_with_qmin)29531 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmin) {
29532 for (uint32_t channels = 3; channels < 4; channels++) {
29533 DWConvMicrokernelTester()
29534 .cr(2)
29535 .kr(25)
29536 .channels(channels)
29537 .qmin(128)
29538 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29539 }
29540 }
29541
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_gt_2_with_qmax)29542 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmax) {
29543 for (uint32_t channels = 3; channels < 4; channels++) {
29544 DWConvMicrokernelTester()
29545 .cr(2)
29546 .kr(25)
29547 .channels(channels)
29548 .qmax(128)
29549 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29550 }
29551 }
29552
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,multipixel)29553 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel) {
29554 for (size_t channels = 1; channels <= 10; channels += 1) {
29555 DWConvMicrokernelTester()
29556 .cr(2)
29557 .kr(25)
29558 .channels(channels)
29559 .width(3)
29560 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29561 }
29562 }
29563
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,multipixel_with_step)29564 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_step) {
29565 for (size_t channels = 1; channels <= 10; channels += 1) {
29566 for (size_t step = 2; step <= 25; step++) {
29567 DWConvMicrokernelTester()
29568 .cr(2)
29569 .kr(25)
29570 .channels(channels)
29571 .width(3)
29572 .step(step)
29573 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29574 }
29575 }
29576 }
29577
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,multipixel_with_output_stride)29578 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_output_stride) {
29579 for (size_t channels = 1; channels <= 10; channels += 1) {
29580 DWConvMicrokernelTester()
29581 .cr(2)
29582 .kr(25)
29583 .channels(2)
29584 .width(5)
29585 .output_stride(13)
29586 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29587 }
29588 }
29589
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,multipixel_with_qmin)29590 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmin) {
29591 for (size_t channels = 1; channels <= 10; channels += 1) {
29592 DWConvMicrokernelTester()
29593 .cr(2)
29594 .kr(25)
29595 .channels(channels)
29596 .width(3)
29597 .qmin(128)
29598 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29599 }
29600 }
29601
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,multipixel_with_qmax)29602 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmax) {
29603 for (size_t channels = 1; channels <= 10; channels += 1) {
29604 DWConvMicrokernelTester()
29605 .cr(2)
29606 .kr(25)
29607 .channels(channels)
29608 .width(3)
29609 .qmax(128)
29610 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29611 }
29612 }
29613
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,input_offset)29614 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, input_offset) {
29615 for (uint32_t channels = 4; channels < 32; channels += 6) {
29616 DWConvMicrokernelTester()
29617 .cr(2)
29618 .kr(25)
29619 .channels(channels)
29620 .input_offset(80)
29621 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29622 }
29623 }
29624
TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,zero)29625 TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, zero) {
29626 for (uint32_t mz = 0; mz < 25; mz++) {
29627 for (uint32_t channels = 4; channels < 32; channels += 6) {
29628 DWConvMicrokernelTester()
29629 .cr(2)
29630 .kr(25)
29631 .channels(channels)
29632 .input_offset(80)
29633 .zero_index(mz)
29634 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29635 }
29636 }
29637 }
29638
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_eq_4)29639 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_eq_4) {
29640 DWConvMicrokernelTester()
29641 .cr(4)
29642 .kr(9)
29643 .channels(4)
29644 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29645 }
29646
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_div_4)29647 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_div_4) {
29648 for (uint32_t channels = 8; channels < 64; channels += 12) {
29649 DWConvMicrokernelTester()
29650 .cr(4)
29651 .kr(9)
29652 .channels(channels)
29653 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29654 }
29655 }
29656
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_div_4_with_qmin)29657 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_div_4_with_qmin) {
29658 for (uint32_t channels = 8; channels < 64; channels += 12) {
29659 DWConvMicrokernelTester()
29660 .cr(4)
29661 .kr(9)
29662 .channels(channels)
29663 .qmin(128)
29664 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29665 }
29666 }
29667
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_div_4_with_qmax)29668 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_div_4_with_qmax) {
29669 for (uint32_t channels = 8; channels < 64; channels += 12) {
29670 DWConvMicrokernelTester()
29671 .cr(4)
29672 .kr(9)
29673 .channels(channels)
29674 .qmax(128)
29675 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29676 }
29677 }
29678
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_lt_4)29679 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_lt_4) {
29680 for (uint32_t channels = 1; channels < 4; channels++) {
29681 DWConvMicrokernelTester()
29682 .cr(4)
29683 .kr(9)
29684 .channels(channels)
29685 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29686 }
29687 }
29688
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_gt_4)29689 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_gt_4) {
29690 for (uint32_t channels = 5; channels < 8; channels++) {
29691 DWConvMicrokernelTester()
29692 .cr(4)
29693 .kr(9)
29694 .channels(channels)
29695 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29696 }
29697 }
29698
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_gt_4_with_qmin)29699 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_gt_4_with_qmin) {
29700 for (uint32_t channels = 5; channels < 8; channels++) {
29701 DWConvMicrokernelTester()
29702 .cr(4)
29703 .kr(9)
29704 .channels(channels)
29705 .qmin(128)
29706 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29707 }
29708 }
29709
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_gt_4_with_qmax)29710 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_gt_4_with_qmax) {
29711 for (uint32_t channels = 5; channels < 8; channels++) {
29712 DWConvMicrokernelTester()
29713 .cr(4)
29714 .kr(9)
29715 .channels(channels)
29716 .qmax(128)
29717 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29718 }
29719 }
29720
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,multipixel)29721 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel) {
29722 for (size_t channels = 1; channels <= 20; channels += 3) {
29723 DWConvMicrokernelTester()
29724 .cr(4)
29725 .kr(9)
29726 .channels(channels)
29727 .width(3)
29728 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29729 }
29730 }
29731
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,multipixel_with_step)29732 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_step) {
29733 for (size_t channels = 1; channels <= 20; channels += 3) {
29734 for (size_t step = 2; step <= 9; step++) {
29735 DWConvMicrokernelTester()
29736 .cr(4)
29737 .kr(9)
29738 .channels(channels)
29739 .width(3)
29740 .step(step)
29741 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29742 }
29743 }
29744 }
29745
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,multipixel_with_output_stride)29746 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_output_stride) {
29747 for (size_t channels = 1; channels <= 20; channels += 3) {
29748 DWConvMicrokernelTester()
29749 .cr(4)
29750 .kr(9)
29751 .channels(4)
29752 .width(5)
29753 .output_stride(23)
29754 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29755 }
29756 }
29757
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,multipixel_with_qmin)29758 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_qmin) {
29759 for (size_t channels = 1; channels <= 20; channels += 3) {
29760 DWConvMicrokernelTester()
29761 .cr(4)
29762 .kr(9)
29763 .channels(channels)
29764 .width(3)
29765 .qmin(128)
29766 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29767 }
29768 }
29769
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,multipixel_with_qmax)29770 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_qmax) {
29771 for (size_t channels = 1; channels <= 20; channels += 3) {
29772 DWConvMicrokernelTester()
29773 .cr(4)
29774 .kr(9)
29775 .channels(channels)
29776 .width(3)
29777 .qmax(128)
29778 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29779 }
29780 }
29781
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,input_offset)29782 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, input_offset) {
29783 for (uint32_t channels = 8; channels < 64; channels += 12) {
29784 DWConvMicrokernelTester()
29785 .cr(4)
29786 .kr(9)
29787 .channels(channels)
29788 .input_offset(112)
29789 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29790 }
29791 }
29792
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,zero)29793 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, zero) {
29794 for (uint32_t mz = 0; mz < 9; mz++) {
29795 for (uint32_t channels = 8; channels < 64; channels += 12) {
29796 DWConvMicrokernelTester()
29797 .cr(4)
29798 .kr(9)
29799 .channels(channels)
29800 .input_offset(112)
29801 .zero_index(mz)
29802 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
29803 }
29804 }
29805 }
29806
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_eq_4)29807 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_eq_4) {
29808 DWConvMicrokernelTester()
29809 .cr(4)
29810 .kr(9)
29811 .channels(4)
29812 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29813 }
29814
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_div_4)29815 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4) {
29816 for (uint32_t channels = 8; channels < 64; channels += 12) {
29817 DWConvMicrokernelTester()
29818 .cr(4)
29819 .kr(9)
29820 .channels(channels)
29821 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29822 }
29823 }
29824
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_div_4_with_qmin)29825 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4_with_qmin) {
29826 for (uint32_t channels = 8; channels < 64; channels += 12) {
29827 DWConvMicrokernelTester()
29828 .cr(4)
29829 .kr(9)
29830 .channels(channels)
29831 .qmin(128)
29832 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29833 }
29834 }
29835
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_div_4_with_qmax)29836 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4_with_qmax) {
29837 for (uint32_t channels = 8; channels < 64; channels += 12) {
29838 DWConvMicrokernelTester()
29839 .cr(4)
29840 .kr(9)
29841 .channels(channels)
29842 .qmax(128)
29843 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29844 }
29845 }
29846
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_lt_4)29847 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_lt_4) {
29848 for (uint32_t channels = 1; channels < 4; channels++) {
29849 DWConvMicrokernelTester()
29850 .cr(4)
29851 .kr(9)
29852 .channels(channels)
29853 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29854 }
29855 }
29856
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_gt_4)29857 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4) {
29858 for (uint32_t channels = 5; channels < 8; channels++) {
29859 DWConvMicrokernelTester()
29860 .cr(4)
29861 .kr(9)
29862 .channels(channels)
29863 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29864 }
29865 }
29866
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_gt_4_with_qmin)29867 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4_with_qmin) {
29868 for (uint32_t channels = 5; channels < 8; channels++) {
29869 DWConvMicrokernelTester()
29870 .cr(4)
29871 .kr(9)
29872 .channels(channels)
29873 .qmin(128)
29874 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29875 }
29876 }
29877
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_gt_4_with_qmax)29878 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4_with_qmax) {
29879 for (uint32_t channels = 5; channels < 8; channels++) {
29880 DWConvMicrokernelTester()
29881 .cr(4)
29882 .kr(9)
29883 .channels(channels)
29884 .qmax(128)
29885 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29886 }
29887 }
29888
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,multipixel)29889 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel) {
29890 for (size_t channels = 1; channels <= 20; channels += 3) {
29891 DWConvMicrokernelTester()
29892 .cr(4)
29893 .kr(9)
29894 .channels(channels)
29895 .width(3)
29896 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29897 }
29898 }
29899
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,multipixel_with_step)29900 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_step) {
29901 for (size_t channels = 1; channels <= 20; channels += 3) {
29902 for (size_t step = 2; step <= 9; step++) {
29903 DWConvMicrokernelTester()
29904 .cr(4)
29905 .kr(9)
29906 .channels(channels)
29907 .width(3)
29908 .step(step)
29909 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29910 }
29911 }
29912 }
29913
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,multipixel_with_output_stride)29914 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
29915 for (size_t channels = 1; channels <= 20; channels += 3) {
29916 DWConvMicrokernelTester()
29917 .cr(4)
29918 .kr(9)
29919 .channels(4)
29920 .width(5)
29921 .output_stride(23)
29922 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29923 }
29924 }
29925
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,multipixel_with_qmin)29926 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_qmin) {
29927 for (size_t channels = 1; channels <= 20; channels += 3) {
29928 DWConvMicrokernelTester()
29929 .cr(4)
29930 .kr(9)
29931 .channels(channels)
29932 .width(3)
29933 .qmin(128)
29934 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29935 }
29936 }
29937
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,multipixel_with_qmax)29938 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_qmax) {
29939 for (size_t channels = 1; channels <= 20; channels += 3) {
29940 DWConvMicrokernelTester()
29941 .cr(4)
29942 .kr(9)
29943 .channels(channels)
29944 .width(3)
29945 .qmax(128)
29946 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29947 }
29948 }
29949
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,input_offset)29950 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, input_offset) {
29951 for (uint32_t channels = 8; channels < 64; channels += 12) {
29952 DWConvMicrokernelTester()
29953 .cr(4)
29954 .kr(9)
29955 .channels(channels)
29956 .input_offset(112)
29957 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29958 }
29959 }
29960
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,zero)29961 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, zero) {
29962 for (uint32_t mz = 0; mz < 9; mz++) {
29963 for (uint32_t channels = 8; channels < 64; channels += 12) {
29964 DWConvMicrokernelTester()
29965 .cr(4)
29966 .kr(9)
29967 .channels(channels)
29968 .input_offset(112)
29969 .zero_index(mz)
29970 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
29971 }
29972 }
29973 }
29974
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_eq_4)29975 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_eq_4) {
29976 DWConvMicrokernelTester()
29977 .cr(4)
29978 .kr(9)
29979 .channels(4)
29980 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29981 }
29982
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_div_4)29983 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4) {
29984 for (uint32_t channels = 8; channels < 64; channels += 12) {
29985 DWConvMicrokernelTester()
29986 .cr(4)
29987 .kr(9)
29988 .channels(channels)
29989 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
29990 }
29991 }
29992
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_div_4_with_qmin)29993 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmin) {
29994 for (uint32_t channels = 8; channels < 64; channels += 12) {
29995 DWConvMicrokernelTester()
29996 .cr(4)
29997 .kr(9)
29998 .channels(channels)
29999 .qmin(128)
30000 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30001 }
30002 }
30003
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_div_4_with_qmax)30004 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmax) {
30005 for (uint32_t channels = 8; channels < 64; channels += 12) {
30006 DWConvMicrokernelTester()
30007 .cr(4)
30008 .kr(9)
30009 .channels(channels)
30010 .qmax(128)
30011 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30012 }
30013 }
30014
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_lt_4)30015 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_lt_4) {
30016 for (uint32_t channels = 1; channels < 4; channels++) {
30017 DWConvMicrokernelTester()
30018 .cr(4)
30019 .kr(9)
30020 .channels(channels)
30021 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30022 }
30023 }
30024
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_gt_4)30025 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4) {
30026 for (uint32_t channels = 5; channels < 8; channels++) {
30027 DWConvMicrokernelTester()
30028 .cr(4)
30029 .kr(9)
30030 .channels(channels)
30031 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30032 }
30033 }
30034
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_gt_4_with_qmin)30035 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmin) {
30036 for (uint32_t channels = 5; channels < 8; channels++) {
30037 DWConvMicrokernelTester()
30038 .cr(4)
30039 .kr(9)
30040 .channels(channels)
30041 .qmin(128)
30042 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30043 }
30044 }
30045
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_gt_4_with_qmax)30046 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmax) {
30047 for (uint32_t channels = 5; channels < 8; channels++) {
30048 DWConvMicrokernelTester()
30049 .cr(4)
30050 .kr(9)
30051 .channels(channels)
30052 .qmax(128)
30053 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30054 }
30055 }
30056
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,multipixel)30057 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel) {
30058 for (size_t channels = 1; channels <= 20; channels += 3) {
30059 DWConvMicrokernelTester()
30060 .cr(4)
30061 .kr(9)
30062 .channels(channels)
30063 .width(3)
30064 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30065 }
30066 }
30067
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,multipixel_with_step)30068 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_step) {
30069 for (size_t channels = 1; channels <= 20; channels += 3) {
30070 for (size_t step = 2; step <= 9; step++) {
30071 DWConvMicrokernelTester()
30072 .cr(4)
30073 .kr(9)
30074 .channels(channels)
30075 .width(3)
30076 .step(step)
30077 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30078 }
30079 }
30080 }
30081
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,multipixel_with_output_stride)30082 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_output_stride) {
30083 for (size_t channels = 1; channels <= 20; channels += 3) {
30084 DWConvMicrokernelTester()
30085 .cr(4)
30086 .kr(9)
30087 .channels(4)
30088 .width(5)
30089 .output_stride(23)
30090 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30091 }
30092 }
30093
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,multipixel_with_qmin)30094 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmin) {
30095 for (size_t channels = 1; channels <= 20; channels += 3) {
30096 DWConvMicrokernelTester()
30097 .cr(4)
30098 .kr(9)
30099 .channels(channels)
30100 .width(3)
30101 .qmin(128)
30102 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30103 }
30104 }
30105
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,multipixel_with_qmax)30106 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmax) {
30107 for (size_t channels = 1; channels <= 20; channels += 3) {
30108 DWConvMicrokernelTester()
30109 .cr(4)
30110 .kr(9)
30111 .channels(channels)
30112 .width(3)
30113 .qmax(128)
30114 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30115 }
30116 }
30117
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,input_offset)30118 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, input_offset) {
30119 for (uint32_t channels = 8; channels < 64; channels += 12) {
30120 DWConvMicrokernelTester()
30121 .cr(4)
30122 .kr(9)
30123 .channels(channels)
30124 .input_offset(112)
30125 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30126 }
30127 }
30128
TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,zero)30129 TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, zero) {
30130 for (uint32_t mz = 0; mz < 9; mz++) {
30131 for (uint32_t channels = 8; channels < 64; channels += 12) {
30132 DWConvMicrokernelTester()
30133 .cr(4)
30134 .kr(9)
30135 .channels(channels)
30136 .input_offset(112)
30137 .zero_index(mz)
30138 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30139 }
30140 }
30141 }
30142
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_eq_4)30143 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_eq_4) {
30144 DWConvMicrokernelTester()
30145 .cr(4)
30146 .kr(25)
30147 .channels(4)
30148 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30149 }
30150
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_div_4)30151 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_div_4) {
30152 for (uint32_t channels = 8; channels < 64; channels += 12) {
30153 DWConvMicrokernelTester()
30154 .cr(4)
30155 .kr(25)
30156 .channels(channels)
30157 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30158 }
30159 }
30160
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_div_4_with_qmin)30161 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_div_4_with_qmin) {
30162 for (uint32_t channels = 8; channels < 64; channels += 12) {
30163 DWConvMicrokernelTester()
30164 .cr(4)
30165 .kr(25)
30166 .channels(channels)
30167 .qmin(128)
30168 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30169 }
30170 }
30171
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_div_4_with_qmax)30172 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_div_4_with_qmax) {
30173 for (uint32_t channels = 8; channels < 64; channels += 12) {
30174 DWConvMicrokernelTester()
30175 .cr(4)
30176 .kr(25)
30177 .channels(channels)
30178 .qmax(128)
30179 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30180 }
30181 }
30182
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_lt_4)30183 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_lt_4) {
30184 for (uint32_t channels = 1; channels < 4; channels++) {
30185 DWConvMicrokernelTester()
30186 .cr(4)
30187 .kr(25)
30188 .channels(channels)
30189 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30190 }
30191 }
30192
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_gt_4)30193 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_gt_4) {
30194 for (uint32_t channels = 5; channels < 8; channels++) {
30195 DWConvMicrokernelTester()
30196 .cr(4)
30197 .kr(25)
30198 .channels(channels)
30199 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30200 }
30201 }
30202
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_gt_4_with_qmin)30203 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_gt_4_with_qmin) {
30204 for (uint32_t channels = 5; channels < 8; channels++) {
30205 DWConvMicrokernelTester()
30206 .cr(4)
30207 .kr(25)
30208 .channels(channels)
30209 .qmin(128)
30210 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30211 }
30212 }
30213
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_gt_4_with_qmax)30214 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_gt_4_with_qmax) {
30215 for (uint32_t channels = 5; channels < 8; channels++) {
30216 DWConvMicrokernelTester()
30217 .cr(4)
30218 .kr(25)
30219 .channels(channels)
30220 .qmax(128)
30221 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30222 }
30223 }
30224
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,multipixel)30225 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel) {
30226 for (size_t channels = 1; channels <= 20; channels += 3) {
30227 DWConvMicrokernelTester()
30228 .cr(4)
30229 .kr(25)
30230 .channels(channels)
30231 .width(3)
30232 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30233 }
30234 }
30235
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,multipixel_with_step)30236 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_step) {
30237 for (size_t channels = 1; channels <= 20; channels += 3) {
30238 for (size_t step = 2; step <= 25; step++) {
30239 DWConvMicrokernelTester()
30240 .cr(4)
30241 .kr(25)
30242 .channels(channels)
30243 .width(3)
30244 .step(step)
30245 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30246 }
30247 }
30248 }
30249
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,multipixel_with_output_stride)30250 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_output_stride) {
30251 for (size_t channels = 1; channels <= 20; channels += 3) {
30252 DWConvMicrokernelTester()
30253 .cr(4)
30254 .kr(25)
30255 .channels(4)
30256 .width(5)
30257 .output_stride(23)
30258 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30259 }
30260 }
30261
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,multipixel_with_qmin)30262 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_qmin) {
30263 for (size_t channels = 1; channels <= 20; channels += 3) {
30264 DWConvMicrokernelTester()
30265 .cr(4)
30266 .kr(25)
30267 .channels(channels)
30268 .width(3)
30269 .qmin(128)
30270 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30271 }
30272 }
30273
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,multipixel_with_qmax)30274 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_qmax) {
30275 for (size_t channels = 1; channels <= 20; channels += 3) {
30276 DWConvMicrokernelTester()
30277 .cr(4)
30278 .kr(25)
30279 .channels(channels)
30280 .width(3)
30281 .qmax(128)
30282 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30283 }
30284 }
30285
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,input_offset)30286 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, input_offset) {
30287 for (uint32_t channels = 8; channels < 64; channels += 12) {
30288 DWConvMicrokernelTester()
30289 .cr(4)
30290 .kr(25)
30291 .channels(channels)
30292 .input_offset(112)
30293 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30294 }
30295 }
30296
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,zero)30297 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, zero) {
30298 for (uint32_t mz = 0; mz < 25; mz++) {
30299 for (uint32_t channels = 8; channels < 64; channels += 12) {
30300 DWConvMicrokernelTester()
30301 .cr(4)
30302 .kr(25)
30303 .channels(channels)
30304 .input_offset(112)
30305 .zero_index(mz)
30306 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
30307 }
30308 }
30309 }
30310
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_eq_4)30311 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_eq_4) {
30312 DWConvMicrokernelTester()
30313 .cr(4)
30314 .kr(25)
30315 .channels(4)
30316 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30317 }
30318
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_div_4)30319 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4) {
30320 for (uint32_t channels = 8; channels < 64; channels += 12) {
30321 DWConvMicrokernelTester()
30322 .cr(4)
30323 .kr(25)
30324 .channels(channels)
30325 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30326 }
30327 }
30328
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_div_4_with_qmin)30329 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4_with_qmin) {
30330 for (uint32_t channels = 8; channels < 64; channels += 12) {
30331 DWConvMicrokernelTester()
30332 .cr(4)
30333 .kr(25)
30334 .channels(channels)
30335 .qmin(128)
30336 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30337 }
30338 }
30339
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_div_4_with_qmax)30340 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4_with_qmax) {
30341 for (uint32_t channels = 8; channels < 64; channels += 12) {
30342 DWConvMicrokernelTester()
30343 .cr(4)
30344 .kr(25)
30345 .channels(channels)
30346 .qmax(128)
30347 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30348 }
30349 }
30350
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_lt_4)30351 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_lt_4) {
30352 for (uint32_t channels = 1; channels < 4; channels++) {
30353 DWConvMicrokernelTester()
30354 .cr(4)
30355 .kr(25)
30356 .channels(channels)
30357 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30358 }
30359 }
30360
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_gt_4)30361 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4) {
30362 for (uint32_t channels = 5; channels < 8; channels++) {
30363 DWConvMicrokernelTester()
30364 .cr(4)
30365 .kr(25)
30366 .channels(channels)
30367 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30368 }
30369 }
30370
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_gt_4_with_qmin)30371 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4_with_qmin) {
30372 for (uint32_t channels = 5; channels < 8; channels++) {
30373 DWConvMicrokernelTester()
30374 .cr(4)
30375 .kr(25)
30376 .channels(channels)
30377 .qmin(128)
30378 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30379 }
30380 }
30381
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_gt_4_with_qmax)30382 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4_with_qmax) {
30383 for (uint32_t channels = 5; channels < 8; channels++) {
30384 DWConvMicrokernelTester()
30385 .cr(4)
30386 .kr(25)
30387 .channels(channels)
30388 .qmax(128)
30389 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30390 }
30391 }
30392
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,multipixel)30393 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel) {
30394 for (size_t channels = 1; channels <= 20; channels += 3) {
30395 DWConvMicrokernelTester()
30396 .cr(4)
30397 .kr(25)
30398 .channels(channels)
30399 .width(3)
30400 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30401 }
30402 }
30403
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,multipixel_with_step)30404 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_step) {
30405 for (size_t channels = 1; channels <= 20; channels += 3) {
30406 for (size_t step = 2; step <= 25; step++) {
30407 DWConvMicrokernelTester()
30408 .cr(4)
30409 .kr(25)
30410 .channels(channels)
30411 .width(3)
30412 .step(step)
30413 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30414 }
30415 }
30416 }
30417
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,multipixel_with_output_stride)30418 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
30419 for (size_t channels = 1; channels <= 20; channels += 3) {
30420 DWConvMicrokernelTester()
30421 .cr(4)
30422 .kr(25)
30423 .channels(4)
30424 .width(5)
30425 .output_stride(23)
30426 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30427 }
30428 }
30429
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,multipixel_with_qmin)30430 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_qmin) {
30431 for (size_t channels = 1; channels <= 20; channels += 3) {
30432 DWConvMicrokernelTester()
30433 .cr(4)
30434 .kr(25)
30435 .channels(channels)
30436 .width(3)
30437 .qmin(128)
30438 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30439 }
30440 }
30441
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,multipixel_with_qmax)30442 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_qmax) {
30443 for (size_t channels = 1; channels <= 20; channels += 3) {
30444 DWConvMicrokernelTester()
30445 .cr(4)
30446 .kr(25)
30447 .channels(channels)
30448 .width(3)
30449 .qmax(128)
30450 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30451 }
30452 }
30453
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,input_offset)30454 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, input_offset) {
30455 for (uint32_t channels = 8; channels < 64; channels += 12) {
30456 DWConvMicrokernelTester()
30457 .cr(4)
30458 .kr(25)
30459 .channels(channels)
30460 .input_offset(112)
30461 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30462 }
30463 }
30464
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,zero)30465 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, zero) {
30466 for (uint32_t mz = 0; mz < 25; mz++) {
30467 for (uint32_t channels = 8; channels < 64; channels += 12) {
30468 DWConvMicrokernelTester()
30469 .cr(4)
30470 .kr(25)
30471 .channels(channels)
30472 .input_offset(112)
30473 .zero_index(mz)
30474 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
30475 }
30476 }
30477 }
30478
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_eq_4)30479 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_eq_4) {
30480 DWConvMicrokernelTester()
30481 .cr(4)
30482 .kr(25)
30483 .channels(4)
30484 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30485 }
30486
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_div_4)30487 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4) {
30488 for (uint32_t channels = 8; channels < 64; channels += 12) {
30489 DWConvMicrokernelTester()
30490 .cr(4)
30491 .kr(25)
30492 .channels(channels)
30493 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30494 }
30495 }
30496
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_div_4_with_qmin)30497 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmin) {
30498 for (uint32_t channels = 8; channels < 64; channels += 12) {
30499 DWConvMicrokernelTester()
30500 .cr(4)
30501 .kr(25)
30502 .channels(channels)
30503 .qmin(128)
30504 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30505 }
30506 }
30507
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_div_4_with_qmax)30508 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmax) {
30509 for (uint32_t channels = 8; channels < 64; channels += 12) {
30510 DWConvMicrokernelTester()
30511 .cr(4)
30512 .kr(25)
30513 .channels(channels)
30514 .qmax(128)
30515 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30516 }
30517 }
30518
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_lt_4)30519 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_lt_4) {
30520 for (uint32_t channels = 1; channels < 4; channels++) {
30521 DWConvMicrokernelTester()
30522 .cr(4)
30523 .kr(25)
30524 .channels(channels)
30525 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30526 }
30527 }
30528
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_gt_4)30529 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4) {
30530 for (uint32_t channels = 5; channels < 8; channels++) {
30531 DWConvMicrokernelTester()
30532 .cr(4)
30533 .kr(25)
30534 .channels(channels)
30535 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30536 }
30537 }
30538
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_gt_4_with_qmin)30539 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmin) {
30540 for (uint32_t channels = 5; channels < 8; channels++) {
30541 DWConvMicrokernelTester()
30542 .cr(4)
30543 .kr(25)
30544 .channels(channels)
30545 .qmin(128)
30546 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30547 }
30548 }
30549
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_gt_4_with_qmax)30550 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmax) {
30551 for (uint32_t channels = 5; channels < 8; channels++) {
30552 DWConvMicrokernelTester()
30553 .cr(4)
30554 .kr(25)
30555 .channels(channels)
30556 .qmax(128)
30557 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30558 }
30559 }
30560
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,multipixel)30561 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel) {
30562 for (size_t channels = 1; channels <= 20; channels += 3) {
30563 DWConvMicrokernelTester()
30564 .cr(4)
30565 .kr(25)
30566 .channels(channels)
30567 .width(3)
30568 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30569 }
30570 }
30571
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,multipixel_with_step)30572 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_step) {
30573 for (size_t channels = 1; channels <= 20; channels += 3) {
30574 for (size_t step = 2; step <= 25; step++) {
30575 DWConvMicrokernelTester()
30576 .cr(4)
30577 .kr(25)
30578 .channels(channels)
30579 .width(3)
30580 .step(step)
30581 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30582 }
30583 }
30584 }
30585
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,multipixel_with_output_stride)30586 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_output_stride) {
30587 for (size_t channels = 1; channels <= 20; channels += 3) {
30588 DWConvMicrokernelTester()
30589 .cr(4)
30590 .kr(25)
30591 .channels(4)
30592 .width(5)
30593 .output_stride(23)
30594 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30595 }
30596 }
30597
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,multipixel_with_qmin)30598 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmin) {
30599 for (size_t channels = 1; channels <= 20; channels += 3) {
30600 DWConvMicrokernelTester()
30601 .cr(4)
30602 .kr(25)
30603 .channels(channels)
30604 .width(3)
30605 .qmin(128)
30606 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30607 }
30608 }
30609
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,multipixel_with_qmax)30610 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmax) {
30611 for (size_t channels = 1; channels <= 20; channels += 3) {
30612 DWConvMicrokernelTester()
30613 .cr(4)
30614 .kr(25)
30615 .channels(channels)
30616 .width(3)
30617 .qmax(128)
30618 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30619 }
30620 }
30621
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,input_offset)30622 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, input_offset) {
30623 for (uint32_t channels = 8; channels < 64; channels += 12) {
30624 DWConvMicrokernelTester()
30625 .cr(4)
30626 .kr(25)
30627 .channels(channels)
30628 .input_offset(112)
30629 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30630 }
30631 }
30632
TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,zero)30633 TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, zero) {
30634 for (uint32_t mz = 0; mz < 25; mz++) {
30635 for (uint32_t channels = 8; channels < 64; channels += 12) {
30636 DWConvMicrokernelTester()
30637 .cr(4)
30638 .kr(25)
30639 .channels(channels)
30640 .input_offset(112)
30641 .zero_index(mz)
30642 .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
30643 }
30644 }
30645 }