1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/qc8-igemm-minmax-fp32.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_eq_8)28 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_eq_8) {
29 TEST_REQUIRES_ARM_NEON;
30 GemmMicrokernelTester()
31 .mr(1)
32 .nr(8)
33 .kr(1)
34 .sr(1)
35 .m(1)
36 .n(8)
37 .k(8)
38 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
39 }
40
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,strided_cn)41 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, strided_cn) {
42 TEST_REQUIRES_ARM_NEON;
43 GemmMicrokernelTester()
44 .mr(1)
45 .nr(8)
46 .kr(1)
47 .sr(1)
48 .m(1)
49 .n(8)
50 .k(8)
51 .cn_stride(11)
52 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
53 }
54
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_eq_8_subtile)55 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_eq_8_subtile) {
56 TEST_REQUIRES_ARM_NEON;
57 for (uint32_t n = 1; n <= 8; n++) {
58 for (uint32_t m = 1; m <= 1; m++) {
59 GemmMicrokernelTester()
60 .mr(1)
61 .nr(8)
62 .kr(1)
63 .sr(1)
64 .m(m)
65 .n(n)
66 .k(8)
67 .iterations(1)
68 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
69 }
70 }
71 }
72
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_eq_8_subtile_m)73 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_eq_8_subtile_m) {
74 TEST_REQUIRES_ARM_NEON;
75 for (uint32_t m = 1; m <= 1; m++) {
76 GemmMicrokernelTester()
77 .mr(1)
78 .nr(8)
79 .kr(1)
80 .sr(1)
81 .m(m)
82 .n(8)
83 .k(8)
84 .iterations(1)
85 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
86 }
87 }
88
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_eq_8_subtile_n)89 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_eq_8_subtile_n) {
90 TEST_REQUIRES_ARM_NEON;
91 for (uint32_t n = 1; n <= 8; n++) {
92 GemmMicrokernelTester()
93 .mr(1)
94 .nr(8)
95 .kr(1)
96 .sr(1)
97 .m(1)
98 .n(n)
99 .k(8)
100 .iterations(1)
101 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
102 }
103 }
104
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_lt_8)105 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_lt_8) {
106 TEST_REQUIRES_ARM_NEON;
107 for (size_t k = 1; k < 8; k++) {
108 GemmMicrokernelTester()
109 .mr(1)
110 .nr(8)
111 .kr(1)
112 .sr(1)
113 .m(1)
114 .n(8)
115 .k(k)
116 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
117 }
118 }
119
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_lt_8_subtile)120 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_lt_8_subtile) {
121 TEST_REQUIRES_ARM_NEON;
122 for (size_t k = 1; k < 8; k++) {
123 for (uint32_t n = 1; n <= 8; n++) {
124 for (uint32_t m = 1; m <= 1; m++) {
125 GemmMicrokernelTester()
126 .mr(1)
127 .nr(8)
128 .kr(1)
129 .sr(1)
130 .m(m)
131 .n(n)
132 .k(k)
133 .iterations(1)
134 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
135 }
136 }
137 }
138 }
139
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_gt_8)140 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_gt_8) {
141 TEST_REQUIRES_ARM_NEON;
142 for (size_t k = 9; k < 16; k++) {
143 GemmMicrokernelTester()
144 .mr(1)
145 .nr(8)
146 .kr(1)
147 .sr(1)
148 .m(1)
149 .n(8)
150 .k(k)
151 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
152 }
153 }
154
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_gt_8_subtile)155 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_gt_8_subtile) {
156 TEST_REQUIRES_ARM_NEON;
157 for (size_t k = 9; k < 16; k++) {
158 for (uint32_t n = 1; n <= 8; n++) {
159 for (uint32_t m = 1; m <= 1; m++) {
160 GemmMicrokernelTester()
161 .mr(1)
162 .nr(8)
163 .kr(1)
164 .sr(1)
165 .m(m)
166 .n(n)
167 .k(k)
168 .iterations(1)
169 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
170 }
171 }
172 }
173 }
174
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_div_8)175 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_div_8) {
176 TEST_REQUIRES_ARM_NEON;
177 for (size_t k = 16; k <= 80; k += 8) {
178 GemmMicrokernelTester()
179 .mr(1)
180 .nr(8)
181 .kr(1)
182 .sr(1)
183 .m(1)
184 .n(8)
185 .k(k)
186 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
187 }
188 }
189
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_div_8_subtile)190 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_div_8_subtile) {
191 TEST_REQUIRES_ARM_NEON;
192 for (size_t k = 16; k <= 80; k += 8) {
193 for (uint32_t n = 1; n <= 8; n++) {
194 for (uint32_t m = 1; m <= 1; m++) {
195 GemmMicrokernelTester()
196 .mr(1)
197 .nr(8)
198 .kr(1)
199 .sr(1)
200 .m(m)
201 .n(n)
202 .k(k)
203 .iterations(1)
204 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
205 }
206 }
207 }
208 }
209
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_gt_8)210 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_gt_8) {
211 TEST_REQUIRES_ARM_NEON;
212 for (uint32_t n = 9; n < 16; n++) {
213 for (size_t k = 1; k <= 40; k += 9) {
214 GemmMicrokernelTester()
215 .mr(1)
216 .nr(8)
217 .kr(1)
218 .sr(1)
219 .m(1)
220 .n(n)
221 .k(k)
222 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
223 }
224 }
225 }
226
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_gt_8_strided_cn)227 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_gt_8_strided_cn) {
228 TEST_REQUIRES_ARM_NEON;
229 for (uint32_t n = 9; n < 16; n++) {
230 for (size_t k = 1; k <= 40; k += 9) {
231 GemmMicrokernelTester()
232 .mr(1)
233 .nr(8)
234 .kr(1)
235 .sr(1)
236 .m(1)
237 .n(n)
238 .k(k)
239 .cn_stride(11)
240 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
241 }
242 }
243 }
244
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_gt_8_subtile)245 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_gt_8_subtile) {
246 TEST_REQUIRES_ARM_NEON;
247 for (uint32_t n = 9; n < 16; n++) {
248 for (size_t k = 1; k <= 40; k += 9) {
249 for (uint32_t m = 1; m <= 1; m++) {
250 GemmMicrokernelTester()
251 .mr(1)
252 .nr(8)
253 .kr(1)
254 .sr(1)
255 .m(m)
256 .n(n)
257 .k(k)
258 .iterations(1)
259 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
260 }
261 }
262 }
263 }
264
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_div_8)265 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_div_8) {
266 TEST_REQUIRES_ARM_NEON;
267 for (uint32_t n = 16; n <= 24; n += 8) {
268 for (size_t k = 1; k <= 40; k += 9) {
269 GemmMicrokernelTester()
270 .mr(1)
271 .nr(8)
272 .kr(1)
273 .sr(1)
274 .m(1)
275 .n(n)
276 .k(k)
277 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
278 }
279 }
280 }
281
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_div_8_strided_cn)282 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_div_8_strided_cn) {
283 TEST_REQUIRES_ARM_NEON;
284 for (uint32_t n = 16; n <= 24; n += 8) {
285 for (size_t k = 1; k <= 40; k += 9) {
286 GemmMicrokernelTester()
287 .mr(1)
288 .nr(8)
289 .kr(1)
290 .sr(1)
291 .m(1)
292 .n(n)
293 .k(k)
294 .cn_stride(11)
295 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
296 }
297 }
298 }
299
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_div_8_subtile)300 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_div_8_subtile) {
301 TEST_REQUIRES_ARM_NEON;
302 for (uint32_t n = 16; n <= 24; n += 8) {
303 for (size_t k = 1; k <= 40; k += 9) {
304 for (uint32_t m = 1; m <= 1; m++) {
305 GemmMicrokernelTester()
306 .mr(1)
307 .nr(8)
308 .kr(1)
309 .sr(1)
310 .m(m)
311 .n(n)
312 .k(k)
313 .iterations(1)
314 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
315 }
316 }
317 }
318 }
319
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,small_kernel)320 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, small_kernel) {
321 TEST_REQUIRES_ARM_NEON;
322 for (size_t k = 1; k <= 40; k += 9) {
323 GemmMicrokernelTester()
324 .mr(1)
325 .nr(8)
326 .kr(1)
327 .sr(1)
328 .m(1)
329 .n(8)
330 .k(k)
331 .ks(3)
332 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
333 }
334 }
335
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,small_kernel_subtile)336 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, small_kernel_subtile) {
337 TEST_REQUIRES_ARM_NEON;
338 for (size_t k = 1; k <= 40; k += 9) {
339 for (uint32_t n = 1; n <= 8; n++) {
340 for (uint32_t m = 1; m <= 1; m++) {
341 GemmMicrokernelTester()
342 .mr(1)
343 .nr(8)
344 .kr(1)
345 .sr(1)
346 .m(m)
347 .n(n)
348 .k(k)
349 .ks(3)
350 .iterations(1)
351 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
352 }
353 }
354 }
355 }
356
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_gt_8_small_kernel)357 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_gt_8_small_kernel) {
358 TEST_REQUIRES_ARM_NEON;
359 for (uint32_t n = 9; n < 16; n++) {
360 for (size_t k = 1; k <= 40; k += 9) {
361 GemmMicrokernelTester()
362 .mr(1)
363 .nr(8)
364 .kr(1)
365 .sr(1)
366 .m(1)
367 .n(n)
368 .k(k)
369 .ks(3)
370 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
371 }
372 }
373 }
374
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_div_8_small_kernel)375 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_div_8_small_kernel) {
376 TEST_REQUIRES_ARM_NEON;
377 for (uint32_t n = 16; n <= 24; n += 8) {
378 for (size_t k = 1; k <= 40; k += 9) {
379 GemmMicrokernelTester()
380 .mr(1)
381 .nr(8)
382 .kr(1)
383 .sr(1)
384 .m(1)
385 .n(n)
386 .k(k)
387 .ks(3)
388 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
389 }
390 }
391 }
392
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,strided_cm_subtile)393 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, strided_cm_subtile) {
394 TEST_REQUIRES_ARM_NEON;
395 for (size_t k = 1; k <= 40; k += 9) {
396 for (uint32_t n = 1; n <= 8; n++) {
397 for (uint32_t m = 1; m <= 1; m++) {
398 GemmMicrokernelTester()
399 .mr(1)
400 .nr(8)
401 .kr(1)
402 .sr(1)
403 .m(m)
404 .n(n)
405 .k(k)
406 .cm_stride(11)
407 .iterations(1)
408 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
409 }
410 }
411 }
412 }
413
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,a_offset)414 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, a_offset) {
415 TEST_REQUIRES_ARM_NEON;
416 for (size_t k = 1; k <= 40; k += 9) {
417 GemmMicrokernelTester()
418 .mr(1)
419 .nr(8)
420 .kr(1)
421 .sr(1)
422 .m(1)
423 .n(8)
424 .k(k)
425 .ks(3)
426 .a_offset(43)
427 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
428 }
429 }
430
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,zero)431 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, zero) {
432 TEST_REQUIRES_ARM_NEON;
433 for (size_t k = 1; k <= 40; k += 9) {
434 for (uint32_t mz = 0; mz < 1; mz++) {
435 GemmMicrokernelTester()
436 .mr(1)
437 .nr(8)
438 .kr(1)
439 .sr(1)
440 .m(1)
441 .n(8)
442 .k(k)
443 .ks(3)
444 .a_offset(43)
445 .zero_index(mz)
446 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
447 }
448 }
449 }
450
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,qmin)451 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, qmin) {
452 TEST_REQUIRES_ARM_NEON;
453 GemmMicrokernelTester()
454 .mr(1)
455 .nr(8)
456 .kr(1)
457 .sr(1)
458 .m(1)
459 .n(8)
460 .k(8)
461 .qmin(128)
462 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
463 }
464
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,qmax)465 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, qmax) {
466 TEST_REQUIRES_ARM_NEON;
467 GemmMicrokernelTester()
468 .mr(1)
469 .nr(8)
470 .kr(1)
471 .sr(1)
472 .m(1)
473 .n(8)
474 .k(8)
475 .qmax(128)
476 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
477 }
478
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,strided_cm)479 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, strided_cm) {
480 TEST_REQUIRES_ARM_NEON;
481 GemmMicrokernelTester()
482 .mr(1)
483 .nr(8)
484 .kr(1)
485 .sr(1)
486 .m(1)
487 .n(8)
488 .k(8)
489 .cm_stride(11)
490 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
491 }
492 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
493
494
495 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_eq_8)496 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_eq_8) {
497 TEST_REQUIRES_ARM_NEON_V8;
498 GemmMicrokernelTester()
499 .mr(1)
500 .nr(8)
501 .kr(1)
502 .sr(1)
503 .m(1)
504 .n(8)
505 .k(8)
506 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
507 }
508
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,strided_cn)509 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, strided_cn) {
510 TEST_REQUIRES_ARM_NEON_V8;
511 GemmMicrokernelTester()
512 .mr(1)
513 .nr(8)
514 .kr(1)
515 .sr(1)
516 .m(1)
517 .n(8)
518 .k(8)
519 .cn_stride(11)
520 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
521 }
522
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_eq_8_subtile)523 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_eq_8_subtile) {
524 TEST_REQUIRES_ARM_NEON_V8;
525 for (uint32_t n = 1; n <= 8; n++) {
526 for (uint32_t m = 1; m <= 1; m++) {
527 GemmMicrokernelTester()
528 .mr(1)
529 .nr(8)
530 .kr(1)
531 .sr(1)
532 .m(m)
533 .n(n)
534 .k(8)
535 .iterations(1)
536 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
537 }
538 }
539 }
540
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_eq_8_subtile_m)541 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_eq_8_subtile_m) {
542 TEST_REQUIRES_ARM_NEON_V8;
543 for (uint32_t m = 1; m <= 1; m++) {
544 GemmMicrokernelTester()
545 .mr(1)
546 .nr(8)
547 .kr(1)
548 .sr(1)
549 .m(m)
550 .n(8)
551 .k(8)
552 .iterations(1)
553 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
554 }
555 }
556
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_eq_8_subtile_n)557 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_eq_8_subtile_n) {
558 TEST_REQUIRES_ARM_NEON_V8;
559 for (uint32_t n = 1; n <= 8; n++) {
560 GemmMicrokernelTester()
561 .mr(1)
562 .nr(8)
563 .kr(1)
564 .sr(1)
565 .m(1)
566 .n(n)
567 .k(8)
568 .iterations(1)
569 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
570 }
571 }
572
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_lt_8)573 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_lt_8) {
574 TEST_REQUIRES_ARM_NEON_V8;
575 for (size_t k = 1; k < 8; k++) {
576 GemmMicrokernelTester()
577 .mr(1)
578 .nr(8)
579 .kr(1)
580 .sr(1)
581 .m(1)
582 .n(8)
583 .k(k)
584 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
585 }
586 }
587
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_lt_8_subtile)588 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_lt_8_subtile) {
589 TEST_REQUIRES_ARM_NEON_V8;
590 for (size_t k = 1; k < 8; k++) {
591 for (uint32_t n = 1; n <= 8; n++) {
592 for (uint32_t m = 1; m <= 1; m++) {
593 GemmMicrokernelTester()
594 .mr(1)
595 .nr(8)
596 .kr(1)
597 .sr(1)
598 .m(m)
599 .n(n)
600 .k(k)
601 .iterations(1)
602 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
603 }
604 }
605 }
606 }
607
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_gt_8)608 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_gt_8) {
609 TEST_REQUIRES_ARM_NEON_V8;
610 for (size_t k = 9; k < 16; k++) {
611 GemmMicrokernelTester()
612 .mr(1)
613 .nr(8)
614 .kr(1)
615 .sr(1)
616 .m(1)
617 .n(8)
618 .k(k)
619 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
620 }
621 }
622
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_gt_8_subtile)623 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_gt_8_subtile) {
624 TEST_REQUIRES_ARM_NEON_V8;
625 for (size_t k = 9; k < 16; k++) {
626 for (uint32_t n = 1; n <= 8; n++) {
627 for (uint32_t m = 1; m <= 1; m++) {
628 GemmMicrokernelTester()
629 .mr(1)
630 .nr(8)
631 .kr(1)
632 .sr(1)
633 .m(m)
634 .n(n)
635 .k(k)
636 .iterations(1)
637 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
638 }
639 }
640 }
641 }
642
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_div_8)643 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_div_8) {
644 TEST_REQUIRES_ARM_NEON_V8;
645 for (size_t k = 16; k <= 80; k += 8) {
646 GemmMicrokernelTester()
647 .mr(1)
648 .nr(8)
649 .kr(1)
650 .sr(1)
651 .m(1)
652 .n(8)
653 .k(k)
654 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
655 }
656 }
657
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_div_8_subtile)658 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_div_8_subtile) {
659 TEST_REQUIRES_ARM_NEON_V8;
660 for (size_t k = 16; k <= 80; k += 8) {
661 for (uint32_t n = 1; n <= 8; n++) {
662 for (uint32_t m = 1; m <= 1; m++) {
663 GemmMicrokernelTester()
664 .mr(1)
665 .nr(8)
666 .kr(1)
667 .sr(1)
668 .m(m)
669 .n(n)
670 .k(k)
671 .iterations(1)
672 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
673 }
674 }
675 }
676 }
677
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_gt_8)678 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_gt_8) {
679 TEST_REQUIRES_ARM_NEON_V8;
680 for (uint32_t n = 9; n < 16; n++) {
681 for (size_t k = 1; k <= 40; k += 9) {
682 GemmMicrokernelTester()
683 .mr(1)
684 .nr(8)
685 .kr(1)
686 .sr(1)
687 .m(1)
688 .n(n)
689 .k(k)
690 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
691 }
692 }
693 }
694
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_gt_8_strided_cn)695 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_gt_8_strided_cn) {
696 TEST_REQUIRES_ARM_NEON_V8;
697 for (uint32_t n = 9; n < 16; n++) {
698 for (size_t k = 1; k <= 40; k += 9) {
699 GemmMicrokernelTester()
700 .mr(1)
701 .nr(8)
702 .kr(1)
703 .sr(1)
704 .m(1)
705 .n(n)
706 .k(k)
707 .cn_stride(11)
708 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
709 }
710 }
711 }
712
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_gt_8_subtile)713 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_gt_8_subtile) {
714 TEST_REQUIRES_ARM_NEON_V8;
715 for (uint32_t n = 9; n < 16; n++) {
716 for (size_t k = 1; k <= 40; k += 9) {
717 for (uint32_t m = 1; m <= 1; m++) {
718 GemmMicrokernelTester()
719 .mr(1)
720 .nr(8)
721 .kr(1)
722 .sr(1)
723 .m(m)
724 .n(n)
725 .k(k)
726 .iterations(1)
727 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
728 }
729 }
730 }
731 }
732
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_div_8)733 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_div_8) {
734 TEST_REQUIRES_ARM_NEON_V8;
735 for (uint32_t n = 16; n <= 24; n += 8) {
736 for (size_t k = 1; k <= 40; k += 9) {
737 GemmMicrokernelTester()
738 .mr(1)
739 .nr(8)
740 .kr(1)
741 .sr(1)
742 .m(1)
743 .n(n)
744 .k(k)
745 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
746 }
747 }
748 }
749
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_div_8_strided_cn)750 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_div_8_strided_cn) {
751 TEST_REQUIRES_ARM_NEON_V8;
752 for (uint32_t n = 16; n <= 24; n += 8) {
753 for (size_t k = 1; k <= 40; k += 9) {
754 GemmMicrokernelTester()
755 .mr(1)
756 .nr(8)
757 .kr(1)
758 .sr(1)
759 .m(1)
760 .n(n)
761 .k(k)
762 .cn_stride(11)
763 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
764 }
765 }
766 }
767
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_div_8_subtile)768 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_div_8_subtile) {
769 TEST_REQUIRES_ARM_NEON_V8;
770 for (uint32_t n = 16; n <= 24; n += 8) {
771 for (size_t k = 1; k <= 40; k += 9) {
772 for (uint32_t m = 1; m <= 1; m++) {
773 GemmMicrokernelTester()
774 .mr(1)
775 .nr(8)
776 .kr(1)
777 .sr(1)
778 .m(m)
779 .n(n)
780 .k(k)
781 .iterations(1)
782 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
783 }
784 }
785 }
786 }
787
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,small_kernel)788 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, small_kernel) {
789 TEST_REQUIRES_ARM_NEON_V8;
790 for (size_t k = 1; k <= 40; k += 9) {
791 GemmMicrokernelTester()
792 .mr(1)
793 .nr(8)
794 .kr(1)
795 .sr(1)
796 .m(1)
797 .n(8)
798 .k(k)
799 .ks(3)
800 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
801 }
802 }
803
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,small_kernel_subtile)804 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, small_kernel_subtile) {
805 TEST_REQUIRES_ARM_NEON_V8;
806 for (size_t k = 1; k <= 40; k += 9) {
807 for (uint32_t n = 1; n <= 8; n++) {
808 for (uint32_t m = 1; m <= 1; m++) {
809 GemmMicrokernelTester()
810 .mr(1)
811 .nr(8)
812 .kr(1)
813 .sr(1)
814 .m(m)
815 .n(n)
816 .k(k)
817 .ks(3)
818 .iterations(1)
819 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
820 }
821 }
822 }
823 }
824
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_gt_8_small_kernel)825 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_gt_8_small_kernel) {
826 TEST_REQUIRES_ARM_NEON_V8;
827 for (uint32_t n = 9; n < 16; n++) {
828 for (size_t k = 1; k <= 40; k += 9) {
829 GemmMicrokernelTester()
830 .mr(1)
831 .nr(8)
832 .kr(1)
833 .sr(1)
834 .m(1)
835 .n(n)
836 .k(k)
837 .ks(3)
838 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
839 }
840 }
841 }
842
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_div_8_small_kernel)843 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_div_8_small_kernel) {
844 TEST_REQUIRES_ARM_NEON_V8;
845 for (uint32_t n = 16; n <= 24; n += 8) {
846 for (size_t k = 1; k <= 40; k += 9) {
847 GemmMicrokernelTester()
848 .mr(1)
849 .nr(8)
850 .kr(1)
851 .sr(1)
852 .m(1)
853 .n(n)
854 .k(k)
855 .ks(3)
856 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
857 }
858 }
859 }
860
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,strided_cm_subtile)861 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, strided_cm_subtile) {
862 TEST_REQUIRES_ARM_NEON_V8;
863 for (size_t k = 1; k <= 40; k += 9) {
864 for (uint32_t n = 1; n <= 8; n++) {
865 for (uint32_t m = 1; m <= 1; m++) {
866 GemmMicrokernelTester()
867 .mr(1)
868 .nr(8)
869 .kr(1)
870 .sr(1)
871 .m(m)
872 .n(n)
873 .k(k)
874 .cm_stride(11)
875 .iterations(1)
876 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
877 }
878 }
879 }
880 }
881
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,a_offset)882 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, a_offset) {
883 TEST_REQUIRES_ARM_NEON_V8;
884 for (size_t k = 1; k <= 40; k += 9) {
885 GemmMicrokernelTester()
886 .mr(1)
887 .nr(8)
888 .kr(1)
889 .sr(1)
890 .m(1)
891 .n(8)
892 .k(k)
893 .ks(3)
894 .a_offset(43)
895 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
896 }
897 }
898
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,zero)899 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, zero) {
900 TEST_REQUIRES_ARM_NEON_V8;
901 for (size_t k = 1; k <= 40; k += 9) {
902 for (uint32_t mz = 0; mz < 1; mz++) {
903 GemmMicrokernelTester()
904 .mr(1)
905 .nr(8)
906 .kr(1)
907 .sr(1)
908 .m(1)
909 .n(8)
910 .k(k)
911 .ks(3)
912 .a_offset(43)
913 .zero_index(mz)
914 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
915 }
916 }
917 }
918
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,qmin)919 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, qmin) {
920 TEST_REQUIRES_ARM_NEON_V8;
921 GemmMicrokernelTester()
922 .mr(1)
923 .nr(8)
924 .kr(1)
925 .sr(1)
926 .m(1)
927 .n(8)
928 .k(8)
929 .qmin(128)
930 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
931 }
932
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,qmax)933 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, qmax) {
934 TEST_REQUIRES_ARM_NEON_V8;
935 GemmMicrokernelTester()
936 .mr(1)
937 .nr(8)
938 .kr(1)
939 .sr(1)
940 .m(1)
941 .n(8)
942 .k(8)
943 .qmax(128)
944 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
945 }
946
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,strided_cm)947 TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, strided_cm) {
948 TEST_REQUIRES_ARM_NEON_V8;
949 GemmMicrokernelTester()
950 .mr(1)
951 .nr(8)
952 .kr(1)
953 .sr(1)
954 .m(1)
955 .n(8)
956 .k(8)
957 .cm_stride(11)
958 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
959 }
960 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
961
962
963 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8)964 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8) {
965 TEST_REQUIRES_ARM_NEON;
966 GemmMicrokernelTester()
967 .mr(4)
968 .nr(8)
969 .kr(1)
970 .sr(1)
971 .m(4)
972 .n(8)
973 .k(8)
974 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
975 }
976
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,strided_cn)977 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cn) {
978 TEST_REQUIRES_ARM_NEON;
979 GemmMicrokernelTester()
980 .mr(4)
981 .nr(8)
982 .kr(1)
983 .sr(1)
984 .m(4)
985 .n(8)
986 .k(8)
987 .cn_stride(11)
988 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
989 }
990
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8_subtile)991 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
992 TEST_REQUIRES_ARM_NEON;
993 for (uint32_t n = 1; n <= 8; n++) {
994 for (uint32_t m = 1; m <= 4; m++) {
995 GemmMicrokernelTester()
996 .mr(4)
997 .nr(8)
998 .kr(1)
999 .sr(1)
1000 .m(m)
1001 .n(n)
1002 .k(8)
1003 .iterations(1)
1004 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1005 }
1006 }
1007 }
1008
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8_subtile_m)1009 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
1010 TEST_REQUIRES_ARM_NEON;
1011 for (uint32_t m = 1; m <= 4; m++) {
1012 GemmMicrokernelTester()
1013 .mr(4)
1014 .nr(8)
1015 .kr(1)
1016 .sr(1)
1017 .m(m)
1018 .n(8)
1019 .k(8)
1020 .iterations(1)
1021 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1022 }
1023 }
1024
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8_subtile_n)1025 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
1026 TEST_REQUIRES_ARM_NEON;
1027 for (uint32_t n = 1; n <= 8; n++) {
1028 GemmMicrokernelTester()
1029 .mr(4)
1030 .nr(8)
1031 .kr(1)
1032 .sr(1)
1033 .m(4)
1034 .n(n)
1035 .k(8)
1036 .iterations(1)
1037 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1038 }
1039 }
1040
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_lt_8)1041 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8) {
1042 TEST_REQUIRES_ARM_NEON;
1043 for (size_t k = 1; k < 8; k++) {
1044 GemmMicrokernelTester()
1045 .mr(4)
1046 .nr(8)
1047 .kr(1)
1048 .sr(1)
1049 .m(4)
1050 .n(8)
1051 .k(k)
1052 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1053 }
1054 }
1055
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_lt_8_subtile)1056 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
1057 TEST_REQUIRES_ARM_NEON;
1058 for (size_t k = 1; k < 8; k++) {
1059 for (uint32_t n = 1; n <= 8; n++) {
1060 for (uint32_t m = 1; m <= 4; m++) {
1061 GemmMicrokernelTester()
1062 .mr(4)
1063 .nr(8)
1064 .kr(1)
1065 .sr(1)
1066 .m(m)
1067 .n(n)
1068 .k(k)
1069 .iterations(1)
1070 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1071 }
1072 }
1073 }
1074 }
1075
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_gt_8)1076 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8) {
1077 TEST_REQUIRES_ARM_NEON;
1078 for (size_t k = 9; k < 16; k++) {
1079 GemmMicrokernelTester()
1080 .mr(4)
1081 .nr(8)
1082 .kr(1)
1083 .sr(1)
1084 .m(4)
1085 .n(8)
1086 .k(k)
1087 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1088 }
1089 }
1090
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_gt_8_subtile)1091 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
1092 TEST_REQUIRES_ARM_NEON;
1093 for (size_t k = 9; k < 16; k++) {
1094 for (uint32_t n = 1; n <= 8; n++) {
1095 for (uint32_t m = 1; m <= 4; m++) {
1096 GemmMicrokernelTester()
1097 .mr(4)
1098 .nr(8)
1099 .kr(1)
1100 .sr(1)
1101 .m(m)
1102 .n(n)
1103 .k(k)
1104 .iterations(1)
1105 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1106 }
1107 }
1108 }
1109 }
1110
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_div_8)1111 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8) {
1112 TEST_REQUIRES_ARM_NEON;
1113 for (size_t k = 16; k <= 80; k += 8) {
1114 GemmMicrokernelTester()
1115 .mr(4)
1116 .nr(8)
1117 .kr(1)
1118 .sr(1)
1119 .m(4)
1120 .n(8)
1121 .k(k)
1122 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1123 }
1124 }
1125
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_div_8_subtile)1126 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
1127 TEST_REQUIRES_ARM_NEON;
1128 for (size_t k = 16; k <= 80; k += 8) {
1129 for (uint32_t n = 1; n <= 8; n++) {
1130 for (uint32_t m = 1; m <= 4; m++) {
1131 GemmMicrokernelTester()
1132 .mr(4)
1133 .nr(8)
1134 .kr(1)
1135 .sr(1)
1136 .m(m)
1137 .n(n)
1138 .k(k)
1139 .iterations(1)
1140 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1141 }
1142 }
1143 }
1144 }
1145
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8)1146 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8) {
1147 TEST_REQUIRES_ARM_NEON;
1148 for (uint32_t n = 9; n < 16; n++) {
1149 for (size_t k = 1; k <= 40; k += 9) {
1150 GemmMicrokernelTester()
1151 .mr(4)
1152 .nr(8)
1153 .kr(1)
1154 .sr(1)
1155 .m(4)
1156 .n(n)
1157 .k(k)
1158 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1159 }
1160 }
1161 }
1162
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8_strided_cn)1163 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_strided_cn) {
1164 TEST_REQUIRES_ARM_NEON;
1165 for (uint32_t n = 9; n < 16; n++) {
1166 for (size_t k = 1; k <= 40; k += 9) {
1167 GemmMicrokernelTester()
1168 .mr(4)
1169 .nr(8)
1170 .kr(1)
1171 .sr(1)
1172 .m(4)
1173 .n(n)
1174 .k(k)
1175 .cn_stride(11)
1176 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1177 }
1178 }
1179 }
1180
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8_subtile)1181 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_subtile) {
1182 TEST_REQUIRES_ARM_NEON;
1183 for (uint32_t n = 9; n < 16; n++) {
1184 for (size_t k = 1; k <= 40; k += 9) {
1185 for (uint32_t m = 1; m <= 4; m++) {
1186 GemmMicrokernelTester()
1187 .mr(4)
1188 .nr(8)
1189 .kr(1)
1190 .sr(1)
1191 .m(m)
1192 .n(n)
1193 .k(k)
1194 .iterations(1)
1195 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1196 }
1197 }
1198 }
1199 }
1200
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8)1201 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8) {
1202 TEST_REQUIRES_ARM_NEON;
1203 for (uint32_t n = 16; n <= 24; n += 8) {
1204 for (size_t k = 1; k <= 40; k += 9) {
1205 GemmMicrokernelTester()
1206 .mr(4)
1207 .nr(8)
1208 .kr(1)
1209 .sr(1)
1210 .m(4)
1211 .n(n)
1212 .k(k)
1213 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1214 }
1215 }
1216 }
1217
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8_strided_cn)1218 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_strided_cn) {
1219 TEST_REQUIRES_ARM_NEON;
1220 for (uint32_t n = 16; n <= 24; n += 8) {
1221 for (size_t k = 1; k <= 40; k += 9) {
1222 GemmMicrokernelTester()
1223 .mr(4)
1224 .nr(8)
1225 .kr(1)
1226 .sr(1)
1227 .m(4)
1228 .n(n)
1229 .k(k)
1230 .cn_stride(11)
1231 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1232 }
1233 }
1234 }
1235
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8_subtile)1236 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_subtile) {
1237 TEST_REQUIRES_ARM_NEON;
1238 for (uint32_t n = 16; n <= 24; n += 8) {
1239 for (size_t k = 1; k <= 40; k += 9) {
1240 for (uint32_t m = 1; m <= 4; m++) {
1241 GemmMicrokernelTester()
1242 .mr(4)
1243 .nr(8)
1244 .kr(1)
1245 .sr(1)
1246 .m(m)
1247 .n(n)
1248 .k(k)
1249 .iterations(1)
1250 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1251 }
1252 }
1253 }
1254 }
1255
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,small_kernel)1256 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, small_kernel) {
1257 TEST_REQUIRES_ARM_NEON;
1258 for (size_t k = 1; k <= 40; k += 9) {
1259 GemmMicrokernelTester()
1260 .mr(4)
1261 .nr(8)
1262 .kr(1)
1263 .sr(1)
1264 .m(4)
1265 .n(8)
1266 .k(k)
1267 .ks(3)
1268 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1269 }
1270 }
1271
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,small_kernel_subtile)1272 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, small_kernel_subtile) {
1273 TEST_REQUIRES_ARM_NEON;
1274 for (size_t k = 1; k <= 40; k += 9) {
1275 for (uint32_t n = 1; n <= 8; n++) {
1276 for (uint32_t m = 1; m <= 4; m++) {
1277 GemmMicrokernelTester()
1278 .mr(4)
1279 .nr(8)
1280 .kr(1)
1281 .sr(1)
1282 .m(m)
1283 .n(n)
1284 .k(k)
1285 .ks(3)
1286 .iterations(1)
1287 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1288 }
1289 }
1290 }
1291 }
1292
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8_small_kernel)1293 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_small_kernel) {
1294 TEST_REQUIRES_ARM_NEON;
1295 for (uint32_t n = 9; n < 16; n++) {
1296 for (size_t k = 1; k <= 40; k += 9) {
1297 GemmMicrokernelTester()
1298 .mr(4)
1299 .nr(8)
1300 .kr(1)
1301 .sr(1)
1302 .m(4)
1303 .n(n)
1304 .k(k)
1305 .ks(3)
1306 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1307 }
1308 }
1309 }
1310
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8_small_kernel)1311 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_small_kernel) {
1312 TEST_REQUIRES_ARM_NEON;
1313 for (uint32_t n = 16; n <= 24; n += 8) {
1314 for (size_t k = 1; k <= 40; k += 9) {
1315 GemmMicrokernelTester()
1316 .mr(4)
1317 .nr(8)
1318 .kr(1)
1319 .sr(1)
1320 .m(4)
1321 .n(n)
1322 .k(k)
1323 .ks(3)
1324 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1325 }
1326 }
1327 }
1328
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,strided_cm_subtile)1329 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
1330 TEST_REQUIRES_ARM_NEON;
1331 for (size_t k = 1; k <= 40; k += 9) {
1332 for (uint32_t n = 1; n <= 8; n++) {
1333 for (uint32_t m = 1; m <= 4; m++) {
1334 GemmMicrokernelTester()
1335 .mr(4)
1336 .nr(8)
1337 .kr(1)
1338 .sr(1)
1339 .m(m)
1340 .n(n)
1341 .k(k)
1342 .cm_stride(11)
1343 .iterations(1)
1344 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1345 }
1346 }
1347 }
1348 }
1349
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,a_offset)1350 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, a_offset) {
1351 TEST_REQUIRES_ARM_NEON;
1352 for (size_t k = 1; k <= 40; k += 9) {
1353 GemmMicrokernelTester()
1354 .mr(4)
1355 .nr(8)
1356 .kr(1)
1357 .sr(1)
1358 .m(4)
1359 .n(8)
1360 .k(k)
1361 .ks(3)
1362 .a_offset(163)
1363 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1364 }
1365 }
1366
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,zero)1367 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, zero) {
1368 TEST_REQUIRES_ARM_NEON;
1369 for (size_t k = 1; k <= 40; k += 9) {
1370 for (uint32_t mz = 0; mz < 4; mz++) {
1371 GemmMicrokernelTester()
1372 .mr(4)
1373 .nr(8)
1374 .kr(1)
1375 .sr(1)
1376 .m(4)
1377 .n(8)
1378 .k(k)
1379 .ks(3)
1380 .a_offset(163)
1381 .zero_index(mz)
1382 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1383 }
1384 }
1385 }
1386
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,qmin)1387 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmin) {
1388 TEST_REQUIRES_ARM_NEON;
1389 GemmMicrokernelTester()
1390 .mr(4)
1391 .nr(8)
1392 .kr(1)
1393 .sr(1)
1394 .m(4)
1395 .n(8)
1396 .k(8)
1397 .qmin(128)
1398 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1399 }
1400
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,qmax)1401 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmax) {
1402 TEST_REQUIRES_ARM_NEON;
1403 GemmMicrokernelTester()
1404 .mr(4)
1405 .nr(8)
1406 .kr(1)
1407 .sr(1)
1408 .m(4)
1409 .n(8)
1410 .k(8)
1411 .qmax(128)
1412 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1413 }
1414
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,strided_cm)1415 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm) {
1416 TEST_REQUIRES_ARM_NEON;
1417 GemmMicrokernelTester()
1418 .mr(4)
1419 .nr(8)
1420 .kr(1)
1421 .sr(1)
1422 .m(4)
1423 .n(8)
1424 .k(8)
1425 .cm_stride(11)
1426 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1427 }
1428 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
1429
1430
1431 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8)1432 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
1433 TEST_REQUIRES_ARM_NEON;
1434 GemmMicrokernelTester()
1435 .mr(4)
1436 .nr(8)
1437 .kr(1)
1438 .sr(1)
1439 .m(4)
1440 .n(8)
1441 .k(8)
1442 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1443 }
1444
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cn)1445 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
1446 TEST_REQUIRES_ARM_NEON;
1447 GemmMicrokernelTester()
1448 .mr(4)
1449 .nr(8)
1450 .kr(1)
1451 .sr(1)
1452 .m(4)
1453 .n(8)
1454 .k(8)
1455 .cn_stride(11)
1456 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1457 }
1458
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile)1459 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
1460 TEST_REQUIRES_ARM_NEON;
1461 for (uint32_t n = 1; n <= 8; n++) {
1462 for (uint32_t m = 1; m <= 4; m++) {
1463 GemmMicrokernelTester()
1464 .mr(4)
1465 .nr(8)
1466 .kr(1)
1467 .sr(1)
1468 .m(m)
1469 .n(n)
1470 .k(8)
1471 .iterations(1)
1472 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1473 }
1474 }
1475 }
1476
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_m)1477 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
1478 TEST_REQUIRES_ARM_NEON;
1479 for (uint32_t m = 1; m <= 4; m++) {
1480 GemmMicrokernelTester()
1481 .mr(4)
1482 .nr(8)
1483 .kr(1)
1484 .sr(1)
1485 .m(m)
1486 .n(8)
1487 .k(8)
1488 .iterations(1)
1489 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1490 }
1491 }
1492
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_n)1493 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
1494 TEST_REQUIRES_ARM_NEON;
1495 for (uint32_t n = 1; n <= 8; n++) {
1496 GemmMicrokernelTester()
1497 .mr(4)
1498 .nr(8)
1499 .kr(1)
1500 .sr(1)
1501 .m(4)
1502 .n(n)
1503 .k(8)
1504 .iterations(1)
1505 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1506 }
1507 }
1508
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8)1509 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
1510 TEST_REQUIRES_ARM_NEON;
1511 for (size_t k = 1; k < 8; k++) {
1512 GemmMicrokernelTester()
1513 .mr(4)
1514 .nr(8)
1515 .kr(1)
1516 .sr(1)
1517 .m(4)
1518 .n(8)
1519 .k(k)
1520 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1521 }
1522 }
1523
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8_subtile)1524 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
1525 TEST_REQUIRES_ARM_NEON;
1526 for (size_t k = 1; k < 8; k++) {
1527 for (uint32_t n = 1; n <= 8; n++) {
1528 for (uint32_t m = 1; m <= 4; m++) {
1529 GemmMicrokernelTester()
1530 .mr(4)
1531 .nr(8)
1532 .kr(1)
1533 .sr(1)
1534 .m(m)
1535 .n(n)
1536 .k(k)
1537 .iterations(1)
1538 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1539 }
1540 }
1541 }
1542 }
1543
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8)1544 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
1545 TEST_REQUIRES_ARM_NEON;
1546 for (size_t k = 9; k < 16; k++) {
1547 GemmMicrokernelTester()
1548 .mr(4)
1549 .nr(8)
1550 .kr(1)
1551 .sr(1)
1552 .m(4)
1553 .n(8)
1554 .k(k)
1555 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1556 }
1557 }
1558
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8_subtile)1559 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
1560 TEST_REQUIRES_ARM_NEON;
1561 for (size_t k = 9; k < 16; k++) {
1562 for (uint32_t n = 1; n <= 8; n++) {
1563 for (uint32_t m = 1; m <= 4; m++) {
1564 GemmMicrokernelTester()
1565 .mr(4)
1566 .nr(8)
1567 .kr(1)
1568 .sr(1)
1569 .m(m)
1570 .n(n)
1571 .k(k)
1572 .iterations(1)
1573 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1574 }
1575 }
1576 }
1577 }
1578
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8)1579 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
1580 TEST_REQUIRES_ARM_NEON;
1581 for (size_t k = 16; k <= 80; k += 8) {
1582 GemmMicrokernelTester()
1583 .mr(4)
1584 .nr(8)
1585 .kr(1)
1586 .sr(1)
1587 .m(4)
1588 .n(8)
1589 .k(k)
1590 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1591 }
1592 }
1593
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8_subtile)1594 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
1595 TEST_REQUIRES_ARM_NEON;
1596 for (size_t k = 16; k <= 80; k += 8) {
1597 for (uint32_t n = 1; n <= 8; n++) {
1598 for (uint32_t m = 1; m <= 4; m++) {
1599 GemmMicrokernelTester()
1600 .mr(4)
1601 .nr(8)
1602 .kr(1)
1603 .sr(1)
1604 .m(m)
1605 .n(n)
1606 .k(k)
1607 .iterations(1)
1608 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1609 }
1610 }
1611 }
1612 }
1613
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8)1614 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8) {
1615 TEST_REQUIRES_ARM_NEON;
1616 for (uint32_t n = 9; n < 16; n++) {
1617 for (size_t k = 1; k <= 40; k += 9) {
1618 GemmMicrokernelTester()
1619 .mr(4)
1620 .nr(8)
1621 .kr(1)
1622 .sr(1)
1623 .m(4)
1624 .n(n)
1625 .k(k)
1626 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1627 }
1628 }
1629 }
1630
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8_strided_cn)1631 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
1632 TEST_REQUIRES_ARM_NEON;
1633 for (uint32_t n = 9; n < 16; n++) {
1634 for (size_t k = 1; k <= 40; k += 9) {
1635 GemmMicrokernelTester()
1636 .mr(4)
1637 .nr(8)
1638 .kr(1)
1639 .sr(1)
1640 .m(4)
1641 .n(n)
1642 .k(k)
1643 .cn_stride(11)
1644 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1645 }
1646 }
1647 }
1648
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8_subtile)1649 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_subtile) {
1650 TEST_REQUIRES_ARM_NEON;
1651 for (uint32_t n = 9; n < 16; n++) {
1652 for (size_t k = 1; k <= 40; k += 9) {
1653 for (uint32_t m = 1; m <= 4; m++) {
1654 GemmMicrokernelTester()
1655 .mr(4)
1656 .nr(8)
1657 .kr(1)
1658 .sr(1)
1659 .m(m)
1660 .n(n)
1661 .k(k)
1662 .iterations(1)
1663 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1664 }
1665 }
1666 }
1667 }
1668
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_8)1669 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8) {
1670 TEST_REQUIRES_ARM_NEON;
1671 for (uint32_t n = 16; n <= 24; n += 8) {
1672 for (size_t k = 1; k <= 40; k += 9) {
1673 GemmMicrokernelTester()
1674 .mr(4)
1675 .nr(8)
1676 .kr(1)
1677 .sr(1)
1678 .m(4)
1679 .n(n)
1680 .k(k)
1681 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1682 }
1683 }
1684 }
1685
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_8_strided_cn)1686 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_strided_cn) {
1687 TEST_REQUIRES_ARM_NEON;
1688 for (uint32_t n = 16; n <= 24; n += 8) {
1689 for (size_t k = 1; k <= 40; k += 9) {
1690 GemmMicrokernelTester()
1691 .mr(4)
1692 .nr(8)
1693 .kr(1)
1694 .sr(1)
1695 .m(4)
1696 .n(n)
1697 .k(k)
1698 .cn_stride(11)
1699 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1700 }
1701 }
1702 }
1703
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_8_subtile)1704 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_subtile) {
1705 TEST_REQUIRES_ARM_NEON;
1706 for (uint32_t n = 16; n <= 24; n += 8) {
1707 for (size_t k = 1; k <= 40; k += 9) {
1708 for (uint32_t m = 1; m <= 4; m++) {
1709 GemmMicrokernelTester()
1710 .mr(4)
1711 .nr(8)
1712 .kr(1)
1713 .sr(1)
1714 .m(m)
1715 .n(n)
1716 .k(k)
1717 .iterations(1)
1718 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1719 }
1720 }
1721 }
1722 }
1723
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,small_kernel)1724 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel) {
1725 TEST_REQUIRES_ARM_NEON;
1726 for (size_t k = 1; k <= 40; k += 9) {
1727 GemmMicrokernelTester()
1728 .mr(4)
1729 .nr(8)
1730 .kr(1)
1731 .sr(1)
1732 .m(4)
1733 .n(8)
1734 .k(k)
1735 .ks(3)
1736 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1737 }
1738 }
1739
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,small_kernel_subtile)1740 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel_subtile) {
1741 TEST_REQUIRES_ARM_NEON;
1742 for (size_t k = 1; k <= 40; k += 9) {
1743 for (uint32_t n = 1; n <= 8; n++) {
1744 for (uint32_t m = 1; m <= 4; m++) {
1745 GemmMicrokernelTester()
1746 .mr(4)
1747 .nr(8)
1748 .kr(1)
1749 .sr(1)
1750 .m(m)
1751 .n(n)
1752 .k(k)
1753 .ks(3)
1754 .iterations(1)
1755 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1756 }
1757 }
1758 }
1759 }
1760
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8_small_kernel)1761 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_small_kernel) {
1762 TEST_REQUIRES_ARM_NEON;
1763 for (uint32_t n = 9; n < 16; n++) {
1764 for (size_t k = 1; k <= 40; k += 9) {
1765 GemmMicrokernelTester()
1766 .mr(4)
1767 .nr(8)
1768 .kr(1)
1769 .sr(1)
1770 .m(4)
1771 .n(n)
1772 .k(k)
1773 .ks(3)
1774 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1775 }
1776 }
1777 }
1778
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_8_small_kernel)1779 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_small_kernel) {
1780 TEST_REQUIRES_ARM_NEON;
1781 for (uint32_t n = 16; n <= 24; n += 8) {
1782 for (size_t k = 1; k <= 40; k += 9) {
1783 GemmMicrokernelTester()
1784 .mr(4)
1785 .nr(8)
1786 .kr(1)
1787 .sr(1)
1788 .m(4)
1789 .n(n)
1790 .k(k)
1791 .ks(3)
1792 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1793 }
1794 }
1795 }
1796
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm_subtile)1797 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
1798 TEST_REQUIRES_ARM_NEON;
1799 for (size_t k = 1; k <= 40; k += 9) {
1800 for (uint32_t n = 1; n <= 8; n++) {
1801 for (uint32_t m = 1; m <= 4; m++) {
1802 GemmMicrokernelTester()
1803 .mr(4)
1804 .nr(8)
1805 .kr(1)
1806 .sr(1)
1807 .m(m)
1808 .n(n)
1809 .k(k)
1810 .cm_stride(11)
1811 .iterations(1)
1812 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1813 }
1814 }
1815 }
1816 }
1817
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,a_offset)1818 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, a_offset) {
1819 TEST_REQUIRES_ARM_NEON;
1820 for (size_t k = 1; k <= 40; k += 9) {
1821 GemmMicrokernelTester()
1822 .mr(4)
1823 .nr(8)
1824 .kr(1)
1825 .sr(1)
1826 .m(4)
1827 .n(8)
1828 .k(k)
1829 .ks(3)
1830 .a_offset(163)
1831 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1832 }
1833 }
1834
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,zero)1835 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, zero) {
1836 TEST_REQUIRES_ARM_NEON;
1837 for (size_t k = 1; k <= 40; k += 9) {
1838 for (uint32_t mz = 0; mz < 4; mz++) {
1839 GemmMicrokernelTester()
1840 .mr(4)
1841 .nr(8)
1842 .kr(1)
1843 .sr(1)
1844 .m(4)
1845 .n(8)
1846 .k(k)
1847 .ks(3)
1848 .a_offset(163)
1849 .zero_index(mz)
1850 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1851 }
1852 }
1853 }
1854
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmin)1855 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
1856 TEST_REQUIRES_ARM_NEON;
1857 GemmMicrokernelTester()
1858 .mr(4)
1859 .nr(8)
1860 .kr(1)
1861 .sr(1)
1862 .m(4)
1863 .n(8)
1864 .k(8)
1865 .qmin(128)
1866 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1867 }
1868
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmax)1869 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
1870 TEST_REQUIRES_ARM_NEON;
1871 GemmMicrokernelTester()
1872 .mr(4)
1873 .nr(8)
1874 .kr(1)
1875 .sr(1)
1876 .m(4)
1877 .n(8)
1878 .k(8)
1879 .qmax(128)
1880 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1881 }
1882
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm)1883 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
1884 TEST_REQUIRES_ARM_NEON;
1885 GemmMicrokernelTester()
1886 .mr(4)
1887 .nr(8)
1888 .kr(1)
1889 .sr(1)
1890 .m(4)
1891 .n(8)
1892 .k(8)
1893 .cm_stride(11)
1894 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1895 }
1896 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
1897
1898
1899 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_eq_8)1900 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_eq_8) {
1901 TEST_REQUIRES_ARM_NEON_DOT;
1902 GemmMicrokernelTester()
1903 .mr(4)
1904 .nr(8)
1905 .kr(4)
1906 .sr(1)
1907 .m(4)
1908 .n(8)
1909 .k(8)
1910 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1911 }
1912
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,strided_cn)1913 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, strided_cn) {
1914 TEST_REQUIRES_ARM_NEON_DOT;
1915 GemmMicrokernelTester()
1916 .mr(4)
1917 .nr(8)
1918 .kr(4)
1919 .sr(1)
1920 .m(4)
1921 .n(8)
1922 .k(8)
1923 .cn_stride(11)
1924 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1925 }
1926
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_eq_8_subtile)1927 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_eq_8_subtile) {
1928 TEST_REQUIRES_ARM_NEON_DOT;
1929 for (uint32_t n = 1; n <= 8; n++) {
1930 for (uint32_t m = 1; m <= 4; m++) {
1931 GemmMicrokernelTester()
1932 .mr(4)
1933 .nr(8)
1934 .kr(4)
1935 .sr(1)
1936 .m(m)
1937 .n(n)
1938 .k(8)
1939 .iterations(1)
1940 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1941 }
1942 }
1943 }
1944
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_eq_8_subtile_m)1945 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_eq_8_subtile_m) {
1946 TEST_REQUIRES_ARM_NEON_DOT;
1947 for (uint32_t m = 1; m <= 4; m++) {
1948 GemmMicrokernelTester()
1949 .mr(4)
1950 .nr(8)
1951 .kr(4)
1952 .sr(1)
1953 .m(m)
1954 .n(8)
1955 .k(8)
1956 .iterations(1)
1957 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1958 }
1959 }
1960
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_eq_8_subtile_n)1961 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_eq_8_subtile_n) {
1962 TEST_REQUIRES_ARM_NEON_DOT;
1963 for (uint32_t n = 1; n <= 8; n++) {
1964 GemmMicrokernelTester()
1965 .mr(4)
1966 .nr(8)
1967 .kr(4)
1968 .sr(1)
1969 .m(4)
1970 .n(n)
1971 .k(8)
1972 .iterations(1)
1973 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1974 }
1975 }
1976
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_lt_8)1977 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_lt_8) {
1978 TEST_REQUIRES_ARM_NEON_DOT;
1979 for (size_t k = 1; k < 8; k++) {
1980 GemmMicrokernelTester()
1981 .mr(4)
1982 .nr(8)
1983 .kr(4)
1984 .sr(1)
1985 .m(4)
1986 .n(8)
1987 .k(k)
1988 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1989 }
1990 }
1991
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_lt_8_subtile)1992 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_lt_8_subtile) {
1993 TEST_REQUIRES_ARM_NEON_DOT;
1994 for (size_t k = 1; k < 8; k++) {
1995 for (uint32_t n = 1; n <= 8; n++) {
1996 for (uint32_t m = 1; m <= 4; m++) {
1997 GemmMicrokernelTester()
1998 .mr(4)
1999 .nr(8)
2000 .kr(4)
2001 .sr(1)
2002 .m(m)
2003 .n(n)
2004 .k(k)
2005 .iterations(1)
2006 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2007 }
2008 }
2009 }
2010 }
2011
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_gt_8)2012 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_gt_8) {
2013 TEST_REQUIRES_ARM_NEON_DOT;
2014 for (size_t k = 9; k < 16; k++) {
2015 GemmMicrokernelTester()
2016 .mr(4)
2017 .nr(8)
2018 .kr(4)
2019 .sr(1)
2020 .m(4)
2021 .n(8)
2022 .k(k)
2023 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2024 }
2025 }
2026
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_gt_8_subtile)2027 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_gt_8_subtile) {
2028 TEST_REQUIRES_ARM_NEON_DOT;
2029 for (size_t k = 9; k < 16; k++) {
2030 for (uint32_t n = 1; n <= 8; n++) {
2031 for (uint32_t m = 1; m <= 4; m++) {
2032 GemmMicrokernelTester()
2033 .mr(4)
2034 .nr(8)
2035 .kr(4)
2036 .sr(1)
2037 .m(m)
2038 .n(n)
2039 .k(k)
2040 .iterations(1)
2041 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2042 }
2043 }
2044 }
2045 }
2046
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_div_8)2047 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_div_8) {
2048 TEST_REQUIRES_ARM_NEON_DOT;
2049 for (size_t k = 16; k <= 80; k += 8) {
2050 GemmMicrokernelTester()
2051 .mr(4)
2052 .nr(8)
2053 .kr(4)
2054 .sr(1)
2055 .m(4)
2056 .n(8)
2057 .k(k)
2058 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2059 }
2060 }
2061
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_div_8_subtile)2062 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_div_8_subtile) {
2063 TEST_REQUIRES_ARM_NEON_DOT;
2064 for (size_t k = 16; k <= 80; k += 8) {
2065 for (uint32_t n = 1; n <= 8; n++) {
2066 for (uint32_t m = 1; m <= 4; m++) {
2067 GemmMicrokernelTester()
2068 .mr(4)
2069 .nr(8)
2070 .kr(4)
2071 .sr(1)
2072 .m(m)
2073 .n(n)
2074 .k(k)
2075 .iterations(1)
2076 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2077 }
2078 }
2079 }
2080 }
2081
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_gt_8)2082 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_gt_8) {
2083 TEST_REQUIRES_ARM_NEON_DOT;
2084 for (uint32_t n = 9; n < 16; n++) {
2085 for (size_t k = 1; k <= 40; k += 9) {
2086 GemmMicrokernelTester()
2087 .mr(4)
2088 .nr(8)
2089 .kr(4)
2090 .sr(1)
2091 .m(4)
2092 .n(n)
2093 .k(k)
2094 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2095 }
2096 }
2097 }
2098
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_gt_8_strided_cn)2099 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_gt_8_strided_cn) {
2100 TEST_REQUIRES_ARM_NEON_DOT;
2101 for (uint32_t n = 9; n < 16; n++) {
2102 for (size_t k = 1; k <= 40; k += 9) {
2103 GemmMicrokernelTester()
2104 .mr(4)
2105 .nr(8)
2106 .kr(4)
2107 .sr(1)
2108 .m(4)
2109 .n(n)
2110 .k(k)
2111 .cn_stride(11)
2112 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2113 }
2114 }
2115 }
2116
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_gt_8_subtile)2117 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_gt_8_subtile) {
2118 TEST_REQUIRES_ARM_NEON_DOT;
2119 for (uint32_t n = 9; n < 16; n++) {
2120 for (size_t k = 1; k <= 40; k += 9) {
2121 for (uint32_t m = 1; m <= 4; m++) {
2122 GemmMicrokernelTester()
2123 .mr(4)
2124 .nr(8)
2125 .kr(4)
2126 .sr(1)
2127 .m(m)
2128 .n(n)
2129 .k(k)
2130 .iterations(1)
2131 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2132 }
2133 }
2134 }
2135 }
2136
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_div_8)2137 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_div_8) {
2138 TEST_REQUIRES_ARM_NEON_DOT;
2139 for (uint32_t n = 16; n <= 24; n += 8) {
2140 for (size_t k = 1; k <= 40; k += 9) {
2141 GemmMicrokernelTester()
2142 .mr(4)
2143 .nr(8)
2144 .kr(4)
2145 .sr(1)
2146 .m(4)
2147 .n(n)
2148 .k(k)
2149 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2150 }
2151 }
2152 }
2153
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_div_8_strided_cn)2154 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_div_8_strided_cn) {
2155 TEST_REQUIRES_ARM_NEON_DOT;
2156 for (uint32_t n = 16; n <= 24; n += 8) {
2157 for (size_t k = 1; k <= 40; k += 9) {
2158 GemmMicrokernelTester()
2159 .mr(4)
2160 .nr(8)
2161 .kr(4)
2162 .sr(1)
2163 .m(4)
2164 .n(n)
2165 .k(k)
2166 .cn_stride(11)
2167 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2168 }
2169 }
2170 }
2171
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_div_8_subtile)2172 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_div_8_subtile) {
2173 TEST_REQUIRES_ARM_NEON_DOT;
2174 for (uint32_t n = 16; n <= 24; n += 8) {
2175 for (size_t k = 1; k <= 40; k += 9) {
2176 for (uint32_t m = 1; m <= 4; m++) {
2177 GemmMicrokernelTester()
2178 .mr(4)
2179 .nr(8)
2180 .kr(4)
2181 .sr(1)
2182 .m(m)
2183 .n(n)
2184 .k(k)
2185 .iterations(1)
2186 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2187 }
2188 }
2189 }
2190 }
2191
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,small_kernel)2192 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, small_kernel) {
2193 TEST_REQUIRES_ARM_NEON_DOT;
2194 for (size_t k = 1; k <= 40; k += 9) {
2195 GemmMicrokernelTester()
2196 .mr(4)
2197 .nr(8)
2198 .kr(4)
2199 .sr(1)
2200 .m(4)
2201 .n(8)
2202 .k(k)
2203 .ks(3)
2204 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2205 }
2206 }
2207
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,small_kernel_subtile)2208 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, small_kernel_subtile) {
2209 TEST_REQUIRES_ARM_NEON_DOT;
2210 for (size_t k = 1; k <= 40; k += 9) {
2211 for (uint32_t n = 1; n <= 8; n++) {
2212 for (uint32_t m = 1; m <= 4; m++) {
2213 GemmMicrokernelTester()
2214 .mr(4)
2215 .nr(8)
2216 .kr(4)
2217 .sr(1)
2218 .m(m)
2219 .n(n)
2220 .k(k)
2221 .ks(3)
2222 .iterations(1)
2223 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2224 }
2225 }
2226 }
2227 }
2228
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_gt_8_small_kernel)2229 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_gt_8_small_kernel) {
2230 TEST_REQUIRES_ARM_NEON_DOT;
2231 for (uint32_t n = 9; n < 16; n++) {
2232 for (size_t k = 1; k <= 40; k += 9) {
2233 GemmMicrokernelTester()
2234 .mr(4)
2235 .nr(8)
2236 .kr(4)
2237 .sr(1)
2238 .m(4)
2239 .n(n)
2240 .k(k)
2241 .ks(3)
2242 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2243 }
2244 }
2245 }
2246
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_div_8_small_kernel)2247 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_div_8_small_kernel) {
2248 TEST_REQUIRES_ARM_NEON_DOT;
2249 for (uint32_t n = 16; n <= 24; n += 8) {
2250 for (size_t k = 1; k <= 40; k += 9) {
2251 GemmMicrokernelTester()
2252 .mr(4)
2253 .nr(8)
2254 .kr(4)
2255 .sr(1)
2256 .m(4)
2257 .n(n)
2258 .k(k)
2259 .ks(3)
2260 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2261 }
2262 }
2263 }
2264
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,strided_cm_subtile)2265 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, strided_cm_subtile) {
2266 TEST_REQUIRES_ARM_NEON_DOT;
2267 for (size_t k = 1; k <= 40; k += 9) {
2268 for (uint32_t n = 1; n <= 8; n++) {
2269 for (uint32_t m = 1; m <= 4; m++) {
2270 GemmMicrokernelTester()
2271 .mr(4)
2272 .nr(8)
2273 .kr(4)
2274 .sr(1)
2275 .m(m)
2276 .n(n)
2277 .k(k)
2278 .cm_stride(11)
2279 .iterations(1)
2280 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2281 }
2282 }
2283 }
2284 }
2285
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,a_offset)2286 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, a_offset) {
2287 TEST_REQUIRES_ARM_NEON_DOT;
2288 for (size_t k = 1; k <= 40; k += 9) {
2289 GemmMicrokernelTester()
2290 .mr(4)
2291 .nr(8)
2292 .kr(4)
2293 .sr(1)
2294 .m(4)
2295 .n(8)
2296 .k(k)
2297 .ks(3)
2298 .a_offset(163)
2299 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2300 }
2301 }
2302
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,zero)2303 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, zero) {
2304 TEST_REQUIRES_ARM_NEON_DOT;
2305 for (size_t k = 1; k <= 40; k += 9) {
2306 for (uint32_t mz = 0; mz < 4; mz++) {
2307 GemmMicrokernelTester()
2308 .mr(4)
2309 .nr(8)
2310 .kr(4)
2311 .sr(1)
2312 .m(4)
2313 .n(8)
2314 .k(k)
2315 .ks(3)
2316 .a_offset(163)
2317 .zero_index(mz)
2318 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2319 }
2320 }
2321 }
2322
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,qmin)2323 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, qmin) {
2324 TEST_REQUIRES_ARM_NEON_DOT;
2325 GemmMicrokernelTester()
2326 .mr(4)
2327 .nr(8)
2328 .kr(4)
2329 .sr(1)
2330 .m(4)
2331 .n(8)
2332 .k(8)
2333 .qmin(128)
2334 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2335 }
2336
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,qmax)2337 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, qmax) {
2338 TEST_REQUIRES_ARM_NEON_DOT;
2339 GemmMicrokernelTester()
2340 .mr(4)
2341 .nr(8)
2342 .kr(4)
2343 .sr(1)
2344 .m(4)
2345 .n(8)
2346 .k(8)
2347 .qmax(128)
2348 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2349 }
2350
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,strided_cm)2351 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, strided_cm) {
2352 TEST_REQUIRES_ARM_NEON_DOT;
2353 GemmMicrokernelTester()
2354 .mr(4)
2355 .nr(8)
2356 .kr(4)
2357 .sr(1)
2358 .m(4)
2359 .n(8)
2360 .k(8)
2361 .cm_stride(11)
2362 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2363 }
2364 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY
2365
2366
2367 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8)2368 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8) {
2369 TEST_REQUIRES_ARM_NEON_DOT;
2370 GemmMicrokernelTester()
2371 .mr(4)
2372 .nr(8)
2373 .kr(4)
2374 .sr(1)
2375 .m(4)
2376 .n(8)
2377 .k(8)
2378 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2379 }
2380
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,strided_cn)2381 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, strided_cn) {
2382 TEST_REQUIRES_ARM_NEON_DOT;
2383 GemmMicrokernelTester()
2384 .mr(4)
2385 .nr(8)
2386 .kr(4)
2387 .sr(1)
2388 .m(4)
2389 .n(8)
2390 .k(8)
2391 .cn_stride(11)
2392 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2393 }
2394
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_subtile)2395 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile) {
2396 TEST_REQUIRES_ARM_NEON_DOT;
2397 for (uint32_t n = 1; n <= 8; n++) {
2398 for (uint32_t m = 1; m <= 4; m++) {
2399 GemmMicrokernelTester()
2400 .mr(4)
2401 .nr(8)
2402 .kr(4)
2403 .sr(1)
2404 .m(m)
2405 .n(n)
2406 .k(8)
2407 .iterations(1)
2408 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2409 }
2410 }
2411 }
2412
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_subtile_m)2413 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_m) {
2414 TEST_REQUIRES_ARM_NEON_DOT;
2415 for (uint32_t m = 1; m <= 4; m++) {
2416 GemmMicrokernelTester()
2417 .mr(4)
2418 .nr(8)
2419 .kr(4)
2420 .sr(1)
2421 .m(m)
2422 .n(8)
2423 .k(8)
2424 .iterations(1)
2425 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2426 }
2427 }
2428
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_subtile_n)2429 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_n) {
2430 TEST_REQUIRES_ARM_NEON_DOT;
2431 for (uint32_t n = 1; n <= 8; n++) {
2432 GemmMicrokernelTester()
2433 .mr(4)
2434 .nr(8)
2435 .kr(4)
2436 .sr(1)
2437 .m(4)
2438 .n(n)
2439 .k(8)
2440 .iterations(1)
2441 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2442 }
2443 }
2444
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_lt_8)2445 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8) {
2446 TEST_REQUIRES_ARM_NEON_DOT;
2447 for (size_t k = 1; k < 8; k++) {
2448 GemmMicrokernelTester()
2449 .mr(4)
2450 .nr(8)
2451 .kr(4)
2452 .sr(1)
2453 .m(4)
2454 .n(8)
2455 .k(k)
2456 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2457 }
2458 }
2459
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_lt_8_subtile)2460 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8_subtile) {
2461 TEST_REQUIRES_ARM_NEON_DOT;
2462 for (size_t k = 1; k < 8; k++) {
2463 for (uint32_t n = 1; n <= 8; n++) {
2464 for (uint32_t m = 1; m <= 4; m++) {
2465 GemmMicrokernelTester()
2466 .mr(4)
2467 .nr(8)
2468 .kr(4)
2469 .sr(1)
2470 .m(m)
2471 .n(n)
2472 .k(k)
2473 .iterations(1)
2474 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2475 }
2476 }
2477 }
2478 }
2479
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_gt_8)2480 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8) {
2481 TEST_REQUIRES_ARM_NEON_DOT;
2482 for (size_t k = 9; k < 16; k++) {
2483 GemmMicrokernelTester()
2484 .mr(4)
2485 .nr(8)
2486 .kr(4)
2487 .sr(1)
2488 .m(4)
2489 .n(8)
2490 .k(k)
2491 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2492 }
2493 }
2494
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_gt_8_subtile)2495 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8_subtile) {
2496 TEST_REQUIRES_ARM_NEON_DOT;
2497 for (size_t k = 9; k < 16; k++) {
2498 for (uint32_t n = 1; n <= 8; n++) {
2499 for (uint32_t m = 1; m <= 4; m++) {
2500 GemmMicrokernelTester()
2501 .mr(4)
2502 .nr(8)
2503 .kr(4)
2504 .sr(1)
2505 .m(m)
2506 .n(n)
2507 .k(k)
2508 .iterations(1)
2509 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2510 }
2511 }
2512 }
2513 }
2514
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_div_8)2515 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_div_8) {
2516 TEST_REQUIRES_ARM_NEON_DOT;
2517 for (size_t k = 16; k <= 80; k += 8) {
2518 GemmMicrokernelTester()
2519 .mr(4)
2520 .nr(8)
2521 .kr(4)
2522 .sr(1)
2523 .m(4)
2524 .n(8)
2525 .k(k)
2526 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2527 }
2528 }
2529
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_div_8_subtile)2530 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_div_8_subtile) {
2531 TEST_REQUIRES_ARM_NEON_DOT;
2532 for (size_t k = 16; k <= 80; k += 8) {
2533 for (uint32_t n = 1; n <= 8; n++) {
2534 for (uint32_t m = 1; m <= 4; m++) {
2535 GemmMicrokernelTester()
2536 .mr(4)
2537 .nr(8)
2538 .kr(4)
2539 .sr(1)
2540 .m(m)
2541 .n(n)
2542 .k(k)
2543 .iterations(1)
2544 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2545 }
2546 }
2547 }
2548 }
2549
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8)2550 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8) {
2551 TEST_REQUIRES_ARM_NEON_DOT;
2552 for (uint32_t n = 9; n < 16; n++) {
2553 for (size_t k = 1; k <= 40; k += 9) {
2554 GemmMicrokernelTester()
2555 .mr(4)
2556 .nr(8)
2557 .kr(4)
2558 .sr(1)
2559 .m(4)
2560 .n(n)
2561 .k(k)
2562 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2563 }
2564 }
2565 }
2566
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8_strided_cn)2567 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_strided_cn) {
2568 TEST_REQUIRES_ARM_NEON_DOT;
2569 for (uint32_t n = 9; n < 16; n++) {
2570 for (size_t k = 1; k <= 40; k += 9) {
2571 GemmMicrokernelTester()
2572 .mr(4)
2573 .nr(8)
2574 .kr(4)
2575 .sr(1)
2576 .m(4)
2577 .n(n)
2578 .k(k)
2579 .cn_stride(11)
2580 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2581 }
2582 }
2583 }
2584
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8_subtile)2585 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_subtile) {
2586 TEST_REQUIRES_ARM_NEON_DOT;
2587 for (uint32_t n = 9; n < 16; n++) {
2588 for (size_t k = 1; k <= 40; k += 9) {
2589 for (uint32_t m = 1; m <= 4; m++) {
2590 GemmMicrokernelTester()
2591 .mr(4)
2592 .nr(8)
2593 .kr(4)
2594 .sr(1)
2595 .m(m)
2596 .n(n)
2597 .k(k)
2598 .iterations(1)
2599 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2600 }
2601 }
2602 }
2603 }
2604
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8)2605 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8) {
2606 TEST_REQUIRES_ARM_NEON_DOT;
2607 for (uint32_t n = 16; n <= 24; n += 8) {
2608 for (size_t k = 1; k <= 40; k += 9) {
2609 GemmMicrokernelTester()
2610 .mr(4)
2611 .nr(8)
2612 .kr(4)
2613 .sr(1)
2614 .m(4)
2615 .n(n)
2616 .k(k)
2617 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2618 }
2619 }
2620 }
2621
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8_strided_cn)2622 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_strided_cn) {
2623 TEST_REQUIRES_ARM_NEON_DOT;
2624 for (uint32_t n = 16; n <= 24; n += 8) {
2625 for (size_t k = 1; k <= 40; k += 9) {
2626 GemmMicrokernelTester()
2627 .mr(4)
2628 .nr(8)
2629 .kr(4)
2630 .sr(1)
2631 .m(4)
2632 .n(n)
2633 .k(k)
2634 .cn_stride(11)
2635 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2636 }
2637 }
2638 }
2639
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8_subtile)2640 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_subtile) {
2641 TEST_REQUIRES_ARM_NEON_DOT;
2642 for (uint32_t n = 16; n <= 24; n += 8) {
2643 for (size_t k = 1; k <= 40; k += 9) {
2644 for (uint32_t m = 1; m <= 4; m++) {
2645 GemmMicrokernelTester()
2646 .mr(4)
2647 .nr(8)
2648 .kr(4)
2649 .sr(1)
2650 .m(m)
2651 .n(n)
2652 .k(k)
2653 .iterations(1)
2654 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2655 }
2656 }
2657 }
2658 }
2659
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,small_kernel)2660 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, small_kernel) {
2661 TEST_REQUIRES_ARM_NEON_DOT;
2662 for (size_t k = 1; k <= 40; k += 9) {
2663 GemmMicrokernelTester()
2664 .mr(4)
2665 .nr(8)
2666 .kr(4)
2667 .sr(1)
2668 .m(4)
2669 .n(8)
2670 .k(k)
2671 .ks(3)
2672 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2673 }
2674 }
2675
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,small_kernel_subtile)2676 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, small_kernel_subtile) {
2677 TEST_REQUIRES_ARM_NEON_DOT;
2678 for (size_t k = 1; k <= 40; k += 9) {
2679 for (uint32_t n = 1; n <= 8; n++) {
2680 for (uint32_t m = 1; m <= 4; m++) {
2681 GemmMicrokernelTester()
2682 .mr(4)
2683 .nr(8)
2684 .kr(4)
2685 .sr(1)
2686 .m(m)
2687 .n(n)
2688 .k(k)
2689 .ks(3)
2690 .iterations(1)
2691 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2692 }
2693 }
2694 }
2695 }
2696
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8_small_kernel)2697 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_small_kernel) {
2698 TEST_REQUIRES_ARM_NEON_DOT;
2699 for (uint32_t n = 9; n < 16; n++) {
2700 for (size_t k = 1; k <= 40; k += 9) {
2701 GemmMicrokernelTester()
2702 .mr(4)
2703 .nr(8)
2704 .kr(4)
2705 .sr(1)
2706 .m(4)
2707 .n(n)
2708 .k(k)
2709 .ks(3)
2710 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2711 }
2712 }
2713 }
2714
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8_small_kernel)2715 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_small_kernel) {
2716 TEST_REQUIRES_ARM_NEON_DOT;
2717 for (uint32_t n = 16; n <= 24; n += 8) {
2718 for (size_t k = 1; k <= 40; k += 9) {
2719 GemmMicrokernelTester()
2720 .mr(4)
2721 .nr(8)
2722 .kr(4)
2723 .sr(1)
2724 .m(4)
2725 .n(n)
2726 .k(k)
2727 .ks(3)
2728 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2729 }
2730 }
2731 }
2732
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,strided_cm_subtile)2733 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, strided_cm_subtile) {
2734 TEST_REQUIRES_ARM_NEON_DOT;
2735 for (size_t k = 1; k <= 40; k += 9) {
2736 for (uint32_t n = 1; n <= 8; n++) {
2737 for (uint32_t m = 1; m <= 4; m++) {
2738 GemmMicrokernelTester()
2739 .mr(4)
2740 .nr(8)
2741 .kr(4)
2742 .sr(1)
2743 .m(m)
2744 .n(n)
2745 .k(k)
2746 .cm_stride(11)
2747 .iterations(1)
2748 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2749 }
2750 }
2751 }
2752 }
2753
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,a_offset)2754 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, a_offset) {
2755 TEST_REQUIRES_ARM_NEON_DOT;
2756 for (size_t k = 1; k <= 40; k += 9) {
2757 GemmMicrokernelTester()
2758 .mr(4)
2759 .nr(8)
2760 .kr(4)
2761 .sr(1)
2762 .m(4)
2763 .n(8)
2764 .k(k)
2765 .ks(3)
2766 .a_offset(163)
2767 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2768 }
2769 }
2770
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,zero)2771 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, zero) {
2772 TEST_REQUIRES_ARM_NEON_DOT;
2773 for (size_t k = 1; k <= 40; k += 9) {
2774 for (uint32_t mz = 0; mz < 4; mz++) {
2775 GemmMicrokernelTester()
2776 .mr(4)
2777 .nr(8)
2778 .kr(4)
2779 .sr(1)
2780 .m(4)
2781 .n(8)
2782 .k(k)
2783 .ks(3)
2784 .a_offset(163)
2785 .zero_index(mz)
2786 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2787 }
2788 }
2789 }
2790
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,qmin)2791 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, qmin) {
2792 TEST_REQUIRES_ARM_NEON_DOT;
2793 GemmMicrokernelTester()
2794 .mr(4)
2795 .nr(8)
2796 .kr(4)
2797 .sr(1)
2798 .m(4)
2799 .n(8)
2800 .k(8)
2801 .qmin(128)
2802 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2803 }
2804
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,qmax)2805 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, qmax) {
2806 TEST_REQUIRES_ARM_NEON_DOT;
2807 GemmMicrokernelTester()
2808 .mr(4)
2809 .nr(8)
2810 .kr(4)
2811 .sr(1)
2812 .m(4)
2813 .n(8)
2814 .k(8)
2815 .qmax(128)
2816 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2817 }
2818
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,strided_cm)2819 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, strided_cm) {
2820 TEST_REQUIRES_ARM_NEON_DOT;
2821 GemmMicrokernelTester()
2822 .mr(4)
2823 .nr(8)
2824 .kr(4)
2825 .sr(1)
2826 .m(4)
2827 .n(8)
2828 .k(8)
2829 .cm_stride(11)
2830 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2831 }
2832 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY
2833
2834
2835 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16)2836 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16) {
2837 TEST_REQUIRES_ARM_NEON;
2838 GemmMicrokernelTester()
2839 .mr(1)
2840 .nr(8)
2841 .kr(8)
2842 .sr(1)
2843 .m(1)
2844 .n(8)
2845 .k(16)
2846 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2847 }
2848
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,strided_cn)2849 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, strided_cn) {
2850 TEST_REQUIRES_ARM_NEON;
2851 GemmMicrokernelTester()
2852 .mr(1)
2853 .nr(8)
2854 .kr(8)
2855 .sr(1)
2856 .m(1)
2857 .n(8)
2858 .k(16)
2859 .cn_stride(11)
2860 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2861 }
2862
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_subtile)2863 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile) {
2864 TEST_REQUIRES_ARM_NEON;
2865 for (uint32_t n = 1; n <= 8; n++) {
2866 for (uint32_t m = 1; m <= 1; m++) {
2867 GemmMicrokernelTester()
2868 .mr(1)
2869 .nr(8)
2870 .kr(8)
2871 .sr(1)
2872 .m(m)
2873 .n(n)
2874 .k(16)
2875 .iterations(1)
2876 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2877 }
2878 }
2879 }
2880
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_subtile_m)2881 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile_m) {
2882 TEST_REQUIRES_ARM_NEON;
2883 for (uint32_t m = 1; m <= 1; m++) {
2884 GemmMicrokernelTester()
2885 .mr(1)
2886 .nr(8)
2887 .kr(8)
2888 .sr(1)
2889 .m(m)
2890 .n(8)
2891 .k(16)
2892 .iterations(1)
2893 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2894 }
2895 }
2896
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_subtile_n)2897 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile_n) {
2898 TEST_REQUIRES_ARM_NEON;
2899 for (uint32_t n = 1; n <= 8; n++) {
2900 GemmMicrokernelTester()
2901 .mr(1)
2902 .nr(8)
2903 .kr(8)
2904 .sr(1)
2905 .m(1)
2906 .n(n)
2907 .k(16)
2908 .iterations(1)
2909 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2910 }
2911 }
2912
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_lt_16)2913 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_lt_16) {
2914 TEST_REQUIRES_ARM_NEON;
2915 for (size_t k = 1; k < 16; k++) {
2916 GemmMicrokernelTester()
2917 .mr(1)
2918 .nr(8)
2919 .kr(8)
2920 .sr(1)
2921 .m(1)
2922 .n(8)
2923 .k(k)
2924 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2925 }
2926 }
2927
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_lt_16_subtile)2928 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_lt_16_subtile) {
2929 TEST_REQUIRES_ARM_NEON;
2930 for (size_t k = 1; k < 16; k++) {
2931 for (uint32_t n = 1; n <= 8; n++) {
2932 for (uint32_t m = 1; m <= 1; m++) {
2933 GemmMicrokernelTester()
2934 .mr(1)
2935 .nr(8)
2936 .kr(8)
2937 .sr(1)
2938 .m(m)
2939 .n(n)
2940 .k(k)
2941 .iterations(1)
2942 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2943 }
2944 }
2945 }
2946 }
2947
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_gt_16)2948 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_gt_16) {
2949 TEST_REQUIRES_ARM_NEON;
2950 for (size_t k = 17; k < 32; k++) {
2951 GemmMicrokernelTester()
2952 .mr(1)
2953 .nr(8)
2954 .kr(8)
2955 .sr(1)
2956 .m(1)
2957 .n(8)
2958 .k(k)
2959 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2960 }
2961 }
2962
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_gt_16_subtile)2963 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_gt_16_subtile) {
2964 TEST_REQUIRES_ARM_NEON;
2965 for (size_t k = 17; k < 32; k++) {
2966 for (uint32_t n = 1; n <= 8; n++) {
2967 for (uint32_t m = 1; m <= 1; m++) {
2968 GemmMicrokernelTester()
2969 .mr(1)
2970 .nr(8)
2971 .kr(8)
2972 .sr(1)
2973 .m(m)
2974 .n(n)
2975 .k(k)
2976 .iterations(1)
2977 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2978 }
2979 }
2980 }
2981 }
2982
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_div_16)2983 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_div_16) {
2984 TEST_REQUIRES_ARM_NEON;
2985 for (size_t k = 32; k <= 160; k += 16) {
2986 GemmMicrokernelTester()
2987 .mr(1)
2988 .nr(8)
2989 .kr(8)
2990 .sr(1)
2991 .m(1)
2992 .n(8)
2993 .k(k)
2994 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2995 }
2996 }
2997
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_div_16_subtile)2998 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_div_16_subtile) {
2999 TEST_REQUIRES_ARM_NEON;
3000 for (size_t k = 32; k <= 160; k += 16) {
3001 for (uint32_t n = 1; n <= 8; n++) {
3002 for (uint32_t m = 1; m <= 1; m++) {
3003 GemmMicrokernelTester()
3004 .mr(1)
3005 .nr(8)
3006 .kr(8)
3007 .sr(1)
3008 .m(m)
3009 .n(n)
3010 .k(k)
3011 .iterations(1)
3012 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3013 }
3014 }
3015 }
3016 }
3017
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8)3018 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8) {
3019 TEST_REQUIRES_ARM_NEON;
3020 for (uint32_t n = 9; n < 16; n++) {
3021 for (size_t k = 1; k <= 80; k += 17) {
3022 GemmMicrokernelTester()
3023 .mr(1)
3024 .nr(8)
3025 .kr(8)
3026 .sr(1)
3027 .m(1)
3028 .n(n)
3029 .k(k)
3030 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3031 }
3032 }
3033 }
3034
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8_strided_cn)3035 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_strided_cn) {
3036 TEST_REQUIRES_ARM_NEON;
3037 for (uint32_t n = 9; n < 16; n++) {
3038 for (size_t k = 1; k <= 80; k += 17) {
3039 GemmMicrokernelTester()
3040 .mr(1)
3041 .nr(8)
3042 .kr(8)
3043 .sr(1)
3044 .m(1)
3045 .n(n)
3046 .k(k)
3047 .cn_stride(11)
3048 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3049 }
3050 }
3051 }
3052
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8_subtile)3053 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_subtile) {
3054 TEST_REQUIRES_ARM_NEON;
3055 for (uint32_t n = 9; n < 16; n++) {
3056 for (size_t k = 1; k <= 80; k += 17) {
3057 for (uint32_t m = 1; m <= 1; m++) {
3058 GemmMicrokernelTester()
3059 .mr(1)
3060 .nr(8)
3061 .kr(8)
3062 .sr(1)
3063 .m(m)
3064 .n(n)
3065 .k(k)
3066 .iterations(1)
3067 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3068 }
3069 }
3070 }
3071 }
3072
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8)3073 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8) {
3074 TEST_REQUIRES_ARM_NEON;
3075 for (uint32_t n = 16; n <= 24; n += 8) {
3076 for (size_t k = 1; k <= 80; k += 17) {
3077 GemmMicrokernelTester()
3078 .mr(1)
3079 .nr(8)
3080 .kr(8)
3081 .sr(1)
3082 .m(1)
3083 .n(n)
3084 .k(k)
3085 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3086 }
3087 }
3088 }
3089
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8_strided_cn)3090 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_strided_cn) {
3091 TEST_REQUIRES_ARM_NEON;
3092 for (uint32_t n = 16; n <= 24; n += 8) {
3093 for (size_t k = 1; k <= 80; k += 17) {
3094 GemmMicrokernelTester()
3095 .mr(1)
3096 .nr(8)
3097 .kr(8)
3098 .sr(1)
3099 .m(1)
3100 .n(n)
3101 .k(k)
3102 .cn_stride(11)
3103 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3104 }
3105 }
3106 }
3107
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8_subtile)3108 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_subtile) {
3109 TEST_REQUIRES_ARM_NEON;
3110 for (uint32_t n = 16; n <= 24; n += 8) {
3111 for (size_t k = 1; k <= 80; k += 17) {
3112 for (uint32_t m = 1; m <= 1; m++) {
3113 GemmMicrokernelTester()
3114 .mr(1)
3115 .nr(8)
3116 .kr(8)
3117 .sr(1)
3118 .m(m)
3119 .n(n)
3120 .k(k)
3121 .iterations(1)
3122 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3123 }
3124 }
3125 }
3126 }
3127
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,small_kernel)3128 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, small_kernel) {
3129 TEST_REQUIRES_ARM_NEON;
3130 for (size_t k = 1; k <= 80; k += 17) {
3131 GemmMicrokernelTester()
3132 .mr(1)
3133 .nr(8)
3134 .kr(8)
3135 .sr(1)
3136 .m(1)
3137 .n(8)
3138 .k(k)
3139 .ks(3)
3140 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3141 }
3142 }
3143
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,small_kernel_subtile)3144 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, small_kernel_subtile) {
3145 TEST_REQUIRES_ARM_NEON;
3146 for (size_t k = 1; k <= 80; k += 17) {
3147 for (uint32_t n = 1; n <= 8; n++) {
3148 for (uint32_t m = 1; m <= 1; m++) {
3149 GemmMicrokernelTester()
3150 .mr(1)
3151 .nr(8)
3152 .kr(8)
3153 .sr(1)
3154 .m(m)
3155 .n(n)
3156 .k(k)
3157 .ks(3)
3158 .iterations(1)
3159 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3160 }
3161 }
3162 }
3163 }
3164
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8_small_kernel)3165 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_small_kernel) {
3166 TEST_REQUIRES_ARM_NEON;
3167 for (uint32_t n = 9; n < 16; n++) {
3168 for (size_t k = 1; k <= 80; k += 17) {
3169 GemmMicrokernelTester()
3170 .mr(1)
3171 .nr(8)
3172 .kr(8)
3173 .sr(1)
3174 .m(1)
3175 .n(n)
3176 .k(k)
3177 .ks(3)
3178 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3179 }
3180 }
3181 }
3182
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8_small_kernel)3183 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_small_kernel) {
3184 TEST_REQUIRES_ARM_NEON;
3185 for (uint32_t n = 16; n <= 24; n += 8) {
3186 for (size_t k = 1; k <= 80; k += 17) {
3187 GemmMicrokernelTester()
3188 .mr(1)
3189 .nr(8)
3190 .kr(8)
3191 .sr(1)
3192 .m(1)
3193 .n(n)
3194 .k(k)
3195 .ks(3)
3196 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3197 }
3198 }
3199 }
3200
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,strided_cm_subtile)3201 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, strided_cm_subtile) {
3202 TEST_REQUIRES_ARM_NEON;
3203 for (size_t k = 1; k <= 80; k += 17) {
3204 for (uint32_t n = 1; n <= 8; n++) {
3205 for (uint32_t m = 1; m <= 1; m++) {
3206 GemmMicrokernelTester()
3207 .mr(1)
3208 .nr(8)
3209 .kr(8)
3210 .sr(1)
3211 .m(m)
3212 .n(n)
3213 .k(k)
3214 .cm_stride(11)
3215 .iterations(1)
3216 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3217 }
3218 }
3219 }
3220 }
3221
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,a_offset)3222 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, a_offset) {
3223 TEST_REQUIRES_ARM_NEON;
3224 for (size_t k = 1; k <= 80; k += 17) {
3225 GemmMicrokernelTester()
3226 .mr(1)
3227 .nr(8)
3228 .kr(8)
3229 .sr(1)
3230 .m(1)
3231 .n(8)
3232 .k(k)
3233 .ks(3)
3234 .a_offset(83)
3235 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3236 }
3237 }
3238
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,zero)3239 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, zero) {
3240 TEST_REQUIRES_ARM_NEON;
3241 for (size_t k = 1; k <= 80; k += 17) {
3242 for (uint32_t mz = 0; mz < 1; mz++) {
3243 GemmMicrokernelTester()
3244 .mr(1)
3245 .nr(8)
3246 .kr(8)
3247 .sr(1)
3248 .m(1)
3249 .n(8)
3250 .k(k)
3251 .ks(3)
3252 .a_offset(83)
3253 .zero_index(mz)
3254 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3255 }
3256 }
3257 }
3258
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,qmin)3259 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, qmin) {
3260 TEST_REQUIRES_ARM_NEON;
3261 GemmMicrokernelTester()
3262 .mr(1)
3263 .nr(8)
3264 .kr(8)
3265 .sr(1)
3266 .m(1)
3267 .n(8)
3268 .k(16)
3269 .qmin(128)
3270 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3271 }
3272
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,qmax)3273 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, qmax) {
3274 TEST_REQUIRES_ARM_NEON;
3275 GemmMicrokernelTester()
3276 .mr(1)
3277 .nr(8)
3278 .kr(8)
3279 .sr(1)
3280 .m(1)
3281 .n(8)
3282 .k(16)
3283 .qmax(128)
3284 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3285 }
3286
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,strided_cm)3287 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, strided_cm) {
3288 TEST_REQUIRES_ARM_NEON;
3289 GemmMicrokernelTester()
3290 .mr(1)
3291 .nr(8)
3292 .kr(8)
3293 .sr(1)
3294 .m(1)
3295 .n(8)
3296 .k(16)
3297 .cm_stride(11)
3298 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3299 }
3300 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3301
3302
3303 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_eq_16)3304 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_eq_16) {
3305 TEST_REQUIRES_ARM_NEON;
3306 GemmMicrokernelTester()
3307 .mr(2)
3308 .nr(8)
3309 .kr(8)
3310 .sr(1)
3311 .m(2)
3312 .n(8)
3313 .k(16)
3314 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3315 }
3316
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,strided_cn)3317 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, strided_cn) {
3318 TEST_REQUIRES_ARM_NEON;
3319 GemmMicrokernelTester()
3320 .mr(2)
3321 .nr(8)
3322 .kr(8)
3323 .sr(1)
3324 .m(2)
3325 .n(8)
3326 .k(16)
3327 .cn_stride(11)
3328 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3329 }
3330
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile)3331 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile) {
3332 TEST_REQUIRES_ARM_NEON;
3333 for (uint32_t n = 1; n <= 8; n++) {
3334 for (uint32_t m = 1; m <= 2; m++) {
3335 GemmMicrokernelTester()
3336 .mr(2)
3337 .nr(8)
3338 .kr(8)
3339 .sr(1)
3340 .m(m)
3341 .n(n)
3342 .k(16)
3343 .iterations(1)
3344 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3345 }
3346 }
3347 }
3348
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile_m)3349 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_m) {
3350 TEST_REQUIRES_ARM_NEON;
3351 for (uint32_t m = 1; m <= 2; m++) {
3352 GemmMicrokernelTester()
3353 .mr(2)
3354 .nr(8)
3355 .kr(8)
3356 .sr(1)
3357 .m(m)
3358 .n(8)
3359 .k(16)
3360 .iterations(1)
3361 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3362 }
3363 }
3364
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile_n)3365 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_n) {
3366 TEST_REQUIRES_ARM_NEON;
3367 for (uint32_t n = 1; n <= 8; n++) {
3368 GemmMicrokernelTester()
3369 .mr(2)
3370 .nr(8)
3371 .kr(8)
3372 .sr(1)
3373 .m(2)
3374 .n(n)
3375 .k(16)
3376 .iterations(1)
3377 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3378 }
3379 }
3380
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_lt_16)3381 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_lt_16) {
3382 TEST_REQUIRES_ARM_NEON;
3383 for (size_t k = 1; k < 16; k++) {
3384 GemmMicrokernelTester()
3385 .mr(2)
3386 .nr(8)
3387 .kr(8)
3388 .sr(1)
3389 .m(2)
3390 .n(8)
3391 .k(k)
3392 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3393 }
3394 }
3395
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_lt_16_subtile)3396 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_lt_16_subtile) {
3397 TEST_REQUIRES_ARM_NEON;
3398 for (size_t k = 1; k < 16; k++) {
3399 for (uint32_t n = 1; n <= 8; n++) {
3400 for (uint32_t m = 1; m <= 2; m++) {
3401 GemmMicrokernelTester()
3402 .mr(2)
3403 .nr(8)
3404 .kr(8)
3405 .sr(1)
3406 .m(m)
3407 .n(n)
3408 .k(k)
3409 .iterations(1)
3410 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3411 }
3412 }
3413 }
3414 }
3415
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_gt_16)3416 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_gt_16) {
3417 TEST_REQUIRES_ARM_NEON;
3418 for (size_t k = 17; k < 32; k++) {
3419 GemmMicrokernelTester()
3420 .mr(2)
3421 .nr(8)
3422 .kr(8)
3423 .sr(1)
3424 .m(2)
3425 .n(8)
3426 .k(k)
3427 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3428 }
3429 }
3430
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_gt_16_subtile)3431 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_gt_16_subtile) {
3432 TEST_REQUIRES_ARM_NEON;
3433 for (size_t k = 17; k < 32; k++) {
3434 for (uint32_t n = 1; n <= 8; n++) {
3435 for (uint32_t m = 1; m <= 2; m++) {
3436 GemmMicrokernelTester()
3437 .mr(2)
3438 .nr(8)
3439 .kr(8)
3440 .sr(1)
3441 .m(m)
3442 .n(n)
3443 .k(k)
3444 .iterations(1)
3445 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3446 }
3447 }
3448 }
3449 }
3450
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_div_16)3451 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_div_16) {
3452 TEST_REQUIRES_ARM_NEON;
3453 for (size_t k = 32; k <= 160; k += 16) {
3454 GemmMicrokernelTester()
3455 .mr(2)
3456 .nr(8)
3457 .kr(8)
3458 .sr(1)
3459 .m(2)
3460 .n(8)
3461 .k(k)
3462 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3463 }
3464 }
3465
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_div_16_subtile)3466 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_div_16_subtile) {
3467 TEST_REQUIRES_ARM_NEON;
3468 for (size_t k = 32; k <= 160; k += 16) {
3469 for (uint32_t n = 1; n <= 8; n++) {
3470 for (uint32_t m = 1; m <= 2; m++) {
3471 GemmMicrokernelTester()
3472 .mr(2)
3473 .nr(8)
3474 .kr(8)
3475 .sr(1)
3476 .m(m)
3477 .n(n)
3478 .k(k)
3479 .iterations(1)
3480 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3481 }
3482 }
3483 }
3484 }
3485
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_gt_8)3486 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_gt_8) {
3487 TEST_REQUIRES_ARM_NEON;
3488 for (uint32_t n = 9; n < 16; n++) {
3489 for (size_t k = 1; k <= 80; k += 17) {
3490 GemmMicrokernelTester()
3491 .mr(2)
3492 .nr(8)
3493 .kr(8)
3494 .sr(1)
3495 .m(2)
3496 .n(n)
3497 .k(k)
3498 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3499 }
3500 }
3501 }
3502
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_gt_8_strided_cn)3503 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_gt_8_strided_cn) {
3504 TEST_REQUIRES_ARM_NEON;
3505 for (uint32_t n = 9; n < 16; n++) {
3506 for (size_t k = 1; k <= 80; k += 17) {
3507 GemmMicrokernelTester()
3508 .mr(2)
3509 .nr(8)
3510 .kr(8)
3511 .sr(1)
3512 .m(2)
3513 .n(n)
3514 .k(k)
3515 .cn_stride(11)
3516 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3517 }
3518 }
3519 }
3520
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_gt_8_subtile)3521 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_gt_8_subtile) {
3522 TEST_REQUIRES_ARM_NEON;
3523 for (uint32_t n = 9; n < 16; n++) {
3524 for (size_t k = 1; k <= 80; k += 17) {
3525 for (uint32_t m = 1; m <= 2; m++) {
3526 GemmMicrokernelTester()
3527 .mr(2)
3528 .nr(8)
3529 .kr(8)
3530 .sr(1)
3531 .m(m)
3532 .n(n)
3533 .k(k)
3534 .iterations(1)
3535 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3536 }
3537 }
3538 }
3539 }
3540
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_div_8)3541 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_div_8) {
3542 TEST_REQUIRES_ARM_NEON;
3543 for (uint32_t n = 16; n <= 24; n += 8) {
3544 for (size_t k = 1; k <= 80; k += 17) {
3545 GemmMicrokernelTester()
3546 .mr(2)
3547 .nr(8)
3548 .kr(8)
3549 .sr(1)
3550 .m(2)
3551 .n(n)
3552 .k(k)
3553 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3554 }
3555 }
3556 }
3557
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_div_8_strided_cn)3558 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_div_8_strided_cn) {
3559 TEST_REQUIRES_ARM_NEON;
3560 for (uint32_t n = 16; n <= 24; n += 8) {
3561 for (size_t k = 1; k <= 80; k += 17) {
3562 GemmMicrokernelTester()
3563 .mr(2)
3564 .nr(8)
3565 .kr(8)
3566 .sr(1)
3567 .m(2)
3568 .n(n)
3569 .k(k)
3570 .cn_stride(11)
3571 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3572 }
3573 }
3574 }
3575
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_div_8_subtile)3576 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_div_8_subtile) {
3577 TEST_REQUIRES_ARM_NEON;
3578 for (uint32_t n = 16; n <= 24; n += 8) {
3579 for (size_t k = 1; k <= 80; k += 17) {
3580 for (uint32_t m = 1; m <= 2; m++) {
3581 GemmMicrokernelTester()
3582 .mr(2)
3583 .nr(8)
3584 .kr(8)
3585 .sr(1)
3586 .m(m)
3587 .n(n)
3588 .k(k)
3589 .iterations(1)
3590 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3591 }
3592 }
3593 }
3594 }
3595
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,small_kernel)3596 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, small_kernel) {
3597 TEST_REQUIRES_ARM_NEON;
3598 for (size_t k = 1; k <= 80; k += 17) {
3599 GemmMicrokernelTester()
3600 .mr(2)
3601 .nr(8)
3602 .kr(8)
3603 .sr(1)
3604 .m(2)
3605 .n(8)
3606 .k(k)
3607 .ks(3)
3608 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3609 }
3610 }
3611
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,small_kernel_subtile)3612 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, small_kernel_subtile) {
3613 TEST_REQUIRES_ARM_NEON;
3614 for (size_t k = 1; k <= 80; k += 17) {
3615 for (uint32_t n = 1; n <= 8; n++) {
3616 for (uint32_t m = 1; m <= 2; m++) {
3617 GemmMicrokernelTester()
3618 .mr(2)
3619 .nr(8)
3620 .kr(8)
3621 .sr(1)
3622 .m(m)
3623 .n(n)
3624 .k(k)
3625 .ks(3)
3626 .iterations(1)
3627 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3628 }
3629 }
3630 }
3631 }
3632
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_gt_8_small_kernel)3633 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_gt_8_small_kernel) {
3634 TEST_REQUIRES_ARM_NEON;
3635 for (uint32_t n = 9; n < 16; n++) {
3636 for (size_t k = 1; k <= 80; k += 17) {
3637 GemmMicrokernelTester()
3638 .mr(2)
3639 .nr(8)
3640 .kr(8)
3641 .sr(1)
3642 .m(2)
3643 .n(n)
3644 .k(k)
3645 .ks(3)
3646 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3647 }
3648 }
3649 }
3650
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_div_8_small_kernel)3651 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_div_8_small_kernel) {
3652 TEST_REQUIRES_ARM_NEON;
3653 for (uint32_t n = 16; n <= 24; n += 8) {
3654 for (size_t k = 1; k <= 80; k += 17) {
3655 GemmMicrokernelTester()
3656 .mr(2)
3657 .nr(8)
3658 .kr(8)
3659 .sr(1)
3660 .m(2)
3661 .n(n)
3662 .k(k)
3663 .ks(3)
3664 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3665 }
3666 }
3667 }
3668
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,strided_cm_subtile)3669 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, strided_cm_subtile) {
3670 TEST_REQUIRES_ARM_NEON;
3671 for (size_t k = 1; k <= 80; k += 17) {
3672 for (uint32_t n = 1; n <= 8; n++) {
3673 for (uint32_t m = 1; m <= 2; m++) {
3674 GemmMicrokernelTester()
3675 .mr(2)
3676 .nr(8)
3677 .kr(8)
3678 .sr(1)
3679 .m(m)
3680 .n(n)
3681 .k(k)
3682 .cm_stride(11)
3683 .iterations(1)
3684 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3685 }
3686 }
3687 }
3688 }
3689
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,a_offset)3690 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, a_offset) {
3691 TEST_REQUIRES_ARM_NEON;
3692 for (size_t k = 1; k <= 80; k += 17) {
3693 GemmMicrokernelTester()
3694 .mr(2)
3695 .nr(8)
3696 .kr(8)
3697 .sr(1)
3698 .m(2)
3699 .n(8)
3700 .k(k)
3701 .ks(3)
3702 .a_offset(163)
3703 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3704 }
3705 }
3706
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,zero)3707 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, zero) {
3708 TEST_REQUIRES_ARM_NEON;
3709 for (size_t k = 1; k <= 80; k += 17) {
3710 for (uint32_t mz = 0; mz < 2; mz++) {
3711 GemmMicrokernelTester()
3712 .mr(2)
3713 .nr(8)
3714 .kr(8)
3715 .sr(1)
3716 .m(2)
3717 .n(8)
3718 .k(k)
3719 .ks(3)
3720 .a_offset(163)
3721 .zero_index(mz)
3722 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3723 }
3724 }
3725 }
3726
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,qmin)3727 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, qmin) {
3728 TEST_REQUIRES_ARM_NEON;
3729 GemmMicrokernelTester()
3730 .mr(2)
3731 .nr(8)
3732 .kr(8)
3733 .sr(1)
3734 .m(2)
3735 .n(8)
3736 .k(16)
3737 .qmin(128)
3738 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3739 }
3740
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,qmax)3741 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, qmax) {
3742 TEST_REQUIRES_ARM_NEON;
3743 GemmMicrokernelTester()
3744 .mr(2)
3745 .nr(8)
3746 .kr(8)
3747 .sr(1)
3748 .m(2)
3749 .n(8)
3750 .k(16)
3751 .qmax(128)
3752 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3753 }
3754
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,strided_cm)3755 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, strided_cm) {
3756 TEST_REQUIRES_ARM_NEON;
3757 GemmMicrokernelTester()
3758 .mr(2)
3759 .nr(8)
3760 .kr(8)
3761 .sr(1)
3762 .m(2)
3763 .n(8)
3764 .k(16)
3765 .cm_stride(11)
3766 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3767 }
3768 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3769
3770
3771 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16)3772 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16) {
3773 TEST_REQUIRES_ARM_NEON;
3774 GemmMicrokernelTester()
3775 .mr(2)
3776 .nr(8)
3777 .kr(8)
3778 .sr(1)
3779 .m(2)
3780 .n(8)
3781 .k(16)
3782 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3783 }
3784
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cn)3785 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cn) {
3786 TEST_REQUIRES_ARM_NEON;
3787 GemmMicrokernelTester()
3788 .mr(2)
3789 .nr(8)
3790 .kr(8)
3791 .sr(1)
3792 .m(2)
3793 .n(8)
3794 .k(16)
3795 .cn_stride(11)
3796 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3797 }
3798
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile)3799 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile) {
3800 TEST_REQUIRES_ARM_NEON;
3801 for (uint32_t n = 1; n <= 8; n++) {
3802 for (uint32_t m = 1; m <= 2; m++) {
3803 GemmMicrokernelTester()
3804 .mr(2)
3805 .nr(8)
3806 .kr(8)
3807 .sr(1)
3808 .m(m)
3809 .n(n)
3810 .k(16)
3811 .iterations(1)
3812 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3813 }
3814 }
3815 }
3816
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_m)3817 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_m) {
3818 TEST_REQUIRES_ARM_NEON;
3819 for (uint32_t m = 1; m <= 2; m++) {
3820 GemmMicrokernelTester()
3821 .mr(2)
3822 .nr(8)
3823 .kr(8)
3824 .sr(1)
3825 .m(m)
3826 .n(8)
3827 .k(16)
3828 .iterations(1)
3829 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3830 }
3831 }
3832
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_n)3833 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_n) {
3834 TEST_REQUIRES_ARM_NEON;
3835 for (uint32_t n = 1; n <= 8; n++) {
3836 GemmMicrokernelTester()
3837 .mr(2)
3838 .nr(8)
3839 .kr(8)
3840 .sr(1)
3841 .m(2)
3842 .n(n)
3843 .k(16)
3844 .iterations(1)
3845 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3846 }
3847 }
3848
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16)3849 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16) {
3850 TEST_REQUIRES_ARM_NEON;
3851 for (size_t k = 1; k < 16; k++) {
3852 GemmMicrokernelTester()
3853 .mr(2)
3854 .nr(8)
3855 .kr(8)
3856 .sr(1)
3857 .m(2)
3858 .n(8)
3859 .k(k)
3860 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3861 }
3862 }
3863
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16_subtile)3864 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16_subtile) {
3865 TEST_REQUIRES_ARM_NEON;
3866 for (size_t k = 1; k < 16; k++) {
3867 for (uint32_t n = 1; n <= 8; n++) {
3868 for (uint32_t m = 1; m <= 2; m++) {
3869 GemmMicrokernelTester()
3870 .mr(2)
3871 .nr(8)
3872 .kr(8)
3873 .sr(1)
3874 .m(m)
3875 .n(n)
3876 .k(k)
3877 .iterations(1)
3878 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3879 }
3880 }
3881 }
3882 }
3883
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16)3884 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16) {
3885 TEST_REQUIRES_ARM_NEON;
3886 for (size_t k = 17; k < 32; k++) {
3887 GemmMicrokernelTester()
3888 .mr(2)
3889 .nr(8)
3890 .kr(8)
3891 .sr(1)
3892 .m(2)
3893 .n(8)
3894 .k(k)
3895 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3896 }
3897 }
3898
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16_subtile)3899 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16_subtile) {
3900 TEST_REQUIRES_ARM_NEON;
3901 for (size_t k = 17; k < 32; k++) {
3902 for (uint32_t n = 1; n <= 8; n++) {
3903 for (uint32_t m = 1; m <= 2; m++) {
3904 GemmMicrokernelTester()
3905 .mr(2)
3906 .nr(8)
3907 .kr(8)
3908 .sr(1)
3909 .m(m)
3910 .n(n)
3911 .k(k)
3912 .iterations(1)
3913 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3914 }
3915 }
3916 }
3917 }
3918
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16)3919 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16) {
3920 TEST_REQUIRES_ARM_NEON;
3921 for (size_t k = 32; k <= 160; k += 16) {
3922 GemmMicrokernelTester()
3923 .mr(2)
3924 .nr(8)
3925 .kr(8)
3926 .sr(1)
3927 .m(2)
3928 .n(8)
3929 .k(k)
3930 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3931 }
3932 }
3933
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16_subtile)3934 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16_subtile) {
3935 TEST_REQUIRES_ARM_NEON;
3936 for (size_t k = 32; k <= 160; k += 16) {
3937 for (uint32_t n = 1; n <= 8; n++) {
3938 for (uint32_t m = 1; m <= 2; m++) {
3939 GemmMicrokernelTester()
3940 .mr(2)
3941 .nr(8)
3942 .kr(8)
3943 .sr(1)
3944 .m(m)
3945 .n(n)
3946 .k(k)
3947 .iterations(1)
3948 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3949 }
3950 }
3951 }
3952 }
3953
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8)3954 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8) {
3955 TEST_REQUIRES_ARM_NEON;
3956 for (uint32_t n = 9; n < 16; n++) {
3957 for (size_t k = 1; k <= 80; k += 17) {
3958 GemmMicrokernelTester()
3959 .mr(2)
3960 .nr(8)
3961 .kr(8)
3962 .sr(1)
3963 .m(2)
3964 .n(n)
3965 .k(k)
3966 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3967 }
3968 }
3969 }
3970
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_strided_cn)3971 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_strided_cn) {
3972 TEST_REQUIRES_ARM_NEON;
3973 for (uint32_t n = 9; n < 16; n++) {
3974 for (size_t k = 1; k <= 80; k += 17) {
3975 GemmMicrokernelTester()
3976 .mr(2)
3977 .nr(8)
3978 .kr(8)
3979 .sr(1)
3980 .m(2)
3981 .n(n)
3982 .k(k)
3983 .cn_stride(11)
3984 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3985 }
3986 }
3987 }
3988
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_subtile)3989 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_subtile) {
3990 TEST_REQUIRES_ARM_NEON;
3991 for (uint32_t n = 9; n < 16; n++) {
3992 for (size_t k = 1; k <= 80; k += 17) {
3993 for (uint32_t m = 1; m <= 2; m++) {
3994 GemmMicrokernelTester()
3995 .mr(2)
3996 .nr(8)
3997 .kr(8)
3998 .sr(1)
3999 .m(m)
4000 .n(n)
4001 .k(k)
4002 .iterations(1)
4003 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4004 }
4005 }
4006 }
4007 }
4008
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8)4009 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8) {
4010 TEST_REQUIRES_ARM_NEON;
4011 for (uint32_t n = 16; n <= 24; n += 8) {
4012 for (size_t k = 1; k <= 80; k += 17) {
4013 GemmMicrokernelTester()
4014 .mr(2)
4015 .nr(8)
4016 .kr(8)
4017 .sr(1)
4018 .m(2)
4019 .n(n)
4020 .k(k)
4021 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4022 }
4023 }
4024 }
4025
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_strided_cn)4026 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_strided_cn) {
4027 TEST_REQUIRES_ARM_NEON;
4028 for (uint32_t n = 16; n <= 24; n += 8) {
4029 for (size_t k = 1; k <= 80; k += 17) {
4030 GemmMicrokernelTester()
4031 .mr(2)
4032 .nr(8)
4033 .kr(8)
4034 .sr(1)
4035 .m(2)
4036 .n(n)
4037 .k(k)
4038 .cn_stride(11)
4039 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4040 }
4041 }
4042 }
4043
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_subtile)4044 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_subtile) {
4045 TEST_REQUIRES_ARM_NEON;
4046 for (uint32_t n = 16; n <= 24; n += 8) {
4047 for (size_t k = 1; k <= 80; k += 17) {
4048 for (uint32_t m = 1; m <= 2; m++) {
4049 GemmMicrokernelTester()
4050 .mr(2)
4051 .nr(8)
4052 .kr(8)
4053 .sr(1)
4054 .m(m)
4055 .n(n)
4056 .k(k)
4057 .iterations(1)
4058 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4059 }
4060 }
4061 }
4062 }
4063
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,small_kernel)4064 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel) {
4065 TEST_REQUIRES_ARM_NEON;
4066 for (size_t k = 1; k <= 80; k += 17) {
4067 GemmMicrokernelTester()
4068 .mr(2)
4069 .nr(8)
4070 .kr(8)
4071 .sr(1)
4072 .m(2)
4073 .n(8)
4074 .k(k)
4075 .ks(3)
4076 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4077 }
4078 }
4079
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,small_kernel_subtile)4080 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel_subtile) {
4081 TEST_REQUIRES_ARM_NEON;
4082 for (size_t k = 1; k <= 80; k += 17) {
4083 for (uint32_t n = 1; n <= 8; n++) {
4084 for (uint32_t m = 1; m <= 2; m++) {
4085 GemmMicrokernelTester()
4086 .mr(2)
4087 .nr(8)
4088 .kr(8)
4089 .sr(1)
4090 .m(m)
4091 .n(n)
4092 .k(k)
4093 .ks(3)
4094 .iterations(1)
4095 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4096 }
4097 }
4098 }
4099 }
4100
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_small_kernel)4101 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_small_kernel) {
4102 TEST_REQUIRES_ARM_NEON;
4103 for (uint32_t n = 9; n < 16; n++) {
4104 for (size_t k = 1; k <= 80; k += 17) {
4105 GemmMicrokernelTester()
4106 .mr(2)
4107 .nr(8)
4108 .kr(8)
4109 .sr(1)
4110 .m(2)
4111 .n(n)
4112 .k(k)
4113 .ks(3)
4114 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4115 }
4116 }
4117 }
4118
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_small_kernel)4119 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_small_kernel) {
4120 TEST_REQUIRES_ARM_NEON;
4121 for (uint32_t n = 16; n <= 24; n += 8) {
4122 for (size_t k = 1; k <= 80; k += 17) {
4123 GemmMicrokernelTester()
4124 .mr(2)
4125 .nr(8)
4126 .kr(8)
4127 .sr(1)
4128 .m(2)
4129 .n(n)
4130 .k(k)
4131 .ks(3)
4132 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4133 }
4134 }
4135 }
4136
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm_subtile)4137 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm_subtile) {
4138 TEST_REQUIRES_ARM_NEON;
4139 for (size_t k = 1; k <= 80; k += 17) {
4140 for (uint32_t n = 1; n <= 8; n++) {
4141 for (uint32_t m = 1; m <= 2; m++) {
4142 GemmMicrokernelTester()
4143 .mr(2)
4144 .nr(8)
4145 .kr(8)
4146 .sr(1)
4147 .m(m)
4148 .n(n)
4149 .k(k)
4150 .cm_stride(11)
4151 .iterations(1)
4152 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4153 }
4154 }
4155 }
4156 }
4157
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,a_offset)4158 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, a_offset) {
4159 TEST_REQUIRES_ARM_NEON;
4160 for (size_t k = 1; k <= 80; k += 17) {
4161 GemmMicrokernelTester()
4162 .mr(2)
4163 .nr(8)
4164 .kr(8)
4165 .sr(1)
4166 .m(2)
4167 .n(8)
4168 .k(k)
4169 .ks(3)
4170 .a_offset(163)
4171 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4172 }
4173 }
4174
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,zero)4175 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, zero) {
4176 TEST_REQUIRES_ARM_NEON;
4177 for (size_t k = 1; k <= 80; k += 17) {
4178 for (uint32_t mz = 0; mz < 2; mz++) {
4179 GemmMicrokernelTester()
4180 .mr(2)
4181 .nr(8)
4182 .kr(8)
4183 .sr(1)
4184 .m(2)
4185 .n(8)
4186 .k(k)
4187 .ks(3)
4188 .a_offset(163)
4189 .zero_index(mz)
4190 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4191 }
4192 }
4193 }
4194
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmin)4195 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmin) {
4196 TEST_REQUIRES_ARM_NEON;
4197 GemmMicrokernelTester()
4198 .mr(2)
4199 .nr(8)
4200 .kr(8)
4201 .sr(1)
4202 .m(2)
4203 .n(8)
4204 .k(16)
4205 .qmin(128)
4206 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4207 }
4208
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmax)4209 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmax) {
4210 TEST_REQUIRES_ARM_NEON;
4211 GemmMicrokernelTester()
4212 .mr(2)
4213 .nr(8)
4214 .kr(8)
4215 .sr(1)
4216 .m(2)
4217 .n(8)
4218 .k(16)
4219 .qmax(128)
4220 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4221 }
4222
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm)4223 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm) {
4224 TEST_REQUIRES_ARM_NEON;
4225 GemmMicrokernelTester()
4226 .mr(2)
4227 .nr(8)
4228 .kr(8)
4229 .sr(1)
4230 .m(2)
4231 .n(8)
4232 .k(16)
4233 .cm_stride(11)
4234 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4235 }
4236 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4237
4238
4239 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_eq_16)4240 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16) {
4241 TEST_REQUIRES_ARM_NEON;
4242 GemmMicrokernelTester()
4243 .mr(2)
4244 .nr(8)
4245 .kr(16)
4246 .sr(1)
4247 .m(2)
4248 .n(8)
4249 .k(16)
4250 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4251 }
4252
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,strided_cn)4253 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, strided_cn) {
4254 TEST_REQUIRES_ARM_NEON;
4255 GemmMicrokernelTester()
4256 .mr(2)
4257 .nr(8)
4258 .kr(16)
4259 .sr(1)
4260 .m(2)
4261 .n(8)
4262 .k(16)
4263 .cn_stride(11)
4264 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4265 }
4266
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_eq_16_subtile)4267 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile) {
4268 TEST_REQUIRES_ARM_NEON;
4269 for (uint32_t n = 1; n <= 8; n++) {
4270 for (uint32_t m = 1; m <= 2; m++) {
4271 GemmMicrokernelTester()
4272 .mr(2)
4273 .nr(8)
4274 .kr(16)
4275 .sr(1)
4276 .m(m)
4277 .n(n)
4278 .k(16)
4279 .iterations(1)
4280 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4281 }
4282 }
4283 }
4284
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_eq_16_subtile_m)4285 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile_m) {
4286 TEST_REQUIRES_ARM_NEON;
4287 for (uint32_t m = 1; m <= 2; m++) {
4288 GemmMicrokernelTester()
4289 .mr(2)
4290 .nr(8)
4291 .kr(16)
4292 .sr(1)
4293 .m(m)
4294 .n(8)
4295 .k(16)
4296 .iterations(1)
4297 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4298 }
4299 }
4300
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_eq_16_subtile_n)4301 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile_n) {
4302 TEST_REQUIRES_ARM_NEON;
4303 for (uint32_t n = 1; n <= 8; n++) {
4304 GemmMicrokernelTester()
4305 .mr(2)
4306 .nr(8)
4307 .kr(16)
4308 .sr(1)
4309 .m(2)
4310 .n(n)
4311 .k(16)
4312 .iterations(1)
4313 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4314 }
4315 }
4316
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_lt_16)4317 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_lt_16) {
4318 TEST_REQUIRES_ARM_NEON;
4319 for (size_t k = 1; k < 16; k++) {
4320 GemmMicrokernelTester()
4321 .mr(2)
4322 .nr(8)
4323 .kr(16)
4324 .sr(1)
4325 .m(2)
4326 .n(8)
4327 .k(k)
4328 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4329 }
4330 }
4331
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_lt_16_subtile)4332 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_lt_16_subtile) {
4333 TEST_REQUIRES_ARM_NEON;
4334 for (size_t k = 1; k < 16; k++) {
4335 for (uint32_t n = 1; n <= 8; n++) {
4336 for (uint32_t m = 1; m <= 2; m++) {
4337 GemmMicrokernelTester()
4338 .mr(2)
4339 .nr(8)
4340 .kr(16)
4341 .sr(1)
4342 .m(m)
4343 .n(n)
4344 .k(k)
4345 .iterations(1)
4346 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4347 }
4348 }
4349 }
4350 }
4351
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_gt_16)4352 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_gt_16) {
4353 TEST_REQUIRES_ARM_NEON;
4354 for (size_t k = 17; k < 32; k++) {
4355 GemmMicrokernelTester()
4356 .mr(2)
4357 .nr(8)
4358 .kr(16)
4359 .sr(1)
4360 .m(2)
4361 .n(8)
4362 .k(k)
4363 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4364 }
4365 }
4366
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_gt_16_subtile)4367 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_gt_16_subtile) {
4368 TEST_REQUIRES_ARM_NEON;
4369 for (size_t k = 17; k < 32; k++) {
4370 for (uint32_t n = 1; n <= 8; n++) {
4371 for (uint32_t m = 1; m <= 2; m++) {
4372 GemmMicrokernelTester()
4373 .mr(2)
4374 .nr(8)
4375 .kr(16)
4376 .sr(1)
4377 .m(m)
4378 .n(n)
4379 .k(k)
4380 .iterations(1)
4381 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4382 }
4383 }
4384 }
4385 }
4386
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_div_16)4387 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_div_16) {
4388 TEST_REQUIRES_ARM_NEON;
4389 for (size_t k = 32; k <= 160; k += 16) {
4390 GemmMicrokernelTester()
4391 .mr(2)
4392 .nr(8)
4393 .kr(16)
4394 .sr(1)
4395 .m(2)
4396 .n(8)
4397 .k(k)
4398 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4399 }
4400 }
4401
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_div_16_subtile)4402 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_div_16_subtile) {
4403 TEST_REQUIRES_ARM_NEON;
4404 for (size_t k = 32; k <= 160; k += 16) {
4405 for (uint32_t n = 1; n <= 8; n++) {
4406 for (uint32_t m = 1; m <= 2; m++) {
4407 GemmMicrokernelTester()
4408 .mr(2)
4409 .nr(8)
4410 .kr(16)
4411 .sr(1)
4412 .m(m)
4413 .n(n)
4414 .k(k)
4415 .iterations(1)
4416 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4417 }
4418 }
4419 }
4420 }
4421
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_gt_8)4422 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8) {
4423 TEST_REQUIRES_ARM_NEON;
4424 for (uint32_t n = 9; n < 16; n++) {
4425 for (size_t k = 1; k <= 80; k += 17) {
4426 GemmMicrokernelTester()
4427 .mr(2)
4428 .nr(8)
4429 .kr(16)
4430 .sr(1)
4431 .m(2)
4432 .n(n)
4433 .k(k)
4434 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4435 }
4436 }
4437 }
4438
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_gt_8_strided_cn)4439 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8_strided_cn) {
4440 TEST_REQUIRES_ARM_NEON;
4441 for (uint32_t n = 9; n < 16; n++) {
4442 for (size_t k = 1; k <= 80; k += 17) {
4443 GemmMicrokernelTester()
4444 .mr(2)
4445 .nr(8)
4446 .kr(16)
4447 .sr(1)
4448 .m(2)
4449 .n(n)
4450 .k(k)
4451 .cn_stride(11)
4452 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4453 }
4454 }
4455 }
4456
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_gt_8_subtile)4457 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8_subtile) {
4458 TEST_REQUIRES_ARM_NEON;
4459 for (uint32_t n = 9; n < 16; n++) {
4460 for (size_t k = 1; k <= 80; k += 17) {
4461 for (uint32_t m = 1; m <= 2; m++) {
4462 GemmMicrokernelTester()
4463 .mr(2)
4464 .nr(8)
4465 .kr(16)
4466 .sr(1)
4467 .m(m)
4468 .n(n)
4469 .k(k)
4470 .iterations(1)
4471 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4472 }
4473 }
4474 }
4475 }
4476
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_div_8)4477 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8) {
4478 TEST_REQUIRES_ARM_NEON;
4479 for (uint32_t n = 16; n <= 24; n += 8) {
4480 for (size_t k = 1; k <= 80; k += 17) {
4481 GemmMicrokernelTester()
4482 .mr(2)
4483 .nr(8)
4484 .kr(16)
4485 .sr(1)
4486 .m(2)
4487 .n(n)
4488 .k(k)
4489 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4490 }
4491 }
4492 }
4493
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_div_8_strided_cn)4494 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8_strided_cn) {
4495 TEST_REQUIRES_ARM_NEON;
4496 for (uint32_t n = 16; n <= 24; n += 8) {
4497 for (size_t k = 1; k <= 80; k += 17) {
4498 GemmMicrokernelTester()
4499 .mr(2)
4500 .nr(8)
4501 .kr(16)
4502 .sr(1)
4503 .m(2)
4504 .n(n)
4505 .k(k)
4506 .cn_stride(11)
4507 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4508 }
4509 }
4510 }
4511
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_div_8_subtile)4512 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8_subtile) {
4513 TEST_REQUIRES_ARM_NEON;
4514 for (uint32_t n = 16; n <= 24; n += 8) {
4515 for (size_t k = 1; k <= 80; k += 17) {
4516 for (uint32_t m = 1; m <= 2; m++) {
4517 GemmMicrokernelTester()
4518 .mr(2)
4519 .nr(8)
4520 .kr(16)
4521 .sr(1)
4522 .m(m)
4523 .n(n)
4524 .k(k)
4525 .iterations(1)
4526 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4527 }
4528 }
4529 }
4530 }
4531
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,small_kernel)4532 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, small_kernel) {
4533 TEST_REQUIRES_ARM_NEON;
4534 for (size_t k = 1; k <= 80; k += 17) {
4535 GemmMicrokernelTester()
4536 .mr(2)
4537 .nr(8)
4538 .kr(16)
4539 .sr(1)
4540 .m(2)
4541 .n(8)
4542 .k(k)
4543 .ks(3)
4544 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4545 }
4546 }
4547
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,small_kernel_subtile)4548 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, small_kernel_subtile) {
4549 TEST_REQUIRES_ARM_NEON;
4550 for (size_t k = 1; k <= 80; k += 17) {
4551 for (uint32_t n = 1; n <= 8; n++) {
4552 for (uint32_t m = 1; m <= 2; m++) {
4553 GemmMicrokernelTester()
4554 .mr(2)
4555 .nr(8)
4556 .kr(16)
4557 .sr(1)
4558 .m(m)
4559 .n(n)
4560 .k(k)
4561 .ks(3)
4562 .iterations(1)
4563 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4564 }
4565 }
4566 }
4567 }
4568
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_gt_8_small_kernel)4569 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8_small_kernel) {
4570 TEST_REQUIRES_ARM_NEON;
4571 for (uint32_t n = 9; n < 16; n++) {
4572 for (size_t k = 1; k <= 80; k += 17) {
4573 GemmMicrokernelTester()
4574 .mr(2)
4575 .nr(8)
4576 .kr(16)
4577 .sr(1)
4578 .m(2)
4579 .n(n)
4580 .k(k)
4581 .ks(3)
4582 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4583 }
4584 }
4585 }
4586
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_div_8_small_kernel)4587 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8_small_kernel) {
4588 TEST_REQUIRES_ARM_NEON;
4589 for (uint32_t n = 16; n <= 24; n += 8) {
4590 for (size_t k = 1; k <= 80; k += 17) {
4591 GemmMicrokernelTester()
4592 .mr(2)
4593 .nr(8)
4594 .kr(16)
4595 .sr(1)
4596 .m(2)
4597 .n(n)
4598 .k(k)
4599 .ks(3)
4600 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4601 }
4602 }
4603 }
4604
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,strided_cm_subtile)4605 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, strided_cm_subtile) {
4606 TEST_REQUIRES_ARM_NEON;
4607 for (size_t k = 1; k <= 80; k += 17) {
4608 for (uint32_t n = 1; n <= 8; n++) {
4609 for (uint32_t m = 1; m <= 2; m++) {
4610 GemmMicrokernelTester()
4611 .mr(2)
4612 .nr(8)
4613 .kr(16)
4614 .sr(1)
4615 .m(m)
4616 .n(n)
4617 .k(k)
4618 .cm_stride(11)
4619 .iterations(1)
4620 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4621 }
4622 }
4623 }
4624 }
4625
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,a_offset)4626 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, a_offset) {
4627 TEST_REQUIRES_ARM_NEON;
4628 for (size_t k = 1; k <= 80; k += 17) {
4629 GemmMicrokernelTester()
4630 .mr(2)
4631 .nr(8)
4632 .kr(16)
4633 .sr(1)
4634 .m(2)
4635 .n(8)
4636 .k(k)
4637 .ks(3)
4638 .a_offset(163)
4639 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4640 }
4641 }
4642
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,zero)4643 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, zero) {
4644 TEST_REQUIRES_ARM_NEON;
4645 for (size_t k = 1; k <= 80; k += 17) {
4646 for (uint32_t mz = 0; mz < 2; mz++) {
4647 GemmMicrokernelTester()
4648 .mr(2)
4649 .nr(8)
4650 .kr(16)
4651 .sr(1)
4652 .m(2)
4653 .n(8)
4654 .k(k)
4655 .ks(3)
4656 .a_offset(163)
4657 .zero_index(mz)
4658 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4659 }
4660 }
4661 }
4662
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,qmin)4663 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, qmin) {
4664 TEST_REQUIRES_ARM_NEON;
4665 GemmMicrokernelTester()
4666 .mr(2)
4667 .nr(8)
4668 .kr(16)
4669 .sr(1)
4670 .m(2)
4671 .n(8)
4672 .k(16)
4673 .qmin(128)
4674 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4675 }
4676
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,qmax)4677 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, qmax) {
4678 TEST_REQUIRES_ARM_NEON;
4679 GemmMicrokernelTester()
4680 .mr(2)
4681 .nr(8)
4682 .kr(16)
4683 .sr(1)
4684 .m(2)
4685 .n(8)
4686 .k(16)
4687 .qmax(128)
4688 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4689 }
4690
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,strided_cm)4691 TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, strided_cm) {
4692 TEST_REQUIRES_ARM_NEON;
4693 GemmMicrokernelTester()
4694 .mr(2)
4695 .nr(8)
4696 .kr(16)
4697 .sr(1)
4698 .m(2)
4699 .n(8)
4700 .k(16)
4701 .cm_stride(11)
4702 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4703 }
4704 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4705
4706
4707 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_eq_8)4708 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_eq_8) {
4709 TEST_REQUIRES_ARM_NEON_V8;
4710 GemmMicrokernelTester()
4711 .mr(1)
4712 .nr(8)
4713 .kr(1)
4714 .sr(1)
4715 .m(1)
4716 .n(8)
4717 .k(8)
4718 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4719 }
4720
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,strided_cn)4721 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, strided_cn) {
4722 TEST_REQUIRES_ARM_NEON_V8;
4723 GemmMicrokernelTester()
4724 .mr(1)
4725 .nr(8)
4726 .kr(1)
4727 .sr(1)
4728 .m(1)
4729 .n(8)
4730 .k(8)
4731 .cn_stride(11)
4732 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4733 }
4734
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_eq_8_subtile)4735 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_eq_8_subtile) {
4736 TEST_REQUIRES_ARM_NEON_V8;
4737 for (uint32_t n = 1; n <= 8; n++) {
4738 for (uint32_t m = 1; m <= 1; m++) {
4739 GemmMicrokernelTester()
4740 .mr(1)
4741 .nr(8)
4742 .kr(1)
4743 .sr(1)
4744 .m(m)
4745 .n(n)
4746 .k(8)
4747 .iterations(1)
4748 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4749 }
4750 }
4751 }
4752
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_eq_8_subtile_m)4753 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
4754 TEST_REQUIRES_ARM_NEON_V8;
4755 for (uint32_t m = 1; m <= 1; m++) {
4756 GemmMicrokernelTester()
4757 .mr(1)
4758 .nr(8)
4759 .kr(1)
4760 .sr(1)
4761 .m(m)
4762 .n(8)
4763 .k(8)
4764 .iterations(1)
4765 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4766 }
4767 }
4768
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_eq_8_subtile_n)4769 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
4770 TEST_REQUIRES_ARM_NEON_V8;
4771 for (uint32_t n = 1; n <= 8; n++) {
4772 GemmMicrokernelTester()
4773 .mr(1)
4774 .nr(8)
4775 .kr(1)
4776 .sr(1)
4777 .m(1)
4778 .n(n)
4779 .k(8)
4780 .iterations(1)
4781 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4782 }
4783 }
4784
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_lt_8)4785 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_lt_8) {
4786 TEST_REQUIRES_ARM_NEON_V8;
4787 for (size_t k = 1; k < 8; k++) {
4788 GemmMicrokernelTester()
4789 .mr(1)
4790 .nr(8)
4791 .kr(1)
4792 .sr(1)
4793 .m(1)
4794 .n(8)
4795 .k(k)
4796 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4797 }
4798 }
4799
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_lt_8_subtile)4800 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_lt_8_subtile) {
4801 TEST_REQUIRES_ARM_NEON_V8;
4802 for (size_t k = 1; k < 8; k++) {
4803 for (uint32_t n = 1; n <= 8; n++) {
4804 for (uint32_t m = 1; m <= 1; m++) {
4805 GemmMicrokernelTester()
4806 .mr(1)
4807 .nr(8)
4808 .kr(1)
4809 .sr(1)
4810 .m(m)
4811 .n(n)
4812 .k(k)
4813 .iterations(1)
4814 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4815 }
4816 }
4817 }
4818 }
4819
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_gt_8)4820 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_gt_8) {
4821 TEST_REQUIRES_ARM_NEON_V8;
4822 for (size_t k = 9; k < 16; k++) {
4823 GemmMicrokernelTester()
4824 .mr(1)
4825 .nr(8)
4826 .kr(1)
4827 .sr(1)
4828 .m(1)
4829 .n(8)
4830 .k(k)
4831 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4832 }
4833 }
4834
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_gt_8_subtile)4835 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_gt_8_subtile) {
4836 TEST_REQUIRES_ARM_NEON_V8;
4837 for (size_t k = 9; k < 16; k++) {
4838 for (uint32_t n = 1; n <= 8; n++) {
4839 for (uint32_t m = 1; m <= 1; m++) {
4840 GemmMicrokernelTester()
4841 .mr(1)
4842 .nr(8)
4843 .kr(1)
4844 .sr(1)
4845 .m(m)
4846 .n(n)
4847 .k(k)
4848 .iterations(1)
4849 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4850 }
4851 }
4852 }
4853 }
4854
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_div_8)4855 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_div_8) {
4856 TEST_REQUIRES_ARM_NEON_V8;
4857 for (size_t k = 16; k <= 80; k += 8) {
4858 GemmMicrokernelTester()
4859 .mr(1)
4860 .nr(8)
4861 .kr(1)
4862 .sr(1)
4863 .m(1)
4864 .n(8)
4865 .k(k)
4866 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4867 }
4868 }
4869
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_div_8_subtile)4870 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_div_8_subtile) {
4871 TEST_REQUIRES_ARM_NEON_V8;
4872 for (size_t k = 16; k <= 80; k += 8) {
4873 for (uint32_t n = 1; n <= 8; n++) {
4874 for (uint32_t m = 1; m <= 1; m++) {
4875 GemmMicrokernelTester()
4876 .mr(1)
4877 .nr(8)
4878 .kr(1)
4879 .sr(1)
4880 .m(m)
4881 .n(n)
4882 .k(k)
4883 .iterations(1)
4884 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4885 }
4886 }
4887 }
4888 }
4889
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_gt_8)4890 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_gt_8) {
4891 TEST_REQUIRES_ARM_NEON_V8;
4892 for (uint32_t n = 9; n < 16; n++) {
4893 for (size_t k = 1; k <= 40; k += 9) {
4894 GemmMicrokernelTester()
4895 .mr(1)
4896 .nr(8)
4897 .kr(1)
4898 .sr(1)
4899 .m(1)
4900 .n(n)
4901 .k(k)
4902 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4903 }
4904 }
4905 }
4906
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_gt_8_strided_cn)4907 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_gt_8_strided_cn) {
4908 TEST_REQUIRES_ARM_NEON_V8;
4909 for (uint32_t n = 9; n < 16; n++) {
4910 for (size_t k = 1; k <= 40; k += 9) {
4911 GemmMicrokernelTester()
4912 .mr(1)
4913 .nr(8)
4914 .kr(1)
4915 .sr(1)
4916 .m(1)
4917 .n(n)
4918 .k(k)
4919 .cn_stride(11)
4920 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4921 }
4922 }
4923 }
4924
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_gt_8_subtile)4925 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_gt_8_subtile) {
4926 TEST_REQUIRES_ARM_NEON_V8;
4927 for (uint32_t n = 9; n < 16; n++) {
4928 for (size_t k = 1; k <= 40; k += 9) {
4929 for (uint32_t m = 1; m <= 1; m++) {
4930 GemmMicrokernelTester()
4931 .mr(1)
4932 .nr(8)
4933 .kr(1)
4934 .sr(1)
4935 .m(m)
4936 .n(n)
4937 .k(k)
4938 .iterations(1)
4939 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4940 }
4941 }
4942 }
4943 }
4944
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_div_8)4945 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_div_8) {
4946 TEST_REQUIRES_ARM_NEON_V8;
4947 for (uint32_t n = 16; n <= 24; n += 8) {
4948 for (size_t k = 1; k <= 40; k += 9) {
4949 GemmMicrokernelTester()
4950 .mr(1)
4951 .nr(8)
4952 .kr(1)
4953 .sr(1)
4954 .m(1)
4955 .n(n)
4956 .k(k)
4957 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4958 }
4959 }
4960 }
4961
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_div_8_strided_cn)4962 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_div_8_strided_cn) {
4963 TEST_REQUIRES_ARM_NEON_V8;
4964 for (uint32_t n = 16; n <= 24; n += 8) {
4965 for (size_t k = 1; k <= 40; k += 9) {
4966 GemmMicrokernelTester()
4967 .mr(1)
4968 .nr(8)
4969 .kr(1)
4970 .sr(1)
4971 .m(1)
4972 .n(n)
4973 .k(k)
4974 .cn_stride(11)
4975 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4976 }
4977 }
4978 }
4979
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_div_8_subtile)4980 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_div_8_subtile) {
4981 TEST_REQUIRES_ARM_NEON_V8;
4982 for (uint32_t n = 16; n <= 24; n += 8) {
4983 for (size_t k = 1; k <= 40; k += 9) {
4984 for (uint32_t m = 1; m <= 1; m++) {
4985 GemmMicrokernelTester()
4986 .mr(1)
4987 .nr(8)
4988 .kr(1)
4989 .sr(1)
4990 .m(m)
4991 .n(n)
4992 .k(k)
4993 .iterations(1)
4994 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4995 }
4996 }
4997 }
4998 }
4999
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,small_kernel)5000 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, small_kernel) {
5001 TEST_REQUIRES_ARM_NEON_V8;
5002 for (size_t k = 1; k <= 40; k += 9) {
5003 GemmMicrokernelTester()
5004 .mr(1)
5005 .nr(8)
5006 .kr(1)
5007 .sr(1)
5008 .m(1)
5009 .n(8)
5010 .k(k)
5011 .ks(3)
5012 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5013 }
5014 }
5015
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,small_kernel_subtile)5016 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, small_kernel_subtile) {
5017 TEST_REQUIRES_ARM_NEON_V8;
5018 for (size_t k = 1; k <= 40; k += 9) {
5019 for (uint32_t n = 1; n <= 8; n++) {
5020 for (uint32_t m = 1; m <= 1; m++) {
5021 GemmMicrokernelTester()
5022 .mr(1)
5023 .nr(8)
5024 .kr(1)
5025 .sr(1)
5026 .m(m)
5027 .n(n)
5028 .k(k)
5029 .ks(3)
5030 .iterations(1)
5031 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5032 }
5033 }
5034 }
5035 }
5036
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_gt_8_small_kernel)5037 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_gt_8_small_kernel) {
5038 TEST_REQUIRES_ARM_NEON_V8;
5039 for (uint32_t n = 9; n < 16; n++) {
5040 for (size_t k = 1; k <= 40; k += 9) {
5041 GemmMicrokernelTester()
5042 .mr(1)
5043 .nr(8)
5044 .kr(1)
5045 .sr(1)
5046 .m(1)
5047 .n(n)
5048 .k(k)
5049 .ks(3)
5050 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5051 }
5052 }
5053 }
5054
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_div_8_small_kernel)5055 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_div_8_small_kernel) {
5056 TEST_REQUIRES_ARM_NEON_V8;
5057 for (uint32_t n = 16; n <= 24; n += 8) {
5058 for (size_t k = 1; k <= 40; k += 9) {
5059 GemmMicrokernelTester()
5060 .mr(1)
5061 .nr(8)
5062 .kr(1)
5063 .sr(1)
5064 .m(1)
5065 .n(n)
5066 .k(k)
5067 .ks(3)
5068 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5069 }
5070 }
5071 }
5072
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,strided_cm_subtile)5073 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, strided_cm_subtile) {
5074 TEST_REQUIRES_ARM_NEON_V8;
5075 for (size_t k = 1; k <= 40; k += 9) {
5076 for (uint32_t n = 1; n <= 8; n++) {
5077 for (uint32_t m = 1; m <= 1; m++) {
5078 GemmMicrokernelTester()
5079 .mr(1)
5080 .nr(8)
5081 .kr(1)
5082 .sr(1)
5083 .m(m)
5084 .n(n)
5085 .k(k)
5086 .cm_stride(11)
5087 .iterations(1)
5088 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5089 }
5090 }
5091 }
5092 }
5093
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,a_offset)5094 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, a_offset) {
5095 TEST_REQUIRES_ARM_NEON_V8;
5096 for (size_t k = 1; k <= 40; k += 9) {
5097 GemmMicrokernelTester()
5098 .mr(1)
5099 .nr(8)
5100 .kr(1)
5101 .sr(1)
5102 .m(1)
5103 .n(8)
5104 .k(k)
5105 .ks(3)
5106 .a_offset(43)
5107 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5108 }
5109 }
5110
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,zero)5111 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, zero) {
5112 TEST_REQUIRES_ARM_NEON_V8;
5113 for (size_t k = 1; k <= 40; k += 9) {
5114 for (uint32_t mz = 0; mz < 1; mz++) {
5115 GemmMicrokernelTester()
5116 .mr(1)
5117 .nr(8)
5118 .kr(1)
5119 .sr(1)
5120 .m(1)
5121 .n(8)
5122 .k(k)
5123 .ks(3)
5124 .a_offset(43)
5125 .zero_index(mz)
5126 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5127 }
5128 }
5129 }
5130
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,qmin)5131 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, qmin) {
5132 TEST_REQUIRES_ARM_NEON_V8;
5133 GemmMicrokernelTester()
5134 .mr(1)
5135 .nr(8)
5136 .kr(1)
5137 .sr(1)
5138 .m(1)
5139 .n(8)
5140 .k(8)
5141 .qmin(128)
5142 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5143 }
5144
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,qmax)5145 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, qmax) {
5146 TEST_REQUIRES_ARM_NEON_V8;
5147 GemmMicrokernelTester()
5148 .mr(1)
5149 .nr(8)
5150 .kr(1)
5151 .sr(1)
5152 .m(1)
5153 .n(8)
5154 .k(8)
5155 .qmax(128)
5156 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5157 }
5158
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,strided_cm)5159 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, strided_cm) {
5160 TEST_REQUIRES_ARM_NEON_V8;
5161 GemmMicrokernelTester()
5162 .mr(1)
5163 .nr(8)
5164 .kr(1)
5165 .sr(1)
5166 .m(1)
5167 .n(8)
5168 .k(8)
5169 .cm_stride(11)
5170 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5171 }
5172 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5173
5174
5175 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_eq_16)5176 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16) {
5177 TEST_REQUIRES_ARM_NEON;
5178 GemmMicrokernelTester()
5179 .mr(1)
5180 .nr(8)
5181 .kr(2)
5182 .sr(1)
5183 .m(1)
5184 .n(8)
5185 .k(16)
5186 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5187 }
5188
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,strided_cn)5189 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, strided_cn) {
5190 TEST_REQUIRES_ARM_NEON;
5191 GemmMicrokernelTester()
5192 .mr(1)
5193 .nr(8)
5194 .kr(2)
5195 .sr(1)
5196 .m(1)
5197 .n(8)
5198 .k(16)
5199 .cn_stride(11)
5200 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5201 }
5202
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_eq_16_subtile)5203 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
5204 TEST_REQUIRES_ARM_NEON;
5205 for (uint32_t n = 1; n <= 8; n++) {
5206 for (uint32_t m = 1; m <= 1; m++) {
5207 GemmMicrokernelTester()
5208 .mr(1)
5209 .nr(8)
5210 .kr(2)
5211 .sr(1)
5212 .m(m)
5213 .n(n)
5214 .k(16)
5215 .iterations(1)
5216 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5217 }
5218 }
5219 }
5220
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_m)5221 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
5222 TEST_REQUIRES_ARM_NEON;
5223 for (uint32_t m = 1; m <= 1; m++) {
5224 GemmMicrokernelTester()
5225 .mr(1)
5226 .nr(8)
5227 .kr(2)
5228 .sr(1)
5229 .m(m)
5230 .n(8)
5231 .k(16)
5232 .iterations(1)
5233 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5234 }
5235 }
5236
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_n)5237 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
5238 TEST_REQUIRES_ARM_NEON;
5239 for (uint32_t n = 1; n <= 8; n++) {
5240 GemmMicrokernelTester()
5241 .mr(1)
5242 .nr(8)
5243 .kr(2)
5244 .sr(1)
5245 .m(1)
5246 .n(n)
5247 .k(16)
5248 .iterations(1)
5249 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5250 }
5251 }
5252
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_lt_16)5253 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_lt_16) {
5254 TEST_REQUIRES_ARM_NEON;
5255 for (size_t k = 1; k < 16; k++) {
5256 GemmMicrokernelTester()
5257 .mr(1)
5258 .nr(8)
5259 .kr(2)
5260 .sr(1)
5261 .m(1)
5262 .n(8)
5263 .k(k)
5264 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5265 }
5266 }
5267
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_lt_16_subtile)5268 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
5269 TEST_REQUIRES_ARM_NEON;
5270 for (size_t k = 1; k < 16; k++) {
5271 for (uint32_t n = 1; n <= 8; n++) {
5272 for (uint32_t m = 1; m <= 1; m++) {
5273 GemmMicrokernelTester()
5274 .mr(1)
5275 .nr(8)
5276 .kr(2)
5277 .sr(1)
5278 .m(m)
5279 .n(n)
5280 .k(k)
5281 .iterations(1)
5282 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5283 }
5284 }
5285 }
5286 }
5287
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_gt_16)5288 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_gt_16) {
5289 TEST_REQUIRES_ARM_NEON;
5290 for (size_t k = 17; k < 32; k++) {
5291 GemmMicrokernelTester()
5292 .mr(1)
5293 .nr(8)
5294 .kr(2)
5295 .sr(1)
5296 .m(1)
5297 .n(8)
5298 .k(k)
5299 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5300 }
5301 }
5302
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_gt_16_subtile)5303 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
5304 TEST_REQUIRES_ARM_NEON;
5305 for (size_t k = 17; k < 32; k++) {
5306 for (uint32_t n = 1; n <= 8; n++) {
5307 for (uint32_t m = 1; m <= 1; m++) {
5308 GemmMicrokernelTester()
5309 .mr(1)
5310 .nr(8)
5311 .kr(2)
5312 .sr(1)
5313 .m(m)
5314 .n(n)
5315 .k(k)
5316 .iterations(1)
5317 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5318 }
5319 }
5320 }
5321 }
5322
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_div_16)5323 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_div_16) {
5324 TEST_REQUIRES_ARM_NEON;
5325 for (size_t k = 32; k <= 160; k += 16) {
5326 GemmMicrokernelTester()
5327 .mr(1)
5328 .nr(8)
5329 .kr(2)
5330 .sr(1)
5331 .m(1)
5332 .n(8)
5333 .k(k)
5334 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5335 }
5336 }
5337
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_div_16_subtile)5338 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_div_16_subtile) {
5339 TEST_REQUIRES_ARM_NEON;
5340 for (size_t k = 32; k <= 160; k += 16) {
5341 for (uint32_t n = 1; n <= 8; n++) {
5342 for (uint32_t m = 1; m <= 1; m++) {
5343 GemmMicrokernelTester()
5344 .mr(1)
5345 .nr(8)
5346 .kr(2)
5347 .sr(1)
5348 .m(m)
5349 .n(n)
5350 .k(k)
5351 .iterations(1)
5352 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5353 }
5354 }
5355 }
5356 }
5357
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_gt_8)5358 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8) {
5359 TEST_REQUIRES_ARM_NEON;
5360 for (uint32_t n = 9; n < 16; n++) {
5361 for (size_t k = 1; k <= 80; k += 17) {
5362 GemmMicrokernelTester()
5363 .mr(1)
5364 .nr(8)
5365 .kr(2)
5366 .sr(1)
5367 .m(1)
5368 .n(n)
5369 .k(k)
5370 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5371 }
5372 }
5373 }
5374
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_gt_8_strided_cn)5375 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
5376 TEST_REQUIRES_ARM_NEON;
5377 for (uint32_t n = 9; n < 16; n++) {
5378 for (size_t k = 1; k <= 80; k += 17) {
5379 GemmMicrokernelTester()
5380 .mr(1)
5381 .nr(8)
5382 .kr(2)
5383 .sr(1)
5384 .m(1)
5385 .n(n)
5386 .k(k)
5387 .cn_stride(11)
5388 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5389 }
5390 }
5391 }
5392
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_gt_8_subtile)5393 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8_subtile) {
5394 TEST_REQUIRES_ARM_NEON;
5395 for (uint32_t n = 9; n < 16; n++) {
5396 for (size_t k = 1; k <= 80; k += 17) {
5397 for (uint32_t m = 1; m <= 1; m++) {
5398 GemmMicrokernelTester()
5399 .mr(1)
5400 .nr(8)
5401 .kr(2)
5402 .sr(1)
5403 .m(m)
5404 .n(n)
5405 .k(k)
5406 .iterations(1)
5407 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5408 }
5409 }
5410 }
5411 }
5412
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_div_8)5413 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8) {
5414 TEST_REQUIRES_ARM_NEON;
5415 for (uint32_t n = 16; n <= 24; n += 8) {
5416 for (size_t k = 1; k <= 80; k += 17) {
5417 GemmMicrokernelTester()
5418 .mr(1)
5419 .nr(8)
5420 .kr(2)
5421 .sr(1)
5422 .m(1)
5423 .n(n)
5424 .k(k)
5425 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5426 }
5427 }
5428 }
5429
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_div_8_strided_cn)5430 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8_strided_cn) {
5431 TEST_REQUIRES_ARM_NEON;
5432 for (uint32_t n = 16; n <= 24; n += 8) {
5433 for (size_t k = 1; k <= 80; k += 17) {
5434 GemmMicrokernelTester()
5435 .mr(1)
5436 .nr(8)
5437 .kr(2)
5438 .sr(1)
5439 .m(1)
5440 .n(n)
5441 .k(k)
5442 .cn_stride(11)
5443 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5444 }
5445 }
5446 }
5447
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_div_8_subtile)5448 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8_subtile) {
5449 TEST_REQUIRES_ARM_NEON;
5450 for (uint32_t n = 16; n <= 24; n += 8) {
5451 for (size_t k = 1; k <= 80; k += 17) {
5452 for (uint32_t m = 1; m <= 1; m++) {
5453 GemmMicrokernelTester()
5454 .mr(1)
5455 .nr(8)
5456 .kr(2)
5457 .sr(1)
5458 .m(m)
5459 .n(n)
5460 .k(k)
5461 .iterations(1)
5462 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5463 }
5464 }
5465 }
5466 }
5467
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,small_kernel)5468 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, small_kernel) {
5469 TEST_REQUIRES_ARM_NEON;
5470 for (size_t k = 1; k <= 80; k += 17) {
5471 GemmMicrokernelTester()
5472 .mr(1)
5473 .nr(8)
5474 .kr(2)
5475 .sr(1)
5476 .m(1)
5477 .n(8)
5478 .k(k)
5479 .ks(3)
5480 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5481 }
5482 }
5483
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,small_kernel_subtile)5484 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, small_kernel_subtile) {
5485 TEST_REQUIRES_ARM_NEON;
5486 for (size_t k = 1; k <= 80; k += 17) {
5487 for (uint32_t n = 1; n <= 8; n++) {
5488 for (uint32_t m = 1; m <= 1; m++) {
5489 GemmMicrokernelTester()
5490 .mr(1)
5491 .nr(8)
5492 .kr(2)
5493 .sr(1)
5494 .m(m)
5495 .n(n)
5496 .k(k)
5497 .ks(3)
5498 .iterations(1)
5499 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5500 }
5501 }
5502 }
5503 }
5504
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_gt_8_small_kernel)5505 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
5506 TEST_REQUIRES_ARM_NEON;
5507 for (uint32_t n = 9; n < 16; n++) {
5508 for (size_t k = 1; k <= 80; k += 17) {
5509 GemmMicrokernelTester()
5510 .mr(1)
5511 .nr(8)
5512 .kr(2)
5513 .sr(1)
5514 .m(1)
5515 .n(n)
5516 .k(k)
5517 .ks(3)
5518 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5519 }
5520 }
5521 }
5522
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_div_8_small_kernel)5523 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8_small_kernel) {
5524 TEST_REQUIRES_ARM_NEON;
5525 for (uint32_t n = 16; n <= 24; n += 8) {
5526 for (size_t k = 1; k <= 80; k += 17) {
5527 GemmMicrokernelTester()
5528 .mr(1)
5529 .nr(8)
5530 .kr(2)
5531 .sr(1)
5532 .m(1)
5533 .n(n)
5534 .k(k)
5535 .ks(3)
5536 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5537 }
5538 }
5539 }
5540
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,strided_cm_subtile)5541 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, strided_cm_subtile) {
5542 TEST_REQUIRES_ARM_NEON;
5543 for (size_t k = 1; k <= 80; k += 17) {
5544 for (uint32_t n = 1; n <= 8; n++) {
5545 for (uint32_t m = 1; m <= 1; m++) {
5546 GemmMicrokernelTester()
5547 .mr(1)
5548 .nr(8)
5549 .kr(2)
5550 .sr(1)
5551 .m(m)
5552 .n(n)
5553 .k(k)
5554 .cm_stride(11)
5555 .iterations(1)
5556 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5557 }
5558 }
5559 }
5560 }
5561
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,a_offset)5562 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, a_offset) {
5563 TEST_REQUIRES_ARM_NEON;
5564 for (size_t k = 1; k <= 80; k += 17) {
5565 GemmMicrokernelTester()
5566 .mr(1)
5567 .nr(8)
5568 .kr(2)
5569 .sr(1)
5570 .m(1)
5571 .n(8)
5572 .k(k)
5573 .ks(3)
5574 .a_offset(83)
5575 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5576 }
5577 }
5578
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,zero)5579 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, zero) {
5580 TEST_REQUIRES_ARM_NEON;
5581 for (size_t k = 1; k <= 80; k += 17) {
5582 for (uint32_t mz = 0; mz < 1; mz++) {
5583 GemmMicrokernelTester()
5584 .mr(1)
5585 .nr(8)
5586 .kr(2)
5587 .sr(1)
5588 .m(1)
5589 .n(8)
5590 .k(k)
5591 .ks(3)
5592 .a_offset(83)
5593 .zero_index(mz)
5594 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5595 }
5596 }
5597 }
5598
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,qmin)5599 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, qmin) {
5600 TEST_REQUIRES_ARM_NEON;
5601 GemmMicrokernelTester()
5602 .mr(1)
5603 .nr(8)
5604 .kr(2)
5605 .sr(1)
5606 .m(1)
5607 .n(8)
5608 .k(16)
5609 .qmin(128)
5610 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5611 }
5612
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,qmax)5613 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, qmax) {
5614 TEST_REQUIRES_ARM_NEON;
5615 GemmMicrokernelTester()
5616 .mr(1)
5617 .nr(8)
5618 .kr(2)
5619 .sr(1)
5620 .m(1)
5621 .n(8)
5622 .k(16)
5623 .qmax(128)
5624 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5625 }
5626
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,strided_cm)5627 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, strided_cm) {
5628 TEST_REQUIRES_ARM_NEON;
5629 GemmMicrokernelTester()
5630 .mr(1)
5631 .nr(8)
5632 .kr(2)
5633 .sr(1)
5634 .m(1)
5635 .n(8)
5636 .k(16)
5637 .cm_stride(11)
5638 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5639 }
5640 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5641
5642
5643 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_eq_16)5644 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16) {
5645 TEST_REQUIRES_ARM_NEON;
5646 GemmMicrokernelTester()
5647 .mr(1)
5648 .nr(8)
5649 .kr(2)
5650 .sr(1)
5651 .m(1)
5652 .n(8)
5653 .k(16)
5654 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5655 }
5656
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,strided_cn)5657 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, strided_cn) {
5658 TEST_REQUIRES_ARM_NEON;
5659 GemmMicrokernelTester()
5660 .mr(1)
5661 .nr(8)
5662 .kr(2)
5663 .sr(1)
5664 .m(1)
5665 .n(8)
5666 .k(16)
5667 .cn_stride(11)
5668 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5669 }
5670
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_eq_16_subtile)5671 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
5672 TEST_REQUIRES_ARM_NEON;
5673 for (uint32_t n = 1; n <= 8; n++) {
5674 for (uint32_t m = 1; m <= 1; m++) {
5675 GemmMicrokernelTester()
5676 .mr(1)
5677 .nr(8)
5678 .kr(2)
5679 .sr(1)
5680 .m(m)
5681 .n(n)
5682 .k(16)
5683 .iterations(1)
5684 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5685 }
5686 }
5687 }
5688
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_m)5689 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
5690 TEST_REQUIRES_ARM_NEON;
5691 for (uint32_t m = 1; m <= 1; m++) {
5692 GemmMicrokernelTester()
5693 .mr(1)
5694 .nr(8)
5695 .kr(2)
5696 .sr(1)
5697 .m(m)
5698 .n(8)
5699 .k(16)
5700 .iterations(1)
5701 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5702 }
5703 }
5704
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_n)5705 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
5706 TEST_REQUIRES_ARM_NEON;
5707 for (uint32_t n = 1; n <= 8; n++) {
5708 GemmMicrokernelTester()
5709 .mr(1)
5710 .nr(8)
5711 .kr(2)
5712 .sr(1)
5713 .m(1)
5714 .n(n)
5715 .k(16)
5716 .iterations(1)
5717 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5718 }
5719 }
5720
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_lt_16)5721 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_lt_16) {
5722 TEST_REQUIRES_ARM_NEON;
5723 for (size_t k = 1; k < 16; k++) {
5724 GemmMicrokernelTester()
5725 .mr(1)
5726 .nr(8)
5727 .kr(2)
5728 .sr(1)
5729 .m(1)
5730 .n(8)
5731 .k(k)
5732 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5733 }
5734 }
5735
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_lt_16_subtile)5736 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
5737 TEST_REQUIRES_ARM_NEON;
5738 for (size_t k = 1; k < 16; k++) {
5739 for (uint32_t n = 1; n <= 8; n++) {
5740 for (uint32_t m = 1; m <= 1; m++) {
5741 GemmMicrokernelTester()
5742 .mr(1)
5743 .nr(8)
5744 .kr(2)
5745 .sr(1)
5746 .m(m)
5747 .n(n)
5748 .k(k)
5749 .iterations(1)
5750 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5751 }
5752 }
5753 }
5754 }
5755
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_gt_16)5756 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_gt_16) {
5757 TEST_REQUIRES_ARM_NEON;
5758 for (size_t k = 17; k < 32; k++) {
5759 GemmMicrokernelTester()
5760 .mr(1)
5761 .nr(8)
5762 .kr(2)
5763 .sr(1)
5764 .m(1)
5765 .n(8)
5766 .k(k)
5767 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5768 }
5769 }
5770
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_gt_16_subtile)5771 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
5772 TEST_REQUIRES_ARM_NEON;
5773 for (size_t k = 17; k < 32; k++) {
5774 for (uint32_t n = 1; n <= 8; n++) {
5775 for (uint32_t m = 1; m <= 1; m++) {
5776 GemmMicrokernelTester()
5777 .mr(1)
5778 .nr(8)
5779 .kr(2)
5780 .sr(1)
5781 .m(m)
5782 .n(n)
5783 .k(k)
5784 .iterations(1)
5785 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5786 }
5787 }
5788 }
5789 }
5790
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_div_16)5791 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_div_16) {
5792 TEST_REQUIRES_ARM_NEON;
5793 for (size_t k = 32; k <= 160; k += 16) {
5794 GemmMicrokernelTester()
5795 .mr(1)
5796 .nr(8)
5797 .kr(2)
5798 .sr(1)
5799 .m(1)
5800 .n(8)
5801 .k(k)
5802 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5803 }
5804 }
5805
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_div_16_subtile)5806 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_div_16_subtile) {
5807 TEST_REQUIRES_ARM_NEON;
5808 for (size_t k = 32; k <= 160; k += 16) {
5809 for (uint32_t n = 1; n <= 8; n++) {
5810 for (uint32_t m = 1; m <= 1; m++) {
5811 GemmMicrokernelTester()
5812 .mr(1)
5813 .nr(8)
5814 .kr(2)
5815 .sr(1)
5816 .m(m)
5817 .n(n)
5818 .k(k)
5819 .iterations(1)
5820 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5821 }
5822 }
5823 }
5824 }
5825
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_gt_8)5826 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8) {
5827 TEST_REQUIRES_ARM_NEON;
5828 for (uint32_t n = 9; n < 16; n++) {
5829 for (size_t k = 1; k <= 80; k += 17) {
5830 GemmMicrokernelTester()
5831 .mr(1)
5832 .nr(8)
5833 .kr(2)
5834 .sr(1)
5835 .m(1)
5836 .n(n)
5837 .k(k)
5838 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5839 }
5840 }
5841 }
5842
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_gt_8_strided_cn)5843 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8_strided_cn) {
5844 TEST_REQUIRES_ARM_NEON;
5845 for (uint32_t n = 9; n < 16; n++) {
5846 for (size_t k = 1; k <= 80; k += 17) {
5847 GemmMicrokernelTester()
5848 .mr(1)
5849 .nr(8)
5850 .kr(2)
5851 .sr(1)
5852 .m(1)
5853 .n(n)
5854 .k(k)
5855 .cn_stride(11)
5856 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5857 }
5858 }
5859 }
5860
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_gt_8_subtile)5861 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8_subtile) {
5862 TEST_REQUIRES_ARM_NEON;
5863 for (uint32_t n = 9; n < 16; n++) {
5864 for (size_t k = 1; k <= 80; k += 17) {
5865 for (uint32_t m = 1; m <= 1; m++) {
5866 GemmMicrokernelTester()
5867 .mr(1)
5868 .nr(8)
5869 .kr(2)
5870 .sr(1)
5871 .m(m)
5872 .n(n)
5873 .k(k)
5874 .iterations(1)
5875 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5876 }
5877 }
5878 }
5879 }
5880
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_div_8)5881 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8) {
5882 TEST_REQUIRES_ARM_NEON;
5883 for (uint32_t n = 16; n <= 24; n += 8) {
5884 for (size_t k = 1; k <= 80; k += 17) {
5885 GemmMicrokernelTester()
5886 .mr(1)
5887 .nr(8)
5888 .kr(2)
5889 .sr(1)
5890 .m(1)
5891 .n(n)
5892 .k(k)
5893 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5894 }
5895 }
5896 }
5897
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_div_8_strided_cn)5898 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8_strided_cn) {
5899 TEST_REQUIRES_ARM_NEON;
5900 for (uint32_t n = 16; n <= 24; n += 8) {
5901 for (size_t k = 1; k <= 80; k += 17) {
5902 GemmMicrokernelTester()
5903 .mr(1)
5904 .nr(8)
5905 .kr(2)
5906 .sr(1)
5907 .m(1)
5908 .n(n)
5909 .k(k)
5910 .cn_stride(11)
5911 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5912 }
5913 }
5914 }
5915
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_div_8_subtile)5916 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8_subtile) {
5917 TEST_REQUIRES_ARM_NEON;
5918 for (uint32_t n = 16; n <= 24; n += 8) {
5919 for (size_t k = 1; k <= 80; k += 17) {
5920 for (uint32_t m = 1; m <= 1; m++) {
5921 GemmMicrokernelTester()
5922 .mr(1)
5923 .nr(8)
5924 .kr(2)
5925 .sr(1)
5926 .m(m)
5927 .n(n)
5928 .k(k)
5929 .iterations(1)
5930 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5931 }
5932 }
5933 }
5934 }
5935
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,small_kernel)5936 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, small_kernel) {
5937 TEST_REQUIRES_ARM_NEON;
5938 for (size_t k = 1; k <= 80; k += 17) {
5939 GemmMicrokernelTester()
5940 .mr(1)
5941 .nr(8)
5942 .kr(2)
5943 .sr(1)
5944 .m(1)
5945 .n(8)
5946 .k(k)
5947 .ks(3)
5948 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5949 }
5950 }
5951
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,small_kernel_subtile)5952 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, small_kernel_subtile) {
5953 TEST_REQUIRES_ARM_NEON;
5954 for (size_t k = 1; k <= 80; k += 17) {
5955 for (uint32_t n = 1; n <= 8; n++) {
5956 for (uint32_t m = 1; m <= 1; m++) {
5957 GemmMicrokernelTester()
5958 .mr(1)
5959 .nr(8)
5960 .kr(2)
5961 .sr(1)
5962 .m(m)
5963 .n(n)
5964 .k(k)
5965 .ks(3)
5966 .iterations(1)
5967 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5968 }
5969 }
5970 }
5971 }
5972
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_gt_8_small_kernel)5973 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8_small_kernel) {
5974 TEST_REQUIRES_ARM_NEON;
5975 for (uint32_t n = 9; n < 16; n++) {
5976 for (size_t k = 1; k <= 80; k += 17) {
5977 GemmMicrokernelTester()
5978 .mr(1)
5979 .nr(8)
5980 .kr(2)
5981 .sr(1)
5982 .m(1)
5983 .n(n)
5984 .k(k)
5985 .ks(3)
5986 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5987 }
5988 }
5989 }
5990
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_div_8_small_kernel)5991 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8_small_kernel) {
5992 TEST_REQUIRES_ARM_NEON;
5993 for (uint32_t n = 16; n <= 24; n += 8) {
5994 for (size_t k = 1; k <= 80; k += 17) {
5995 GemmMicrokernelTester()
5996 .mr(1)
5997 .nr(8)
5998 .kr(2)
5999 .sr(1)
6000 .m(1)
6001 .n(n)
6002 .k(k)
6003 .ks(3)
6004 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6005 }
6006 }
6007 }
6008
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,strided_cm_subtile)6009 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, strided_cm_subtile) {
6010 TEST_REQUIRES_ARM_NEON;
6011 for (size_t k = 1; k <= 80; k += 17) {
6012 for (uint32_t n = 1; n <= 8; n++) {
6013 for (uint32_t m = 1; m <= 1; m++) {
6014 GemmMicrokernelTester()
6015 .mr(1)
6016 .nr(8)
6017 .kr(2)
6018 .sr(1)
6019 .m(m)
6020 .n(n)
6021 .k(k)
6022 .cm_stride(11)
6023 .iterations(1)
6024 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6025 }
6026 }
6027 }
6028 }
6029
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,a_offset)6030 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, a_offset) {
6031 TEST_REQUIRES_ARM_NEON;
6032 for (size_t k = 1; k <= 80; k += 17) {
6033 GemmMicrokernelTester()
6034 .mr(1)
6035 .nr(8)
6036 .kr(2)
6037 .sr(1)
6038 .m(1)
6039 .n(8)
6040 .k(k)
6041 .ks(3)
6042 .a_offset(83)
6043 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6044 }
6045 }
6046
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,zero)6047 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, zero) {
6048 TEST_REQUIRES_ARM_NEON;
6049 for (size_t k = 1; k <= 80; k += 17) {
6050 for (uint32_t mz = 0; mz < 1; mz++) {
6051 GemmMicrokernelTester()
6052 .mr(1)
6053 .nr(8)
6054 .kr(2)
6055 .sr(1)
6056 .m(1)
6057 .n(8)
6058 .k(k)
6059 .ks(3)
6060 .a_offset(83)
6061 .zero_index(mz)
6062 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6063 }
6064 }
6065 }
6066
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,qmin)6067 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, qmin) {
6068 TEST_REQUIRES_ARM_NEON;
6069 GemmMicrokernelTester()
6070 .mr(1)
6071 .nr(8)
6072 .kr(2)
6073 .sr(1)
6074 .m(1)
6075 .n(8)
6076 .k(16)
6077 .qmin(128)
6078 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6079 }
6080
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,qmax)6081 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, qmax) {
6082 TEST_REQUIRES_ARM_NEON;
6083 GemmMicrokernelTester()
6084 .mr(1)
6085 .nr(8)
6086 .kr(2)
6087 .sr(1)
6088 .m(1)
6089 .n(8)
6090 .k(16)
6091 .qmax(128)
6092 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6093 }
6094
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,strided_cm)6095 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, strided_cm) {
6096 TEST_REQUIRES_ARM_NEON;
6097 GemmMicrokernelTester()
6098 .mr(1)
6099 .nr(8)
6100 .kr(2)
6101 .sr(1)
6102 .m(1)
6103 .n(8)
6104 .k(16)
6105 .cm_stride(11)
6106 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6107 }
6108 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6109
6110
6111 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_eq_16)6112 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_eq_16) {
6113 TEST_REQUIRES_ARM_NEON;
6114 GemmMicrokernelTester()
6115 .mr(1)
6116 .nr(8)
6117 .kr(4)
6118 .sr(1)
6119 .m(1)
6120 .n(8)
6121 .k(16)
6122 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6123 }
6124
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,strided_cn)6125 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, strided_cn) {
6126 TEST_REQUIRES_ARM_NEON;
6127 GemmMicrokernelTester()
6128 .mr(1)
6129 .nr(8)
6130 .kr(4)
6131 .sr(1)
6132 .m(1)
6133 .n(8)
6134 .k(16)
6135 .cn_stride(11)
6136 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6137 }
6138
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_eq_16_subtile)6139 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
6140 TEST_REQUIRES_ARM_NEON;
6141 for (uint32_t n = 1; n <= 8; n++) {
6142 for (uint32_t m = 1; m <= 1; m++) {
6143 GemmMicrokernelTester()
6144 .mr(1)
6145 .nr(8)
6146 .kr(4)
6147 .sr(1)
6148 .m(m)
6149 .n(n)
6150 .k(16)
6151 .iterations(1)
6152 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6153 }
6154 }
6155 }
6156
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_m)6157 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
6158 TEST_REQUIRES_ARM_NEON;
6159 for (uint32_t m = 1; m <= 1; m++) {
6160 GemmMicrokernelTester()
6161 .mr(1)
6162 .nr(8)
6163 .kr(4)
6164 .sr(1)
6165 .m(m)
6166 .n(8)
6167 .k(16)
6168 .iterations(1)
6169 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6170 }
6171 }
6172
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_n)6173 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
6174 TEST_REQUIRES_ARM_NEON;
6175 for (uint32_t n = 1; n <= 8; n++) {
6176 GemmMicrokernelTester()
6177 .mr(1)
6178 .nr(8)
6179 .kr(4)
6180 .sr(1)
6181 .m(1)
6182 .n(n)
6183 .k(16)
6184 .iterations(1)
6185 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6186 }
6187 }
6188
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_lt_16)6189 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_lt_16) {
6190 TEST_REQUIRES_ARM_NEON;
6191 for (size_t k = 1; k < 16; k++) {
6192 GemmMicrokernelTester()
6193 .mr(1)
6194 .nr(8)
6195 .kr(4)
6196 .sr(1)
6197 .m(1)
6198 .n(8)
6199 .k(k)
6200 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6201 }
6202 }
6203
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_lt_16_subtile)6204 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
6205 TEST_REQUIRES_ARM_NEON;
6206 for (size_t k = 1; k < 16; k++) {
6207 for (uint32_t n = 1; n <= 8; n++) {
6208 for (uint32_t m = 1; m <= 1; m++) {
6209 GemmMicrokernelTester()
6210 .mr(1)
6211 .nr(8)
6212 .kr(4)
6213 .sr(1)
6214 .m(m)
6215 .n(n)
6216 .k(k)
6217 .iterations(1)
6218 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6219 }
6220 }
6221 }
6222 }
6223
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_gt_16)6224 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_gt_16) {
6225 TEST_REQUIRES_ARM_NEON;
6226 for (size_t k = 17; k < 32; k++) {
6227 GemmMicrokernelTester()
6228 .mr(1)
6229 .nr(8)
6230 .kr(4)
6231 .sr(1)
6232 .m(1)
6233 .n(8)
6234 .k(k)
6235 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6236 }
6237 }
6238
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_gt_16_subtile)6239 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
6240 TEST_REQUIRES_ARM_NEON;
6241 for (size_t k = 17; k < 32; k++) {
6242 for (uint32_t n = 1; n <= 8; n++) {
6243 for (uint32_t m = 1; m <= 1; m++) {
6244 GemmMicrokernelTester()
6245 .mr(1)
6246 .nr(8)
6247 .kr(4)
6248 .sr(1)
6249 .m(m)
6250 .n(n)
6251 .k(k)
6252 .iterations(1)
6253 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6254 }
6255 }
6256 }
6257 }
6258
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_div_16)6259 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_div_16) {
6260 TEST_REQUIRES_ARM_NEON;
6261 for (size_t k = 32; k <= 160; k += 16) {
6262 GemmMicrokernelTester()
6263 .mr(1)
6264 .nr(8)
6265 .kr(4)
6266 .sr(1)
6267 .m(1)
6268 .n(8)
6269 .k(k)
6270 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6271 }
6272 }
6273
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_div_16_subtile)6274 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_div_16_subtile) {
6275 TEST_REQUIRES_ARM_NEON;
6276 for (size_t k = 32; k <= 160; k += 16) {
6277 for (uint32_t n = 1; n <= 8; n++) {
6278 for (uint32_t m = 1; m <= 1; m++) {
6279 GemmMicrokernelTester()
6280 .mr(1)
6281 .nr(8)
6282 .kr(4)
6283 .sr(1)
6284 .m(m)
6285 .n(n)
6286 .k(k)
6287 .iterations(1)
6288 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6289 }
6290 }
6291 }
6292 }
6293
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_gt_8)6294 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_gt_8) {
6295 TEST_REQUIRES_ARM_NEON;
6296 for (uint32_t n = 9; n < 16; n++) {
6297 for (size_t k = 1; k <= 80; k += 17) {
6298 GemmMicrokernelTester()
6299 .mr(1)
6300 .nr(8)
6301 .kr(4)
6302 .sr(1)
6303 .m(1)
6304 .n(n)
6305 .k(k)
6306 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6307 }
6308 }
6309 }
6310
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_gt_8_strided_cn)6311 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
6312 TEST_REQUIRES_ARM_NEON;
6313 for (uint32_t n = 9; n < 16; n++) {
6314 for (size_t k = 1; k <= 80; k += 17) {
6315 GemmMicrokernelTester()
6316 .mr(1)
6317 .nr(8)
6318 .kr(4)
6319 .sr(1)
6320 .m(1)
6321 .n(n)
6322 .k(k)
6323 .cn_stride(11)
6324 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6325 }
6326 }
6327 }
6328
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_gt_8_subtile)6329 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) {
6330 TEST_REQUIRES_ARM_NEON;
6331 for (uint32_t n = 9; n < 16; n++) {
6332 for (size_t k = 1; k <= 80; k += 17) {
6333 for (uint32_t m = 1; m <= 1; m++) {
6334 GemmMicrokernelTester()
6335 .mr(1)
6336 .nr(8)
6337 .kr(4)
6338 .sr(1)
6339 .m(m)
6340 .n(n)
6341 .k(k)
6342 .iterations(1)
6343 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6344 }
6345 }
6346 }
6347 }
6348
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_div_8)6349 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_div_8) {
6350 TEST_REQUIRES_ARM_NEON;
6351 for (uint32_t n = 16; n <= 24; n += 8) {
6352 for (size_t k = 1; k <= 80; k += 17) {
6353 GemmMicrokernelTester()
6354 .mr(1)
6355 .nr(8)
6356 .kr(4)
6357 .sr(1)
6358 .m(1)
6359 .n(n)
6360 .k(k)
6361 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6362 }
6363 }
6364 }
6365
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_div_8_strided_cn)6366 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) {
6367 TEST_REQUIRES_ARM_NEON;
6368 for (uint32_t n = 16; n <= 24; n += 8) {
6369 for (size_t k = 1; k <= 80; k += 17) {
6370 GemmMicrokernelTester()
6371 .mr(1)
6372 .nr(8)
6373 .kr(4)
6374 .sr(1)
6375 .m(1)
6376 .n(n)
6377 .k(k)
6378 .cn_stride(11)
6379 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6380 }
6381 }
6382 }
6383
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_div_8_subtile)6384 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_div_8_subtile) {
6385 TEST_REQUIRES_ARM_NEON;
6386 for (uint32_t n = 16; n <= 24; n += 8) {
6387 for (size_t k = 1; k <= 80; k += 17) {
6388 for (uint32_t m = 1; m <= 1; m++) {
6389 GemmMicrokernelTester()
6390 .mr(1)
6391 .nr(8)
6392 .kr(4)
6393 .sr(1)
6394 .m(m)
6395 .n(n)
6396 .k(k)
6397 .iterations(1)
6398 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6399 }
6400 }
6401 }
6402 }
6403
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,small_kernel)6404 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, small_kernel) {
6405 TEST_REQUIRES_ARM_NEON;
6406 for (size_t k = 1; k <= 80; k += 17) {
6407 GemmMicrokernelTester()
6408 .mr(1)
6409 .nr(8)
6410 .kr(4)
6411 .sr(1)
6412 .m(1)
6413 .n(8)
6414 .k(k)
6415 .ks(3)
6416 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6417 }
6418 }
6419
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,small_kernel_subtile)6420 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, small_kernel_subtile) {
6421 TEST_REQUIRES_ARM_NEON;
6422 for (size_t k = 1; k <= 80; k += 17) {
6423 for (uint32_t n = 1; n <= 8; n++) {
6424 for (uint32_t m = 1; m <= 1; m++) {
6425 GemmMicrokernelTester()
6426 .mr(1)
6427 .nr(8)
6428 .kr(4)
6429 .sr(1)
6430 .m(m)
6431 .n(n)
6432 .k(k)
6433 .ks(3)
6434 .iterations(1)
6435 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6436 }
6437 }
6438 }
6439 }
6440
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_gt_8_small_kernel)6441 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
6442 TEST_REQUIRES_ARM_NEON;
6443 for (uint32_t n = 9; n < 16; n++) {
6444 for (size_t k = 1; k <= 80; k += 17) {
6445 GemmMicrokernelTester()
6446 .mr(1)
6447 .nr(8)
6448 .kr(4)
6449 .sr(1)
6450 .m(1)
6451 .n(n)
6452 .k(k)
6453 .ks(3)
6454 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6455 }
6456 }
6457 }
6458
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_div_8_small_kernel)6459 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_div_8_small_kernel) {
6460 TEST_REQUIRES_ARM_NEON;
6461 for (uint32_t n = 16; n <= 24; n += 8) {
6462 for (size_t k = 1; k <= 80; k += 17) {
6463 GemmMicrokernelTester()
6464 .mr(1)
6465 .nr(8)
6466 .kr(4)
6467 .sr(1)
6468 .m(1)
6469 .n(n)
6470 .k(k)
6471 .ks(3)
6472 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6473 }
6474 }
6475 }
6476
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,strided_cm_subtile)6477 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, strided_cm_subtile) {
6478 TEST_REQUIRES_ARM_NEON;
6479 for (size_t k = 1; k <= 80; k += 17) {
6480 for (uint32_t n = 1; n <= 8; n++) {
6481 for (uint32_t m = 1; m <= 1; m++) {
6482 GemmMicrokernelTester()
6483 .mr(1)
6484 .nr(8)
6485 .kr(4)
6486 .sr(1)
6487 .m(m)
6488 .n(n)
6489 .k(k)
6490 .cm_stride(11)
6491 .iterations(1)
6492 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6493 }
6494 }
6495 }
6496 }
6497
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,a_offset)6498 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, a_offset) {
6499 TEST_REQUIRES_ARM_NEON;
6500 for (size_t k = 1; k <= 80; k += 17) {
6501 GemmMicrokernelTester()
6502 .mr(1)
6503 .nr(8)
6504 .kr(4)
6505 .sr(1)
6506 .m(1)
6507 .n(8)
6508 .k(k)
6509 .ks(3)
6510 .a_offset(83)
6511 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6512 }
6513 }
6514
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,zero)6515 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, zero) {
6516 TEST_REQUIRES_ARM_NEON;
6517 for (size_t k = 1; k <= 80; k += 17) {
6518 for (uint32_t mz = 0; mz < 1; mz++) {
6519 GemmMicrokernelTester()
6520 .mr(1)
6521 .nr(8)
6522 .kr(4)
6523 .sr(1)
6524 .m(1)
6525 .n(8)
6526 .k(k)
6527 .ks(3)
6528 .a_offset(83)
6529 .zero_index(mz)
6530 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6531 }
6532 }
6533 }
6534
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,qmin)6535 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, qmin) {
6536 TEST_REQUIRES_ARM_NEON;
6537 GemmMicrokernelTester()
6538 .mr(1)
6539 .nr(8)
6540 .kr(4)
6541 .sr(1)
6542 .m(1)
6543 .n(8)
6544 .k(16)
6545 .qmin(128)
6546 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6547 }
6548
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,qmax)6549 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, qmax) {
6550 TEST_REQUIRES_ARM_NEON;
6551 GemmMicrokernelTester()
6552 .mr(1)
6553 .nr(8)
6554 .kr(4)
6555 .sr(1)
6556 .m(1)
6557 .n(8)
6558 .k(16)
6559 .qmax(128)
6560 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6561 }
6562
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,strided_cm)6563 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, strided_cm) {
6564 TEST_REQUIRES_ARM_NEON;
6565 GemmMicrokernelTester()
6566 .mr(1)
6567 .nr(8)
6568 .kr(4)
6569 .sr(1)
6570 .m(1)
6571 .n(8)
6572 .k(16)
6573 .cm_stride(11)
6574 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6575 }
6576 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6577
6578
6579 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_eq_16)6580 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16) {
6581 TEST_REQUIRES_ARM_NEON_V8;
6582 GemmMicrokernelTester()
6583 .mr(1)
6584 .nr(8)
6585 .kr(4)
6586 .sr(2)
6587 .m(1)
6588 .n(8)
6589 .k(16)
6590 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6591 }
6592
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,strided_cn)6593 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, strided_cn) {
6594 TEST_REQUIRES_ARM_NEON_V8;
6595 GemmMicrokernelTester()
6596 .mr(1)
6597 .nr(8)
6598 .kr(4)
6599 .sr(2)
6600 .m(1)
6601 .n(8)
6602 .k(16)
6603 .cn_stride(11)
6604 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6605 }
6606
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_eq_16_subtile)6607 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16_subtile) {
6608 TEST_REQUIRES_ARM_NEON_V8;
6609 for (uint32_t n = 1; n <= 8; n++) {
6610 for (uint32_t m = 1; m <= 1; m++) {
6611 GemmMicrokernelTester()
6612 .mr(1)
6613 .nr(8)
6614 .kr(4)
6615 .sr(2)
6616 .m(m)
6617 .n(n)
6618 .k(16)
6619 .iterations(1)
6620 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6621 }
6622 }
6623 }
6624
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_eq_16_subtile_m)6625 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16_subtile_m) {
6626 TEST_REQUIRES_ARM_NEON_V8;
6627 for (uint32_t m = 1; m <= 1; m++) {
6628 GemmMicrokernelTester()
6629 .mr(1)
6630 .nr(8)
6631 .kr(4)
6632 .sr(2)
6633 .m(m)
6634 .n(8)
6635 .k(16)
6636 .iterations(1)
6637 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6638 }
6639 }
6640
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_eq_16_subtile_n)6641 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16_subtile_n) {
6642 TEST_REQUIRES_ARM_NEON_V8;
6643 for (uint32_t n = 1; n <= 8; n++) {
6644 GemmMicrokernelTester()
6645 .mr(1)
6646 .nr(8)
6647 .kr(4)
6648 .sr(2)
6649 .m(1)
6650 .n(n)
6651 .k(16)
6652 .iterations(1)
6653 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6654 }
6655 }
6656
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_lt_16)6657 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_lt_16) {
6658 TEST_REQUIRES_ARM_NEON_V8;
6659 for (size_t k = 1; k < 16; k++) {
6660 GemmMicrokernelTester()
6661 .mr(1)
6662 .nr(8)
6663 .kr(4)
6664 .sr(2)
6665 .m(1)
6666 .n(8)
6667 .k(k)
6668 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6669 }
6670 }
6671
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_lt_16_subtile)6672 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_lt_16_subtile) {
6673 TEST_REQUIRES_ARM_NEON_V8;
6674 for (size_t k = 1; k < 16; k++) {
6675 for (uint32_t n = 1; n <= 8; n++) {
6676 for (uint32_t m = 1; m <= 1; m++) {
6677 GemmMicrokernelTester()
6678 .mr(1)
6679 .nr(8)
6680 .kr(4)
6681 .sr(2)
6682 .m(m)
6683 .n(n)
6684 .k(k)
6685 .iterations(1)
6686 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6687 }
6688 }
6689 }
6690 }
6691
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_gt_16)6692 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_gt_16) {
6693 TEST_REQUIRES_ARM_NEON_V8;
6694 for (size_t k = 17; k < 32; k++) {
6695 GemmMicrokernelTester()
6696 .mr(1)
6697 .nr(8)
6698 .kr(4)
6699 .sr(2)
6700 .m(1)
6701 .n(8)
6702 .k(k)
6703 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6704 }
6705 }
6706
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_gt_16_subtile)6707 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_gt_16_subtile) {
6708 TEST_REQUIRES_ARM_NEON_V8;
6709 for (size_t k = 17; k < 32; k++) {
6710 for (uint32_t n = 1; n <= 8; n++) {
6711 for (uint32_t m = 1; m <= 1; m++) {
6712 GemmMicrokernelTester()
6713 .mr(1)
6714 .nr(8)
6715 .kr(4)
6716 .sr(2)
6717 .m(m)
6718 .n(n)
6719 .k(k)
6720 .iterations(1)
6721 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6722 }
6723 }
6724 }
6725 }
6726
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_div_16)6727 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_div_16) {
6728 TEST_REQUIRES_ARM_NEON_V8;
6729 for (size_t k = 32; k <= 160; k += 16) {
6730 GemmMicrokernelTester()
6731 .mr(1)
6732 .nr(8)
6733 .kr(4)
6734 .sr(2)
6735 .m(1)
6736 .n(8)
6737 .k(k)
6738 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6739 }
6740 }
6741
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_div_16_subtile)6742 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_div_16_subtile) {
6743 TEST_REQUIRES_ARM_NEON_V8;
6744 for (size_t k = 32; k <= 160; k += 16) {
6745 for (uint32_t n = 1; n <= 8; n++) {
6746 for (uint32_t m = 1; m <= 1; m++) {
6747 GemmMicrokernelTester()
6748 .mr(1)
6749 .nr(8)
6750 .kr(4)
6751 .sr(2)
6752 .m(m)
6753 .n(n)
6754 .k(k)
6755 .iterations(1)
6756 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6757 }
6758 }
6759 }
6760 }
6761
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_gt_8)6762 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8) {
6763 TEST_REQUIRES_ARM_NEON_V8;
6764 for (uint32_t n = 9; n < 16; n++) {
6765 for (size_t k = 1; k <= 80; k += 17) {
6766 GemmMicrokernelTester()
6767 .mr(1)
6768 .nr(8)
6769 .kr(4)
6770 .sr(2)
6771 .m(1)
6772 .n(n)
6773 .k(k)
6774 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6775 }
6776 }
6777 }
6778
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_gt_8_strided_cn)6779 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8_strided_cn) {
6780 TEST_REQUIRES_ARM_NEON_V8;
6781 for (uint32_t n = 9; n < 16; n++) {
6782 for (size_t k = 1; k <= 80; k += 17) {
6783 GemmMicrokernelTester()
6784 .mr(1)
6785 .nr(8)
6786 .kr(4)
6787 .sr(2)
6788 .m(1)
6789 .n(n)
6790 .k(k)
6791 .cn_stride(11)
6792 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6793 }
6794 }
6795 }
6796
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_gt_8_subtile)6797 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8_subtile) {
6798 TEST_REQUIRES_ARM_NEON_V8;
6799 for (uint32_t n = 9; n < 16; n++) {
6800 for (size_t k = 1; k <= 80; k += 17) {
6801 for (uint32_t m = 1; m <= 1; m++) {
6802 GemmMicrokernelTester()
6803 .mr(1)
6804 .nr(8)
6805 .kr(4)
6806 .sr(2)
6807 .m(m)
6808 .n(n)
6809 .k(k)
6810 .iterations(1)
6811 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6812 }
6813 }
6814 }
6815 }
6816
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_div_8)6817 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8) {
6818 TEST_REQUIRES_ARM_NEON_V8;
6819 for (uint32_t n = 16; n <= 24; n += 8) {
6820 for (size_t k = 1; k <= 80; k += 17) {
6821 GemmMicrokernelTester()
6822 .mr(1)
6823 .nr(8)
6824 .kr(4)
6825 .sr(2)
6826 .m(1)
6827 .n(n)
6828 .k(k)
6829 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6830 }
6831 }
6832 }
6833
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_div_8_strided_cn)6834 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8_strided_cn) {
6835 TEST_REQUIRES_ARM_NEON_V8;
6836 for (uint32_t n = 16; n <= 24; n += 8) {
6837 for (size_t k = 1; k <= 80; k += 17) {
6838 GemmMicrokernelTester()
6839 .mr(1)
6840 .nr(8)
6841 .kr(4)
6842 .sr(2)
6843 .m(1)
6844 .n(n)
6845 .k(k)
6846 .cn_stride(11)
6847 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6848 }
6849 }
6850 }
6851
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_div_8_subtile)6852 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8_subtile) {
6853 TEST_REQUIRES_ARM_NEON_V8;
6854 for (uint32_t n = 16; n <= 24; n += 8) {
6855 for (size_t k = 1; k <= 80; k += 17) {
6856 for (uint32_t m = 1; m <= 1; m++) {
6857 GemmMicrokernelTester()
6858 .mr(1)
6859 .nr(8)
6860 .kr(4)
6861 .sr(2)
6862 .m(m)
6863 .n(n)
6864 .k(k)
6865 .iterations(1)
6866 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6867 }
6868 }
6869 }
6870 }
6871
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,small_kernel)6872 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, small_kernel) {
6873 TEST_REQUIRES_ARM_NEON_V8;
6874 for (size_t k = 1; k <= 80; k += 17) {
6875 GemmMicrokernelTester()
6876 .mr(1)
6877 .nr(8)
6878 .kr(4)
6879 .sr(2)
6880 .m(1)
6881 .n(8)
6882 .k(k)
6883 .ks(3)
6884 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6885 }
6886 }
6887
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,small_kernel_subtile)6888 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, small_kernel_subtile) {
6889 TEST_REQUIRES_ARM_NEON_V8;
6890 for (size_t k = 1; k <= 80; k += 17) {
6891 for (uint32_t n = 1; n <= 8; n++) {
6892 for (uint32_t m = 1; m <= 1; m++) {
6893 GemmMicrokernelTester()
6894 .mr(1)
6895 .nr(8)
6896 .kr(4)
6897 .sr(2)
6898 .m(m)
6899 .n(n)
6900 .k(k)
6901 .ks(3)
6902 .iterations(1)
6903 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6904 }
6905 }
6906 }
6907 }
6908
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_gt_8_small_kernel)6909 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8_small_kernel) {
6910 TEST_REQUIRES_ARM_NEON_V8;
6911 for (uint32_t n = 9; n < 16; n++) {
6912 for (size_t k = 1; k <= 80; k += 17) {
6913 GemmMicrokernelTester()
6914 .mr(1)
6915 .nr(8)
6916 .kr(4)
6917 .sr(2)
6918 .m(1)
6919 .n(n)
6920 .k(k)
6921 .ks(3)
6922 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6923 }
6924 }
6925 }
6926
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_div_8_small_kernel)6927 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8_small_kernel) {
6928 TEST_REQUIRES_ARM_NEON_V8;
6929 for (uint32_t n = 16; n <= 24; n += 8) {
6930 for (size_t k = 1; k <= 80; k += 17) {
6931 GemmMicrokernelTester()
6932 .mr(1)
6933 .nr(8)
6934 .kr(4)
6935 .sr(2)
6936 .m(1)
6937 .n(n)
6938 .k(k)
6939 .ks(3)
6940 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6941 }
6942 }
6943 }
6944
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,strided_cm_subtile)6945 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, strided_cm_subtile) {
6946 TEST_REQUIRES_ARM_NEON_V8;
6947 for (size_t k = 1; k <= 80; k += 17) {
6948 for (uint32_t n = 1; n <= 8; n++) {
6949 for (uint32_t m = 1; m <= 1; m++) {
6950 GemmMicrokernelTester()
6951 .mr(1)
6952 .nr(8)
6953 .kr(4)
6954 .sr(2)
6955 .m(m)
6956 .n(n)
6957 .k(k)
6958 .cm_stride(11)
6959 .iterations(1)
6960 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6961 }
6962 }
6963 }
6964 }
6965
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,a_offset)6966 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, a_offset) {
6967 TEST_REQUIRES_ARM_NEON_V8;
6968 for (size_t k = 1; k <= 80; k += 17) {
6969 GemmMicrokernelTester()
6970 .mr(1)
6971 .nr(8)
6972 .kr(4)
6973 .sr(2)
6974 .m(1)
6975 .n(8)
6976 .k(k)
6977 .ks(3)
6978 .a_offset(83)
6979 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6980 }
6981 }
6982
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,zero)6983 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, zero) {
6984 TEST_REQUIRES_ARM_NEON_V8;
6985 for (size_t k = 1; k <= 80; k += 17) {
6986 for (uint32_t mz = 0; mz < 1; mz++) {
6987 GemmMicrokernelTester()
6988 .mr(1)
6989 .nr(8)
6990 .kr(4)
6991 .sr(2)
6992 .m(1)
6993 .n(8)
6994 .k(k)
6995 .ks(3)
6996 .a_offset(83)
6997 .zero_index(mz)
6998 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6999 }
7000 }
7001 }
7002
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,qmin)7003 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, qmin) {
7004 TEST_REQUIRES_ARM_NEON_V8;
7005 GemmMicrokernelTester()
7006 .mr(1)
7007 .nr(8)
7008 .kr(4)
7009 .sr(2)
7010 .m(1)
7011 .n(8)
7012 .k(16)
7013 .qmin(128)
7014 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7015 }
7016
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,qmax)7017 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, qmax) {
7018 TEST_REQUIRES_ARM_NEON_V8;
7019 GemmMicrokernelTester()
7020 .mr(1)
7021 .nr(8)
7022 .kr(4)
7023 .sr(2)
7024 .m(1)
7025 .n(8)
7026 .k(16)
7027 .qmax(128)
7028 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7029 }
7030
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,strided_cm)7031 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, strided_cm) {
7032 TEST_REQUIRES_ARM_NEON_V8;
7033 GemmMicrokernelTester()
7034 .mr(1)
7035 .nr(8)
7036 .kr(4)
7037 .sr(2)
7038 .m(1)
7039 .n(8)
7040 .k(16)
7041 .cm_stride(11)
7042 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7043 }
7044 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7045
7046
7047 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_eq_16)7048 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16) {
7049 TEST_REQUIRES_ARM_NEON_V8;
7050 GemmMicrokernelTester()
7051 .mr(1)
7052 .nr(8)
7053 .kr(8)
7054 .sr(1)
7055 .m(1)
7056 .n(8)
7057 .k(16)
7058 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7059 }
7060
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,strided_cn)7061 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, strided_cn) {
7062 TEST_REQUIRES_ARM_NEON_V8;
7063 GemmMicrokernelTester()
7064 .mr(1)
7065 .nr(8)
7066 .kr(8)
7067 .sr(1)
7068 .m(1)
7069 .n(8)
7070 .k(16)
7071 .cn_stride(11)
7072 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7073 }
7074
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_eq_16_subtile)7075 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16_subtile) {
7076 TEST_REQUIRES_ARM_NEON_V8;
7077 for (uint32_t n = 1; n <= 8; n++) {
7078 for (uint32_t m = 1; m <= 1; m++) {
7079 GemmMicrokernelTester()
7080 .mr(1)
7081 .nr(8)
7082 .kr(8)
7083 .sr(1)
7084 .m(m)
7085 .n(n)
7086 .k(16)
7087 .iterations(1)
7088 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7089 }
7090 }
7091 }
7092
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_eq_16_subtile_m)7093 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16_subtile_m) {
7094 TEST_REQUIRES_ARM_NEON_V8;
7095 for (uint32_t m = 1; m <= 1; m++) {
7096 GemmMicrokernelTester()
7097 .mr(1)
7098 .nr(8)
7099 .kr(8)
7100 .sr(1)
7101 .m(m)
7102 .n(8)
7103 .k(16)
7104 .iterations(1)
7105 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7106 }
7107 }
7108
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_eq_16_subtile_n)7109 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16_subtile_n) {
7110 TEST_REQUIRES_ARM_NEON_V8;
7111 for (uint32_t n = 1; n <= 8; n++) {
7112 GemmMicrokernelTester()
7113 .mr(1)
7114 .nr(8)
7115 .kr(8)
7116 .sr(1)
7117 .m(1)
7118 .n(n)
7119 .k(16)
7120 .iterations(1)
7121 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7122 }
7123 }
7124
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_lt_16)7125 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_lt_16) {
7126 TEST_REQUIRES_ARM_NEON_V8;
7127 for (size_t k = 1; k < 16; k++) {
7128 GemmMicrokernelTester()
7129 .mr(1)
7130 .nr(8)
7131 .kr(8)
7132 .sr(1)
7133 .m(1)
7134 .n(8)
7135 .k(k)
7136 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7137 }
7138 }
7139
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_lt_16_subtile)7140 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_lt_16_subtile) {
7141 TEST_REQUIRES_ARM_NEON_V8;
7142 for (size_t k = 1; k < 16; k++) {
7143 for (uint32_t n = 1; n <= 8; n++) {
7144 for (uint32_t m = 1; m <= 1; m++) {
7145 GemmMicrokernelTester()
7146 .mr(1)
7147 .nr(8)
7148 .kr(8)
7149 .sr(1)
7150 .m(m)
7151 .n(n)
7152 .k(k)
7153 .iterations(1)
7154 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7155 }
7156 }
7157 }
7158 }
7159
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_gt_16)7160 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_gt_16) {
7161 TEST_REQUIRES_ARM_NEON_V8;
7162 for (size_t k = 17; k < 32; k++) {
7163 GemmMicrokernelTester()
7164 .mr(1)
7165 .nr(8)
7166 .kr(8)
7167 .sr(1)
7168 .m(1)
7169 .n(8)
7170 .k(k)
7171 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7172 }
7173 }
7174
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_gt_16_subtile)7175 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_gt_16_subtile) {
7176 TEST_REQUIRES_ARM_NEON_V8;
7177 for (size_t k = 17; k < 32; k++) {
7178 for (uint32_t n = 1; n <= 8; n++) {
7179 for (uint32_t m = 1; m <= 1; m++) {
7180 GemmMicrokernelTester()
7181 .mr(1)
7182 .nr(8)
7183 .kr(8)
7184 .sr(1)
7185 .m(m)
7186 .n(n)
7187 .k(k)
7188 .iterations(1)
7189 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7190 }
7191 }
7192 }
7193 }
7194
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_div_16)7195 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_div_16) {
7196 TEST_REQUIRES_ARM_NEON_V8;
7197 for (size_t k = 32; k <= 160; k += 16) {
7198 GemmMicrokernelTester()
7199 .mr(1)
7200 .nr(8)
7201 .kr(8)
7202 .sr(1)
7203 .m(1)
7204 .n(8)
7205 .k(k)
7206 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7207 }
7208 }
7209
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_div_16_subtile)7210 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_div_16_subtile) {
7211 TEST_REQUIRES_ARM_NEON_V8;
7212 for (size_t k = 32; k <= 160; k += 16) {
7213 for (uint32_t n = 1; n <= 8; n++) {
7214 for (uint32_t m = 1; m <= 1; m++) {
7215 GemmMicrokernelTester()
7216 .mr(1)
7217 .nr(8)
7218 .kr(8)
7219 .sr(1)
7220 .m(m)
7221 .n(n)
7222 .k(k)
7223 .iterations(1)
7224 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7225 }
7226 }
7227 }
7228 }
7229
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_gt_8)7230 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8) {
7231 TEST_REQUIRES_ARM_NEON_V8;
7232 for (uint32_t n = 9; n < 16; n++) {
7233 for (size_t k = 1; k <= 80; k += 17) {
7234 GemmMicrokernelTester()
7235 .mr(1)
7236 .nr(8)
7237 .kr(8)
7238 .sr(1)
7239 .m(1)
7240 .n(n)
7241 .k(k)
7242 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7243 }
7244 }
7245 }
7246
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_gt_8_strided_cn)7247 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8_strided_cn) {
7248 TEST_REQUIRES_ARM_NEON_V8;
7249 for (uint32_t n = 9; n < 16; n++) {
7250 for (size_t k = 1; k <= 80; k += 17) {
7251 GemmMicrokernelTester()
7252 .mr(1)
7253 .nr(8)
7254 .kr(8)
7255 .sr(1)
7256 .m(1)
7257 .n(n)
7258 .k(k)
7259 .cn_stride(11)
7260 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7261 }
7262 }
7263 }
7264
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_gt_8_subtile)7265 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8_subtile) {
7266 TEST_REQUIRES_ARM_NEON_V8;
7267 for (uint32_t n = 9; n < 16; n++) {
7268 for (size_t k = 1; k <= 80; k += 17) {
7269 for (uint32_t m = 1; m <= 1; m++) {
7270 GemmMicrokernelTester()
7271 .mr(1)
7272 .nr(8)
7273 .kr(8)
7274 .sr(1)
7275 .m(m)
7276 .n(n)
7277 .k(k)
7278 .iterations(1)
7279 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7280 }
7281 }
7282 }
7283 }
7284
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_div_8)7285 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8) {
7286 TEST_REQUIRES_ARM_NEON_V8;
7287 for (uint32_t n = 16; n <= 24; n += 8) {
7288 for (size_t k = 1; k <= 80; k += 17) {
7289 GemmMicrokernelTester()
7290 .mr(1)
7291 .nr(8)
7292 .kr(8)
7293 .sr(1)
7294 .m(1)
7295 .n(n)
7296 .k(k)
7297 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7298 }
7299 }
7300 }
7301
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_div_8_strided_cn)7302 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8_strided_cn) {
7303 TEST_REQUIRES_ARM_NEON_V8;
7304 for (uint32_t n = 16; n <= 24; n += 8) {
7305 for (size_t k = 1; k <= 80; k += 17) {
7306 GemmMicrokernelTester()
7307 .mr(1)
7308 .nr(8)
7309 .kr(8)
7310 .sr(1)
7311 .m(1)
7312 .n(n)
7313 .k(k)
7314 .cn_stride(11)
7315 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7316 }
7317 }
7318 }
7319
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_div_8_subtile)7320 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8_subtile) {
7321 TEST_REQUIRES_ARM_NEON_V8;
7322 for (uint32_t n = 16; n <= 24; n += 8) {
7323 for (size_t k = 1; k <= 80; k += 17) {
7324 for (uint32_t m = 1; m <= 1; m++) {
7325 GemmMicrokernelTester()
7326 .mr(1)
7327 .nr(8)
7328 .kr(8)
7329 .sr(1)
7330 .m(m)
7331 .n(n)
7332 .k(k)
7333 .iterations(1)
7334 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7335 }
7336 }
7337 }
7338 }
7339
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,small_kernel)7340 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, small_kernel) {
7341 TEST_REQUIRES_ARM_NEON_V8;
7342 for (size_t k = 1; k <= 80; k += 17) {
7343 GemmMicrokernelTester()
7344 .mr(1)
7345 .nr(8)
7346 .kr(8)
7347 .sr(1)
7348 .m(1)
7349 .n(8)
7350 .k(k)
7351 .ks(3)
7352 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7353 }
7354 }
7355
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,small_kernel_subtile)7356 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, small_kernel_subtile) {
7357 TEST_REQUIRES_ARM_NEON_V8;
7358 for (size_t k = 1; k <= 80; k += 17) {
7359 for (uint32_t n = 1; n <= 8; n++) {
7360 for (uint32_t m = 1; m <= 1; m++) {
7361 GemmMicrokernelTester()
7362 .mr(1)
7363 .nr(8)
7364 .kr(8)
7365 .sr(1)
7366 .m(m)
7367 .n(n)
7368 .k(k)
7369 .ks(3)
7370 .iterations(1)
7371 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7372 }
7373 }
7374 }
7375 }
7376
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_gt_8_small_kernel)7377 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8_small_kernel) {
7378 TEST_REQUIRES_ARM_NEON_V8;
7379 for (uint32_t n = 9; n < 16; n++) {
7380 for (size_t k = 1; k <= 80; k += 17) {
7381 GemmMicrokernelTester()
7382 .mr(1)
7383 .nr(8)
7384 .kr(8)
7385 .sr(1)
7386 .m(1)
7387 .n(n)
7388 .k(k)
7389 .ks(3)
7390 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7391 }
7392 }
7393 }
7394
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_div_8_small_kernel)7395 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8_small_kernel) {
7396 TEST_REQUIRES_ARM_NEON_V8;
7397 for (uint32_t n = 16; n <= 24; n += 8) {
7398 for (size_t k = 1; k <= 80; k += 17) {
7399 GemmMicrokernelTester()
7400 .mr(1)
7401 .nr(8)
7402 .kr(8)
7403 .sr(1)
7404 .m(1)
7405 .n(n)
7406 .k(k)
7407 .ks(3)
7408 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7409 }
7410 }
7411 }
7412
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,strided_cm_subtile)7413 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, strided_cm_subtile) {
7414 TEST_REQUIRES_ARM_NEON_V8;
7415 for (size_t k = 1; k <= 80; k += 17) {
7416 for (uint32_t n = 1; n <= 8; n++) {
7417 for (uint32_t m = 1; m <= 1; m++) {
7418 GemmMicrokernelTester()
7419 .mr(1)
7420 .nr(8)
7421 .kr(8)
7422 .sr(1)
7423 .m(m)
7424 .n(n)
7425 .k(k)
7426 .cm_stride(11)
7427 .iterations(1)
7428 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7429 }
7430 }
7431 }
7432 }
7433
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,a_offset)7434 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, a_offset) {
7435 TEST_REQUIRES_ARM_NEON_V8;
7436 for (size_t k = 1; k <= 80; k += 17) {
7437 GemmMicrokernelTester()
7438 .mr(1)
7439 .nr(8)
7440 .kr(8)
7441 .sr(1)
7442 .m(1)
7443 .n(8)
7444 .k(k)
7445 .ks(3)
7446 .a_offset(83)
7447 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7448 }
7449 }
7450
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,zero)7451 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, zero) {
7452 TEST_REQUIRES_ARM_NEON_V8;
7453 for (size_t k = 1; k <= 80; k += 17) {
7454 for (uint32_t mz = 0; mz < 1; mz++) {
7455 GemmMicrokernelTester()
7456 .mr(1)
7457 .nr(8)
7458 .kr(8)
7459 .sr(1)
7460 .m(1)
7461 .n(8)
7462 .k(k)
7463 .ks(3)
7464 .a_offset(83)
7465 .zero_index(mz)
7466 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7467 }
7468 }
7469 }
7470
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,qmin)7471 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, qmin) {
7472 TEST_REQUIRES_ARM_NEON_V8;
7473 GemmMicrokernelTester()
7474 .mr(1)
7475 .nr(8)
7476 .kr(8)
7477 .sr(1)
7478 .m(1)
7479 .n(8)
7480 .k(16)
7481 .qmin(128)
7482 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7483 }
7484
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,qmax)7485 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, qmax) {
7486 TEST_REQUIRES_ARM_NEON_V8;
7487 GemmMicrokernelTester()
7488 .mr(1)
7489 .nr(8)
7490 .kr(8)
7491 .sr(1)
7492 .m(1)
7493 .n(8)
7494 .k(16)
7495 .qmax(128)
7496 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7497 }
7498
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,strided_cm)7499 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, strided_cm) {
7500 TEST_REQUIRES_ARM_NEON_V8;
7501 GemmMicrokernelTester()
7502 .mr(1)
7503 .nr(8)
7504 .kr(8)
7505 .sr(1)
7506 .m(1)
7507 .n(8)
7508 .k(16)
7509 .cm_stride(11)
7510 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7511 }
7512 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7513
7514
7515 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8)7516 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8) {
7517 TEST_REQUIRES_ARM_NEON;
7518 GemmMicrokernelTester()
7519 .mr(1)
7520 .nr(16)
7521 .kr(1)
7522 .sr(1)
7523 .m(1)
7524 .n(16)
7525 .k(8)
7526 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7527 }
7528
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,strided_cn)7529 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cn) {
7530 TEST_REQUIRES_ARM_NEON;
7531 GemmMicrokernelTester()
7532 .mr(1)
7533 .nr(16)
7534 .kr(1)
7535 .sr(1)
7536 .m(1)
7537 .n(16)
7538 .k(8)
7539 .cn_stride(19)
7540 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7541 }
7542
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8_subtile)7543 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile) {
7544 TEST_REQUIRES_ARM_NEON;
7545 for (uint32_t n = 1; n <= 16; n++) {
7546 for (uint32_t m = 1; m <= 1; m++) {
7547 GemmMicrokernelTester()
7548 .mr(1)
7549 .nr(16)
7550 .kr(1)
7551 .sr(1)
7552 .m(m)
7553 .n(n)
7554 .k(8)
7555 .iterations(1)
7556 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7557 }
7558 }
7559 }
7560
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8_subtile_m)7561 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
7562 TEST_REQUIRES_ARM_NEON;
7563 for (uint32_t m = 1; m <= 1; m++) {
7564 GemmMicrokernelTester()
7565 .mr(1)
7566 .nr(16)
7567 .kr(1)
7568 .sr(1)
7569 .m(m)
7570 .n(16)
7571 .k(8)
7572 .iterations(1)
7573 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7574 }
7575 }
7576
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8_subtile_n)7577 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
7578 TEST_REQUIRES_ARM_NEON;
7579 for (uint32_t n = 1; n <= 16; n++) {
7580 GemmMicrokernelTester()
7581 .mr(1)
7582 .nr(16)
7583 .kr(1)
7584 .sr(1)
7585 .m(1)
7586 .n(n)
7587 .k(8)
7588 .iterations(1)
7589 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7590 }
7591 }
7592
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_lt_8)7593 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_lt_8) {
7594 TEST_REQUIRES_ARM_NEON;
7595 for (size_t k = 1; k < 8; k++) {
7596 GemmMicrokernelTester()
7597 .mr(1)
7598 .nr(16)
7599 .kr(1)
7600 .sr(1)
7601 .m(1)
7602 .n(16)
7603 .k(k)
7604 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7605 }
7606 }
7607
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_lt_8_subtile)7608 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_lt_8_subtile) {
7609 TEST_REQUIRES_ARM_NEON;
7610 for (size_t k = 1; k < 8; k++) {
7611 for (uint32_t n = 1; n <= 16; n++) {
7612 for (uint32_t m = 1; m <= 1; m++) {
7613 GemmMicrokernelTester()
7614 .mr(1)
7615 .nr(16)
7616 .kr(1)
7617 .sr(1)
7618 .m(m)
7619 .n(n)
7620 .k(k)
7621 .iterations(1)
7622 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7623 }
7624 }
7625 }
7626 }
7627
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_gt_8)7628 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_gt_8) {
7629 TEST_REQUIRES_ARM_NEON;
7630 for (size_t k = 9; k < 16; k++) {
7631 GemmMicrokernelTester()
7632 .mr(1)
7633 .nr(16)
7634 .kr(1)
7635 .sr(1)
7636 .m(1)
7637 .n(16)
7638 .k(k)
7639 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7640 }
7641 }
7642
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_gt_8_subtile)7643 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_gt_8_subtile) {
7644 TEST_REQUIRES_ARM_NEON;
7645 for (size_t k = 9; k < 16; k++) {
7646 for (uint32_t n = 1; n <= 16; n++) {
7647 for (uint32_t m = 1; m <= 1; m++) {
7648 GemmMicrokernelTester()
7649 .mr(1)
7650 .nr(16)
7651 .kr(1)
7652 .sr(1)
7653 .m(m)
7654 .n(n)
7655 .k(k)
7656 .iterations(1)
7657 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7658 }
7659 }
7660 }
7661 }
7662
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_div_8)7663 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_div_8) {
7664 TEST_REQUIRES_ARM_NEON;
7665 for (size_t k = 16; k <= 80; k += 8) {
7666 GemmMicrokernelTester()
7667 .mr(1)
7668 .nr(16)
7669 .kr(1)
7670 .sr(1)
7671 .m(1)
7672 .n(16)
7673 .k(k)
7674 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7675 }
7676 }
7677
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_div_8_subtile)7678 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_div_8_subtile) {
7679 TEST_REQUIRES_ARM_NEON;
7680 for (size_t k = 16; k <= 80; k += 8) {
7681 for (uint32_t n = 1; n <= 16; n++) {
7682 for (uint32_t m = 1; m <= 1; m++) {
7683 GemmMicrokernelTester()
7684 .mr(1)
7685 .nr(16)
7686 .kr(1)
7687 .sr(1)
7688 .m(m)
7689 .n(n)
7690 .k(k)
7691 .iterations(1)
7692 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7693 }
7694 }
7695 }
7696 }
7697
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16)7698 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16) {
7699 TEST_REQUIRES_ARM_NEON;
7700 for (uint32_t n = 17; n < 32; n++) {
7701 for (size_t k = 1; k <= 40; k += 9) {
7702 GemmMicrokernelTester()
7703 .mr(1)
7704 .nr(16)
7705 .kr(1)
7706 .sr(1)
7707 .m(1)
7708 .n(n)
7709 .k(k)
7710 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7711 }
7712 }
7713 }
7714
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16_strided_cn)7715 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
7716 TEST_REQUIRES_ARM_NEON;
7717 for (uint32_t n = 17; n < 32; n++) {
7718 for (size_t k = 1; k <= 40; k += 9) {
7719 GemmMicrokernelTester()
7720 .mr(1)
7721 .nr(16)
7722 .kr(1)
7723 .sr(1)
7724 .m(1)
7725 .n(n)
7726 .k(k)
7727 .cn_stride(19)
7728 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7729 }
7730 }
7731 }
7732
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16_subtile)7733 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_subtile) {
7734 TEST_REQUIRES_ARM_NEON;
7735 for (uint32_t n = 17; n < 32; n++) {
7736 for (size_t k = 1; k <= 40; k += 9) {
7737 for (uint32_t m = 1; m <= 1; m++) {
7738 GemmMicrokernelTester()
7739 .mr(1)
7740 .nr(16)
7741 .kr(1)
7742 .sr(1)
7743 .m(m)
7744 .n(n)
7745 .k(k)
7746 .iterations(1)
7747 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7748 }
7749 }
7750 }
7751 }
7752
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16)7753 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16) {
7754 TEST_REQUIRES_ARM_NEON;
7755 for (uint32_t n = 32; n <= 48; n += 16) {
7756 for (size_t k = 1; k <= 40; k += 9) {
7757 GemmMicrokernelTester()
7758 .mr(1)
7759 .nr(16)
7760 .kr(1)
7761 .sr(1)
7762 .m(1)
7763 .n(n)
7764 .k(k)
7765 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7766 }
7767 }
7768 }
7769
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16_strided_cn)7770 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
7771 TEST_REQUIRES_ARM_NEON;
7772 for (uint32_t n = 32; n <= 48; n += 16) {
7773 for (size_t k = 1; k <= 40; k += 9) {
7774 GemmMicrokernelTester()
7775 .mr(1)
7776 .nr(16)
7777 .kr(1)
7778 .sr(1)
7779 .m(1)
7780 .n(n)
7781 .k(k)
7782 .cn_stride(19)
7783 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7784 }
7785 }
7786 }
7787
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16_subtile)7788 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_subtile) {
7789 TEST_REQUIRES_ARM_NEON;
7790 for (uint32_t n = 32; n <= 48; n += 16) {
7791 for (size_t k = 1; k <= 40; k += 9) {
7792 for (uint32_t m = 1; m <= 1; m++) {
7793 GemmMicrokernelTester()
7794 .mr(1)
7795 .nr(16)
7796 .kr(1)
7797 .sr(1)
7798 .m(m)
7799 .n(n)
7800 .k(k)
7801 .iterations(1)
7802 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7803 }
7804 }
7805 }
7806 }
7807
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,small_kernel)7808 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, small_kernel) {
7809 TEST_REQUIRES_ARM_NEON;
7810 for (size_t k = 1; k <= 40; k += 9) {
7811 GemmMicrokernelTester()
7812 .mr(1)
7813 .nr(16)
7814 .kr(1)
7815 .sr(1)
7816 .m(1)
7817 .n(16)
7818 .k(k)
7819 .ks(3)
7820 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7821 }
7822 }
7823
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,small_kernel_subtile)7824 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, small_kernel_subtile) {
7825 TEST_REQUIRES_ARM_NEON;
7826 for (size_t k = 1; k <= 40; k += 9) {
7827 for (uint32_t n = 1; n <= 16; n++) {
7828 for (uint32_t m = 1; m <= 1; m++) {
7829 GemmMicrokernelTester()
7830 .mr(1)
7831 .nr(16)
7832 .kr(1)
7833 .sr(1)
7834 .m(m)
7835 .n(n)
7836 .k(k)
7837 .ks(3)
7838 .iterations(1)
7839 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7840 }
7841 }
7842 }
7843 }
7844
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16_small_kernel)7845 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
7846 TEST_REQUIRES_ARM_NEON;
7847 for (uint32_t n = 17; n < 32; n++) {
7848 for (size_t k = 1; k <= 40; k += 9) {
7849 GemmMicrokernelTester()
7850 .mr(1)
7851 .nr(16)
7852 .kr(1)
7853 .sr(1)
7854 .m(1)
7855 .n(n)
7856 .k(k)
7857 .ks(3)
7858 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7859 }
7860 }
7861 }
7862
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16_small_kernel)7863 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
7864 TEST_REQUIRES_ARM_NEON;
7865 for (uint32_t n = 32; n <= 48; n += 16) {
7866 for (size_t k = 1; k <= 40; k += 9) {
7867 GemmMicrokernelTester()
7868 .mr(1)
7869 .nr(16)
7870 .kr(1)
7871 .sr(1)
7872 .m(1)
7873 .n(n)
7874 .k(k)
7875 .ks(3)
7876 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7877 }
7878 }
7879 }
7880
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,strided_cm_subtile)7881 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cm_subtile) {
7882 TEST_REQUIRES_ARM_NEON;
7883 for (size_t k = 1; k <= 40; k += 9) {
7884 for (uint32_t n = 1; n <= 16; n++) {
7885 for (uint32_t m = 1; m <= 1; m++) {
7886 GemmMicrokernelTester()
7887 .mr(1)
7888 .nr(16)
7889 .kr(1)
7890 .sr(1)
7891 .m(m)
7892 .n(n)
7893 .k(k)
7894 .cm_stride(19)
7895 .iterations(1)
7896 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7897 }
7898 }
7899 }
7900 }
7901
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,a_offset)7902 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, a_offset) {
7903 TEST_REQUIRES_ARM_NEON;
7904 for (size_t k = 1; k <= 40; k += 9) {
7905 GemmMicrokernelTester()
7906 .mr(1)
7907 .nr(16)
7908 .kr(1)
7909 .sr(1)
7910 .m(1)
7911 .n(16)
7912 .k(k)
7913 .ks(3)
7914 .a_offset(43)
7915 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7916 }
7917 }
7918
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,zero)7919 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, zero) {
7920 TEST_REQUIRES_ARM_NEON;
7921 for (size_t k = 1; k <= 40; k += 9) {
7922 for (uint32_t mz = 0; mz < 1; mz++) {
7923 GemmMicrokernelTester()
7924 .mr(1)
7925 .nr(16)
7926 .kr(1)
7927 .sr(1)
7928 .m(1)
7929 .n(16)
7930 .k(k)
7931 .ks(3)
7932 .a_offset(43)
7933 .zero_index(mz)
7934 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7935 }
7936 }
7937 }
7938
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,qmin)7939 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, qmin) {
7940 TEST_REQUIRES_ARM_NEON;
7941 GemmMicrokernelTester()
7942 .mr(1)
7943 .nr(16)
7944 .kr(1)
7945 .sr(1)
7946 .m(1)
7947 .n(16)
7948 .k(8)
7949 .qmin(128)
7950 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7951 }
7952
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,qmax)7953 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, qmax) {
7954 TEST_REQUIRES_ARM_NEON;
7955 GemmMicrokernelTester()
7956 .mr(1)
7957 .nr(16)
7958 .kr(1)
7959 .sr(1)
7960 .m(1)
7961 .n(16)
7962 .k(8)
7963 .qmax(128)
7964 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7965 }
7966
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,strided_cm)7967 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cm) {
7968 TEST_REQUIRES_ARM_NEON;
7969 GemmMicrokernelTester()
7970 .mr(1)
7971 .nr(16)
7972 .kr(1)
7973 .sr(1)
7974 .m(1)
7975 .n(16)
7976 .k(8)
7977 .cm_stride(19)
7978 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7979 }
7980 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7981
7982
7983 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_eq_8)7984 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
7985 TEST_REQUIRES_ARM_NEON_V8;
7986 GemmMicrokernelTester()
7987 .mr(1)
7988 .nr(16)
7989 .kr(1)
7990 .sr(1)
7991 .m(1)
7992 .n(16)
7993 .k(8)
7994 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7995 }
7996
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,strided_cn)7997 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, strided_cn) {
7998 TEST_REQUIRES_ARM_NEON_V8;
7999 GemmMicrokernelTester()
8000 .mr(1)
8001 .nr(16)
8002 .kr(1)
8003 .sr(1)
8004 .m(1)
8005 .n(16)
8006 .k(8)
8007 .cn_stride(19)
8008 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8009 }
8010
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)8011 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
8012 TEST_REQUIRES_ARM_NEON_V8;
8013 for (uint32_t n = 1; n <= 16; n++) {
8014 for (uint32_t m = 1; m <= 1; m++) {
8015 GemmMicrokernelTester()
8016 .mr(1)
8017 .nr(16)
8018 .kr(1)
8019 .sr(1)
8020 .m(m)
8021 .n(n)
8022 .k(8)
8023 .iterations(1)
8024 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8025 }
8026 }
8027 }
8028
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)8029 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
8030 TEST_REQUIRES_ARM_NEON_V8;
8031 for (uint32_t m = 1; m <= 1; m++) {
8032 GemmMicrokernelTester()
8033 .mr(1)
8034 .nr(16)
8035 .kr(1)
8036 .sr(1)
8037 .m(m)
8038 .n(16)
8039 .k(8)
8040 .iterations(1)
8041 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8042 }
8043 }
8044
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)8045 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
8046 TEST_REQUIRES_ARM_NEON_V8;
8047 for (uint32_t n = 1; n <= 16; n++) {
8048 GemmMicrokernelTester()
8049 .mr(1)
8050 .nr(16)
8051 .kr(1)
8052 .sr(1)
8053 .m(1)
8054 .n(n)
8055 .k(8)
8056 .iterations(1)
8057 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8058 }
8059 }
8060
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_lt_8)8061 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
8062 TEST_REQUIRES_ARM_NEON_V8;
8063 for (size_t k = 1; k < 8; k++) {
8064 GemmMicrokernelTester()
8065 .mr(1)
8066 .nr(16)
8067 .kr(1)
8068 .sr(1)
8069 .m(1)
8070 .n(16)
8071 .k(k)
8072 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8073 }
8074 }
8075
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)8076 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
8077 TEST_REQUIRES_ARM_NEON_V8;
8078 for (size_t k = 1; k < 8; k++) {
8079 for (uint32_t n = 1; n <= 16; n++) {
8080 for (uint32_t m = 1; m <= 1; m++) {
8081 GemmMicrokernelTester()
8082 .mr(1)
8083 .nr(16)
8084 .kr(1)
8085 .sr(1)
8086 .m(m)
8087 .n(n)
8088 .k(k)
8089 .iterations(1)
8090 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8091 }
8092 }
8093 }
8094 }
8095
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_gt_8)8096 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
8097 TEST_REQUIRES_ARM_NEON_V8;
8098 for (size_t k = 9; k < 16; k++) {
8099 GemmMicrokernelTester()
8100 .mr(1)
8101 .nr(16)
8102 .kr(1)
8103 .sr(1)
8104 .m(1)
8105 .n(16)
8106 .k(k)
8107 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8108 }
8109 }
8110
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)8111 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
8112 TEST_REQUIRES_ARM_NEON_V8;
8113 for (size_t k = 9; k < 16; k++) {
8114 for (uint32_t n = 1; n <= 16; n++) {
8115 for (uint32_t m = 1; m <= 1; m++) {
8116 GemmMicrokernelTester()
8117 .mr(1)
8118 .nr(16)
8119 .kr(1)
8120 .sr(1)
8121 .m(m)
8122 .n(n)
8123 .k(k)
8124 .iterations(1)
8125 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8126 }
8127 }
8128 }
8129 }
8130
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_div_8)8131 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_div_8) {
8132 TEST_REQUIRES_ARM_NEON_V8;
8133 for (size_t k = 16; k <= 80; k += 8) {
8134 GemmMicrokernelTester()
8135 .mr(1)
8136 .nr(16)
8137 .kr(1)
8138 .sr(1)
8139 .m(1)
8140 .n(16)
8141 .k(k)
8142 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8143 }
8144 }
8145
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)8146 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
8147 TEST_REQUIRES_ARM_NEON_V8;
8148 for (size_t k = 16; k <= 80; k += 8) {
8149 for (uint32_t n = 1; n <= 16; n++) {
8150 for (uint32_t m = 1; m <= 1; m++) {
8151 GemmMicrokernelTester()
8152 .mr(1)
8153 .nr(16)
8154 .kr(1)
8155 .sr(1)
8156 .m(m)
8157 .n(n)
8158 .k(k)
8159 .iterations(1)
8160 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8161 }
8162 }
8163 }
8164 }
8165
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_gt_16)8166 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_gt_16) {
8167 TEST_REQUIRES_ARM_NEON_V8;
8168 for (uint32_t n = 17; n < 32; n++) {
8169 for (size_t k = 1; k <= 40; k += 9) {
8170 GemmMicrokernelTester()
8171 .mr(1)
8172 .nr(16)
8173 .kr(1)
8174 .sr(1)
8175 .m(1)
8176 .n(n)
8177 .k(k)
8178 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8179 }
8180 }
8181 }
8182
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_strided_cn)8183 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
8184 TEST_REQUIRES_ARM_NEON_V8;
8185 for (uint32_t n = 17; n < 32; n++) {
8186 for (size_t k = 1; k <= 40; k += 9) {
8187 GemmMicrokernelTester()
8188 .mr(1)
8189 .nr(16)
8190 .kr(1)
8191 .sr(1)
8192 .m(1)
8193 .n(n)
8194 .k(k)
8195 .cn_stride(19)
8196 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8197 }
8198 }
8199 }
8200
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_subtile)8201 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_subtile) {
8202 TEST_REQUIRES_ARM_NEON_V8;
8203 for (uint32_t n = 17; n < 32; n++) {
8204 for (size_t k = 1; k <= 40; k += 9) {
8205 for (uint32_t m = 1; m <= 1; m++) {
8206 GemmMicrokernelTester()
8207 .mr(1)
8208 .nr(16)
8209 .kr(1)
8210 .sr(1)
8211 .m(m)
8212 .n(n)
8213 .k(k)
8214 .iterations(1)
8215 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8216 }
8217 }
8218 }
8219 }
8220
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_div_16)8221 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_div_16) {
8222 TEST_REQUIRES_ARM_NEON_V8;
8223 for (uint32_t n = 32; n <= 48; n += 16) {
8224 for (size_t k = 1; k <= 40; k += 9) {
8225 GemmMicrokernelTester()
8226 .mr(1)
8227 .nr(16)
8228 .kr(1)
8229 .sr(1)
8230 .m(1)
8231 .n(n)
8232 .k(k)
8233 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8234 }
8235 }
8236 }
8237
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_div_16_strided_cn)8238 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_div_16_strided_cn) {
8239 TEST_REQUIRES_ARM_NEON_V8;
8240 for (uint32_t n = 32; n <= 48; n += 16) {
8241 for (size_t k = 1; k <= 40; k += 9) {
8242 GemmMicrokernelTester()
8243 .mr(1)
8244 .nr(16)
8245 .kr(1)
8246 .sr(1)
8247 .m(1)
8248 .n(n)
8249 .k(k)
8250 .cn_stride(19)
8251 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8252 }
8253 }
8254 }
8255
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_div_16_subtile)8256 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_div_16_subtile) {
8257 TEST_REQUIRES_ARM_NEON_V8;
8258 for (uint32_t n = 32; n <= 48; n += 16) {
8259 for (size_t k = 1; k <= 40; k += 9) {
8260 for (uint32_t m = 1; m <= 1; m++) {
8261 GemmMicrokernelTester()
8262 .mr(1)
8263 .nr(16)
8264 .kr(1)
8265 .sr(1)
8266 .m(m)
8267 .n(n)
8268 .k(k)
8269 .iterations(1)
8270 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8271 }
8272 }
8273 }
8274 }
8275
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,small_kernel)8276 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, small_kernel) {
8277 TEST_REQUIRES_ARM_NEON_V8;
8278 for (size_t k = 1; k <= 40; k += 9) {
8279 GemmMicrokernelTester()
8280 .mr(1)
8281 .nr(16)
8282 .kr(1)
8283 .sr(1)
8284 .m(1)
8285 .n(16)
8286 .k(k)
8287 .ks(3)
8288 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8289 }
8290 }
8291
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,small_kernel_subtile)8292 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, small_kernel_subtile) {
8293 TEST_REQUIRES_ARM_NEON_V8;
8294 for (size_t k = 1; k <= 40; k += 9) {
8295 for (uint32_t n = 1; n <= 16; n++) {
8296 for (uint32_t m = 1; m <= 1; m++) {
8297 GemmMicrokernelTester()
8298 .mr(1)
8299 .nr(16)
8300 .kr(1)
8301 .sr(1)
8302 .m(m)
8303 .n(n)
8304 .k(k)
8305 .ks(3)
8306 .iterations(1)
8307 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8308 }
8309 }
8310 }
8311 }
8312
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_small_kernel)8313 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
8314 TEST_REQUIRES_ARM_NEON_V8;
8315 for (uint32_t n = 17; n < 32; n++) {
8316 for (size_t k = 1; k <= 40; k += 9) {
8317 GemmMicrokernelTester()
8318 .mr(1)
8319 .nr(16)
8320 .kr(1)
8321 .sr(1)
8322 .m(1)
8323 .n(n)
8324 .k(k)
8325 .ks(3)
8326 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8327 }
8328 }
8329 }
8330
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_div_16_small_kernel)8331 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_div_16_small_kernel) {
8332 TEST_REQUIRES_ARM_NEON_V8;
8333 for (uint32_t n = 32; n <= 48; n += 16) {
8334 for (size_t k = 1; k <= 40; k += 9) {
8335 GemmMicrokernelTester()
8336 .mr(1)
8337 .nr(16)
8338 .kr(1)
8339 .sr(1)
8340 .m(1)
8341 .n(n)
8342 .k(k)
8343 .ks(3)
8344 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8345 }
8346 }
8347 }
8348
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)8349 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
8350 TEST_REQUIRES_ARM_NEON_V8;
8351 for (size_t k = 1; k <= 40; k += 9) {
8352 for (uint32_t n = 1; n <= 16; n++) {
8353 for (uint32_t m = 1; m <= 1; m++) {
8354 GemmMicrokernelTester()
8355 .mr(1)
8356 .nr(16)
8357 .kr(1)
8358 .sr(1)
8359 .m(m)
8360 .n(n)
8361 .k(k)
8362 .cm_stride(19)
8363 .iterations(1)
8364 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8365 }
8366 }
8367 }
8368 }
8369
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,a_offset)8370 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, a_offset) {
8371 TEST_REQUIRES_ARM_NEON_V8;
8372 for (size_t k = 1; k <= 40; k += 9) {
8373 GemmMicrokernelTester()
8374 .mr(1)
8375 .nr(16)
8376 .kr(1)
8377 .sr(1)
8378 .m(1)
8379 .n(16)
8380 .k(k)
8381 .ks(3)
8382 .a_offset(43)
8383 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8384 }
8385 }
8386
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,zero)8387 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, zero) {
8388 TEST_REQUIRES_ARM_NEON_V8;
8389 for (size_t k = 1; k <= 40; k += 9) {
8390 for (uint32_t mz = 0; mz < 1; mz++) {
8391 GemmMicrokernelTester()
8392 .mr(1)
8393 .nr(16)
8394 .kr(1)
8395 .sr(1)
8396 .m(1)
8397 .n(16)
8398 .k(k)
8399 .ks(3)
8400 .a_offset(43)
8401 .zero_index(mz)
8402 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8403 }
8404 }
8405 }
8406
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,qmin)8407 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, qmin) {
8408 TEST_REQUIRES_ARM_NEON_V8;
8409 GemmMicrokernelTester()
8410 .mr(1)
8411 .nr(16)
8412 .kr(1)
8413 .sr(1)
8414 .m(1)
8415 .n(16)
8416 .k(8)
8417 .qmin(128)
8418 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8419 }
8420
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,qmax)8421 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, qmax) {
8422 TEST_REQUIRES_ARM_NEON_V8;
8423 GemmMicrokernelTester()
8424 .mr(1)
8425 .nr(16)
8426 .kr(1)
8427 .sr(1)
8428 .m(1)
8429 .n(16)
8430 .k(8)
8431 .qmax(128)
8432 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8433 }
8434
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,strided_cm)8435 TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, strided_cm) {
8436 TEST_REQUIRES_ARM_NEON_V8;
8437 GemmMicrokernelTester()
8438 .mr(1)
8439 .nr(16)
8440 .kr(1)
8441 .sr(1)
8442 .m(1)
8443 .n(16)
8444 .k(8)
8445 .cm_stride(19)
8446 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8447 }
8448 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8449
8450
8451 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_eq_16)8452 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_eq_16) {
8453 TEST_REQUIRES_ARM_NEON;
8454 GemmMicrokernelTester()
8455 .mr(2)
8456 .nr(8)
8457 .kr(2)
8458 .sr(1)
8459 .m(2)
8460 .n(8)
8461 .k(16)
8462 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8463 }
8464
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,strided_cn)8465 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, strided_cn) {
8466 TEST_REQUIRES_ARM_NEON;
8467 GemmMicrokernelTester()
8468 .mr(2)
8469 .nr(8)
8470 .kr(2)
8471 .sr(1)
8472 .m(2)
8473 .n(8)
8474 .k(16)
8475 .cn_stride(11)
8476 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8477 }
8478
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_eq_16_subtile)8479 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
8480 TEST_REQUIRES_ARM_NEON;
8481 for (uint32_t n = 1; n <= 8; n++) {
8482 for (uint32_t m = 1; m <= 2; m++) {
8483 GemmMicrokernelTester()
8484 .mr(2)
8485 .nr(8)
8486 .kr(2)
8487 .sr(1)
8488 .m(m)
8489 .n(n)
8490 .k(16)
8491 .iterations(1)
8492 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8493 }
8494 }
8495 }
8496
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_m)8497 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
8498 TEST_REQUIRES_ARM_NEON;
8499 for (uint32_t m = 1; m <= 2; m++) {
8500 GemmMicrokernelTester()
8501 .mr(2)
8502 .nr(8)
8503 .kr(2)
8504 .sr(1)
8505 .m(m)
8506 .n(8)
8507 .k(16)
8508 .iterations(1)
8509 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8510 }
8511 }
8512
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_n)8513 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
8514 TEST_REQUIRES_ARM_NEON;
8515 for (uint32_t n = 1; n <= 8; n++) {
8516 GemmMicrokernelTester()
8517 .mr(2)
8518 .nr(8)
8519 .kr(2)
8520 .sr(1)
8521 .m(2)
8522 .n(n)
8523 .k(16)
8524 .iterations(1)
8525 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8526 }
8527 }
8528
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_lt_16)8529 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_lt_16) {
8530 TEST_REQUIRES_ARM_NEON;
8531 for (size_t k = 1; k < 16; k++) {
8532 GemmMicrokernelTester()
8533 .mr(2)
8534 .nr(8)
8535 .kr(2)
8536 .sr(1)
8537 .m(2)
8538 .n(8)
8539 .k(k)
8540 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8541 }
8542 }
8543
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_lt_16_subtile)8544 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
8545 TEST_REQUIRES_ARM_NEON;
8546 for (size_t k = 1; k < 16; k++) {
8547 for (uint32_t n = 1; n <= 8; n++) {
8548 for (uint32_t m = 1; m <= 2; m++) {
8549 GemmMicrokernelTester()
8550 .mr(2)
8551 .nr(8)
8552 .kr(2)
8553 .sr(1)
8554 .m(m)
8555 .n(n)
8556 .k(k)
8557 .iterations(1)
8558 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8559 }
8560 }
8561 }
8562 }
8563
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_gt_16)8564 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_gt_16) {
8565 TEST_REQUIRES_ARM_NEON;
8566 for (size_t k = 17; k < 32; k++) {
8567 GemmMicrokernelTester()
8568 .mr(2)
8569 .nr(8)
8570 .kr(2)
8571 .sr(1)
8572 .m(2)
8573 .n(8)
8574 .k(k)
8575 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8576 }
8577 }
8578
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_gt_16_subtile)8579 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
8580 TEST_REQUIRES_ARM_NEON;
8581 for (size_t k = 17; k < 32; k++) {
8582 for (uint32_t n = 1; n <= 8; n++) {
8583 for (uint32_t m = 1; m <= 2; m++) {
8584 GemmMicrokernelTester()
8585 .mr(2)
8586 .nr(8)
8587 .kr(2)
8588 .sr(1)
8589 .m(m)
8590 .n(n)
8591 .k(k)
8592 .iterations(1)
8593 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8594 }
8595 }
8596 }
8597 }
8598
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_div_16)8599 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_div_16) {
8600 TEST_REQUIRES_ARM_NEON;
8601 for (size_t k = 32; k <= 160; k += 16) {
8602 GemmMicrokernelTester()
8603 .mr(2)
8604 .nr(8)
8605 .kr(2)
8606 .sr(1)
8607 .m(2)
8608 .n(8)
8609 .k(k)
8610 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8611 }
8612 }
8613
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_div_16_subtile)8614 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_div_16_subtile) {
8615 TEST_REQUIRES_ARM_NEON;
8616 for (size_t k = 32; k <= 160; k += 16) {
8617 for (uint32_t n = 1; n <= 8; n++) {
8618 for (uint32_t m = 1; m <= 2; m++) {
8619 GemmMicrokernelTester()
8620 .mr(2)
8621 .nr(8)
8622 .kr(2)
8623 .sr(1)
8624 .m(m)
8625 .n(n)
8626 .k(k)
8627 .iterations(1)
8628 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8629 }
8630 }
8631 }
8632 }
8633
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_gt_8)8634 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_gt_8) {
8635 TEST_REQUIRES_ARM_NEON;
8636 for (uint32_t n = 9; n < 16; n++) {
8637 for (size_t k = 1; k <= 80; k += 17) {
8638 GemmMicrokernelTester()
8639 .mr(2)
8640 .nr(8)
8641 .kr(2)
8642 .sr(1)
8643 .m(2)
8644 .n(n)
8645 .k(k)
8646 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8647 }
8648 }
8649 }
8650
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_gt_8_strided_cn)8651 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
8652 TEST_REQUIRES_ARM_NEON;
8653 for (uint32_t n = 9; n < 16; n++) {
8654 for (size_t k = 1; k <= 80; k += 17) {
8655 GemmMicrokernelTester()
8656 .mr(2)
8657 .nr(8)
8658 .kr(2)
8659 .sr(1)
8660 .m(2)
8661 .n(n)
8662 .k(k)
8663 .cn_stride(11)
8664 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8665 }
8666 }
8667 }
8668
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_gt_8_subtile)8669 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_gt_8_subtile) {
8670 TEST_REQUIRES_ARM_NEON;
8671 for (uint32_t n = 9; n < 16; n++) {
8672 for (size_t k = 1; k <= 80; k += 17) {
8673 for (uint32_t m = 1; m <= 2; m++) {
8674 GemmMicrokernelTester()
8675 .mr(2)
8676 .nr(8)
8677 .kr(2)
8678 .sr(1)
8679 .m(m)
8680 .n(n)
8681 .k(k)
8682 .iterations(1)
8683 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8684 }
8685 }
8686 }
8687 }
8688
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_div_8)8689 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_div_8) {
8690 TEST_REQUIRES_ARM_NEON;
8691 for (uint32_t n = 16; n <= 24; n += 8) {
8692 for (size_t k = 1; k <= 80; k += 17) {
8693 GemmMicrokernelTester()
8694 .mr(2)
8695 .nr(8)
8696 .kr(2)
8697 .sr(1)
8698 .m(2)
8699 .n(n)
8700 .k(k)
8701 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8702 }
8703 }
8704 }
8705
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_div_8_strided_cn)8706 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_div_8_strided_cn) {
8707 TEST_REQUIRES_ARM_NEON;
8708 for (uint32_t n = 16; n <= 24; n += 8) {
8709 for (size_t k = 1; k <= 80; k += 17) {
8710 GemmMicrokernelTester()
8711 .mr(2)
8712 .nr(8)
8713 .kr(2)
8714 .sr(1)
8715 .m(2)
8716 .n(n)
8717 .k(k)
8718 .cn_stride(11)
8719 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8720 }
8721 }
8722 }
8723
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_div_8_subtile)8724 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_div_8_subtile) {
8725 TEST_REQUIRES_ARM_NEON;
8726 for (uint32_t n = 16; n <= 24; n += 8) {
8727 for (size_t k = 1; k <= 80; k += 17) {
8728 for (uint32_t m = 1; m <= 2; m++) {
8729 GemmMicrokernelTester()
8730 .mr(2)
8731 .nr(8)
8732 .kr(2)
8733 .sr(1)
8734 .m(m)
8735 .n(n)
8736 .k(k)
8737 .iterations(1)
8738 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8739 }
8740 }
8741 }
8742 }
8743
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,small_kernel)8744 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, small_kernel) {
8745 TEST_REQUIRES_ARM_NEON;
8746 for (size_t k = 1; k <= 80; k += 17) {
8747 GemmMicrokernelTester()
8748 .mr(2)
8749 .nr(8)
8750 .kr(2)
8751 .sr(1)
8752 .m(2)
8753 .n(8)
8754 .k(k)
8755 .ks(3)
8756 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8757 }
8758 }
8759
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,small_kernel_subtile)8760 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, small_kernel_subtile) {
8761 TEST_REQUIRES_ARM_NEON;
8762 for (size_t k = 1; k <= 80; k += 17) {
8763 for (uint32_t n = 1; n <= 8; n++) {
8764 for (uint32_t m = 1; m <= 2; m++) {
8765 GemmMicrokernelTester()
8766 .mr(2)
8767 .nr(8)
8768 .kr(2)
8769 .sr(1)
8770 .m(m)
8771 .n(n)
8772 .k(k)
8773 .ks(3)
8774 .iterations(1)
8775 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8776 }
8777 }
8778 }
8779 }
8780
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_gt_8_small_kernel)8781 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
8782 TEST_REQUIRES_ARM_NEON;
8783 for (uint32_t n = 9; n < 16; n++) {
8784 for (size_t k = 1; k <= 80; k += 17) {
8785 GemmMicrokernelTester()
8786 .mr(2)
8787 .nr(8)
8788 .kr(2)
8789 .sr(1)
8790 .m(2)
8791 .n(n)
8792 .k(k)
8793 .ks(3)
8794 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8795 }
8796 }
8797 }
8798
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_div_8_small_kernel)8799 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_div_8_small_kernel) {
8800 TEST_REQUIRES_ARM_NEON;
8801 for (uint32_t n = 16; n <= 24; n += 8) {
8802 for (size_t k = 1; k <= 80; k += 17) {
8803 GemmMicrokernelTester()
8804 .mr(2)
8805 .nr(8)
8806 .kr(2)
8807 .sr(1)
8808 .m(2)
8809 .n(n)
8810 .k(k)
8811 .ks(3)
8812 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8813 }
8814 }
8815 }
8816
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,strided_cm_subtile)8817 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, strided_cm_subtile) {
8818 TEST_REQUIRES_ARM_NEON;
8819 for (size_t k = 1; k <= 80; k += 17) {
8820 for (uint32_t n = 1; n <= 8; n++) {
8821 for (uint32_t m = 1; m <= 2; m++) {
8822 GemmMicrokernelTester()
8823 .mr(2)
8824 .nr(8)
8825 .kr(2)
8826 .sr(1)
8827 .m(m)
8828 .n(n)
8829 .k(k)
8830 .cm_stride(11)
8831 .iterations(1)
8832 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8833 }
8834 }
8835 }
8836 }
8837
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,a_offset)8838 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, a_offset) {
8839 TEST_REQUIRES_ARM_NEON;
8840 for (size_t k = 1; k <= 80; k += 17) {
8841 GemmMicrokernelTester()
8842 .mr(2)
8843 .nr(8)
8844 .kr(2)
8845 .sr(1)
8846 .m(2)
8847 .n(8)
8848 .k(k)
8849 .ks(3)
8850 .a_offset(163)
8851 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8852 }
8853 }
8854
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,zero)8855 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, zero) {
8856 TEST_REQUIRES_ARM_NEON;
8857 for (size_t k = 1; k <= 80; k += 17) {
8858 for (uint32_t mz = 0; mz < 2; mz++) {
8859 GemmMicrokernelTester()
8860 .mr(2)
8861 .nr(8)
8862 .kr(2)
8863 .sr(1)
8864 .m(2)
8865 .n(8)
8866 .k(k)
8867 .ks(3)
8868 .a_offset(163)
8869 .zero_index(mz)
8870 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8871 }
8872 }
8873 }
8874
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,qmin)8875 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, qmin) {
8876 TEST_REQUIRES_ARM_NEON;
8877 GemmMicrokernelTester()
8878 .mr(2)
8879 .nr(8)
8880 .kr(2)
8881 .sr(1)
8882 .m(2)
8883 .n(8)
8884 .k(16)
8885 .qmin(128)
8886 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8887 }
8888
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,qmax)8889 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, qmax) {
8890 TEST_REQUIRES_ARM_NEON;
8891 GemmMicrokernelTester()
8892 .mr(2)
8893 .nr(8)
8894 .kr(2)
8895 .sr(1)
8896 .m(2)
8897 .n(8)
8898 .k(16)
8899 .qmax(128)
8900 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8901 }
8902
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,strided_cm)8903 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, strided_cm) {
8904 TEST_REQUIRES_ARM_NEON;
8905 GemmMicrokernelTester()
8906 .mr(2)
8907 .nr(8)
8908 .kr(2)
8909 .sr(1)
8910 .m(2)
8911 .n(8)
8912 .k(16)
8913 .cm_stride(11)
8914 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8915 }
8916 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8917
8918
8919 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_eq_16)8920 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_eq_16) {
8921 TEST_REQUIRES_ARM_NEON;
8922 GemmMicrokernelTester()
8923 .mr(2)
8924 .nr(8)
8925 .kr(2)
8926 .sr(1)
8927 .m(2)
8928 .n(8)
8929 .k(16)
8930 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8931 }
8932
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,strided_cn)8933 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, strided_cn) {
8934 TEST_REQUIRES_ARM_NEON;
8935 GemmMicrokernelTester()
8936 .mr(2)
8937 .nr(8)
8938 .kr(2)
8939 .sr(1)
8940 .m(2)
8941 .n(8)
8942 .k(16)
8943 .cn_stride(11)
8944 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8945 }
8946
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_eq_16_subtile)8947 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
8948 TEST_REQUIRES_ARM_NEON;
8949 for (uint32_t n = 1; n <= 8; n++) {
8950 for (uint32_t m = 1; m <= 2; m++) {
8951 GemmMicrokernelTester()
8952 .mr(2)
8953 .nr(8)
8954 .kr(2)
8955 .sr(1)
8956 .m(m)
8957 .n(n)
8958 .k(16)
8959 .iterations(1)
8960 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8961 }
8962 }
8963 }
8964
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_m)8965 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
8966 TEST_REQUIRES_ARM_NEON;
8967 for (uint32_t m = 1; m <= 2; m++) {
8968 GemmMicrokernelTester()
8969 .mr(2)
8970 .nr(8)
8971 .kr(2)
8972 .sr(1)
8973 .m(m)
8974 .n(8)
8975 .k(16)
8976 .iterations(1)
8977 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8978 }
8979 }
8980
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_n)8981 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
8982 TEST_REQUIRES_ARM_NEON;
8983 for (uint32_t n = 1; n <= 8; n++) {
8984 GemmMicrokernelTester()
8985 .mr(2)
8986 .nr(8)
8987 .kr(2)
8988 .sr(1)
8989 .m(2)
8990 .n(n)
8991 .k(16)
8992 .iterations(1)
8993 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8994 }
8995 }
8996
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_lt_16)8997 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_lt_16) {
8998 TEST_REQUIRES_ARM_NEON;
8999 for (size_t k = 1; k < 16; k++) {
9000 GemmMicrokernelTester()
9001 .mr(2)
9002 .nr(8)
9003 .kr(2)
9004 .sr(1)
9005 .m(2)
9006 .n(8)
9007 .k(k)
9008 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9009 }
9010 }
9011
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_lt_16_subtile)9012 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
9013 TEST_REQUIRES_ARM_NEON;
9014 for (size_t k = 1; k < 16; k++) {
9015 for (uint32_t n = 1; n <= 8; n++) {
9016 for (uint32_t m = 1; m <= 2; m++) {
9017 GemmMicrokernelTester()
9018 .mr(2)
9019 .nr(8)
9020 .kr(2)
9021 .sr(1)
9022 .m(m)
9023 .n(n)
9024 .k(k)
9025 .iterations(1)
9026 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9027 }
9028 }
9029 }
9030 }
9031
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_gt_16)9032 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_gt_16) {
9033 TEST_REQUIRES_ARM_NEON;
9034 for (size_t k = 17; k < 32; k++) {
9035 GemmMicrokernelTester()
9036 .mr(2)
9037 .nr(8)
9038 .kr(2)
9039 .sr(1)
9040 .m(2)
9041 .n(8)
9042 .k(k)
9043 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9044 }
9045 }
9046
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_gt_16_subtile)9047 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
9048 TEST_REQUIRES_ARM_NEON;
9049 for (size_t k = 17; k < 32; k++) {
9050 for (uint32_t n = 1; n <= 8; n++) {
9051 for (uint32_t m = 1; m <= 2; m++) {
9052 GemmMicrokernelTester()
9053 .mr(2)
9054 .nr(8)
9055 .kr(2)
9056 .sr(1)
9057 .m(m)
9058 .n(n)
9059 .k(k)
9060 .iterations(1)
9061 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9062 }
9063 }
9064 }
9065 }
9066
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_div_16)9067 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_div_16) {
9068 TEST_REQUIRES_ARM_NEON;
9069 for (size_t k = 32; k <= 160; k += 16) {
9070 GemmMicrokernelTester()
9071 .mr(2)
9072 .nr(8)
9073 .kr(2)
9074 .sr(1)
9075 .m(2)
9076 .n(8)
9077 .k(k)
9078 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9079 }
9080 }
9081
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_div_16_subtile)9082 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_div_16_subtile) {
9083 TEST_REQUIRES_ARM_NEON;
9084 for (size_t k = 32; k <= 160; k += 16) {
9085 for (uint32_t n = 1; n <= 8; n++) {
9086 for (uint32_t m = 1; m <= 2; m++) {
9087 GemmMicrokernelTester()
9088 .mr(2)
9089 .nr(8)
9090 .kr(2)
9091 .sr(1)
9092 .m(m)
9093 .n(n)
9094 .k(k)
9095 .iterations(1)
9096 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9097 }
9098 }
9099 }
9100 }
9101
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_gt_8)9102 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_gt_8) {
9103 TEST_REQUIRES_ARM_NEON;
9104 for (uint32_t n = 9; n < 16; n++) {
9105 for (size_t k = 1; k <= 80; k += 17) {
9106 GemmMicrokernelTester()
9107 .mr(2)
9108 .nr(8)
9109 .kr(2)
9110 .sr(1)
9111 .m(2)
9112 .n(n)
9113 .k(k)
9114 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9115 }
9116 }
9117 }
9118
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_gt_8_strided_cn)9119 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_gt_8_strided_cn) {
9120 TEST_REQUIRES_ARM_NEON;
9121 for (uint32_t n = 9; n < 16; n++) {
9122 for (size_t k = 1; k <= 80; k += 17) {
9123 GemmMicrokernelTester()
9124 .mr(2)
9125 .nr(8)
9126 .kr(2)
9127 .sr(1)
9128 .m(2)
9129 .n(n)
9130 .k(k)
9131 .cn_stride(11)
9132 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9133 }
9134 }
9135 }
9136
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_gt_8_subtile)9137 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_gt_8_subtile) {
9138 TEST_REQUIRES_ARM_NEON;
9139 for (uint32_t n = 9; n < 16; n++) {
9140 for (size_t k = 1; k <= 80; k += 17) {
9141 for (uint32_t m = 1; m <= 2; m++) {
9142 GemmMicrokernelTester()
9143 .mr(2)
9144 .nr(8)
9145 .kr(2)
9146 .sr(1)
9147 .m(m)
9148 .n(n)
9149 .k(k)
9150 .iterations(1)
9151 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9152 }
9153 }
9154 }
9155 }
9156
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_div_8)9157 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_div_8) {
9158 TEST_REQUIRES_ARM_NEON;
9159 for (uint32_t n = 16; n <= 24; n += 8) {
9160 for (size_t k = 1; k <= 80; k += 17) {
9161 GemmMicrokernelTester()
9162 .mr(2)
9163 .nr(8)
9164 .kr(2)
9165 .sr(1)
9166 .m(2)
9167 .n(n)
9168 .k(k)
9169 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9170 }
9171 }
9172 }
9173
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_div_8_strided_cn)9174 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_div_8_strided_cn) {
9175 TEST_REQUIRES_ARM_NEON;
9176 for (uint32_t n = 16; n <= 24; n += 8) {
9177 for (size_t k = 1; k <= 80; k += 17) {
9178 GemmMicrokernelTester()
9179 .mr(2)
9180 .nr(8)
9181 .kr(2)
9182 .sr(1)
9183 .m(2)
9184 .n(n)
9185 .k(k)
9186 .cn_stride(11)
9187 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9188 }
9189 }
9190 }
9191
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_div_8_subtile)9192 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_div_8_subtile) {
9193 TEST_REQUIRES_ARM_NEON;
9194 for (uint32_t n = 16; n <= 24; n += 8) {
9195 for (size_t k = 1; k <= 80; k += 17) {
9196 for (uint32_t m = 1; m <= 2; m++) {
9197 GemmMicrokernelTester()
9198 .mr(2)
9199 .nr(8)
9200 .kr(2)
9201 .sr(1)
9202 .m(m)
9203 .n(n)
9204 .k(k)
9205 .iterations(1)
9206 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9207 }
9208 }
9209 }
9210 }
9211
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,small_kernel)9212 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, small_kernel) {
9213 TEST_REQUIRES_ARM_NEON;
9214 for (size_t k = 1; k <= 80; k += 17) {
9215 GemmMicrokernelTester()
9216 .mr(2)
9217 .nr(8)
9218 .kr(2)
9219 .sr(1)
9220 .m(2)
9221 .n(8)
9222 .k(k)
9223 .ks(3)
9224 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9225 }
9226 }
9227
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,small_kernel_subtile)9228 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, small_kernel_subtile) {
9229 TEST_REQUIRES_ARM_NEON;
9230 for (size_t k = 1; k <= 80; k += 17) {
9231 for (uint32_t n = 1; n <= 8; n++) {
9232 for (uint32_t m = 1; m <= 2; m++) {
9233 GemmMicrokernelTester()
9234 .mr(2)
9235 .nr(8)
9236 .kr(2)
9237 .sr(1)
9238 .m(m)
9239 .n(n)
9240 .k(k)
9241 .ks(3)
9242 .iterations(1)
9243 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9244 }
9245 }
9246 }
9247 }
9248
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_gt_8_small_kernel)9249 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_gt_8_small_kernel) {
9250 TEST_REQUIRES_ARM_NEON;
9251 for (uint32_t n = 9; n < 16; n++) {
9252 for (size_t k = 1; k <= 80; k += 17) {
9253 GemmMicrokernelTester()
9254 .mr(2)
9255 .nr(8)
9256 .kr(2)
9257 .sr(1)
9258 .m(2)
9259 .n(n)
9260 .k(k)
9261 .ks(3)
9262 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9263 }
9264 }
9265 }
9266
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_div_8_small_kernel)9267 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_div_8_small_kernel) {
9268 TEST_REQUIRES_ARM_NEON;
9269 for (uint32_t n = 16; n <= 24; n += 8) {
9270 for (size_t k = 1; k <= 80; k += 17) {
9271 GemmMicrokernelTester()
9272 .mr(2)
9273 .nr(8)
9274 .kr(2)
9275 .sr(1)
9276 .m(2)
9277 .n(n)
9278 .k(k)
9279 .ks(3)
9280 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9281 }
9282 }
9283 }
9284
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,strided_cm_subtile)9285 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, strided_cm_subtile) {
9286 TEST_REQUIRES_ARM_NEON;
9287 for (size_t k = 1; k <= 80; k += 17) {
9288 for (uint32_t n = 1; n <= 8; n++) {
9289 for (uint32_t m = 1; m <= 2; m++) {
9290 GemmMicrokernelTester()
9291 .mr(2)
9292 .nr(8)
9293 .kr(2)
9294 .sr(1)
9295 .m(m)
9296 .n(n)
9297 .k(k)
9298 .cm_stride(11)
9299 .iterations(1)
9300 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9301 }
9302 }
9303 }
9304 }
9305
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,a_offset)9306 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, a_offset) {
9307 TEST_REQUIRES_ARM_NEON;
9308 for (size_t k = 1; k <= 80; k += 17) {
9309 GemmMicrokernelTester()
9310 .mr(2)
9311 .nr(8)
9312 .kr(2)
9313 .sr(1)
9314 .m(2)
9315 .n(8)
9316 .k(k)
9317 .ks(3)
9318 .a_offset(163)
9319 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9320 }
9321 }
9322
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,zero)9323 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, zero) {
9324 TEST_REQUIRES_ARM_NEON;
9325 for (size_t k = 1; k <= 80; k += 17) {
9326 for (uint32_t mz = 0; mz < 2; mz++) {
9327 GemmMicrokernelTester()
9328 .mr(2)
9329 .nr(8)
9330 .kr(2)
9331 .sr(1)
9332 .m(2)
9333 .n(8)
9334 .k(k)
9335 .ks(3)
9336 .a_offset(163)
9337 .zero_index(mz)
9338 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9339 }
9340 }
9341 }
9342
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,qmin)9343 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, qmin) {
9344 TEST_REQUIRES_ARM_NEON;
9345 GemmMicrokernelTester()
9346 .mr(2)
9347 .nr(8)
9348 .kr(2)
9349 .sr(1)
9350 .m(2)
9351 .n(8)
9352 .k(16)
9353 .qmin(128)
9354 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9355 }
9356
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,qmax)9357 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, qmax) {
9358 TEST_REQUIRES_ARM_NEON;
9359 GemmMicrokernelTester()
9360 .mr(2)
9361 .nr(8)
9362 .kr(2)
9363 .sr(1)
9364 .m(2)
9365 .n(8)
9366 .k(16)
9367 .qmax(128)
9368 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9369 }
9370
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,strided_cm)9371 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, strided_cm) {
9372 TEST_REQUIRES_ARM_NEON;
9373 GemmMicrokernelTester()
9374 .mr(2)
9375 .nr(8)
9376 .kr(2)
9377 .sr(1)
9378 .m(2)
9379 .n(8)
9380 .k(16)
9381 .cm_stride(11)
9382 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9383 }
9384 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9385
9386
9387 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_eq_16)9388 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16) {
9389 TEST_REQUIRES_ARM_NEON_V8;
9390 GemmMicrokernelTester()
9391 .mr(2)
9392 .nr(8)
9393 .kr(2)
9394 .sr(1)
9395 .m(2)
9396 .n(8)
9397 .k(16)
9398 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9399 }
9400
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,strided_cn)9401 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, strided_cn) {
9402 TEST_REQUIRES_ARM_NEON_V8;
9403 GemmMicrokernelTester()
9404 .mr(2)
9405 .nr(8)
9406 .kr(2)
9407 .sr(1)
9408 .m(2)
9409 .n(8)
9410 .k(16)
9411 .cn_stride(11)
9412 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9413 }
9414
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_eq_16_subtile)9415 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile) {
9416 TEST_REQUIRES_ARM_NEON_V8;
9417 for (uint32_t n = 1; n <= 8; n++) {
9418 for (uint32_t m = 1; m <= 2; m++) {
9419 GemmMicrokernelTester()
9420 .mr(2)
9421 .nr(8)
9422 .kr(2)
9423 .sr(1)
9424 .m(m)
9425 .n(n)
9426 .k(16)
9427 .iterations(1)
9428 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9429 }
9430 }
9431 }
9432
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_eq_16_subtile_m)9433 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile_m) {
9434 TEST_REQUIRES_ARM_NEON_V8;
9435 for (uint32_t m = 1; m <= 2; m++) {
9436 GemmMicrokernelTester()
9437 .mr(2)
9438 .nr(8)
9439 .kr(2)
9440 .sr(1)
9441 .m(m)
9442 .n(8)
9443 .k(16)
9444 .iterations(1)
9445 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9446 }
9447 }
9448
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_eq_16_subtile_n)9449 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile_n) {
9450 TEST_REQUIRES_ARM_NEON_V8;
9451 for (uint32_t n = 1; n <= 8; n++) {
9452 GemmMicrokernelTester()
9453 .mr(2)
9454 .nr(8)
9455 .kr(2)
9456 .sr(1)
9457 .m(2)
9458 .n(n)
9459 .k(16)
9460 .iterations(1)
9461 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9462 }
9463 }
9464
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_lt_16)9465 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_lt_16) {
9466 TEST_REQUIRES_ARM_NEON_V8;
9467 for (size_t k = 1; k < 16; k++) {
9468 GemmMicrokernelTester()
9469 .mr(2)
9470 .nr(8)
9471 .kr(2)
9472 .sr(1)
9473 .m(2)
9474 .n(8)
9475 .k(k)
9476 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9477 }
9478 }
9479
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_lt_16_subtile)9480 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_lt_16_subtile) {
9481 TEST_REQUIRES_ARM_NEON_V8;
9482 for (size_t k = 1; k < 16; k++) {
9483 for (uint32_t n = 1; n <= 8; n++) {
9484 for (uint32_t m = 1; m <= 2; m++) {
9485 GemmMicrokernelTester()
9486 .mr(2)
9487 .nr(8)
9488 .kr(2)
9489 .sr(1)
9490 .m(m)
9491 .n(n)
9492 .k(k)
9493 .iterations(1)
9494 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9495 }
9496 }
9497 }
9498 }
9499
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_gt_16)9500 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_gt_16) {
9501 TEST_REQUIRES_ARM_NEON_V8;
9502 for (size_t k = 17; k < 32; k++) {
9503 GemmMicrokernelTester()
9504 .mr(2)
9505 .nr(8)
9506 .kr(2)
9507 .sr(1)
9508 .m(2)
9509 .n(8)
9510 .k(k)
9511 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9512 }
9513 }
9514
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_gt_16_subtile)9515 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_gt_16_subtile) {
9516 TEST_REQUIRES_ARM_NEON_V8;
9517 for (size_t k = 17; k < 32; k++) {
9518 for (uint32_t n = 1; n <= 8; n++) {
9519 for (uint32_t m = 1; m <= 2; m++) {
9520 GemmMicrokernelTester()
9521 .mr(2)
9522 .nr(8)
9523 .kr(2)
9524 .sr(1)
9525 .m(m)
9526 .n(n)
9527 .k(k)
9528 .iterations(1)
9529 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9530 }
9531 }
9532 }
9533 }
9534
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_div_16)9535 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_div_16) {
9536 TEST_REQUIRES_ARM_NEON_V8;
9537 for (size_t k = 32; k <= 160; k += 16) {
9538 GemmMicrokernelTester()
9539 .mr(2)
9540 .nr(8)
9541 .kr(2)
9542 .sr(1)
9543 .m(2)
9544 .n(8)
9545 .k(k)
9546 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9547 }
9548 }
9549
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_div_16_subtile)9550 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_div_16_subtile) {
9551 TEST_REQUIRES_ARM_NEON_V8;
9552 for (size_t k = 32; k <= 160; k += 16) {
9553 for (uint32_t n = 1; n <= 8; n++) {
9554 for (uint32_t m = 1; m <= 2; m++) {
9555 GemmMicrokernelTester()
9556 .mr(2)
9557 .nr(8)
9558 .kr(2)
9559 .sr(1)
9560 .m(m)
9561 .n(n)
9562 .k(k)
9563 .iterations(1)
9564 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9565 }
9566 }
9567 }
9568 }
9569
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_gt_8)9570 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_gt_8) {
9571 TEST_REQUIRES_ARM_NEON_V8;
9572 for (uint32_t n = 9; n < 16; n++) {
9573 for (size_t k = 1; k <= 80; k += 17) {
9574 GemmMicrokernelTester()
9575 .mr(2)
9576 .nr(8)
9577 .kr(2)
9578 .sr(1)
9579 .m(2)
9580 .n(n)
9581 .k(k)
9582 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9583 }
9584 }
9585 }
9586
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_gt_8_strided_cn)9587 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_gt_8_strided_cn) {
9588 TEST_REQUIRES_ARM_NEON_V8;
9589 for (uint32_t n = 9; n < 16; n++) {
9590 for (size_t k = 1; k <= 80; k += 17) {
9591 GemmMicrokernelTester()
9592 .mr(2)
9593 .nr(8)
9594 .kr(2)
9595 .sr(1)
9596 .m(2)
9597 .n(n)
9598 .k(k)
9599 .cn_stride(11)
9600 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9601 }
9602 }
9603 }
9604
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_gt_8_subtile)9605 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_gt_8_subtile) {
9606 TEST_REQUIRES_ARM_NEON_V8;
9607 for (uint32_t n = 9; n < 16; n++) {
9608 for (size_t k = 1; k <= 80; k += 17) {
9609 for (uint32_t m = 1; m <= 2; m++) {
9610 GemmMicrokernelTester()
9611 .mr(2)
9612 .nr(8)
9613 .kr(2)
9614 .sr(1)
9615 .m(m)
9616 .n(n)
9617 .k(k)
9618 .iterations(1)
9619 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9620 }
9621 }
9622 }
9623 }
9624
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_div_8)9625 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_div_8) {
9626 TEST_REQUIRES_ARM_NEON_V8;
9627 for (uint32_t n = 16; n <= 24; n += 8) {
9628 for (size_t k = 1; k <= 80; k += 17) {
9629 GemmMicrokernelTester()
9630 .mr(2)
9631 .nr(8)
9632 .kr(2)
9633 .sr(1)
9634 .m(2)
9635 .n(n)
9636 .k(k)
9637 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9638 }
9639 }
9640 }
9641
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_div_8_strided_cn)9642 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_div_8_strided_cn) {
9643 TEST_REQUIRES_ARM_NEON_V8;
9644 for (uint32_t n = 16; n <= 24; n += 8) {
9645 for (size_t k = 1; k <= 80; k += 17) {
9646 GemmMicrokernelTester()
9647 .mr(2)
9648 .nr(8)
9649 .kr(2)
9650 .sr(1)
9651 .m(2)
9652 .n(n)
9653 .k(k)
9654 .cn_stride(11)
9655 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9656 }
9657 }
9658 }
9659
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_div_8_subtile)9660 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_div_8_subtile) {
9661 TEST_REQUIRES_ARM_NEON_V8;
9662 for (uint32_t n = 16; n <= 24; n += 8) {
9663 for (size_t k = 1; k <= 80; k += 17) {
9664 for (uint32_t m = 1; m <= 2; m++) {
9665 GemmMicrokernelTester()
9666 .mr(2)
9667 .nr(8)
9668 .kr(2)
9669 .sr(1)
9670 .m(m)
9671 .n(n)
9672 .k(k)
9673 .iterations(1)
9674 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9675 }
9676 }
9677 }
9678 }
9679
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,small_kernel)9680 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, small_kernel) {
9681 TEST_REQUIRES_ARM_NEON_V8;
9682 for (size_t k = 1; k <= 80; k += 17) {
9683 GemmMicrokernelTester()
9684 .mr(2)
9685 .nr(8)
9686 .kr(2)
9687 .sr(1)
9688 .m(2)
9689 .n(8)
9690 .k(k)
9691 .ks(3)
9692 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9693 }
9694 }
9695
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,small_kernel_subtile)9696 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, small_kernel_subtile) {
9697 TEST_REQUIRES_ARM_NEON_V8;
9698 for (size_t k = 1; k <= 80; k += 17) {
9699 for (uint32_t n = 1; n <= 8; n++) {
9700 for (uint32_t m = 1; m <= 2; m++) {
9701 GemmMicrokernelTester()
9702 .mr(2)
9703 .nr(8)
9704 .kr(2)
9705 .sr(1)
9706 .m(m)
9707 .n(n)
9708 .k(k)
9709 .ks(3)
9710 .iterations(1)
9711 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9712 }
9713 }
9714 }
9715 }
9716
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_gt_8_small_kernel)9717 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_gt_8_small_kernel) {
9718 TEST_REQUIRES_ARM_NEON_V8;
9719 for (uint32_t n = 9; n < 16; n++) {
9720 for (size_t k = 1; k <= 80; k += 17) {
9721 GemmMicrokernelTester()
9722 .mr(2)
9723 .nr(8)
9724 .kr(2)
9725 .sr(1)
9726 .m(2)
9727 .n(n)
9728 .k(k)
9729 .ks(3)
9730 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9731 }
9732 }
9733 }
9734
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_div_8_small_kernel)9735 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_div_8_small_kernel) {
9736 TEST_REQUIRES_ARM_NEON_V8;
9737 for (uint32_t n = 16; n <= 24; n += 8) {
9738 for (size_t k = 1; k <= 80; k += 17) {
9739 GemmMicrokernelTester()
9740 .mr(2)
9741 .nr(8)
9742 .kr(2)
9743 .sr(1)
9744 .m(2)
9745 .n(n)
9746 .k(k)
9747 .ks(3)
9748 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9749 }
9750 }
9751 }
9752
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,strided_cm_subtile)9753 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, strided_cm_subtile) {
9754 TEST_REQUIRES_ARM_NEON_V8;
9755 for (size_t k = 1; k <= 80; k += 17) {
9756 for (uint32_t n = 1; n <= 8; n++) {
9757 for (uint32_t m = 1; m <= 2; m++) {
9758 GemmMicrokernelTester()
9759 .mr(2)
9760 .nr(8)
9761 .kr(2)
9762 .sr(1)
9763 .m(m)
9764 .n(n)
9765 .k(k)
9766 .cm_stride(11)
9767 .iterations(1)
9768 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9769 }
9770 }
9771 }
9772 }
9773
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,a_offset)9774 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, a_offset) {
9775 TEST_REQUIRES_ARM_NEON_V8;
9776 for (size_t k = 1; k <= 80; k += 17) {
9777 GemmMicrokernelTester()
9778 .mr(2)
9779 .nr(8)
9780 .kr(2)
9781 .sr(1)
9782 .m(2)
9783 .n(8)
9784 .k(k)
9785 .ks(3)
9786 .a_offset(163)
9787 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9788 }
9789 }
9790
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,zero)9791 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, zero) {
9792 TEST_REQUIRES_ARM_NEON_V8;
9793 for (size_t k = 1; k <= 80; k += 17) {
9794 for (uint32_t mz = 0; mz < 2; mz++) {
9795 GemmMicrokernelTester()
9796 .mr(2)
9797 .nr(8)
9798 .kr(2)
9799 .sr(1)
9800 .m(2)
9801 .n(8)
9802 .k(k)
9803 .ks(3)
9804 .a_offset(163)
9805 .zero_index(mz)
9806 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9807 }
9808 }
9809 }
9810
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,qmin)9811 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, qmin) {
9812 TEST_REQUIRES_ARM_NEON_V8;
9813 GemmMicrokernelTester()
9814 .mr(2)
9815 .nr(8)
9816 .kr(2)
9817 .sr(1)
9818 .m(2)
9819 .n(8)
9820 .k(16)
9821 .qmin(128)
9822 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9823 }
9824
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,qmax)9825 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, qmax) {
9826 TEST_REQUIRES_ARM_NEON_V8;
9827 GemmMicrokernelTester()
9828 .mr(2)
9829 .nr(8)
9830 .kr(2)
9831 .sr(1)
9832 .m(2)
9833 .n(8)
9834 .k(16)
9835 .qmax(128)
9836 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9837 }
9838
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,strided_cm)9839 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, strided_cm) {
9840 TEST_REQUIRES_ARM_NEON_V8;
9841 GemmMicrokernelTester()
9842 .mr(2)
9843 .nr(8)
9844 .kr(2)
9845 .sr(1)
9846 .m(2)
9847 .n(8)
9848 .k(16)
9849 .cm_stride(11)
9850 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9851 }
9852 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9853
9854
9855 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_eq_16)9856 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16) {
9857 TEST_REQUIRES_ARM_NEON;
9858 GemmMicrokernelTester()
9859 .mr(2)
9860 .nr(8)
9861 .kr(4)
9862 .sr(1)
9863 .m(2)
9864 .n(8)
9865 .k(16)
9866 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9867 }
9868
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,strided_cn)9869 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, strided_cn) {
9870 TEST_REQUIRES_ARM_NEON;
9871 GemmMicrokernelTester()
9872 .mr(2)
9873 .nr(8)
9874 .kr(4)
9875 .sr(1)
9876 .m(2)
9877 .n(8)
9878 .k(16)
9879 .cn_stride(11)
9880 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9881 }
9882
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_eq_16_subtile)9883 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile) {
9884 TEST_REQUIRES_ARM_NEON;
9885 for (uint32_t n = 1; n <= 8; n++) {
9886 for (uint32_t m = 1; m <= 2; m++) {
9887 GemmMicrokernelTester()
9888 .mr(2)
9889 .nr(8)
9890 .kr(4)
9891 .sr(1)
9892 .m(m)
9893 .n(n)
9894 .k(16)
9895 .iterations(1)
9896 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9897 }
9898 }
9899 }
9900
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_eq_16_subtile_m)9901 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
9902 TEST_REQUIRES_ARM_NEON;
9903 for (uint32_t m = 1; m <= 2; m++) {
9904 GemmMicrokernelTester()
9905 .mr(2)
9906 .nr(8)
9907 .kr(4)
9908 .sr(1)
9909 .m(m)
9910 .n(8)
9911 .k(16)
9912 .iterations(1)
9913 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9914 }
9915 }
9916
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_eq_16_subtile_n)9917 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
9918 TEST_REQUIRES_ARM_NEON;
9919 for (uint32_t n = 1; n <= 8; n++) {
9920 GemmMicrokernelTester()
9921 .mr(2)
9922 .nr(8)
9923 .kr(4)
9924 .sr(1)
9925 .m(2)
9926 .n(n)
9927 .k(16)
9928 .iterations(1)
9929 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9930 }
9931 }
9932
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_lt_16)9933 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_lt_16) {
9934 TEST_REQUIRES_ARM_NEON;
9935 for (size_t k = 1; k < 16; k++) {
9936 GemmMicrokernelTester()
9937 .mr(2)
9938 .nr(8)
9939 .kr(4)
9940 .sr(1)
9941 .m(2)
9942 .n(8)
9943 .k(k)
9944 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9945 }
9946 }
9947
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_lt_16_subtile)9948 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_lt_16_subtile) {
9949 TEST_REQUIRES_ARM_NEON;
9950 for (size_t k = 1; k < 16; k++) {
9951 for (uint32_t n = 1; n <= 8; n++) {
9952 for (uint32_t m = 1; m <= 2; m++) {
9953 GemmMicrokernelTester()
9954 .mr(2)
9955 .nr(8)
9956 .kr(4)
9957 .sr(1)
9958 .m(m)
9959 .n(n)
9960 .k(k)
9961 .iterations(1)
9962 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9963 }
9964 }
9965 }
9966 }
9967
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_gt_16)9968 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_gt_16) {
9969 TEST_REQUIRES_ARM_NEON;
9970 for (size_t k = 17; k < 32; k++) {
9971 GemmMicrokernelTester()
9972 .mr(2)
9973 .nr(8)
9974 .kr(4)
9975 .sr(1)
9976 .m(2)
9977 .n(8)
9978 .k(k)
9979 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9980 }
9981 }
9982
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_gt_16_subtile)9983 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_gt_16_subtile) {
9984 TEST_REQUIRES_ARM_NEON;
9985 for (size_t k = 17; k < 32; k++) {
9986 for (uint32_t n = 1; n <= 8; n++) {
9987 for (uint32_t m = 1; m <= 2; m++) {
9988 GemmMicrokernelTester()
9989 .mr(2)
9990 .nr(8)
9991 .kr(4)
9992 .sr(1)
9993 .m(m)
9994 .n(n)
9995 .k(k)
9996 .iterations(1)
9997 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9998 }
9999 }
10000 }
10001 }
10002
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_div_16)10003 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_div_16) {
10004 TEST_REQUIRES_ARM_NEON;
10005 for (size_t k = 32; k <= 160; k += 16) {
10006 GemmMicrokernelTester()
10007 .mr(2)
10008 .nr(8)
10009 .kr(4)
10010 .sr(1)
10011 .m(2)
10012 .n(8)
10013 .k(k)
10014 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10015 }
10016 }
10017
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_div_16_subtile)10018 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_div_16_subtile) {
10019 TEST_REQUIRES_ARM_NEON;
10020 for (size_t k = 32; k <= 160; k += 16) {
10021 for (uint32_t n = 1; n <= 8; n++) {
10022 for (uint32_t m = 1; m <= 2; m++) {
10023 GemmMicrokernelTester()
10024 .mr(2)
10025 .nr(8)
10026 .kr(4)
10027 .sr(1)
10028 .m(m)
10029 .n(n)
10030 .k(k)
10031 .iterations(1)
10032 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10033 }
10034 }
10035 }
10036 }
10037
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_gt_8)10038 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8) {
10039 TEST_REQUIRES_ARM_NEON;
10040 for (uint32_t n = 9; n < 16; n++) {
10041 for (size_t k = 1; k <= 80; k += 17) {
10042 GemmMicrokernelTester()
10043 .mr(2)
10044 .nr(8)
10045 .kr(4)
10046 .sr(1)
10047 .m(2)
10048 .n(n)
10049 .k(k)
10050 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10051 }
10052 }
10053 }
10054
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_gt_8_strided_cn)10055 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8_strided_cn) {
10056 TEST_REQUIRES_ARM_NEON;
10057 for (uint32_t n = 9; n < 16; n++) {
10058 for (size_t k = 1; k <= 80; k += 17) {
10059 GemmMicrokernelTester()
10060 .mr(2)
10061 .nr(8)
10062 .kr(4)
10063 .sr(1)
10064 .m(2)
10065 .n(n)
10066 .k(k)
10067 .cn_stride(11)
10068 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10069 }
10070 }
10071 }
10072
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_gt_8_subtile)10073 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8_subtile) {
10074 TEST_REQUIRES_ARM_NEON;
10075 for (uint32_t n = 9; n < 16; n++) {
10076 for (size_t k = 1; k <= 80; k += 17) {
10077 for (uint32_t m = 1; m <= 2; m++) {
10078 GemmMicrokernelTester()
10079 .mr(2)
10080 .nr(8)
10081 .kr(4)
10082 .sr(1)
10083 .m(m)
10084 .n(n)
10085 .k(k)
10086 .iterations(1)
10087 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10088 }
10089 }
10090 }
10091 }
10092
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_div_8)10093 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8) {
10094 TEST_REQUIRES_ARM_NEON;
10095 for (uint32_t n = 16; n <= 24; n += 8) {
10096 for (size_t k = 1; k <= 80; k += 17) {
10097 GemmMicrokernelTester()
10098 .mr(2)
10099 .nr(8)
10100 .kr(4)
10101 .sr(1)
10102 .m(2)
10103 .n(n)
10104 .k(k)
10105 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10106 }
10107 }
10108 }
10109
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_div_8_strided_cn)10110 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8_strided_cn) {
10111 TEST_REQUIRES_ARM_NEON;
10112 for (uint32_t n = 16; n <= 24; n += 8) {
10113 for (size_t k = 1; k <= 80; k += 17) {
10114 GemmMicrokernelTester()
10115 .mr(2)
10116 .nr(8)
10117 .kr(4)
10118 .sr(1)
10119 .m(2)
10120 .n(n)
10121 .k(k)
10122 .cn_stride(11)
10123 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10124 }
10125 }
10126 }
10127
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_div_8_subtile)10128 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8_subtile) {
10129 TEST_REQUIRES_ARM_NEON;
10130 for (uint32_t n = 16; n <= 24; n += 8) {
10131 for (size_t k = 1; k <= 80; k += 17) {
10132 for (uint32_t m = 1; m <= 2; m++) {
10133 GemmMicrokernelTester()
10134 .mr(2)
10135 .nr(8)
10136 .kr(4)
10137 .sr(1)
10138 .m(m)
10139 .n(n)
10140 .k(k)
10141 .iterations(1)
10142 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10143 }
10144 }
10145 }
10146 }
10147
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,small_kernel)10148 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, small_kernel) {
10149 TEST_REQUIRES_ARM_NEON;
10150 for (size_t k = 1; k <= 80; k += 17) {
10151 GemmMicrokernelTester()
10152 .mr(2)
10153 .nr(8)
10154 .kr(4)
10155 .sr(1)
10156 .m(2)
10157 .n(8)
10158 .k(k)
10159 .ks(3)
10160 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10161 }
10162 }
10163
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,small_kernel_subtile)10164 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, small_kernel_subtile) {
10165 TEST_REQUIRES_ARM_NEON;
10166 for (size_t k = 1; k <= 80; k += 17) {
10167 for (uint32_t n = 1; n <= 8; n++) {
10168 for (uint32_t m = 1; m <= 2; m++) {
10169 GemmMicrokernelTester()
10170 .mr(2)
10171 .nr(8)
10172 .kr(4)
10173 .sr(1)
10174 .m(m)
10175 .n(n)
10176 .k(k)
10177 .ks(3)
10178 .iterations(1)
10179 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10180 }
10181 }
10182 }
10183 }
10184
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_gt_8_small_kernel)10185 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8_small_kernel) {
10186 TEST_REQUIRES_ARM_NEON;
10187 for (uint32_t n = 9; n < 16; n++) {
10188 for (size_t k = 1; k <= 80; k += 17) {
10189 GemmMicrokernelTester()
10190 .mr(2)
10191 .nr(8)
10192 .kr(4)
10193 .sr(1)
10194 .m(2)
10195 .n(n)
10196 .k(k)
10197 .ks(3)
10198 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10199 }
10200 }
10201 }
10202
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_div_8_small_kernel)10203 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8_small_kernel) {
10204 TEST_REQUIRES_ARM_NEON;
10205 for (uint32_t n = 16; n <= 24; n += 8) {
10206 for (size_t k = 1; k <= 80; k += 17) {
10207 GemmMicrokernelTester()
10208 .mr(2)
10209 .nr(8)
10210 .kr(4)
10211 .sr(1)
10212 .m(2)
10213 .n(n)
10214 .k(k)
10215 .ks(3)
10216 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10217 }
10218 }
10219 }
10220
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,strided_cm_subtile)10221 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, strided_cm_subtile) {
10222 TEST_REQUIRES_ARM_NEON;
10223 for (size_t k = 1; k <= 80; k += 17) {
10224 for (uint32_t n = 1; n <= 8; n++) {
10225 for (uint32_t m = 1; m <= 2; m++) {
10226 GemmMicrokernelTester()
10227 .mr(2)
10228 .nr(8)
10229 .kr(4)
10230 .sr(1)
10231 .m(m)
10232 .n(n)
10233 .k(k)
10234 .cm_stride(11)
10235 .iterations(1)
10236 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10237 }
10238 }
10239 }
10240 }
10241
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,a_offset)10242 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, a_offset) {
10243 TEST_REQUIRES_ARM_NEON;
10244 for (size_t k = 1; k <= 80; k += 17) {
10245 GemmMicrokernelTester()
10246 .mr(2)
10247 .nr(8)
10248 .kr(4)
10249 .sr(1)
10250 .m(2)
10251 .n(8)
10252 .k(k)
10253 .ks(3)
10254 .a_offset(163)
10255 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10256 }
10257 }
10258
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,zero)10259 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, zero) {
10260 TEST_REQUIRES_ARM_NEON;
10261 for (size_t k = 1; k <= 80; k += 17) {
10262 for (uint32_t mz = 0; mz < 2; mz++) {
10263 GemmMicrokernelTester()
10264 .mr(2)
10265 .nr(8)
10266 .kr(4)
10267 .sr(1)
10268 .m(2)
10269 .n(8)
10270 .k(k)
10271 .ks(3)
10272 .a_offset(163)
10273 .zero_index(mz)
10274 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10275 }
10276 }
10277 }
10278
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,qmin)10279 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, qmin) {
10280 TEST_REQUIRES_ARM_NEON;
10281 GemmMicrokernelTester()
10282 .mr(2)
10283 .nr(8)
10284 .kr(4)
10285 .sr(1)
10286 .m(2)
10287 .n(8)
10288 .k(16)
10289 .qmin(128)
10290 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10291 }
10292
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,qmax)10293 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, qmax) {
10294 TEST_REQUIRES_ARM_NEON;
10295 GemmMicrokernelTester()
10296 .mr(2)
10297 .nr(8)
10298 .kr(4)
10299 .sr(1)
10300 .m(2)
10301 .n(8)
10302 .k(16)
10303 .qmax(128)
10304 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10305 }
10306
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,strided_cm)10307 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, strided_cm) {
10308 TEST_REQUIRES_ARM_NEON;
10309 GemmMicrokernelTester()
10310 .mr(2)
10311 .nr(8)
10312 .kr(4)
10313 .sr(1)
10314 .m(2)
10315 .n(8)
10316 .k(16)
10317 .cm_stride(11)
10318 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10319 }
10320 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10321
10322
10323 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_eq_16)10324 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16) {
10325 TEST_REQUIRES_ARM_NEON;
10326 GemmMicrokernelTester()
10327 .mr(2)
10328 .nr(8)
10329 .kr(8)
10330 .sr(1)
10331 .m(2)
10332 .n(8)
10333 .k(16)
10334 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10335 }
10336
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,strided_cn)10337 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, strided_cn) {
10338 TEST_REQUIRES_ARM_NEON;
10339 GemmMicrokernelTester()
10340 .mr(2)
10341 .nr(8)
10342 .kr(8)
10343 .sr(1)
10344 .m(2)
10345 .n(8)
10346 .k(16)
10347 .cn_stride(11)
10348 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10349 }
10350
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_eq_16_subtile)10351 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16_subtile) {
10352 TEST_REQUIRES_ARM_NEON;
10353 for (uint32_t n = 1; n <= 8; n++) {
10354 for (uint32_t m = 1; m <= 2; m++) {
10355 GemmMicrokernelTester()
10356 .mr(2)
10357 .nr(8)
10358 .kr(8)
10359 .sr(1)
10360 .m(m)
10361 .n(n)
10362 .k(16)
10363 .iterations(1)
10364 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10365 }
10366 }
10367 }
10368
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_eq_16_subtile_m)10369 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16_subtile_m) {
10370 TEST_REQUIRES_ARM_NEON;
10371 for (uint32_t m = 1; m <= 2; m++) {
10372 GemmMicrokernelTester()
10373 .mr(2)
10374 .nr(8)
10375 .kr(8)
10376 .sr(1)
10377 .m(m)
10378 .n(8)
10379 .k(16)
10380 .iterations(1)
10381 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10382 }
10383 }
10384
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_eq_16_subtile_n)10385 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16_subtile_n) {
10386 TEST_REQUIRES_ARM_NEON;
10387 for (uint32_t n = 1; n <= 8; n++) {
10388 GemmMicrokernelTester()
10389 .mr(2)
10390 .nr(8)
10391 .kr(8)
10392 .sr(1)
10393 .m(2)
10394 .n(n)
10395 .k(16)
10396 .iterations(1)
10397 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10398 }
10399 }
10400
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_lt_16)10401 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_lt_16) {
10402 TEST_REQUIRES_ARM_NEON;
10403 for (size_t k = 1; k < 16; k++) {
10404 GemmMicrokernelTester()
10405 .mr(2)
10406 .nr(8)
10407 .kr(8)
10408 .sr(1)
10409 .m(2)
10410 .n(8)
10411 .k(k)
10412 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10413 }
10414 }
10415
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_lt_16_subtile)10416 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_lt_16_subtile) {
10417 TEST_REQUIRES_ARM_NEON;
10418 for (size_t k = 1; k < 16; k++) {
10419 for (uint32_t n = 1; n <= 8; n++) {
10420 for (uint32_t m = 1; m <= 2; m++) {
10421 GemmMicrokernelTester()
10422 .mr(2)
10423 .nr(8)
10424 .kr(8)
10425 .sr(1)
10426 .m(m)
10427 .n(n)
10428 .k(k)
10429 .iterations(1)
10430 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10431 }
10432 }
10433 }
10434 }
10435
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_gt_16)10436 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_gt_16) {
10437 TEST_REQUIRES_ARM_NEON;
10438 for (size_t k = 17; k < 32; k++) {
10439 GemmMicrokernelTester()
10440 .mr(2)
10441 .nr(8)
10442 .kr(8)
10443 .sr(1)
10444 .m(2)
10445 .n(8)
10446 .k(k)
10447 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10448 }
10449 }
10450
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_gt_16_subtile)10451 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_gt_16_subtile) {
10452 TEST_REQUIRES_ARM_NEON;
10453 for (size_t k = 17; k < 32; k++) {
10454 for (uint32_t n = 1; n <= 8; n++) {
10455 for (uint32_t m = 1; m <= 2; m++) {
10456 GemmMicrokernelTester()
10457 .mr(2)
10458 .nr(8)
10459 .kr(8)
10460 .sr(1)
10461 .m(m)
10462 .n(n)
10463 .k(k)
10464 .iterations(1)
10465 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10466 }
10467 }
10468 }
10469 }
10470
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_div_16)10471 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_div_16) {
10472 TEST_REQUIRES_ARM_NEON;
10473 for (size_t k = 32; k <= 160; k += 16) {
10474 GemmMicrokernelTester()
10475 .mr(2)
10476 .nr(8)
10477 .kr(8)
10478 .sr(1)
10479 .m(2)
10480 .n(8)
10481 .k(k)
10482 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10483 }
10484 }
10485
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_div_16_subtile)10486 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_div_16_subtile) {
10487 TEST_REQUIRES_ARM_NEON;
10488 for (size_t k = 32; k <= 160; k += 16) {
10489 for (uint32_t n = 1; n <= 8; n++) {
10490 for (uint32_t m = 1; m <= 2; m++) {
10491 GemmMicrokernelTester()
10492 .mr(2)
10493 .nr(8)
10494 .kr(8)
10495 .sr(1)
10496 .m(m)
10497 .n(n)
10498 .k(k)
10499 .iterations(1)
10500 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10501 }
10502 }
10503 }
10504 }
10505
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_gt_8)10506 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_gt_8) {
10507 TEST_REQUIRES_ARM_NEON;
10508 for (uint32_t n = 9; n < 16; n++) {
10509 for (size_t k = 1; k <= 80; k += 17) {
10510 GemmMicrokernelTester()
10511 .mr(2)
10512 .nr(8)
10513 .kr(8)
10514 .sr(1)
10515 .m(2)
10516 .n(n)
10517 .k(k)
10518 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10519 }
10520 }
10521 }
10522
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_gt_8_strided_cn)10523 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_gt_8_strided_cn) {
10524 TEST_REQUIRES_ARM_NEON;
10525 for (uint32_t n = 9; n < 16; n++) {
10526 for (size_t k = 1; k <= 80; k += 17) {
10527 GemmMicrokernelTester()
10528 .mr(2)
10529 .nr(8)
10530 .kr(8)
10531 .sr(1)
10532 .m(2)
10533 .n(n)
10534 .k(k)
10535 .cn_stride(11)
10536 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10537 }
10538 }
10539 }
10540
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_gt_8_subtile)10541 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_gt_8_subtile) {
10542 TEST_REQUIRES_ARM_NEON;
10543 for (uint32_t n = 9; n < 16; n++) {
10544 for (size_t k = 1; k <= 80; k += 17) {
10545 for (uint32_t m = 1; m <= 2; m++) {
10546 GemmMicrokernelTester()
10547 .mr(2)
10548 .nr(8)
10549 .kr(8)
10550 .sr(1)
10551 .m(m)
10552 .n(n)
10553 .k(k)
10554 .iterations(1)
10555 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10556 }
10557 }
10558 }
10559 }
10560
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_div_8)10561 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_div_8) {
10562 TEST_REQUIRES_ARM_NEON;
10563 for (uint32_t n = 16; n <= 24; n += 8) {
10564 for (size_t k = 1; k <= 80; k += 17) {
10565 GemmMicrokernelTester()
10566 .mr(2)
10567 .nr(8)
10568 .kr(8)
10569 .sr(1)
10570 .m(2)
10571 .n(n)
10572 .k(k)
10573 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10574 }
10575 }
10576 }
10577
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_div_8_strided_cn)10578 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_div_8_strided_cn) {
10579 TEST_REQUIRES_ARM_NEON;
10580 for (uint32_t n = 16; n <= 24; n += 8) {
10581 for (size_t k = 1; k <= 80; k += 17) {
10582 GemmMicrokernelTester()
10583 .mr(2)
10584 .nr(8)
10585 .kr(8)
10586 .sr(1)
10587 .m(2)
10588 .n(n)
10589 .k(k)
10590 .cn_stride(11)
10591 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10592 }
10593 }
10594 }
10595
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_div_8_subtile)10596 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_div_8_subtile) {
10597 TEST_REQUIRES_ARM_NEON;
10598 for (uint32_t n = 16; n <= 24; n += 8) {
10599 for (size_t k = 1; k <= 80; k += 17) {
10600 for (uint32_t m = 1; m <= 2; m++) {
10601 GemmMicrokernelTester()
10602 .mr(2)
10603 .nr(8)
10604 .kr(8)
10605 .sr(1)
10606 .m(m)
10607 .n(n)
10608 .k(k)
10609 .iterations(1)
10610 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10611 }
10612 }
10613 }
10614 }
10615
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,small_kernel)10616 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, small_kernel) {
10617 TEST_REQUIRES_ARM_NEON;
10618 for (size_t k = 1; k <= 80; k += 17) {
10619 GemmMicrokernelTester()
10620 .mr(2)
10621 .nr(8)
10622 .kr(8)
10623 .sr(1)
10624 .m(2)
10625 .n(8)
10626 .k(k)
10627 .ks(3)
10628 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10629 }
10630 }
10631
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,small_kernel_subtile)10632 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, small_kernel_subtile) {
10633 TEST_REQUIRES_ARM_NEON;
10634 for (size_t k = 1; k <= 80; k += 17) {
10635 for (uint32_t n = 1; n <= 8; n++) {
10636 for (uint32_t m = 1; m <= 2; m++) {
10637 GemmMicrokernelTester()
10638 .mr(2)
10639 .nr(8)
10640 .kr(8)
10641 .sr(1)
10642 .m(m)
10643 .n(n)
10644 .k(k)
10645 .ks(3)
10646 .iterations(1)
10647 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10648 }
10649 }
10650 }
10651 }
10652
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_gt_8_small_kernel)10653 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_gt_8_small_kernel) {
10654 TEST_REQUIRES_ARM_NEON;
10655 for (uint32_t n = 9; n < 16; n++) {
10656 for (size_t k = 1; k <= 80; k += 17) {
10657 GemmMicrokernelTester()
10658 .mr(2)
10659 .nr(8)
10660 .kr(8)
10661 .sr(1)
10662 .m(2)
10663 .n(n)
10664 .k(k)
10665 .ks(3)
10666 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10667 }
10668 }
10669 }
10670
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_div_8_small_kernel)10671 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_div_8_small_kernel) {
10672 TEST_REQUIRES_ARM_NEON;
10673 for (uint32_t n = 16; n <= 24; n += 8) {
10674 for (size_t k = 1; k <= 80; k += 17) {
10675 GemmMicrokernelTester()
10676 .mr(2)
10677 .nr(8)
10678 .kr(8)
10679 .sr(1)
10680 .m(2)
10681 .n(n)
10682 .k(k)
10683 .ks(3)
10684 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10685 }
10686 }
10687 }
10688
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,strided_cm_subtile)10689 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, strided_cm_subtile) {
10690 TEST_REQUIRES_ARM_NEON;
10691 for (size_t k = 1; k <= 80; k += 17) {
10692 for (uint32_t n = 1; n <= 8; n++) {
10693 for (uint32_t m = 1; m <= 2; m++) {
10694 GemmMicrokernelTester()
10695 .mr(2)
10696 .nr(8)
10697 .kr(8)
10698 .sr(1)
10699 .m(m)
10700 .n(n)
10701 .k(k)
10702 .cm_stride(11)
10703 .iterations(1)
10704 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10705 }
10706 }
10707 }
10708 }
10709
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,a_offset)10710 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, a_offset) {
10711 TEST_REQUIRES_ARM_NEON;
10712 for (size_t k = 1; k <= 80; k += 17) {
10713 GemmMicrokernelTester()
10714 .mr(2)
10715 .nr(8)
10716 .kr(8)
10717 .sr(1)
10718 .m(2)
10719 .n(8)
10720 .k(k)
10721 .ks(3)
10722 .a_offset(163)
10723 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10724 }
10725 }
10726
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,zero)10727 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, zero) {
10728 TEST_REQUIRES_ARM_NEON;
10729 for (size_t k = 1; k <= 80; k += 17) {
10730 for (uint32_t mz = 0; mz < 2; mz++) {
10731 GemmMicrokernelTester()
10732 .mr(2)
10733 .nr(8)
10734 .kr(8)
10735 .sr(1)
10736 .m(2)
10737 .n(8)
10738 .k(k)
10739 .ks(3)
10740 .a_offset(163)
10741 .zero_index(mz)
10742 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10743 }
10744 }
10745 }
10746
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,qmin)10747 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, qmin) {
10748 TEST_REQUIRES_ARM_NEON;
10749 GemmMicrokernelTester()
10750 .mr(2)
10751 .nr(8)
10752 .kr(8)
10753 .sr(1)
10754 .m(2)
10755 .n(8)
10756 .k(16)
10757 .qmin(128)
10758 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10759 }
10760
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,qmax)10761 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, qmax) {
10762 TEST_REQUIRES_ARM_NEON;
10763 GemmMicrokernelTester()
10764 .mr(2)
10765 .nr(8)
10766 .kr(8)
10767 .sr(1)
10768 .m(2)
10769 .n(8)
10770 .k(16)
10771 .qmax(128)
10772 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10773 }
10774
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,strided_cm)10775 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, strided_cm) {
10776 TEST_REQUIRES_ARM_NEON;
10777 GemmMicrokernelTester()
10778 .mr(2)
10779 .nr(8)
10780 .kr(8)
10781 .sr(1)
10782 .m(2)
10783 .n(8)
10784 .k(16)
10785 .cm_stride(11)
10786 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10787 }
10788 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10789
10790
10791 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_eq_8)10792 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_eq_8) {
10793 TEST_REQUIRES_ARM_NEON;
10794 GemmMicrokernelTester()
10795 .mr(2)
10796 .nr(16)
10797 .kr(1)
10798 .sr(1)
10799 .m(2)
10800 .n(16)
10801 .k(8)
10802 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10803 }
10804
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,strided_cn)10805 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, strided_cn) {
10806 TEST_REQUIRES_ARM_NEON;
10807 GemmMicrokernelTester()
10808 .mr(2)
10809 .nr(16)
10810 .kr(1)
10811 .sr(1)
10812 .m(2)
10813 .n(16)
10814 .k(8)
10815 .cn_stride(19)
10816 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10817 }
10818
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_eq_8_subtile)10819 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_eq_8_subtile) {
10820 TEST_REQUIRES_ARM_NEON;
10821 for (uint32_t n = 1; n <= 16; n++) {
10822 for (uint32_t m = 1; m <= 2; m++) {
10823 GemmMicrokernelTester()
10824 .mr(2)
10825 .nr(16)
10826 .kr(1)
10827 .sr(1)
10828 .m(m)
10829 .n(n)
10830 .k(8)
10831 .iterations(1)
10832 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10833 }
10834 }
10835 }
10836
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_eq_8_subtile_m)10837 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
10838 TEST_REQUIRES_ARM_NEON;
10839 for (uint32_t m = 1; m <= 2; m++) {
10840 GemmMicrokernelTester()
10841 .mr(2)
10842 .nr(16)
10843 .kr(1)
10844 .sr(1)
10845 .m(m)
10846 .n(16)
10847 .k(8)
10848 .iterations(1)
10849 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10850 }
10851 }
10852
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_eq_8_subtile_n)10853 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
10854 TEST_REQUIRES_ARM_NEON;
10855 for (uint32_t n = 1; n <= 16; n++) {
10856 GemmMicrokernelTester()
10857 .mr(2)
10858 .nr(16)
10859 .kr(1)
10860 .sr(1)
10861 .m(2)
10862 .n(n)
10863 .k(8)
10864 .iterations(1)
10865 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10866 }
10867 }
10868
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_lt_8)10869 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_lt_8) {
10870 TEST_REQUIRES_ARM_NEON;
10871 for (size_t k = 1; k < 8; k++) {
10872 GemmMicrokernelTester()
10873 .mr(2)
10874 .nr(16)
10875 .kr(1)
10876 .sr(1)
10877 .m(2)
10878 .n(16)
10879 .k(k)
10880 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10881 }
10882 }
10883
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_lt_8_subtile)10884 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_lt_8_subtile) {
10885 TEST_REQUIRES_ARM_NEON;
10886 for (size_t k = 1; k < 8; k++) {
10887 for (uint32_t n = 1; n <= 16; n++) {
10888 for (uint32_t m = 1; m <= 2; m++) {
10889 GemmMicrokernelTester()
10890 .mr(2)
10891 .nr(16)
10892 .kr(1)
10893 .sr(1)
10894 .m(m)
10895 .n(n)
10896 .k(k)
10897 .iterations(1)
10898 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10899 }
10900 }
10901 }
10902 }
10903
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_gt_8)10904 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_gt_8) {
10905 TEST_REQUIRES_ARM_NEON;
10906 for (size_t k = 9; k < 16; k++) {
10907 GemmMicrokernelTester()
10908 .mr(2)
10909 .nr(16)
10910 .kr(1)
10911 .sr(1)
10912 .m(2)
10913 .n(16)
10914 .k(k)
10915 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10916 }
10917 }
10918
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_gt_8_subtile)10919 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_gt_8_subtile) {
10920 TEST_REQUIRES_ARM_NEON;
10921 for (size_t k = 9; k < 16; k++) {
10922 for (uint32_t n = 1; n <= 16; n++) {
10923 for (uint32_t m = 1; m <= 2; m++) {
10924 GemmMicrokernelTester()
10925 .mr(2)
10926 .nr(16)
10927 .kr(1)
10928 .sr(1)
10929 .m(m)
10930 .n(n)
10931 .k(k)
10932 .iterations(1)
10933 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10934 }
10935 }
10936 }
10937 }
10938
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_div_8)10939 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_div_8) {
10940 TEST_REQUIRES_ARM_NEON;
10941 for (size_t k = 16; k <= 80; k += 8) {
10942 GemmMicrokernelTester()
10943 .mr(2)
10944 .nr(16)
10945 .kr(1)
10946 .sr(1)
10947 .m(2)
10948 .n(16)
10949 .k(k)
10950 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10951 }
10952 }
10953
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_div_8_subtile)10954 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_div_8_subtile) {
10955 TEST_REQUIRES_ARM_NEON;
10956 for (size_t k = 16; k <= 80; k += 8) {
10957 for (uint32_t n = 1; n <= 16; n++) {
10958 for (uint32_t m = 1; m <= 2; m++) {
10959 GemmMicrokernelTester()
10960 .mr(2)
10961 .nr(16)
10962 .kr(1)
10963 .sr(1)
10964 .m(m)
10965 .n(n)
10966 .k(k)
10967 .iterations(1)
10968 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10969 }
10970 }
10971 }
10972 }
10973
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_gt_16)10974 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_gt_16) {
10975 TEST_REQUIRES_ARM_NEON;
10976 for (uint32_t n = 17; n < 32; n++) {
10977 for (size_t k = 1; k <= 40; k += 9) {
10978 GemmMicrokernelTester()
10979 .mr(2)
10980 .nr(16)
10981 .kr(1)
10982 .sr(1)
10983 .m(2)
10984 .n(n)
10985 .k(k)
10986 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10987 }
10988 }
10989 }
10990
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_gt_16_strided_cn)10991 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
10992 TEST_REQUIRES_ARM_NEON;
10993 for (uint32_t n = 17; n < 32; n++) {
10994 for (size_t k = 1; k <= 40; k += 9) {
10995 GemmMicrokernelTester()
10996 .mr(2)
10997 .nr(16)
10998 .kr(1)
10999 .sr(1)
11000 .m(2)
11001 .n(n)
11002 .k(k)
11003 .cn_stride(19)
11004 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11005 }
11006 }
11007 }
11008
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_gt_16_subtile)11009 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_gt_16_subtile) {
11010 TEST_REQUIRES_ARM_NEON;
11011 for (uint32_t n = 17; n < 32; n++) {
11012 for (size_t k = 1; k <= 40; k += 9) {
11013 for (uint32_t m = 1; m <= 2; m++) {
11014 GemmMicrokernelTester()
11015 .mr(2)
11016 .nr(16)
11017 .kr(1)
11018 .sr(1)
11019 .m(m)
11020 .n(n)
11021 .k(k)
11022 .iterations(1)
11023 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11024 }
11025 }
11026 }
11027 }
11028
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_div_16)11029 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_div_16) {
11030 TEST_REQUIRES_ARM_NEON;
11031 for (uint32_t n = 32; n <= 48; n += 16) {
11032 for (size_t k = 1; k <= 40; k += 9) {
11033 GemmMicrokernelTester()
11034 .mr(2)
11035 .nr(16)
11036 .kr(1)
11037 .sr(1)
11038 .m(2)
11039 .n(n)
11040 .k(k)
11041 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11042 }
11043 }
11044 }
11045
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_div_16_strided_cn)11046 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
11047 TEST_REQUIRES_ARM_NEON;
11048 for (uint32_t n = 32; n <= 48; n += 16) {
11049 for (size_t k = 1; k <= 40; k += 9) {
11050 GemmMicrokernelTester()
11051 .mr(2)
11052 .nr(16)
11053 .kr(1)
11054 .sr(1)
11055 .m(2)
11056 .n(n)
11057 .k(k)
11058 .cn_stride(19)
11059 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11060 }
11061 }
11062 }
11063
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_div_16_subtile)11064 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_div_16_subtile) {
11065 TEST_REQUIRES_ARM_NEON;
11066 for (uint32_t n = 32; n <= 48; n += 16) {
11067 for (size_t k = 1; k <= 40; k += 9) {
11068 for (uint32_t m = 1; m <= 2; m++) {
11069 GemmMicrokernelTester()
11070 .mr(2)
11071 .nr(16)
11072 .kr(1)
11073 .sr(1)
11074 .m(m)
11075 .n(n)
11076 .k(k)
11077 .iterations(1)
11078 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11079 }
11080 }
11081 }
11082 }
11083
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,small_kernel)11084 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, small_kernel) {
11085 TEST_REQUIRES_ARM_NEON;
11086 for (size_t k = 1; k <= 40; k += 9) {
11087 GemmMicrokernelTester()
11088 .mr(2)
11089 .nr(16)
11090 .kr(1)
11091 .sr(1)
11092 .m(2)
11093 .n(16)
11094 .k(k)
11095 .ks(3)
11096 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11097 }
11098 }
11099
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,small_kernel_subtile)11100 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, small_kernel_subtile) {
11101 TEST_REQUIRES_ARM_NEON;
11102 for (size_t k = 1; k <= 40; k += 9) {
11103 for (uint32_t n = 1; n <= 16; n++) {
11104 for (uint32_t m = 1; m <= 2; m++) {
11105 GemmMicrokernelTester()
11106 .mr(2)
11107 .nr(16)
11108 .kr(1)
11109 .sr(1)
11110 .m(m)
11111 .n(n)
11112 .k(k)
11113 .ks(3)
11114 .iterations(1)
11115 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11116 }
11117 }
11118 }
11119 }
11120
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_gt_16_small_kernel)11121 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
11122 TEST_REQUIRES_ARM_NEON;
11123 for (uint32_t n = 17; n < 32; n++) {
11124 for (size_t k = 1; k <= 40; k += 9) {
11125 GemmMicrokernelTester()
11126 .mr(2)
11127 .nr(16)
11128 .kr(1)
11129 .sr(1)
11130 .m(2)
11131 .n(n)
11132 .k(k)
11133 .ks(3)
11134 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11135 }
11136 }
11137 }
11138
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_div_16_small_kernel)11139 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
11140 TEST_REQUIRES_ARM_NEON;
11141 for (uint32_t n = 32; n <= 48; n += 16) {
11142 for (size_t k = 1; k <= 40; k += 9) {
11143 GemmMicrokernelTester()
11144 .mr(2)
11145 .nr(16)
11146 .kr(1)
11147 .sr(1)
11148 .m(2)
11149 .n(n)
11150 .k(k)
11151 .ks(3)
11152 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11153 }
11154 }
11155 }
11156
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,strided_cm_subtile)11157 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, strided_cm_subtile) {
11158 TEST_REQUIRES_ARM_NEON;
11159 for (size_t k = 1; k <= 40; k += 9) {
11160 for (uint32_t n = 1; n <= 16; n++) {
11161 for (uint32_t m = 1; m <= 2; m++) {
11162 GemmMicrokernelTester()
11163 .mr(2)
11164 .nr(16)
11165 .kr(1)
11166 .sr(1)
11167 .m(m)
11168 .n(n)
11169 .k(k)
11170 .cm_stride(19)
11171 .iterations(1)
11172 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11173 }
11174 }
11175 }
11176 }
11177
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,a_offset)11178 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, a_offset) {
11179 TEST_REQUIRES_ARM_NEON;
11180 for (size_t k = 1; k <= 40; k += 9) {
11181 GemmMicrokernelTester()
11182 .mr(2)
11183 .nr(16)
11184 .kr(1)
11185 .sr(1)
11186 .m(2)
11187 .n(16)
11188 .k(k)
11189 .ks(3)
11190 .a_offset(83)
11191 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11192 }
11193 }
11194
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,zero)11195 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, zero) {
11196 TEST_REQUIRES_ARM_NEON;
11197 for (size_t k = 1; k <= 40; k += 9) {
11198 for (uint32_t mz = 0; mz < 2; mz++) {
11199 GemmMicrokernelTester()
11200 .mr(2)
11201 .nr(16)
11202 .kr(1)
11203 .sr(1)
11204 .m(2)
11205 .n(16)
11206 .k(k)
11207 .ks(3)
11208 .a_offset(83)
11209 .zero_index(mz)
11210 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11211 }
11212 }
11213 }
11214
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,qmin)11215 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, qmin) {
11216 TEST_REQUIRES_ARM_NEON;
11217 GemmMicrokernelTester()
11218 .mr(2)
11219 .nr(16)
11220 .kr(1)
11221 .sr(1)
11222 .m(2)
11223 .n(16)
11224 .k(8)
11225 .qmin(128)
11226 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11227 }
11228
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,qmax)11229 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, qmax) {
11230 TEST_REQUIRES_ARM_NEON;
11231 GemmMicrokernelTester()
11232 .mr(2)
11233 .nr(16)
11234 .kr(1)
11235 .sr(1)
11236 .m(2)
11237 .n(16)
11238 .k(8)
11239 .qmax(128)
11240 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11241 }
11242
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,strided_cm)11243 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, strided_cm) {
11244 TEST_REQUIRES_ARM_NEON;
11245 GemmMicrokernelTester()
11246 .mr(2)
11247 .nr(16)
11248 .kr(1)
11249 .sr(1)
11250 .m(2)
11251 .n(16)
11252 .k(8)
11253 .cm_stride(19)
11254 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11255 }
11256 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11257
11258
11259 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_eq_8)11260 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
11261 TEST_REQUIRES_ARM_NEON;
11262 GemmMicrokernelTester()
11263 .mr(3)
11264 .nr(8)
11265 .kr(1)
11266 .sr(1)
11267 .m(3)
11268 .n(8)
11269 .k(8)
11270 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11271 }
11272
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,strided_cn)11273 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, strided_cn) {
11274 TEST_REQUIRES_ARM_NEON;
11275 GemmMicrokernelTester()
11276 .mr(3)
11277 .nr(8)
11278 .kr(1)
11279 .sr(1)
11280 .m(3)
11281 .n(8)
11282 .k(8)
11283 .cn_stride(11)
11284 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11285 }
11286
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)11287 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
11288 TEST_REQUIRES_ARM_NEON;
11289 for (uint32_t n = 1; n <= 8; n++) {
11290 for (uint32_t m = 1; m <= 3; m++) {
11291 GemmMicrokernelTester()
11292 .mr(3)
11293 .nr(8)
11294 .kr(1)
11295 .sr(1)
11296 .m(m)
11297 .n(n)
11298 .k(8)
11299 .iterations(1)
11300 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11301 }
11302 }
11303 }
11304
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)11305 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
11306 TEST_REQUIRES_ARM_NEON;
11307 for (uint32_t m = 1; m <= 3; m++) {
11308 GemmMicrokernelTester()
11309 .mr(3)
11310 .nr(8)
11311 .kr(1)
11312 .sr(1)
11313 .m(m)
11314 .n(8)
11315 .k(8)
11316 .iterations(1)
11317 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11318 }
11319 }
11320
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)11321 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
11322 TEST_REQUIRES_ARM_NEON;
11323 for (uint32_t n = 1; n <= 8; n++) {
11324 GemmMicrokernelTester()
11325 .mr(3)
11326 .nr(8)
11327 .kr(1)
11328 .sr(1)
11329 .m(3)
11330 .n(n)
11331 .k(8)
11332 .iterations(1)
11333 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11334 }
11335 }
11336
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_lt_8)11337 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
11338 TEST_REQUIRES_ARM_NEON;
11339 for (size_t k = 1; k < 8; k++) {
11340 GemmMicrokernelTester()
11341 .mr(3)
11342 .nr(8)
11343 .kr(1)
11344 .sr(1)
11345 .m(3)
11346 .n(8)
11347 .k(k)
11348 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11349 }
11350 }
11351
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)11352 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
11353 TEST_REQUIRES_ARM_NEON;
11354 for (size_t k = 1; k < 8; k++) {
11355 for (uint32_t n = 1; n <= 8; n++) {
11356 for (uint32_t m = 1; m <= 3; m++) {
11357 GemmMicrokernelTester()
11358 .mr(3)
11359 .nr(8)
11360 .kr(1)
11361 .sr(1)
11362 .m(m)
11363 .n(n)
11364 .k(k)
11365 .iterations(1)
11366 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11367 }
11368 }
11369 }
11370 }
11371
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_gt_8)11372 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
11373 TEST_REQUIRES_ARM_NEON;
11374 for (size_t k = 9; k < 16; k++) {
11375 GemmMicrokernelTester()
11376 .mr(3)
11377 .nr(8)
11378 .kr(1)
11379 .sr(1)
11380 .m(3)
11381 .n(8)
11382 .k(k)
11383 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11384 }
11385 }
11386
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)11387 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
11388 TEST_REQUIRES_ARM_NEON;
11389 for (size_t k = 9; k < 16; k++) {
11390 for (uint32_t n = 1; n <= 8; n++) {
11391 for (uint32_t m = 1; m <= 3; m++) {
11392 GemmMicrokernelTester()
11393 .mr(3)
11394 .nr(8)
11395 .kr(1)
11396 .sr(1)
11397 .m(m)
11398 .n(n)
11399 .k(k)
11400 .iterations(1)
11401 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11402 }
11403 }
11404 }
11405 }
11406
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_div_8)11407 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_div_8) {
11408 TEST_REQUIRES_ARM_NEON;
11409 for (size_t k = 16; k <= 80; k += 8) {
11410 GemmMicrokernelTester()
11411 .mr(3)
11412 .nr(8)
11413 .kr(1)
11414 .sr(1)
11415 .m(3)
11416 .n(8)
11417 .k(k)
11418 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11419 }
11420 }
11421
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_div_8_subtile)11422 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
11423 TEST_REQUIRES_ARM_NEON;
11424 for (size_t k = 16; k <= 80; k += 8) {
11425 for (uint32_t n = 1; n <= 8; n++) {
11426 for (uint32_t m = 1; m <= 3; m++) {
11427 GemmMicrokernelTester()
11428 .mr(3)
11429 .nr(8)
11430 .kr(1)
11431 .sr(1)
11432 .m(m)
11433 .n(n)
11434 .k(k)
11435 .iterations(1)
11436 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11437 }
11438 }
11439 }
11440 }
11441
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_gt_8)11442 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
11443 TEST_REQUIRES_ARM_NEON;
11444 for (uint32_t n = 9; n < 16; n++) {
11445 for (size_t k = 1; k <= 40; k += 9) {
11446 GemmMicrokernelTester()
11447 .mr(3)
11448 .nr(8)
11449 .kr(1)
11450 .sr(1)
11451 .m(3)
11452 .n(n)
11453 .k(k)
11454 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11455 }
11456 }
11457 }
11458
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_cn)11459 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
11460 TEST_REQUIRES_ARM_NEON;
11461 for (uint32_t n = 9; n < 16; n++) {
11462 for (size_t k = 1; k <= 40; k += 9) {
11463 GemmMicrokernelTester()
11464 .mr(3)
11465 .nr(8)
11466 .kr(1)
11467 .sr(1)
11468 .m(3)
11469 .n(n)
11470 .k(k)
11471 .cn_stride(11)
11472 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11473 }
11474 }
11475 }
11476
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_gt_8_subtile)11477 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
11478 TEST_REQUIRES_ARM_NEON;
11479 for (uint32_t n = 9; n < 16; n++) {
11480 for (size_t k = 1; k <= 40; k += 9) {
11481 for (uint32_t m = 1; m <= 3; m++) {
11482 GemmMicrokernelTester()
11483 .mr(3)
11484 .nr(8)
11485 .kr(1)
11486 .sr(1)
11487 .m(m)
11488 .n(n)
11489 .k(k)
11490 .iterations(1)
11491 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11492 }
11493 }
11494 }
11495 }
11496
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_div_8)11497 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_div_8) {
11498 TEST_REQUIRES_ARM_NEON;
11499 for (uint32_t n = 16; n <= 24; n += 8) {
11500 for (size_t k = 1; k <= 40; k += 9) {
11501 GemmMicrokernelTester()
11502 .mr(3)
11503 .nr(8)
11504 .kr(1)
11505 .sr(1)
11506 .m(3)
11507 .n(n)
11508 .k(k)
11509 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11510 }
11511 }
11512 }
11513
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_cn)11514 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
11515 TEST_REQUIRES_ARM_NEON;
11516 for (uint32_t n = 16; n <= 24; n += 8) {
11517 for (size_t k = 1; k <= 40; k += 9) {
11518 GemmMicrokernelTester()
11519 .mr(3)
11520 .nr(8)
11521 .kr(1)
11522 .sr(1)
11523 .m(3)
11524 .n(n)
11525 .k(k)
11526 .cn_stride(11)
11527 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11528 }
11529 }
11530 }
11531
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_div_8_subtile)11532 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
11533 TEST_REQUIRES_ARM_NEON;
11534 for (uint32_t n = 16; n <= 24; n += 8) {
11535 for (size_t k = 1; k <= 40; k += 9) {
11536 for (uint32_t m = 1; m <= 3; m++) {
11537 GemmMicrokernelTester()
11538 .mr(3)
11539 .nr(8)
11540 .kr(1)
11541 .sr(1)
11542 .m(m)
11543 .n(n)
11544 .k(k)
11545 .iterations(1)
11546 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11547 }
11548 }
11549 }
11550 }
11551
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,small_kernel)11552 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, small_kernel) {
11553 TEST_REQUIRES_ARM_NEON;
11554 for (size_t k = 1; k <= 40; k += 9) {
11555 GemmMicrokernelTester()
11556 .mr(3)
11557 .nr(8)
11558 .kr(1)
11559 .sr(1)
11560 .m(3)
11561 .n(8)
11562 .k(k)
11563 .ks(3)
11564 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11565 }
11566 }
11567
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,small_kernel_subtile)11568 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
11569 TEST_REQUIRES_ARM_NEON;
11570 for (size_t k = 1; k <= 40; k += 9) {
11571 for (uint32_t n = 1; n <= 8; n++) {
11572 for (uint32_t m = 1; m <= 3; m++) {
11573 GemmMicrokernelTester()
11574 .mr(3)
11575 .nr(8)
11576 .kr(1)
11577 .sr(1)
11578 .m(m)
11579 .n(n)
11580 .k(k)
11581 .ks(3)
11582 .iterations(1)
11583 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11584 }
11585 }
11586 }
11587 }
11588
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_gt_8_small_kernel)11589 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
11590 TEST_REQUIRES_ARM_NEON;
11591 for (uint32_t n = 9; n < 16; n++) {
11592 for (size_t k = 1; k <= 40; k += 9) {
11593 GemmMicrokernelTester()
11594 .mr(3)
11595 .nr(8)
11596 .kr(1)
11597 .sr(1)
11598 .m(3)
11599 .n(n)
11600 .k(k)
11601 .ks(3)
11602 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11603 }
11604 }
11605 }
11606
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_div_8_small_kernel)11607 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_div_8_small_kernel) {
11608 TEST_REQUIRES_ARM_NEON;
11609 for (uint32_t n = 16; n <= 24; n += 8) {
11610 for (size_t k = 1; k <= 40; k += 9) {
11611 GemmMicrokernelTester()
11612 .mr(3)
11613 .nr(8)
11614 .kr(1)
11615 .sr(1)
11616 .m(3)
11617 .n(n)
11618 .k(k)
11619 .ks(3)
11620 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11621 }
11622 }
11623 }
11624
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,strided_cm_subtile)11625 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
11626 TEST_REQUIRES_ARM_NEON;
11627 for (size_t k = 1; k <= 40; k += 9) {
11628 for (uint32_t n = 1; n <= 8; n++) {
11629 for (uint32_t m = 1; m <= 3; m++) {
11630 GemmMicrokernelTester()
11631 .mr(3)
11632 .nr(8)
11633 .kr(1)
11634 .sr(1)
11635 .m(m)
11636 .n(n)
11637 .k(k)
11638 .cm_stride(11)
11639 .iterations(1)
11640 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11641 }
11642 }
11643 }
11644 }
11645
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,a_offset)11646 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, a_offset) {
11647 TEST_REQUIRES_ARM_NEON;
11648 for (size_t k = 1; k <= 40; k += 9) {
11649 GemmMicrokernelTester()
11650 .mr(3)
11651 .nr(8)
11652 .kr(1)
11653 .sr(1)
11654 .m(3)
11655 .n(8)
11656 .k(k)
11657 .ks(3)
11658 .a_offset(127)
11659 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11660 }
11661 }
11662
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,zero)11663 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, zero) {
11664 TEST_REQUIRES_ARM_NEON;
11665 for (size_t k = 1; k <= 40; k += 9) {
11666 for (uint32_t mz = 0; mz < 3; mz++) {
11667 GemmMicrokernelTester()
11668 .mr(3)
11669 .nr(8)
11670 .kr(1)
11671 .sr(1)
11672 .m(3)
11673 .n(8)
11674 .k(k)
11675 .ks(3)
11676 .a_offset(127)
11677 .zero_index(mz)
11678 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11679 }
11680 }
11681 }
11682
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,qmin)11683 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, qmin) {
11684 TEST_REQUIRES_ARM_NEON;
11685 GemmMicrokernelTester()
11686 .mr(3)
11687 .nr(8)
11688 .kr(1)
11689 .sr(1)
11690 .m(3)
11691 .n(8)
11692 .k(8)
11693 .qmin(128)
11694 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11695 }
11696
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,qmax)11697 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, qmax) {
11698 TEST_REQUIRES_ARM_NEON;
11699 GemmMicrokernelTester()
11700 .mr(3)
11701 .nr(8)
11702 .kr(1)
11703 .sr(1)
11704 .m(3)
11705 .n(8)
11706 .k(8)
11707 .qmax(128)
11708 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11709 }
11710
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,strided_cm)11711 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, strided_cm) {
11712 TEST_REQUIRES_ARM_NEON;
11713 GemmMicrokernelTester()
11714 .mr(3)
11715 .nr(8)
11716 .kr(1)
11717 .sr(1)
11718 .m(3)
11719 .n(8)
11720 .k(8)
11721 .cm_stride(11)
11722 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11723 }
11724 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11725
11726
11727 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_eq_8)11728 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_eq_8) {
11729 TEST_REQUIRES_ARM_NEON;
11730 GemmMicrokernelTester()
11731 .mr(3)
11732 .nr(16)
11733 .kr(1)
11734 .sr(1)
11735 .m(3)
11736 .n(16)
11737 .k(8)
11738 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11739 }
11740
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,strided_cn)11741 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, strided_cn) {
11742 TEST_REQUIRES_ARM_NEON;
11743 GemmMicrokernelTester()
11744 .mr(3)
11745 .nr(16)
11746 .kr(1)
11747 .sr(1)
11748 .m(3)
11749 .n(16)
11750 .k(8)
11751 .cn_stride(19)
11752 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11753 }
11754
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_eq_8_subtile)11755 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_eq_8_subtile) {
11756 TEST_REQUIRES_ARM_NEON;
11757 for (uint32_t n = 1; n <= 16; n++) {
11758 for (uint32_t m = 1; m <= 3; m++) {
11759 GemmMicrokernelTester()
11760 .mr(3)
11761 .nr(16)
11762 .kr(1)
11763 .sr(1)
11764 .m(m)
11765 .n(n)
11766 .k(8)
11767 .iterations(1)
11768 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11769 }
11770 }
11771 }
11772
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_eq_8_subtile_m)11773 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
11774 TEST_REQUIRES_ARM_NEON;
11775 for (uint32_t m = 1; m <= 3; m++) {
11776 GemmMicrokernelTester()
11777 .mr(3)
11778 .nr(16)
11779 .kr(1)
11780 .sr(1)
11781 .m(m)
11782 .n(16)
11783 .k(8)
11784 .iterations(1)
11785 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11786 }
11787 }
11788
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_eq_8_subtile_n)11789 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
11790 TEST_REQUIRES_ARM_NEON;
11791 for (uint32_t n = 1; n <= 16; n++) {
11792 GemmMicrokernelTester()
11793 .mr(3)
11794 .nr(16)
11795 .kr(1)
11796 .sr(1)
11797 .m(3)
11798 .n(n)
11799 .k(8)
11800 .iterations(1)
11801 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11802 }
11803 }
11804
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_lt_8)11805 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_lt_8) {
11806 TEST_REQUIRES_ARM_NEON;
11807 for (size_t k = 1; k < 8; k++) {
11808 GemmMicrokernelTester()
11809 .mr(3)
11810 .nr(16)
11811 .kr(1)
11812 .sr(1)
11813 .m(3)
11814 .n(16)
11815 .k(k)
11816 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11817 }
11818 }
11819
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_lt_8_subtile)11820 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_lt_8_subtile) {
11821 TEST_REQUIRES_ARM_NEON;
11822 for (size_t k = 1; k < 8; k++) {
11823 for (uint32_t n = 1; n <= 16; n++) {
11824 for (uint32_t m = 1; m <= 3; m++) {
11825 GemmMicrokernelTester()
11826 .mr(3)
11827 .nr(16)
11828 .kr(1)
11829 .sr(1)
11830 .m(m)
11831 .n(n)
11832 .k(k)
11833 .iterations(1)
11834 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11835 }
11836 }
11837 }
11838 }
11839
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_gt_8)11840 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_gt_8) {
11841 TEST_REQUIRES_ARM_NEON;
11842 for (size_t k = 9; k < 16; k++) {
11843 GemmMicrokernelTester()
11844 .mr(3)
11845 .nr(16)
11846 .kr(1)
11847 .sr(1)
11848 .m(3)
11849 .n(16)
11850 .k(k)
11851 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11852 }
11853 }
11854
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_gt_8_subtile)11855 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_gt_8_subtile) {
11856 TEST_REQUIRES_ARM_NEON;
11857 for (size_t k = 9; k < 16; k++) {
11858 for (uint32_t n = 1; n <= 16; n++) {
11859 for (uint32_t m = 1; m <= 3; m++) {
11860 GemmMicrokernelTester()
11861 .mr(3)
11862 .nr(16)
11863 .kr(1)
11864 .sr(1)
11865 .m(m)
11866 .n(n)
11867 .k(k)
11868 .iterations(1)
11869 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11870 }
11871 }
11872 }
11873 }
11874
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_div_8)11875 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_div_8) {
11876 TEST_REQUIRES_ARM_NEON;
11877 for (size_t k = 16; k <= 80; k += 8) {
11878 GemmMicrokernelTester()
11879 .mr(3)
11880 .nr(16)
11881 .kr(1)
11882 .sr(1)
11883 .m(3)
11884 .n(16)
11885 .k(k)
11886 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11887 }
11888 }
11889
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_div_8_subtile)11890 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_div_8_subtile) {
11891 TEST_REQUIRES_ARM_NEON;
11892 for (size_t k = 16; k <= 80; k += 8) {
11893 for (uint32_t n = 1; n <= 16; n++) {
11894 for (uint32_t m = 1; m <= 3; m++) {
11895 GemmMicrokernelTester()
11896 .mr(3)
11897 .nr(16)
11898 .kr(1)
11899 .sr(1)
11900 .m(m)
11901 .n(n)
11902 .k(k)
11903 .iterations(1)
11904 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11905 }
11906 }
11907 }
11908 }
11909
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_gt_16)11910 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_gt_16) {
11911 TEST_REQUIRES_ARM_NEON;
11912 for (uint32_t n = 17; n < 32; n++) {
11913 for (size_t k = 1; k <= 40; k += 9) {
11914 GemmMicrokernelTester()
11915 .mr(3)
11916 .nr(16)
11917 .kr(1)
11918 .sr(1)
11919 .m(3)
11920 .n(n)
11921 .k(k)
11922 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11923 }
11924 }
11925 }
11926
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_gt_16_strided_cn)11927 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
11928 TEST_REQUIRES_ARM_NEON;
11929 for (uint32_t n = 17; n < 32; n++) {
11930 for (size_t k = 1; k <= 40; k += 9) {
11931 GemmMicrokernelTester()
11932 .mr(3)
11933 .nr(16)
11934 .kr(1)
11935 .sr(1)
11936 .m(3)
11937 .n(n)
11938 .k(k)
11939 .cn_stride(19)
11940 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11941 }
11942 }
11943 }
11944
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_gt_16_subtile)11945 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_gt_16_subtile) {
11946 TEST_REQUIRES_ARM_NEON;
11947 for (uint32_t n = 17; n < 32; n++) {
11948 for (size_t k = 1; k <= 40; k += 9) {
11949 for (uint32_t m = 1; m <= 3; m++) {
11950 GemmMicrokernelTester()
11951 .mr(3)
11952 .nr(16)
11953 .kr(1)
11954 .sr(1)
11955 .m(m)
11956 .n(n)
11957 .k(k)
11958 .iterations(1)
11959 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11960 }
11961 }
11962 }
11963 }
11964
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_div_16)11965 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_div_16) {
11966 TEST_REQUIRES_ARM_NEON;
11967 for (uint32_t n = 32; n <= 48; n += 16) {
11968 for (size_t k = 1; k <= 40; k += 9) {
11969 GemmMicrokernelTester()
11970 .mr(3)
11971 .nr(16)
11972 .kr(1)
11973 .sr(1)
11974 .m(3)
11975 .n(n)
11976 .k(k)
11977 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11978 }
11979 }
11980 }
11981
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_div_16_strided_cn)11982 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
11983 TEST_REQUIRES_ARM_NEON;
11984 for (uint32_t n = 32; n <= 48; n += 16) {
11985 for (size_t k = 1; k <= 40; k += 9) {
11986 GemmMicrokernelTester()
11987 .mr(3)
11988 .nr(16)
11989 .kr(1)
11990 .sr(1)
11991 .m(3)
11992 .n(n)
11993 .k(k)
11994 .cn_stride(19)
11995 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11996 }
11997 }
11998 }
11999
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_div_16_subtile)12000 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_div_16_subtile) {
12001 TEST_REQUIRES_ARM_NEON;
12002 for (uint32_t n = 32; n <= 48; n += 16) {
12003 for (size_t k = 1; k <= 40; k += 9) {
12004 for (uint32_t m = 1; m <= 3; m++) {
12005 GemmMicrokernelTester()
12006 .mr(3)
12007 .nr(16)
12008 .kr(1)
12009 .sr(1)
12010 .m(m)
12011 .n(n)
12012 .k(k)
12013 .iterations(1)
12014 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12015 }
12016 }
12017 }
12018 }
12019
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,small_kernel)12020 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, small_kernel) {
12021 TEST_REQUIRES_ARM_NEON;
12022 for (size_t k = 1; k <= 40; k += 9) {
12023 GemmMicrokernelTester()
12024 .mr(3)
12025 .nr(16)
12026 .kr(1)
12027 .sr(1)
12028 .m(3)
12029 .n(16)
12030 .k(k)
12031 .ks(3)
12032 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12033 }
12034 }
12035
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,small_kernel_subtile)12036 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, small_kernel_subtile) {
12037 TEST_REQUIRES_ARM_NEON;
12038 for (size_t k = 1; k <= 40; k += 9) {
12039 for (uint32_t n = 1; n <= 16; n++) {
12040 for (uint32_t m = 1; m <= 3; m++) {
12041 GemmMicrokernelTester()
12042 .mr(3)
12043 .nr(16)
12044 .kr(1)
12045 .sr(1)
12046 .m(m)
12047 .n(n)
12048 .k(k)
12049 .ks(3)
12050 .iterations(1)
12051 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12052 }
12053 }
12054 }
12055 }
12056
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_gt_16_small_kernel)12057 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
12058 TEST_REQUIRES_ARM_NEON;
12059 for (uint32_t n = 17; n < 32; n++) {
12060 for (size_t k = 1; k <= 40; k += 9) {
12061 GemmMicrokernelTester()
12062 .mr(3)
12063 .nr(16)
12064 .kr(1)
12065 .sr(1)
12066 .m(3)
12067 .n(n)
12068 .k(k)
12069 .ks(3)
12070 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12071 }
12072 }
12073 }
12074
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_div_16_small_kernel)12075 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
12076 TEST_REQUIRES_ARM_NEON;
12077 for (uint32_t n = 32; n <= 48; n += 16) {
12078 for (size_t k = 1; k <= 40; k += 9) {
12079 GemmMicrokernelTester()
12080 .mr(3)
12081 .nr(16)
12082 .kr(1)
12083 .sr(1)
12084 .m(3)
12085 .n(n)
12086 .k(k)
12087 .ks(3)
12088 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12089 }
12090 }
12091 }
12092
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,strided_cm_subtile)12093 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, strided_cm_subtile) {
12094 TEST_REQUIRES_ARM_NEON;
12095 for (size_t k = 1; k <= 40; k += 9) {
12096 for (uint32_t n = 1; n <= 16; n++) {
12097 for (uint32_t m = 1; m <= 3; m++) {
12098 GemmMicrokernelTester()
12099 .mr(3)
12100 .nr(16)
12101 .kr(1)
12102 .sr(1)
12103 .m(m)
12104 .n(n)
12105 .k(k)
12106 .cm_stride(19)
12107 .iterations(1)
12108 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12109 }
12110 }
12111 }
12112 }
12113
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,a_offset)12114 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, a_offset) {
12115 TEST_REQUIRES_ARM_NEON;
12116 for (size_t k = 1; k <= 40; k += 9) {
12117 GemmMicrokernelTester()
12118 .mr(3)
12119 .nr(16)
12120 .kr(1)
12121 .sr(1)
12122 .m(3)
12123 .n(16)
12124 .k(k)
12125 .ks(3)
12126 .a_offset(127)
12127 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12128 }
12129 }
12130
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,zero)12131 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, zero) {
12132 TEST_REQUIRES_ARM_NEON;
12133 for (size_t k = 1; k <= 40; k += 9) {
12134 for (uint32_t mz = 0; mz < 3; mz++) {
12135 GemmMicrokernelTester()
12136 .mr(3)
12137 .nr(16)
12138 .kr(1)
12139 .sr(1)
12140 .m(3)
12141 .n(16)
12142 .k(k)
12143 .ks(3)
12144 .a_offset(127)
12145 .zero_index(mz)
12146 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12147 }
12148 }
12149 }
12150
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,qmin)12151 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, qmin) {
12152 TEST_REQUIRES_ARM_NEON;
12153 GemmMicrokernelTester()
12154 .mr(3)
12155 .nr(16)
12156 .kr(1)
12157 .sr(1)
12158 .m(3)
12159 .n(16)
12160 .k(8)
12161 .qmin(128)
12162 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12163 }
12164
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,qmax)12165 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, qmax) {
12166 TEST_REQUIRES_ARM_NEON;
12167 GemmMicrokernelTester()
12168 .mr(3)
12169 .nr(16)
12170 .kr(1)
12171 .sr(1)
12172 .m(3)
12173 .n(16)
12174 .k(8)
12175 .qmax(128)
12176 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12177 }
12178
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,strided_cm)12179 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, strided_cm) {
12180 TEST_REQUIRES_ARM_NEON;
12181 GemmMicrokernelTester()
12182 .mr(3)
12183 .nr(16)
12184 .kr(1)
12185 .sr(1)
12186 .m(3)
12187 .n(16)
12188 .k(8)
12189 .cm_stride(19)
12190 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12191 }
12192 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12193
12194
12195 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_eq_8)12196 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
12197 TEST_REQUIRES_ARM_NEON;
12198 GemmMicrokernelTester()
12199 .mr(3)
12200 .nr(16)
12201 .kr(1)
12202 .sr(1)
12203 .m(3)
12204 .n(16)
12205 .k(8)
12206 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12207 }
12208
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,strided_cn)12209 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, strided_cn) {
12210 TEST_REQUIRES_ARM_NEON;
12211 GemmMicrokernelTester()
12212 .mr(3)
12213 .nr(16)
12214 .kr(1)
12215 .sr(1)
12216 .m(3)
12217 .n(16)
12218 .k(8)
12219 .cn_stride(19)
12220 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12221 }
12222
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)12223 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
12224 TEST_REQUIRES_ARM_NEON;
12225 for (uint32_t n = 1; n <= 16; n++) {
12226 for (uint32_t m = 1; m <= 3; m++) {
12227 GemmMicrokernelTester()
12228 .mr(3)
12229 .nr(16)
12230 .kr(1)
12231 .sr(1)
12232 .m(m)
12233 .n(n)
12234 .k(8)
12235 .iterations(1)
12236 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12237 }
12238 }
12239 }
12240
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)12241 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
12242 TEST_REQUIRES_ARM_NEON;
12243 for (uint32_t m = 1; m <= 3; m++) {
12244 GemmMicrokernelTester()
12245 .mr(3)
12246 .nr(16)
12247 .kr(1)
12248 .sr(1)
12249 .m(m)
12250 .n(16)
12251 .k(8)
12252 .iterations(1)
12253 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12254 }
12255 }
12256
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)12257 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
12258 TEST_REQUIRES_ARM_NEON;
12259 for (uint32_t n = 1; n <= 16; n++) {
12260 GemmMicrokernelTester()
12261 .mr(3)
12262 .nr(16)
12263 .kr(1)
12264 .sr(1)
12265 .m(3)
12266 .n(n)
12267 .k(8)
12268 .iterations(1)
12269 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12270 }
12271 }
12272
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_lt_8)12273 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
12274 TEST_REQUIRES_ARM_NEON;
12275 for (size_t k = 1; k < 8; k++) {
12276 GemmMicrokernelTester()
12277 .mr(3)
12278 .nr(16)
12279 .kr(1)
12280 .sr(1)
12281 .m(3)
12282 .n(16)
12283 .k(k)
12284 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12285 }
12286 }
12287
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)12288 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
12289 TEST_REQUIRES_ARM_NEON;
12290 for (size_t k = 1; k < 8; k++) {
12291 for (uint32_t n = 1; n <= 16; n++) {
12292 for (uint32_t m = 1; m <= 3; m++) {
12293 GemmMicrokernelTester()
12294 .mr(3)
12295 .nr(16)
12296 .kr(1)
12297 .sr(1)
12298 .m(m)
12299 .n(n)
12300 .k(k)
12301 .iterations(1)
12302 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12303 }
12304 }
12305 }
12306 }
12307
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_gt_8)12308 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
12309 TEST_REQUIRES_ARM_NEON;
12310 for (size_t k = 9; k < 16; k++) {
12311 GemmMicrokernelTester()
12312 .mr(3)
12313 .nr(16)
12314 .kr(1)
12315 .sr(1)
12316 .m(3)
12317 .n(16)
12318 .k(k)
12319 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12320 }
12321 }
12322
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)12323 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
12324 TEST_REQUIRES_ARM_NEON;
12325 for (size_t k = 9; k < 16; k++) {
12326 for (uint32_t n = 1; n <= 16; n++) {
12327 for (uint32_t m = 1; m <= 3; m++) {
12328 GemmMicrokernelTester()
12329 .mr(3)
12330 .nr(16)
12331 .kr(1)
12332 .sr(1)
12333 .m(m)
12334 .n(n)
12335 .k(k)
12336 .iterations(1)
12337 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12338 }
12339 }
12340 }
12341 }
12342
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_div_8)12343 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_div_8) {
12344 TEST_REQUIRES_ARM_NEON;
12345 for (size_t k = 16; k <= 80; k += 8) {
12346 GemmMicrokernelTester()
12347 .mr(3)
12348 .nr(16)
12349 .kr(1)
12350 .sr(1)
12351 .m(3)
12352 .n(16)
12353 .k(k)
12354 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12355 }
12356 }
12357
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_div_8_subtile)12358 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
12359 TEST_REQUIRES_ARM_NEON;
12360 for (size_t k = 16; k <= 80; k += 8) {
12361 for (uint32_t n = 1; n <= 16; n++) {
12362 for (uint32_t m = 1; m <= 3; m++) {
12363 GemmMicrokernelTester()
12364 .mr(3)
12365 .nr(16)
12366 .kr(1)
12367 .sr(1)
12368 .m(m)
12369 .n(n)
12370 .k(k)
12371 .iterations(1)
12372 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12373 }
12374 }
12375 }
12376 }
12377
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_gt_16)12378 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
12379 TEST_REQUIRES_ARM_NEON;
12380 for (uint32_t n = 17; n < 32; n++) {
12381 for (size_t k = 1; k <= 40; k += 9) {
12382 GemmMicrokernelTester()
12383 .mr(3)
12384 .nr(16)
12385 .kr(1)
12386 .sr(1)
12387 .m(3)
12388 .n(n)
12389 .k(k)
12390 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12391 }
12392 }
12393 }
12394
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_cn)12395 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
12396 TEST_REQUIRES_ARM_NEON;
12397 for (uint32_t n = 17; n < 32; n++) {
12398 for (size_t k = 1; k <= 40; k += 9) {
12399 GemmMicrokernelTester()
12400 .mr(3)
12401 .nr(16)
12402 .kr(1)
12403 .sr(1)
12404 .m(3)
12405 .n(n)
12406 .k(k)
12407 .cn_stride(19)
12408 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12409 }
12410 }
12411 }
12412
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_gt_16_subtile)12413 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
12414 TEST_REQUIRES_ARM_NEON;
12415 for (uint32_t n = 17; n < 32; n++) {
12416 for (size_t k = 1; k <= 40; k += 9) {
12417 for (uint32_t m = 1; m <= 3; m++) {
12418 GemmMicrokernelTester()
12419 .mr(3)
12420 .nr(16)
12421 .kr(1)
12422 .sr(1)
12423 .m(m)
12424 .n(n)
12425 .k(k)
12426 .iterations(1)
12427 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12428 }
12429 }
12430 }
12431 }
12432
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_div_16)12433 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_div_16) {
12434 TEST_REQUIRES_ARM_NEON;
12435 for (uint32_t n = 32; n <= 48; n += 16) {
12436 for (size_t k = 1; k <= 40; k += 9) {
12437 GemmMicrokernelTester()
12438 .mr(3)
12439 .nr(16)
12440 .kr(1)
12441 .sr(1)
12442 .m(3)
12443 .n(n)
12444 .k(k)
12445 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12446 }
12447 }
12448 }
12449
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_cn)12450 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
12451 TEST_REQUIRES_ARM_NEON;
12452 for (uint32_t n = 32; n <= 48; n += 16) {
12453 for (size_t k = 1; k <= 40; k += 9) {
12454 GemmMicrokernelTester()
12455 .mr(3)
12456 .nr(16)
12457 .kr(1)
12458 .sr(1)
12459 .m(3)
12460 .n(n)
12461 .k(k)
12462 .cn_stride(19)
12463 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12464 }
12465 }
12466 }
12467
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_div_16_subtile)12468 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
12469 TEST_REQUIRES_ARM_NEON;
12470 for (uint32_t n = 32; n <= 48; n += 16) {
12471 for (size_t k = 1; k <= 40; k += 9) {
12472 for (uint32_t m = 1; m <= 3; m++) {
12473 GemmMicrokernelTester()
12474 .mr(3)
12475 .nr(16)
12476 .kr(1)
12477 .sr(1)
12478 .m(m)
12479 .n(n)
12480 .k(k)
12481 .iterations(1)
12482 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12483 }
12484 }
12485 }
12486 }
12487
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,small_kernel)12488 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, small_kernel) {
12489 TEST_REQUIRES_ARM_NEON;
12490 for (size_t k = 1; k <= 40; k += 9) {
12491 GemmMicrokernelTester()
12492 .mr(3)
12493 .nr(16)
12494 .kr(1)
12495 .sr(1)
12496 .m(3)
12497 .n(16)
12498 .k(k)
12499 .ks(3)
12500 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12501 }
12502 }
12503
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,small_kernel_subtile)12504 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
12505 TEST_REQUIRES_ARM_NEON;
12506 for (size_t k = 1; k <= 40; k += 9) {
12507 for (uint32_t n = 1; n <= 16; n++) {
12508 for (uint32_t m = 1; m <= 3; m++) {
12509 GemmMicrokernelTester()
12510 .mr(3)
12511 .nr(16)
12512 .kr(1)
12513 .sr(1)
12514 .m(m)
12515 .n(n)
12516 .k(k)
12517 .ks(3)
12518 .iterations(1)
12519 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12520 }
12521 }
12522 }
12523 }
12524
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_gt_16_small_kernel)12525 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
12526 TEST_REQUIRES_ARM_NEON;
12527 for (uint32_t n = 17; n < 32; n++) {
12528 for (size_t k = 1; k <= 40; k += 9) {
12529 GemmMicrokernelTester()
12530 .mr(3)
12531 .nr(16)
12532 .kr(1)
12533 .sr(1)
12534 .m(3)
12535 .n(n)
12536 .k(k)
12537 .ks(3)
12538 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12539 }
12540 }
12541 }
12542
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_div_16_small_kernel)12543 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
12544 TEST_REQUIRES_ARM_NEON;
12545 for (uint32_t n = 32; n <= 48; n += 16) {
12546 for (size_t k = 1; k <= 40; k += 9) {
12547 GemmMicrokernelTester()
12548 .mr(3)
12549 .nr(16)
12550 .kr(1)
12551 .sr(1)
12552 .m(3)
12553 .n(n)
12554 .k(k)
12555 .ks(3)
12556 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12557 }
12558 }
12559 }
12560
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,strided_cm_subtile)12561 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
12562 TEST_REQUIRES_ARM_NEON;
12563 for (size_t k = 1; k <= 40; k += 9) {
12564 for (uint32_t n = 1; n <= 16; n++) {
12565 for (uint32_t m = 1; m <= 3; m++) {
12566 GemmMicrokernelTester()
12567 .mr(3)
12568 .nr(16)
12569 .kr(1)
12570 .sr(1)
12571 .m(m)
12572 .n(n)
12573 .k(k)
12574 .cm_stride(19)
12575 .iterations(1)
12576 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12577 }
12578 }
12579 }
12580 }
12581
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,a_offset)12582 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, a_offset) {
12583 TEST_REQUIRES_ARM_NEON;
12584 for (size_t k = 1; k <= 40; k += 9) {
12585 GemmMicrokernelTester()
12586 .mr(3)
12587 .nr(16)
12588 .kr(1)
12589 .sr(1)
12590 .m(3)
12591 .n(16)
12592 .k(k)
12593 .ks(3)
12594 .a_offset(127)
12595 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12596 }
12597 }
12598
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,zero)12599 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, zero) {
12600 TEST_REQUIRES_ARM_NEON;
12601 for (size_t k = 1; k <= 40; k += 9) {
12602 for (uint32_t mz = 0; mz < 3; mz++) {
12603 GemmMicrokernelTester()
12604 .mr(3)
12605 .nr(16)
12606 .kr(1)
12607 .sr(1)
12608 .m(3)
12609 .n(16)
12610 .k(k)
12611 .ks(3)
12612 .a_offset(127)
12613 .zero_index(mz)
12614 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12615 }
12616 }
12617 }
12618
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,qmin)12619 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, qmin) {
12620 TEST_REQUIRES_ARM_NEON;
12621 GemmMicrokernelTester()
12622 .mr(3)
12623 .nr(16)
12624 .kr(1)
12625 .sr(1)
12626 .m(3)
12627 .n(16)
12628 .k(8)
12629 .qmin(128)
12630 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12631 }
12632
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,qmax)12633 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, qmax) {
12634 TEST_REQUIRES_ARM_NEON;
12635 GemmMicrokernelTester()
12636 .mr(3)
12637 .nr(16)
12638 .kr(1)
12639 .sr(1)
12640 .m(3)
12641 .n(16)
12642 .k(8)
12643 .qmax(128)
12644 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12645 }
12646
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,strided_cm)12647 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, strided_cm) {
12648 TEST_REQUIRES_ARM_NEON;
12649 GemmMicrokernelTester()
12650 .mr(3)
12651 .nr(16)
12652 .kr(1)
12653 .sr(1)
12654 .m(3)
12655 .n(16)
12656 .k(8)
12657 .cm_stride(19)
12658 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12659 }
12660 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12661
12662
12663 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_eq_8)12664 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_eq_8) {
12665 TEST_REQUIRES_ARM_NEON_V8;
12666 GemmMicrokernelTester()
12667 .mr(3)
12668 .nr(16)
12669 .kr(1)
12670 .sr(1)
12671 .m(3)
12672 .n(16)
12673 .k(8)
12674 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12675 }
12676
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,strided_cn)12677 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, strided_cn) {
12678 TEST_REQUIRES_ARM_NEON_V8;
12679 GemmMicrokernelTester()
12680 .mr(3)
12681 .nr(16)
12682 .kr(1)
12683 .sr(1)
12684 .m(3)
12685 .n(16)
12686 .k(8)
12687 .cn_stride(19)
12688 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12689 }
12690
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_eq_8_subtile)12691 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
12692 TEST_REQUIRES_ARM_NEON_V8;
12693 for (uint32_t n = 1; n <= 16; n++) {
12694 for (uint32_t m = 1; m <= 3; m++) {
12695 GemmMicrokernelTester()
12696 .mr(3)
12697 .nr(16)
12698 .kr(1)
12699 .sr(1)
12700 .m(m)
12701 .n(n)
12702 .k(8)
12703 .iterations(1)
12704 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12705 }
12706 }
12707 }
12708
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_eq_8_subtile_m)12709 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
12710 TEST_REQUIRES_ARM_NEON_V8;
12711 for (uint32_t m = 1; m <= 3; m++) {
12712 GemmMicrokernelTester()
12713 .mr(3)
12714 .nr(16)
12715 .kr(1)
12716 .sr(1)
12717 .m(m)
12718 .n(16)
12719 .k(8)
12720 .iterations(1)
12721 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12722 }
12723 }
12724
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_eq_8_subtile_n)12725 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
12726 TEST_REQUIRES_ARM_NEON_V8;
12727 for (uint32_t n = 1; n <= 16; n++) {
12728 GemmMicrokernelTester()
12729 .mr(3)
12730 .nr(16)
12731 .kr(1)
12732 .sr(1)
12733 .m(3)
12734 .n(n)
12735 .k(8)
12736 .iterations(1)
12737 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12738 }
12739 }
12740
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_lt_8)12741 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_lt_8) {
12742 TEST_REQUIRES_ARM_NEON_V8;
12743 for (size_t k = 1; k < 8; k++) {
12744 GemmMicrokernelTester()
12745 .mr(3)
12746 .nr(16)
12747 .kr(1)
12748 .sr(1)
12749 .m(3)
12750 .n(16)
12751 .k(k)
12752 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12753 }
12754 }
12755
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_lt_8_subtile)12756 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
12757 TEST_REQUIRES_ARM_NEON_V8;
12758 for (size_t k = 1; k < 8; k++) {
12759 for (uint32_t n = 1; n <= 16; n++) {
12760 for (uint32_t m = 1; m <= 3; m++) {
12761 GemmMicrokernelTester()
12762 .mr(3)
12763 .nr(16)
12764 .kr(1)
12765 .sr(1)
12766 .m(m)
12767 .n(n)
12768 .k(k)
12769 .iterations(1)
12770 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12771 }
12772 }
12773 }
12774 }
12775
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_gt_8)12776 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_gt_8) {
12777 TEST_REQUIRES_ARM_NEON_V8;
12778 for (size_t k = 9; k < 16; k++) {
12779 GemmMicrokernelTester()
12780 .mr(3)
12781 .nr(16)
12782 .kr(1)
12783 .sr(1)
12784 .m(3)
12785 .n(16)
12786 .k(k)
12787 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12788 }
12789 }
12790
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_gt_8_subtile)12791 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
12792 TEST_REQUIRES_ARM_NEON_V8;
12793 for (size_t k = 9; k < 16; k++) {
12794 for (uint32_t n = 1; n <= 16; n++) {
12795 for (uint32_t m = 1; m <= 3; m++) {
12796 GemmMicrokernelTester()
12797 .mr(3)
12798 .nr(16)
12799 .kr(1)
12800 .sr(1)
12801 .m(m)
12802 .n(n)
12803 .k(k)
12804 .iterations(1)
12805 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12806 }
12807 }
12808 }
12809 }
12810
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_div_8)12811 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_div_8) {
12812 TEST_REQUIRES_ARM_NEON_V8;
12813 for (size_t k = 16; k <= 80; k += 8) {
12814 GemmMicrokernelTester()
12815 .mr(3)
12816 .nr(16)
12817 .kr(1)
12818 .sr(1)
12819 .m(3)
12820 .n(16)
12821 .k(k)
12822 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12823 }
12824 }
12825
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_div_8_subtile)12826 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
12827 TEST_REQUIRES_ARM_NEON_V8;
12828 for (size_t k = 16; k <= 80; k += 8) {
12829 for (uint32_t n = 1; n <= 16; n++) {
12830 for (uint32_t m = 1; m <= 3; m++) {
12831 GemmMicrokernelTester()
12832 .mr(3)
12833 .nr(16)
12834 .kr(1)
12835 .sr(1)
12836 .m(m)
12837 .n(n)
12838 .k(k)
12839 .iterations(1)
12840 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12841 }
12842 }
12843 }
12844 }
12845
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_gt_16)12846 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_gt_16) {
12847 TEST_REQUIRES_ARM_NEON_V8;
12848 for (uint32_t n = 17; n < 32; n++) {
12849 for (size_t k = 1; k <= 40; k += 9) {
12850 GemmMicrokernelTester()
12851 .mr(3)
12852 .nr(16)
12853 .kr(1)
12854 .sr(1)
12855 .m(3)
12856 .n(n)
12857 .k(k)
12858 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12859 }
12860 }
12861 }
12862
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_gt_16_strided_cn)12863 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
12864 TEST_REQUIRES_ARM_NEON_V8;
12865 for (uint32_t n = 17; n < 32; n++) {
12866 for (size_t k = 1; k <= 40; k += 9) {
12867 GemmMicrokernelTester()
12868 .mr(3)
12869 .nr(16)
12870 .kr(1)
12871 .sr(1)
12872 .m(3)
12873 .n(n)
12874 .k(k)
12875 .cn_stride(19)
12876 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12877 }
12878 }
12879 }
12880
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_gt_16_subtile)12881 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
12882 TEST_REQUIRES_ARM_NEON_V8;
12883 for (uint32_t n = 17; n < 32; n++) {
12884 for (size_t k = 1; k <= 40; k += 9) {
12885 for (uint32_t m = 1; m <= 3; m++) {
12886 GemmMicrokernelTester()
12887 .mr(3)
12888 .nr(16)
12889 .kr(1)
12890 .sr(1)
12891 .m(m)
12892 .n(n)
12893 .k(k)
12894 .iterations(1)
12895 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12896 }
12897 }
12898 }
12899 }
12900
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_div_16)12901 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_div_16) {
12902 TEST_REQUIRES_ARM_NEON_V8;
12903 for (uint32_t n = 32; n <= 48; n += 16) {
12904 for (size_t k = 1; k <= 40; k += 9) {
12905 GemmMicrokernelTester()
12906 .mr(3)
12907 .nr(16)
12908 .kr(1)
12909 .sr(1)
12910 .m(3)
12911 .n(n)
12912 .k(k)
12913 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12914 }
12915 }
12916 }
12917
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_div_16_strided_cn)12918 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
12919 TEST_REQUIRES_ARM_NEON_V8;
12920 for (uint32_t n = 32; n <= 48; n += 16) {
12921 for (size_t k = 1; k <= 40; k += 9) {
12922 GemmMicrokernelTester()
12923 .mr(3)
12924 .nr(16)
12925 .kr(1)
12926 .sr(1)
12927 .m(3)
12928 .n(n)
12929 .k(k)
12930 .cn_stride(19)
12931 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12932 }
12933 }
12934 }
12935
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_div_16_subtile)12936 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
12937 TEST_REQUIRES_ARM_NEON_V8;
12938 for (uint32_t n = 32; n <= 48; n += 16) {
12939 for (size_t k = 1; k <= 40; k += 9) {
12940 for (uint32_t m = 1; m <= 3; m++) {
12941 GemmMicrokernelTester()
12942 .mr(3)
12943 .nr(16)
12944 .kr(1)
12945 .sr(1)
12946 .m(m)
12947 .n(n)
12948 .k(k)
12949 .iterations(1)
12950 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12951 }
12952 }
12953 }
12954 }
12955
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,small_kernel)12956 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, small_kernel) {
12957 TEST_REQUIRES_ARM_NEON_V8;
12958 for (size_t k = 1; k <= 40; k += 9) {
12959 GemmMicrokernelTester()
12960 .mr(3)
12961 .nr(16)
12962 .kr(1)
12963 .sr(1)
12964 .m(3)
12965 .n(16)
12966 .k(k)
12967 .ks(3)
12968 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12969 }
12970 }
12971
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,small_kernel_subtile)12972 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, small_kernel_subtile) {
12973 TEST_REQUIRES_ARM_NEON_V8;
12974 for (size_t k = 1; k <= 40; k += 9) {
12975 for (uint32_t n = 1; n <= 16; n++) {
12976 for (uint32_t m = 1; m <= 3; m++) {
12977 GemmMicrokernelTester()
12978 .mr(3)
12979 .nr(16)
12980 .kr(1)
12981 .sr(1)
12982 .m(m)
12983 .n(n)
12984 .k(k)
12985 .ks(3)
12986 .iterations(1)
12987 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12988 }
12989 }
12990 }
12991 }
12992
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_gt_16_small_kernel)12993 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_gt_16_small_kernel) {
12994 TEST_REQUIRES_ARM_NEON_V8;
12995 for (uint32_t n = 17; n < 32; n++) {
12996 for (size_t k = 1; k <= 40; k += 9) {
12997 GemmMicrokernelTester()
12998 .mr(3)
12999 .nr(16)
13000 .kr(1)
13001 .sr(1)
13002 .m(3)
13003 .n(n)
13004 .k(k)
13005 .ks(3)
13006 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13007 }
13008 }
13009 }
13010
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_div_16_small_kernel)13011 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_div_16_small_kernel) {
13012 TEST_REQUIRES_ARM_NEON_V8;
13013 for (uint32_t n = 32; n <= 48; n += 16) {
13014 for (size_t k = 1; k <= 40; k += 9) {
13015 GemmMicrokernelTester()
13016 .mr(3)
13017 .nr(16)
13018 .kr(1)
13019 .sr(1)
13020 .m(3)
13021 .n(n)
13022 .k(k)
13023 .ks(3)
13024 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13025 }
13026 }
13027 }
13028
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,strided_cm_subtile)13029 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
13030 TEST_REQUIRES_ARM_NEON_V8;
13031 for (size_t k = 1; k <= 40; k += 9) {
13032 for (uint32_t n = 1; n <= 16; n++) {
13033 for (uint32_t m = 1; m <= 3; m++) {
13034 GemmMicrokernelTester()
13035 .mr(3)
13036 .nr(16)
13037 .kr(1)
13038 .sr(1)
13039 .m(m)
13040 .n(n)
13041 .k(k)
13042 .cm_stride(19)
13043 .iterations(1)
13044 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13045 }
13046 }
13047 }
13048 }
13049
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,a_offset)13050 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, a_offset) {
13051 TEST_REQUIRES_ARM_NEON_V8;
13052 for (size_t k = 1; k <= 40; k += 9) {
13053 GemmMicrokernelTester()
13054 .mr(3)
13055 .nr(16)
13056 .kr(1)
13057 .sr(1)
13058 .m(3)
13059 .n(16)
13060 .k(k)
13061 .ks(3)
13062 .a_offset(127)
13063 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13064 }
13065 }
13066
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,zero)13067 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, zero) {
13068 TEST_REQUIRES_ARM_NEON_V8;
13069 for (size_t k = 1; k <= 40; k += 9) {
13070 for (uint32_t mz = 0; mz < 3; mz++) {
13071 GemmMicrokernelTester()
13072 .mr(3)
13073 .nr(16)
13074 .kr(1)
13075 .sr(1)
13076 .m(3)
13077 .n(16)
13078 .k(k)
13079 .ks(3)
13080 .a_offset(127)
13081 .zero_index(mz)
13082 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13083 }
13084 }
13085 }
13086
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,qmin)13087 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, qmin) {
13088 TEST_REQUIRES_ARM_NEON_V8;
13089 GemmMicrokernelTester()
13090 .mr(3)
13091 .nr(16)
13092 .kr(1)
13093 .sr(1)
13094 .m(3)
13095 .n(16)
13096 .k(8)
13097 .qmin(128)
13098 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13099 }
13100
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,qmax)13101 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, qmax) {
13102 TEST_REQUIRES_ARM_NEON_V8;
13103 GemmMicrokernelTester()
13104 .mr(3)
13105 .nr(16)
13106 .kr(1)
13107 .sr(1)
13108 .m(3)
13109 .n(16)
13110 .k(8)
13111 .qmax(128)
13112 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13113 }
13114
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,strided_cm)13115 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, strided_cm) {
13116 TEST_REQUIRES_ARM_NEON_V8;
13117 GemmMicrokernelTester()
13118 .mr(3)
13119 .nr(16)
13120 .kr(1)
13121 .sr(1)
13122 .m(3)
13123 .n(16)
13124 .k(8)
13125 .cm_stride(19)
13126 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13127 }
13128 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13129
13130
13131 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_eq_8)13132 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_eq_8) {
13133 TEST_REQUIRES_ARM_NEON;
13134 GemmMicrokernelTester()
13135 .mr(4)
13136 .nr(8)
13137 .kr(1)
13138 .sr(1)
13139 .m(4)
13140 .n(8)
13141 .k(8)
13142 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13143 }
13144
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,strided_cn)13145 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, strided_cn) {
13146 TEST_REQUIRES_ARM_NEON;
13147 GemmMicrokernelTester()
13148 .mr(4)
13149 .nr(8)
13150 .kr(1)
13151 .sr(1)
13152 .m(4)
13153 .n(8)
13154 .k(8)
13155 .cn_stride(11)
13156 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13157 }
13158
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_eq_8_subtile)13159 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_eq_8_subtile) {
13160 TEST_REQUIRES_ARM_NEON;
13161 for (uint32_t n = 1; n <= 8; n++) {
13162 for (uint32_t m = 1; m <= 4; m++) {
13163 GemmMicrokernelTester()
13164 .mr(4)
13165 .nr(8)
13166 .kr(1)
13167 .sr(1)
13168 .m(m)
13169 .n(n)
13170 .k(8)
13171 .iterations(1)
13172 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13173 }
13174 }
13175 }
13176
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_eq_8_subtile_m)13177 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
13178 TEST_REQUIRES_ARM_NEON;
13179 for (uint32_t m = 1; m <= 4; m++) {
13180 GemmMicrokernelTester()
13181 .mr(4)
13182 .nr(8)
13183 .kr(1)
13184 .sr(1)
13185 .m(m)
13186 .n(8)
13187 .k(8)
13188 .iterations(1)
13189 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13190 }
13191 }
13192
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_eq_8_subtile_n)13193 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
13194 TEST_REQUIRES_ARM_NEON;
13195 for (uint32_t n = 1; n <= 8; n++) {
13196 GemmMicrokernelTester()
13197 .mr(4)
13198 .nr(8)
13199 .kr(1)
13200 .sr(1)
13201 .m(4)
13202 .n(n)
13203 .k(8)
13204 .iterations(1)
13205 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13206 }
13207 }
13208
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_lt_8)13209 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_lt_8) {
13210 TEST_REQUIRES_ARM_NEON;
13211 for (size_t k = 1; k < 8; k++) {
13212 GemmMicrokernelTester()
13213 .mr(4)
13214 .nr(8)
13215 .kr(1)
13216 .sr(1)
13217 .m(4)
13218 .n(8)
13219 .k(k)
13220 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13221 }
13222 }
13223
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_lt_8_subtile)13224 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_lt_8_subtile) {
13225 TEST_REQUIRES_ARM_NEON;
13226 for (size_t k = 1; k < 8; k++) {
13227 for (uint32_t n = 1; n <= 8; n++) {
13228 for (uint32_t m = 1; m <= 4; m++) {
13229 GemmMicrokernelTester()
13230 .mr(4)
13231 .nr(8)
13232 .kr(1)
13233 .sr(1)
13234 .m(m)
13235 .n(n)
13236 .k(k)
13237 .iterations(1)
13238 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13239 }
13240 }
13241 }
13242 }
13243
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_gt_8)13244 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_gt_8) {
13245 TEST_REQUIRES_ARM_NEON;
13246 for (size_t k = 9; k < 16; k++) {
13247 GemmMicrokernelTester()
13248 .mr(4)
13249 .nr(8)
13250 .kr(1)
13251 .sr(1)
13252 .m(4)
13253 .n(8)
13254 .k(k)
13255 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13256 }
13257 }
13258
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_gt_8_subtile)13259 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_gt_8_subtile) {
13260 TEST_REQUIRES_ARM_NEON;
13261 for (size_t k = 9; k < 16; k++) {
13262 for (uint32_t n = 1; n <= 8; n++) {
13263 for (uint32_t m = 1; m <= 4; m++) {
13264 GemmMicrokernelTester()
13265 .mr(4)
13266 .nr(8)
13267 .kr(1)
13268 .sr(1)
13269 .m(m)
13270 .n(n)
13271 .k(k)
13272 .iterations(1)
13273 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13274 }
13275 }
13276 }
13277 }
13278
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_div_8)13279 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_div_8) {
13280 TEST_REQUIRES_ARM_NEON;
13281 for (size_t k = 16; k <= 80; k += 8) {
13282 GemmMicrokernelTester()
13283 .mr(4)
13284 .nr(8)
13285 .kr(1)
13286 .sr(1)
13287 .m(4)
13288 .n(8)
13289 .k(k)
13290 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13291 }
13292 }
13293
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_div_8_subtile)13294 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_div_8_subtile) {
13295 TEST_REQUIRES_ARM_NEON;
13296 for (size_t k = 16; k <= 80; k += 8) {
13297 for (uint32_t n = 1; n <= 8; n++) {
13298 for (uint32_t m = 1; m <= 4; m++) {
13299 GemmMicrokernelTester()
13300 .mr(4)
13301 .nr(8)
13302 .kr(1)
13303 .sr(1)
13304 .m(m)
13305 .n(n)
13306 .k(k)
13307 .iterations(1)
13308 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13309 }
13310 }
13311 }
13312 }
13313
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_gt_8)13314 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_gt_8) {
13315 TEST_REQUIRES_ARM_NEON;
13316 for (uint32_t n = 9; n < 16; n++) {
13317 for (size_t k = 1; k <= 40; k += 9) {
13318 GemmMicrokernelTester()
13319 .mr(4)
13320 .nr(8)
13321 .kr(1)
13322 .sr(1)
13323 .m(4)
13324 .n(n)
13325 .k(k)
13326 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13327 }
13328 }
13329 }
13330
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_gt_8_strided_cn)13331 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
13332 TEST_REQUIRES_ARM_NEON;
13333 for (uint32_t n = 9; n < 16; n++) {
13334 for (size_t k = 1; k <= 40; k += 9) {
13335 GemmMicrokernelTester()
13336 .mr(4)
13337 .nr(8)
13338 .kr(1)
13339 .sr(1)
13340 .m(4)
13341 .n(n)
13342 .k(k)
13343 .cn_stride(11)
13344 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13345 }
13346 }
13347 }
13348
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_gt_8_subtile)13349 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_gt_8_subtile) {
13350 TEST_REQUIRES_ARM_NEON;
13351 for (uint32_t n = 9; n < 16; n++) {
13352 for (size_t k = 1; k <= 40; k += 9) {
13353 for (uint32_t m = 1; m <= 4; m++) {
13354 GemmMicrokernelTester()
13355 .mr(4)
13356 .nr(8)
13357 .kr(1)
13358 .sr(1)
13359 .m(m)
13360 .n(n)
13361 .k(k)
13362 .iterations(1)
13363 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13364 }
13365 }
13366 }
13367 }
13368
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_div_8)13369 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_div_8) {
13370 TEST_REQUIRES_ARM_NEON;
13371 for (uint32_t n = 16; n <= 24; n += 8) {
13372 for (size_t k = 1; k <= 40; k += 9) {
13373 GemmMicrokernelTester()
13374 .mr(4)
13375 .nr(8)
13376 .kr(1)
13377 .sr(1)
13378 .m(4)
13379 .n(n)
13380 .k(k)
13381 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13382 }
13383 }
13384 }
13385
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_div_8_strided_cn)13386 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
13387 TEST_REQUIRES_ARM_NEON;
13388 for (uint32_t n = 16; n <= 24; n += 8) {
13389 for (size_t k = 1; k <= 40; k += 9) {
13390 GemmMicrokernelTester()
13391 .mr(4)
13392 .nr(8)
13393 .kr(1)
13394 .sr(1)
13395 .m(4)
13396 .n(n)
13397 .k(k)
13398 .cn_stride(11)
13399 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13400 }
13401 }
13402 }
13403
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_div_8_subtile)13404 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_div_8_subtile) {
13405 TEST_REQUIRES_ARM_NEON;
13406 for (uint32_t n = 16; n <= 24; n += 8) {
13407 for (size_t k = 1; k <= 40; k += 9) {
13408 for (uint32_t m = 1; m <= 4; m++) {
13409 GemmMicrokernelTester()
13410 .mr(4)
13411 .nr(8)
13412 .kr(1)
13413 .sr(1)
13414 .m(m)
13415 .n(n)
13416 .k(k)
13417 .iterations(1)
13418 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13419 }
13420 }
13421 }
13422 }
13423
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,small_kernel)13424 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, small_kernel) {
13425 TEST_REQUIRES_ARM_NEON;
13426 for (size_t k = 1; k <= 40; k += 9) {
13427 GemmMicrokernelTester()
13428 .mr(4)
13429 .nr(8)
13430 .kr(1)
13431 .sr(1)
13432 .m(4)
13433 .n(8)
13434 .k(k)
13435 .ks(3)
13436 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13437 }
13438 }
13439
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,small_kernel_subtile)13440 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, small_kernel_subtile) {
13441 TEST_REQUIRES_ARM_NEON;
13442 for (size_t k = 1; k <= 40; k += 9) {
13443 for (uint32_t n = 1; n <= 8; n++) {
13444 for (uint32_t m = 1; m <= 4; m++) {
13445 GemmMicrokernelTester()
13446 .mr(4)
13447 .nr(8)
13448 .kr(1)
13449 .sr(1)
13450 .m(m)
13451 .n(n)
13452 .k(k)
13453 .ks(3)
13454 .iterations(1)
13455 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13456 }
13457 }
13458 }
13459 }
13460
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_gt_8_small_kernel)13461 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
13462 TEST_REQUIRES_ARM_NEON;
13463 for (uint32_t n = 9; n < 16; n++) {
13464 for (size_t k = 1; k <= 40; k += 9) {
13465 GemmMicrokernelTester()
13466 .mr(4)
13467 .nr(8)
13468 .kr(1)
13469 .sr(1)
13470 .m(4)
13471 .n(n)
13472 .k(k)
13473 .ks(3)
13474 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13475 }
13476 }
13477 }
13478
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_div_8_small_kernel)13479 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
13480 TEST_REQUIRES_ARM_NEON;
13481 for (uint32_t n = 16; n <= 24; n += 8) {
13482 for (size_t k = 1; k <= 40; k += 9) {
13483 GemmMicrokernelTester()
13484 .mr(4)
13485 .nr(8)
13486 .kr(1)
13487 .sr(1)
13488 .m(4)
13489 .n(n)
13490 .k(k)
13491 .ks(3)
13492 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13493 }
13494 }
13495 }
13496
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,strided_cm_subtile)13497 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, strided_cm_subtile) {
13498 TEST_REQUIRES_ARM_NEON;
13499 for (size_t k = 1; k <= 40; k += 9) {
13500 for (uint32_t n = 1; n <= 8; n++) {
13501 for (uint32_t m = 1; m <= 4; m++) {
13502 GemmMicrokernelTester()
13503 .mr(4)
13504 .nr(8)
13505 .kr(1)
13506 .sr(1)
13507 .m(m)
13508 .n(n)
13509 .k(k)
13510 .cm_stride(11)
13511 .iterations(1)
13512 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13513 }
13514 }
13515 }
13516 }
13517
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,a_offset)13518 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, a_offset) {
13519 TEST_REQUIRES_ARM_NEON;
13520 for (size_t k = 1; k <= 40; k += 9) {
13521 GemmMicrokernelTester()
13522 .mr(4)
13523 .nr(8)
13524 .kr(1)
13525 .sr(1)
13526 .m(4)
13527 .n(8)
13528 .k(k)
13529 .ks(3)
13530 .a_offset(163)
13531 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13532 }
13533 }
13534
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,zero)13535 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, zero) {
13536 TEST_REQUIRES_ARM_NEON;
13537 for (size_t k = 1; k <= 40; k += 9) {
13538 for (uint32_t mz = 0; mz < 4; mz++) {
13539 GemmMicrokernelTester()
13540 .mr(4)
13541 .nr(8)
13542 .kr(1)
13543 .sr(1)
13544 .m(4)
13545 .n(8)
13546 .k(k)
13547 .ks(3)
13548 .a_offset(163)
13549 .zero_index(mz)
13550 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13551 }
13552 }
13553 }
13554
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,qmin)13555 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, qmin) {
13556 TEST_REQUIRES_ARM_NEON;
13557 GemmMicrokernelTester()
13558 .mr(4)
13559 .nr(8)
13560 .kr(1)
13561 .sr(1)
13562 .m(4)
13563 .n(8)
13564 .k(8)
13565 .qmin(128)
13566 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13567 }
13568
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,qmax)13569 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, qmax) {
13570 TEST_REQUIRES_ARM_NEON;
13571 GemmMicrokernelTester()
13572 .mr(4)
13573 .nr(8)
13574 .kr(1)
13575 .sr(1)
13576 .m(4)
13577 .n(8)
13578 .k(8)
13579 .qmax(128)
13580 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13581 }
13582
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,strided_cm)13583 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, strided_cm) {
13584 TEST_REQUIRES_ARM_NEON;
13585 GemmMicrokernelTester()
13586 .mr(4)
13587 .nr(8)
13588 .kr(1)
13589 .sr(1)
13590 .m(4)
13591 .n(8)
13592 .k(8)
13593 .cm_stride(11)
13594 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13595 }
13596 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13597
13598
13599 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_eq_8)13600 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_eq_8) {
13601 TEST_REQUIRES_ARM_NEON_V8;
13602 GemmMicrokernelTester()
13603 .mr(4)
13604 .nr(8)
13605 .kr(1)
13606 .sr(1)
13607 .m(4)
13608 .n(8)
13609 .k(8)
13610 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13611 }
13612
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,strided_cn)13613 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, strided_cn) {
13614 TEST_REQUIRES_ARM_NEON_V8;
13615 GemmMicrokernelTester()
13616 .mr(4)
13617 .nr(8)
13618 .kr(1)
13619 .sr(1)
13620 .m(4)
13621 .n(8)
13622 .k(8)
13623 .cn_stride(11)
13624 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13625 }
13626
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_eq_8_subtile)13627 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_eq_8_subtile) {
13628 TEST_REQUIRES_ARM_NEON_V8;
13629 for (uint32_t n = 1; n <= 8; n++) {
13630 for (uint32_t m = 1; m <= 4; m++) {
13631 GemmMicrokernelTester()
13632 .mr(4)
13633 .nr(8)
13634 .kr(1)
13635 .sr(1)
13636 .m(m)
13637 .n(n)
13638 .k(8)
13639 .iterations(1)
13640 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13641 }
13642 }
13643 }
13644
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_eq_8_subtile_m)13645 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
13646 TEST_REQUIRES_ARM_NEON_V8;
13647 for (uint32_t m = 1; m <= 4; m++) {
13648 GemmMicrokernelTester()
13649 .mr(4)
13650 .nr(8)
13651 .kr(1)
13652 .sr(1)
13653 .m(m)
13654 .n(8)
13655 .k(8)
13656 .iterations(1)
13657 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13658 }
13659 }
13660
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_eq_8_subtile_n)13661 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
13662 TEST_REQUIRES_ARM_NEON_V8;
13663 for (uint32_t n = 1; n <= 8; n++) {
13664 GemmMicrokernelTester()
13665 .mr(4)
13666 .nr(8)
13667 .kr(1)
13668 .sr(1)
13669 .m(4)
13670 .n(n)
13671 .k(8)
13672 .iterations(1)
13673 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13674 }
13675 }
13676
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_lt_8)13677 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_lt_8) {
13678 TEST_REQUIRES_ARM_NEON_V8;
13679 for (size_t k = 1; k < 8; k++) {
13680 GemmMicrokernelTester()
13681 .mr(4)
13682 .nr(8)
13683 .kr(1)
13684 .sr(1)
13685 .m(4)
13686 .n(8)
13687 .k(k)
13688 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13689 }
13690 }
13691
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_lt_8_subtile)13692 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_lt_8_subtile) {
13693 TEST_REQUIRES_ARM_NEON_V8;
13694 for (size_t k = 1; k < 8; k++) {
13695 for (uint32_t n = 1; n <= 8; n++) {
13696 for (uint32_t m = 1; m <= 4; m++) {
13697 GemmMicrokernelTester()
13698 .mr(4)
13699 .nr(8)
13700 .kr(1)
13701 .sr(1)
13702 .m(m)
13703 .n(n)
13704 .k(k)
13705 .iterations(1)
13706 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13707 }
13708 }
13709 }
13710 }
13711
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_gt_8)13712 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_gt_8) {
13713 TEST_REQUIRES_ARM_NEON_V8;
13714 for (size_t k = 9; k < 16; k++) {
13715 GemmMicrokernelTester()
13716 .mr(4)
13717 .nr(8)
13718 .kr(1)
13719 .sr(1)
13720 .m(4)
13721 .n(8)
13722 .k(k)
13723 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13724 }
13725 }
13726
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_gt_8_subtile)13727 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_gt_8_subtile) {
13728 TEST_REQUIRES_ARM_NEON_V8;
13729 for (size_t k = 9; k < 16; k++) {
13730 for (uint32_t n = 1; n <= 8; n++) {
13731 for (uint32_t m = 1; m <= 4; m++) {
13732 GemmMicrokernelTester()
13733 .mr(4)
13734 .nr(8)
13735 .kr(1)
13736 .sr(1)
13737 .m(m)
13738 .n(n)
13739 .k(k)
13740 .iterations(1)
13741 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13742 }
13743 }
13744 }
13745 }
13746
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_div_8)13747 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_div_8) {
13748 TEST_REQUIRES_ARM_NEON_V8;
13749 for (size_t k = 16; k <= 80; k += 8) {
13750 GemmMicrokernelTester()
13751 .mr(4)
13752 .nr(8)
13753 .kr(1)
13754 .sr(1)
13755 .m(4)
13756 .n(8)
13757 .k(k)
13758 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13759 }
13760 }
13761
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_div_8_subtile)13762 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_div_8_subtile) {
13763 TEST_REQUIRES_ARM_NEON_V8;
13764 for (size_t k = 16; k <= 80; k += 8) {
13765 for (uint32_t n = 1; n <= 8; n++) {
13766 for (uint32_t m = 1; m <= 4; m++) {
13767 GemmMicrokernelTester()
13768 .mr(4)
13769 .nr(8)
13770 .kr(1)
13771 .sr(1)
13772 .m(m)
13773 .n(n)
13774 .k(k)
13775 .iterations(1)
13776 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13777 }
13778 }
13779 }
13780 }
13781
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_gt_8)13782 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_gt_8) {
13783 TEST_REQUIRES_ARM_NEON_V8;
13784 for (uint32_t n = 9; n < 16; n++) {
13785 for (size_t k = 1; k <= 40; k += 9) {
13786 GemmMicrokernelTester()
13787 .mr(4)
13788 .nr(8)
13789 .kr(1)
13790 .sr(1)
13791 .m(4)
13792 .n(n)
13793 .k(k)
13794 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13795 }
13796 }
13797 }
13798
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_gt_8_strided_cn)13799 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_gt_8_strided_cn) {
13800 TEST_REQUIRES_ARM_NEON_V8;
13801 for (uint32_t n = 9; n < 16; n++) {
13802 for (size_t k = 1; k <= 40; k += 9) {
13803 GemmMicrokernelTester()
13804 .mr(4)
13805 .nr(8)
13806 .kr(1)
13807 .sr(1)
13808 .m(4)
13809 .n(n)
13810 .k(k)
13811 .cn_stride(11)
13812 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13813 }
13814 }
13815 }
13816
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_gt_8_subtile)13817 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_gt_8_subtile) {
13818 TEST_REQUIRES_ARM_NEON_V8;
13819 for (uint32_t n = 9; n < 16; n++) {
13820 for (size_t k = 1; k <= 40; k += 9) {
13821 for (uint32_t m = 1; m <= 4; m++) {
13822 GemmMicrokernelTester()
13823 .mr(4)
13824 .nr(8)
13825 .kr(1)
13826 .sr(1)
13827 .m(m)
13828 .n(n)
13829 .k(k)
13830 .iterations(1)
13831 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13832 }
13833 }
13834 }
13835 }
13836
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_div_8)13837 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_div_8) {
13838 TEST_REQUIRES_ARM_NEON_V8;
13839 for (uint32_t n = 16; n <= 24; n += 8) {
13840 for (size_t k = 1; k <= 40; k += 9) {
13841 GemmMicrokernelTester()
13842 .mr(4)
13843 .nr(8)
13844 .kr(1)
13845 .sr(1)
13846 .m(4)
13847 .n(n)
13848 .k(k)
13849 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13850 }
13851 }
13852 }
13853
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_div_8_strided_cn)13854 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_div_8_strided_cn) {
13855 TEST_REQUIRES_ARM_NEON_V8;
13856 for (uint32_t n = 16; n <= 24; n += 8) {
13857 for (size_t k = 1; k <= 40; k += 9) {
13858 GemmMicrokernelTester()
13859 .mr(4)
13860 .nr(8)
13861 .kr(1)
13862 .sr(1)
13863 .m(4)
13864 .n(n)
13865 .k(k)
13866 .cn_stride(11)
13867 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13868 }
13869 }
13870 }
13871
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_div_8_subtile)13872 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_div_8_subtile) {
13873 TEST_REQUIRES_ARM_NEON_V8;
13874 for (uint32_t n = 16; n <= 24; n += 8) {
13875 for (size_t k = 1; k <= 40; k += 9) {
13876 for (uint32_t m = 1; m <= 4; m++) {
13877 GemmMicrokernelTester()
13878 .mr(4)
13879 .nr(8)
13880 .kr(1)
13881 .sr(1)
13882 .m(m)
13883 .n(n)
13884 .k(k)
13885 .iterations(1)
13886 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13887 }
13888 }
13889 }
13890 }
13891
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,small_kernel)13892 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, small_kernel) {
13893 TEST_REQUIRES_ARM_NEON_V8;
13894 for (size_t k = 1; k <= 40; k += 9) {
13895 GemmMicrokernelTester()
13896 .mr(4)
13897 .nr(8)
13898 .kr(1)
13899 .sr(1)
13900 .m(4)
13901 .n(8)
13902 .k(k)
13903 .ks(3)
13904 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13905 }
13906 }
13907
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,small_kernel_subtile)13908 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, small_kernel_subtile) {
13909 TEST_REQUIRES_ARM_NEON_V8;
13910 for (size_t k = 1; k <= 40; k += 9) {
13911 for (uint32_t n = 1; n <= 8; n++) {
13912 for (uint32_t m = 1; m <= 4; m++) {
13913 GemmMicrokernelTester()
13914 .mr(4)
13915 .nr(8)
13916 .kr(1)
13917 .sr(1)
13918 .m(m)
13919 .n(n)
13920 .k(k)
13921 .ks(3)
13922 .iterations(1)
13923 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13924 }
13925 }
13926 }
13927 }
13928
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_gt_8_small_kernel)13929 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_gt_8_small_kernel) {
13930 TEST_REQUIRES_ARM_NEON_V8;
13931 for (uint32_t n = 9; n < 16; n++) {
13932 for (size_t k = 1; k <= 40; k += 9) {
13933 GemmMicrokernelTester()
13934 .mr(4)
13935 .nr(8)
13936 .kr(1)
13937 .sr(1)
13938 .m(4)
13939 .n(n)
13940 .k(k)
13941 .ks(3)
13942 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13943 }
13944 }
13945 }
13946
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_div_8_small_kernel)13947 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_div_8_small_kernel) {
13948 TEST_REQUIRES_ARM_NEON_V8;
13949 for (uint32_t n = 16; n <= 24; n += 8) {
13950 for (size_t k = 1; k <= 40; k += 9) {
13951 GemmMicrokernelTester()
13952 .mr(4)
13953 .nr(8)
13954 .kr(1)
13955 .sr(1)
13956 .m(4)
13957 .n(n)
13958 .k(k)
13959 .ks(3)
13960 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13961 }
13962 }
13963 }
13964
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,strided_cm_subtile)13965 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, strided_cm_subtile) {
13966 TEST_REQUIRES_ARM_NEON_V8;
13967 for (size_t k = 1; k <= 40; k += 9) {
13968 for (uint32_t n = 1; n <= 8; n++) {
13969 for (uint32_t m = 1; m <= 4; m++) {
13970 GemmMicrokernelTester()
13971 .mr(4)
13972 .nr(8)
13973 .kr(1)
13974 .sr(1)
13975 .m(m)
13976 .n(n)
13977 .k(k)
13978 .cm_stride(11)
13979 .iterations(1)
13980 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13981 }
13982 }
13983 }
13984 }
13985
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,a_offset)13986 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, a_offset) {
13987 TEST_REQUIRES_ARM_NEON_V8;
13988 for (size_t k = 1; k <= 40; k += 9) {
13989 GemmMicrokernelTester()
13990 .mr(4)
13991 .nr(8)
13992 .kr(1)
13993 .sr(1)
13994 .m(4)
13995 .n(8)
13996 .k(k)
13997 .ks(3)
13998 .a_offset(163)
13999 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14000 }
14001 }
14002
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,zero)14003 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, zero) {
14004 TEST_REQUIRES_ARM_NEON_V8;
14005 for (size_t k = 1; k <= 40; k += 9) {
14006 for (uint32_t mz = 0; mz < 4; mz++) {
14007 GemmMicrokernelTester()
14008 .mr(4)
14009 .nr(8)
14010 .kr(1)
14011 .sr(1)
14012 .m(4)
14013 .n(8)
14014 .k(k)
14015 .ks(3)
14016 .a_offset(163)
14017 .zero_index(mz)
14018 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14019 }
14020 }
14021 }
14022
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,qmin)14023 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, qmin) {
14024 TEST_REQUIRES_ARM_NEON_V8;
14025 GemmMicrokernelTester()
14026 .mr(4)
14027 .nr(8)
14028 .kr(1)
14029 .sr(1)
14030 .m(4)
14031 .n(8)
14032 .k(8)
14033 .qmin(128)
14034 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14035 }
14036
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,qmax)14037 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, qmax) {
14038 TEST_REQUIRES_ARM_NEON_V8;
14039 GemmMicrokernelTester()
14040 .mr(4)
14041 .nr(8)
14042 .kr(1)
14043 .sr(1)
14044 .m(4)
14045 .n(8)
14046 .k(8)
14047 .qmax(128)
14048 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14049 }
14050
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,strided_cm)14051 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, strided_cm) {
14052 TEST_REQUIRES_ARM_NEON_V8;
14053 GemmMicrokernelTester()
14054 .mr(4)
14055 .nr(8)
14056 .kr(1)
14057 .sr(1)
14058 .m(4)
14059 .n(8)
14060 .k(8)
14061 .cm_stride(11)
14062 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14063 }
14064 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14065
14066
14067 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8)14068 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8) {
14069 TEST_REQUIRES_ARM_NEON_V8;
14070 GemmMicrokernelTester()
14071 .mr(4)
14072 .nr(16)
14073 .kr(1)
14074 .sr(1)
14075 .m(4)
14076 .n(16)
14077 .k(8)
14078 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14079 }
14080
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,strided_cn)14081 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cn) {
14082 TEST_REQUIRES_ARM_NEON_V8;
14083 GemmMicrokernelTester()
14084 .mr(4)
14085 .nr(16)
14086 .kr(1)
14087 .sr(1)
14088 .m(4)
14089 .n(16)
14090 .k(8)
14091 .cn_stride(19)
14092 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14093 }
14094
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8_subtile)14095 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
14096 TEST_REQUIRES_ARM_NEON_V8;
14097 for (uint32_t n = 1; n <= 16; n++) {
14098 for (uint32_t m = 1; m <= 4; m++) {
14099 GemmMicrokernelTester()
14100 .mr(4)
14101 .nr(16)
14102 .kr(1)
14103 .sr(1)
14104 .m(m)
14105 .n(n)
14106 .k(8)
14107 .iterations(1)
14108 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14109 }
14110 }
14111 }
14112
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8_subtile_m)14113 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
14114 TEST_REQUIRES_ARM_NEON_V8;
14115 for (uint32_t m = 1; m <= 4; m++) {
14116 GemmMicrokernelTester()
14117 .mr(4)
14118 .nr(16)
14119 .kr(1)
14120 .sr(1)
14121 .m(m)
14122 .n(16)
14123 .k(8)
14124 .iterations(1)
14125 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14126 }
14127 }
14128
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8_subtile_n)14129 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
14130 TEST_REQUIRES_ARM_NEON_V8;
14131 for (uint32_t n = 1; n <= 16; n++) {
14132 GemmMicrokernelTester()
14133 .mr(4)
14134 .nr(16)
14135 .kr(1)
14136 .sr(1)
14137 .m(4)
14138 .n(n)
14139 .k(8)
14140 .iterations(1)
14141 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14142 }
14143 }
14144
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_lt_8)14145 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_lt_8) {
14146 TEST_REQUIRES_ARM_NEON_V8;
14147 for (size_t k = 1; k < 8; k++) {
14148 GemmMicrokernelTester()
14149 .mr(4)
14150 .nr(16)
14151 .kr(1)
14152 .sr(1)
14153 .m(4)
14154 .n(16)
14155 .k(k)
14156 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14157 }
14158 }
14159
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_lt_8_subtile)14160 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
14161 TEST_REQUIRES_ARM_NEON_V8;
14162 for (size_t k = 1; k < 8; k++) {
14163 for (uint32_t n = 1; n <= 16; n++) {
14164 for (uint32_t m = 1; m <= 4; m++) {
14165 GemmMicrokernelTester()
14166 .mr(4)
14167 .nr(16)
14168 .kr(1)
14169 .sr(1)
14170 .m(m)
14171 .n(n)
14172 .k(k)
14173 .iterations(1)
14174 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14175 }
14176 }
14177 }
14178 }
14179
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_gt_8)14180 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_gt_8) {
14181 TEST_REQUIRES_ARM_NEON_V8;
14182 for (size_t k = 9; k < 16; k++) {
14183 GemmMicrokernelTester()
14184 .mr(4)
14185 .nr(16)
14186 .kr(1)
14187 .sr(1)
14188 .m(4)
14189 .n(16)
14190 .k(k)
14191 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14192 }
14193 }
14194
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_gt_8_subtile)14195 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
14196 TEST_REQUIRES_ARM_NEON_V8;
14197 for (size_t k = 9; k < 16; k++) {
14198 for (uint32_t n = 1; n <= 16; n++) {
14199 for (uint32_t m = 1; m <= 4; m++) {
14200 GemmMicrokernelTester()
14201 .mr(4)
14202 .nr(16)
14203 .kr(1)
14204 .sr(1)
14205 .m(m)
14206 .n(n)
14207 .k(k)
14208 .iterations(1)
14209 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14210 }
14211 }
14212 }
14213 }
14214
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_div_8)14215 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_div_8) {
14216 TEST_REQUIRES_ARM_NEON_V8;
14217 for (size_t k = 16; k <= 80; k += 8) {
14218 GemmMicrokernelTester()
14219 .mr(4)
14220 .nr(16)
14221 .kr(1)
14222 .sr(1)
14223 .m(4)
14224 .n(16)
14225 .k(k)
14226 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14227 }
14228 }
14229
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_div_8_subtile)14230 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
14231 TEST_REQUIRES_ARM_NEON_V8;
14232 for (size_t k = 16; k <= 80; k += 8) {
14233 for (uint32_t n = 1; n <= 16; n++) {
14234 for (uint32_t m = 1; m <= 4; m++) {
14235 GemmMicrokernelTester()
14236 .mr(4)
14237 .nr(16)
14238 .kr(1)
14239 .sr(1)
14240 .m(m)
14241 .n(n)
14242 .k(k)
14243 .iterations(1)
14244 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14245 }
14246 }
14247 }
14248 }
14249
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16)14250 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16) {
14251 TEST_REQUIRES_ARM_NEON_V8;
14252 for (uint32_t n = 17; n < 32; n++) {
14253 for (size_t k = 1; k <= 40; k += 9) {
14254 GemmMicrokernelTester()
14255 .mr(4)
14256 .nr(16)
14257 .kr(1)
14258 .sr(1)
14259 .m(4)
14260 .n(n)
14261 .k(k)
14262 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14263 }
14264 }
14265 }
14266
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16_strided_cn)14267 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
14268 TEST_REQUIRES_ARM_NEON_V8;
14269 for (uint32_t n = 17; n < 32; n++) {
14270 for (size_t k = 1; k <= 40; k += 9) {
14271 GemmMicrokernelTester()
14272 .mr(4)
14273 .nr(16)
14274 .kr(1)
14275 .sr(1)
14276 .m(4)
14277 .n(n)
14278 .k(k)
14279 .cn_stride(19)
14280 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14281 }
14282 }
14283 }
14284
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16_subtile)14285 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
14286 TEST_REQUIRES_ARM_NEON_V8;
14287 for (uint32_t n = 17; n < 32; n++) {
14288 for (size_t k = 1; k <= 40; k += 9) {
14289 for (uint32_t m = 1; m <= 4; m++) {
14290 GemmMicrokernelTester()
14291 .mr(4)
14292 .nr(16)
14293 .kr(1)
14294 .sr(1)
14295 .m(m)
14296 .n(n)
14297 .k(k)
14298 .iterations(1)
14299 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14300 }
14301 }
14302 }
14303 }
14304
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16)14305 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16) {
14306 TEST_REQUIRES_ARM_NEON_V8;
14307 for (uint32_t n = 32; n <= 48; n += 16) {
14308 for (size_t k = 1; k <= 40; k += 9) {
14309 GemmMicrokernelTester()
14310 .mr(4)
14311 .nr(16)
14312 .kr(1)
14313 .sr(1)
14314 .m(4)
14315 .n(n)
14316 .k(k)
14317 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14318 }
14319 }
14320 }
14321
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16_strided_cn)14322 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
14323 TEST_REQUIRES_ARM_NEON_V8;
14324 for (uint32_t n = 32; n <= 48; n += 16) {
14325 for (size_t k = 1; k <= 40; k += 9) {
14326 GemmMicrokernelTester()
14327 .mr(4)
14328 .nr(16)
14329 .kr(1)
14330 .sr(1)
14331 .m(4)
14332 .n(n)
14333 .k(k)
14334 .cn_stride(19)
14335 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14336 }
14337 }
14338 }
14339
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16_subtile)14340 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
14341 TEST_REQUIRES_ARM_NEON_V8;
14342 for (uint32_t n = 32; n <= 48; n += 16) {
14343 for (size_t k = 1; k <= 40; k += 9) {
14344 for (uint32_t m = 1; m <= 4; m++) {
14345 GemmMicrokernelTester()
14346 .mr(4)
14347 .nr(16)
14348 .kr(1)
14349 .sr(1)
14350 .m(m)
14351 .n(n)
14352 .k(k)
14353 .iterations(1)
14354 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14355 }
14356 }
14357 }
14358 }
14359
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,small_kernel)14360 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, small_kernel) {
14361 TEST_REQUIRES_ARM_NEON_V8;
14362 for (size_t k = 1; k <= 40; k += 9) {
14363 GemmMicrokernelTester()
14364 .mr(4)
14365 .nr(16)
14366 .kr(1)
14367 .sr(1)
14368 .m(4)
14369 .n(16)
14370 .k(k)
14371 .ks(3)
14372 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14373 }
14374 }
14375
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,small_kernel_subtile)14376 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, small_kernel_subtile) {
14377 TEST_REQUIRES_ARM_NEON_V8;
14378 for (size_t k = 1; k <= 40; k += 9) {
14379 for (uint32_t n = 1; n <= 16; n++) {
14380 for (uint32_t m = 1; m <= 4; m++) {
14381 GemmMicrokernelTester()
14382 .mr(4)
14383 .nr(16)
14384 .kr(1)
14385 .sr(1)
14386 .m(m)
14387 .n(n)
14388 .k(k)
14389 .ks(3)
14390 .iterations(1)
14391 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14392 }
14393 }
14394 }
14395 }
14396
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16_small_kernel)14397 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_small_kernel) {
14398 TEST_REQUIRES_ARM_NEON_V8;
14399 for (uint32_t n = 17; n < 32; n++) {
14400 for (size_t k = 1; k <= 40; k += 9) {
14401 GemmMicrokernelTester()
14402 .mr(4)
14403 .nr(16)
14404 .kr(1)
14405 .sr(1)
14406 .m(4)
14407 .n(n)
14408 .k(k)
14409 .ks(3)
14410 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14411 }
14412 }
14413 }
14414
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16_small_kernel)14415 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_small_kernel) {
14416 TEST_REQUIRES_ARM_NEON_V8;
14417 for (uint32_t n = 32; n <= 48; n += 16) {
14418 for (size_t k = 1; k <= 40; k += 9) {
14419 GemmMicrokernelTester()
14420 .mr(4)
14421 .nr(16)
14422 .kr(1)
14423 .sr(1)
14424 .m(4)
14425 .n(n)
14426 .k(k)
14427 .ks(3)
14428 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14429 }
14430 }
14431 }
14432
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,strided_cm_subtile)14433 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
14434 TEST_REQUIRES_ARM_NEON_V8;
14435 for (size_t k = 1; k <= 40; k += 9) {
14436 for (uint32_t n = 1; n <= 16; n++) {
14437 for (uint32_t m = 1; m <= 4; m++) {
14438 GemmMicrokernelTester()
14439 .mr(4)
14440 .nr(16)
14441 .kr(1)
14442 .sr(1)
14443 .m(m)
14444 .n(n)
14445 .k(k)
14446 .cm_stride(19)
14447 .iterations(1)
14448 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14449 }
14450 }
14451 }
14452 }
14453
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,a_offset)14454 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, a_offset) {
14455 TEST_REQUIRES_ARM_NEON_V8;
14456 for (size_t k = 1; k <= 40; k += 9) {
14457 GemmMicrokernelTester()
14458 .mr(4)
14459 .nr(16)
14460 .kr(1)
14461 .sr(1)
14462 .m(4)
14463 .n(16)
14464 .k(k)
14465 .ks(3)
14466 .a_offset(163)
14467 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14468 }
14469 }
14470
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,zero)14471 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, zero) {
14472 TEST_REQUIRES_ARM_NEON_V8;
14473 for (size_t k = 1; k <= 40; k += 9) {
14474 for (uint32_t mz = 0; mz < 4; mz++) {
14475 GemmMicrokernelTester()
14476 .mr(4)
14477 .nr(16)
14478 .kr(1)
14479 .sr(1)
14480 .m(4)
14481 .n(16)
14482 .k(k)
14483 .ks(3)
14484 .a_offset(163)
14485 .zero_index(mz)
14486 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14487 }
14488 }
14489 }
14490
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,qmin)14491 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, qmin) {
14492 TEST_REQUIRES_ARM_NEON_V8;
14493 GemmMicrokernelTester()
14494 .mr(4)
14495 .nr(16)
14496 .kr(1)
14497 .sr(1)
14498 .m(4)
14499 .n(16)
14500 .k(8)
14501 .qmin(128)
14502 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14503 }
14504
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,qmax)14505 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, qmax) {
14506 TEST_REQUIRES_ARM_NEON_V8;
14507 GemmMicrokernelTester()
14508 .mr(4)
14509 .nr(16)
14510 .kr(1)
14511 .sr(1)
14512 .m(4)
14513 .n(16)
14514 .k(8)
14515 .qmax(128)
14516 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14517 }
14518
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,strided_cm)14519 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cm) {
14520 TEST_REQUIRES_ARM_NEON_V8;
14521 GemmMicrokernelTester()
14522 .mr(4)
14523 .nr(16)
14524 .kr(1)
14525 .sr(1)
14526 .m(4)
14527 .n(16)
14528 .k(8)
14529 .cm_stride(19)
14530 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14531 }
14532 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14533
14534
14535 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_eq_8)14536 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
14537 TEST_REQUIRES_ARM_NEON_V8;
14538 GemmMicrokernelTester()
14539 .mr(4)
14540 .nr(16)
14541 .kr(1)
14542 .sr(1)
14543 .m(4)
14544 .n(16)
14545 .k(8)
14546 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14547 }
14548
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,strided_cn)14549 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, strided_cn) {
14550 TEST_REQUIRES_ARM_NEON_V8;
14551 GemmMicrokernelTester()
14552 .mr(4)
14553 .nr(16)
14554 .kr(1)
14555 .sr(1)
14556 .m(4)
14557 .n(16)
14558 .k(8)
14559 .cn_stride(19)
14560 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14561 }
14562
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)14563 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
14564 TEST_REQUIRES_ARM_NEON_V8;
14565 for (uint32_t n = 1; n <= 16; n++) {
14566 for (uint32_t m = 1; m <= 4; m++) {
14567 GemmMicrokernelTester()
14568 .mr(4)
14569 .nr(16)
14570 .kr(1)
14571 .sr(1)
14572 .m(m)
14573 .n(n)
14574 .k(8)
14575 .iterations(1)
14576 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14577 }
14578 }
14579 }
14580
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)14581 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
14582 TEST_REQUIRES_ARM_NEON_V8;
14583 for (uint32_t m = 1; m <= 4; m++) {
14584 GemmMicrokernelTester()
14585 .mr(4)
14586 .nr(16)
14587 .kr(1)
14588 .sr(1)
14589 .m(m)
14590 .n(16)
14591 .k(8)
14592 .iterations(1)
14593 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14594 }
14595 }
14596
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)14597 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
14598 TEST_REQUIRES_ARM_NEON_V8;
14599 for (uint32_t n = 1; n <= 16; n++) {
14600 GemmMicrokernelTester()
14601 .mr(4)
14602 .nr(16)
14603 .kr(1)
14604 .sr(1)
14605 .m(4)
14606 .n(n)
14607 .k(8)
14608 .iterations(1)
14609 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14610 }
14611 }
14612
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_lt_8)14613 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
14614 TEST_REQUIRES_ARM_NEON_V8;
14615 for (size_t k = 1; k < 8; k++) {
14616 GemmMicrokernelTester()
14617 .mr(4)
14618 .nr(16)
14619 .kr(1)
14620 .sr(1)
14621 .m(4)
14622 .n(16)
14623 .k(k)
14624 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14625 }
14626 }
14627
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)14628 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
14629 TEST_REQUIRES_ARM_NEON_V8;
14630 for (size_t k = 1; k < 8; k++) {
14631 for (uint32_t n = 1; n <= 16; n++) {
14632 for (uint32_t m = 1; m <= 4; m++) {
14633 GemmMicrokernelTester()
14634 .mr(4)
14635 .nr(16)
14636 .kr(1)
14637 .sr(1)
14638 .m(m)
14639 .n(n)
14640 .k(k)
14641 .iterations(1)
14642 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14643 }
14644 }
14645 }
14646 }
14647
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_gt_8)14648 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
14649 TEST_REQUIRES_ARM_NEON_V8;
14650 for (size_t k = 9; k < 16; k++) {
14651 GemmMicrokernelTester()
14652 .mr(4)
14653 .nr(16)
14654 .kr(1)
14655 .sr(1)
14656 .m(4)
14657 .n(16)
14658 .k(k)
14659 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14660 }
14661 }
14662
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)14663 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
14664 TEST_REQUIRES_ARM_NEON_V8;
14665 for (size_t k = 9; k < 16; k++) {
14666 for (uint32_t n = 1; n <= 16; n++) {
14667 for (uint32_t m = 1; m <= 4; m++) {
14668 GemmMicrokernelTester()
14669 .mr(4)
14670 .nr(16)
14671 .kr(1)
14672 .sr(1)
14673 .m(m)
14674 .n(n)
14675 .k(k)
14676 .iterations(1)
14677 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14678 }
14679 }
14680 }
14681 }
14682
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_div_8)14683 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_div_8) {
14684 TEST_REQUIRES_ARM_NEON_V8;
14685 for (size_t k = 16; k <= 80; k += 8) {
14686 GemmMicrokernelTester()
14687 .mr(4)
14688 .nr(16)
14689 .kr(1)
14690 .sr(1)
14691 .m(4)
14692 .n(16)
14693 .k(k)
14694 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14695 }
14696 }
14697
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)14698 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
14699 TEST_REQUIRES_ARM_NEON_V8;
14700 for (size_t k = 16; k <= 80; k += 8) {
14701 for (uint32_t n = 1; n <= 16; n++) {
14702 for (uint32_t m = 1; m <= 4; m++) {
14703 GemmMicrokernelTester()
14704 .mr(4)
14705 .nr(16)
14706 .kr(1)
14707 .sr(1)
14708 .m(m)
14709 .n(n)
14710 .k(k)
14711 .iterations(1)
14712 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14713 }
14714 }
14715 }
14716 }
14717
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_gt_16)14718 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_gt_16) {
14719 TEST_REQUIRES_ARM_NEON_V8;
14720 for (uint32_t n = 17; n < 32; n++) {
14721 for (size_t k = 1; k <= 40; k += 9) {
14722 GemmMicrokernelTester()
14723 .mr(4)
14724 .nr(16)
14725 .kr(1)
14726 .sr(1)
14727 .m(4)
14728 .n(n)
14729 .k(k)
14730 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14731 }
14732 }
14733 }
14734
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_strided_cn)14735 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
14736 TEST_REQUIRES_ARM_NEON_V8;
14737 for (uint32_t n = 17; n < 32; n++) {
14738 for (size_t k = 1; k <= 40; k += 9) {
14739 GemmMicrokernelTester()
14740 .mr(4)
14741 .nr(16)
14742 .kr(1)
14743 .sr(1)
14744 .m(4)
14745 .n(n)
14746 .k(k)
14747 .cn_stride(19)
14748 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14749 }
14750 }
14751 }
14752
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_subtile)14753 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_subtile) {
14754 TEST_REQUIRES_ARM_NEON_V8;
14755 for (uint32_t n = 17; n < 32; n++) {
14756 for (size_t k = 1; k <= 40; k += 9) {
14757 for (uint32_t m = 1; m <= 4; m++) {
14758 GemmMicrokernelTester()
14759 .mr(4)
14760 .nr(16)
14761 .kr(1)
14762 .sr(1)
14763 .m(m)
14764 .n(n)
14765 .k(k)
14766 .iterations(1)
14767 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14768 }
14769 }
14770 }
14771 }
14772
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_div_16)14773 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_div_16) {
14774 TEST_REQUIRES_ARM_NEON_V8;
14775 for (uint32_t n = 32; n <= 48; n += 16) {
14776 for (size_t k = 1; k <= 40; k += 9) {
14777 GemmMicrokernelTester()
14778 .mr(4)
14779 .nr(16)
14780 .kr(1)
14781 .sr(1)
14782 .m(4)
14783 .n(n)
14784 .k(k)
14785 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14786 }
14787 }
14788 }
14789
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_div_16_strided_cn)14790 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_div_16_strided_cn) {
14791 TEST_REQUIRES_ARM_NEON_V8;
14792 for (uint32_t n = 32; n <= 48; n += 16) {
14793 for (size_t k = 1; k <= 40; k += 9) {
14794 GemmMicrokernelTester()
14795 .mr(4)
14796 .nr(16)
14797 .kr(1)
14798 .sr(1)
14799 .m(4)
14800 .n(n)
14801 .k(k)
14802 .cn_stride(19)
14803 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14804 }
14805 }
14806 }
14807
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_div_16_subtile)14808 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_div_16_subtile) {
14809 TEST_REQUIRES_ARM_NEON_V8;
14810 for (uint32_t n = 32; n <= 48; n += 16) {
14811 for (size_t k = 1; k <= 40; k += 9) {
14812 for (uint32_t m = 1; m <= 4; m++) {
14813 GemmMicrokernelTester()
14814 .mr(4)
14815 .nr(16)
14816 .kr(1)
14817 .sr(1)
14818 .m(m)
14819 .n(n)
14820 .k(k)
14821 .iterations(1)
14822 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14823 }
14824 }
14825 }
14826 }
14827
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,small_kernel)14828 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, small_kernel) {
14829 TEST_REQUIRES_ARM_NEON_V8;
14830 for (size_t k = 1; k <= 40; k += 9) {
14831 GemmMicrokernelTester()
14832 .mr(4)
14833 .nr(16)
14834 .kr(1)
14835 .sr(1)
14836 .m(4)
14837 .n(16)
14838 .k(k)
14839 .ks(3)
14840 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14841 }
14842 }
14843
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,small_kernel_subtile)14844 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, small_kernel_subtile) {
14845 TEST_REQUIRES_ARM_NEON_V8;
14846 for (size_t k = 1; k <= 40; k += 9) {
14847 for (uint32_t n = 1; n <= 16; n++) {
14848 for (uint32_t m = 1; m <= 4; m++) {
14849 GemmMicrokernelTester()
14850 .mr(4)
14851 .nr(16)
14852 .kr(1)
14853 .sr(1)
14854 .m(m)
14855 .n(n)
14856 .k(k)
14857 .ks(3)
14858 .iterations(1)
14859 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14860 }
14861 }
14862 }
14863 }
14864
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_small_kernel)14865 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
14866 TEST_REQUIRES_ARM_NEON_V8;
14867 for (uint32_t n = 17; n < 32; n++) {
14868 for (size_t k = 1; k <= 40; k += 9) {
14869 GemmMicrokernelTester()
14870 .mr(4)
14871 .nr(16)
14872 .kr(1)
14873 .sr(1)
14874 .m(4)
14875 .n(n)
14876 .k(k)
14877 .ks(3)
14878 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14879 }
14880 }
14881 }
14882
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_div_16_small_kernel)14883 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_div_16_small_kernel) {
14884 TEST_REQUIRES_ARM_NEON_V8;
14885 for (uint32_t n = 32; n <= 48; n += 16) {
14886 for (size_t k = 1; k <= 40; k += 9) {
14887 GemmMicrokernelTester()
14888 .mr(4)
14889 .nr(16)
14890 .kr(1)
14891 .sr(1)
14892 .m(4)
14893 .n(n)
14894 .k(k)
14895 .ks(3)
14896 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14897 }
14898 }
14899 }
14900
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)14901 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
14902 TEST_REQUIRES_ARM_NEON_V8;
14903 for (size_t k = 1; k <= 40; k += 9) {
14904 for (uint32_t n = 1; n <= 16; n++) {
14905 for (uint32_t m = 1; m <= 4; m++) {
14906 GemmMicrokernelTester()
14907 .mr(4)
14908 .nr(16)
14909 .kr(1)
14910 .sr(1)
14911 .m(m)
14912 .n(n)
14913 .k(k)
14914 .cm_stride(19)
14915 .iterations(1)
14916 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14917 }
14918 }
14919 }
14920 }
14921
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,a_offset)14922 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, a_offset) {
14923 TEST_REQUIRES_ARM_NEON_V8;
14924 for (size_t k = 1; k <= 40; k += 9) {
14925 GemmMicrokernelTester()
14926 .mr(4)
14927 .nr(16)
14928 .kr(1)
14929 .sr(1)
14930 .m(4)
14931 .n(16)
14932 .k(k)
14933 .ks(3)
14934 .a_offset(163)
14935 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14936 }
14937 }
14938
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,zero)14939 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, zero) {
14940 TEST_REQUIRES_ARM_NEON_V8;
14941 for (size_t k = 1; k <= 40; k += 9) {
14942 for (uint32_t mz = 0; mz < 4; mz++) {
14943 GemmMicrokernelTester()
14944 .mr(4)
14945 .nr(16)
14946 .kr(1)
14947 .sr(1)
14948 .m(4)
14949 .n(16)
14950 .k(k)
14951 .ks(3)
14952 .a_offset(163)
14953 .zero_index(mz)
14954 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14955 }
14956 }
14957 }
14958
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,qmin)14959 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, qmin) {
14960 TEST_REQUIRES_ARM_NEON_V8;
14961 GemmMicrokernelTester()
14962 .mr(4)
14963 .nr(16)
14964 .kr(1)
14965 .sr(1)
14966 .m(4)
14967 .n(16)
14968 .k(8)
14969 .qmin(128)
14970 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14971 }
14972
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,qmax)14973 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, qmax) {
14974 TEST_REQUIRES_ARM_NEON_V8;
14975 GemmMicrokernelTester()
14976 .mr(4)
14977 .nr(16)
14978 .kr(1)
14979 .sr(1)
14980 .m(4)
14981 .n(16)
14982 .k(8)
14983 .qmax(128)
14984 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14985 }
14986
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,strided_cm)14987 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, strided_cm) {
14988 TEST_REQUIRES_ARM_NEON_V8;
14989 GemmMicrokernelTester()
14990 .mr(4)
14991 .nr(16)
14992 .kr(1)
14993 .sr(1)
14994 .m(4)
14995 .n(16)
14996 .k(8)
14997 .cm_stride(19)
14998 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14999 }
15000 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15001
15002
15003 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_eq_8)15004 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_eq_8) {
15005 TEST_REQUIRES_ARM_NEON_DOT;
15006 GemmMicrokernelTester()
15007 .mr(4)
15008 .nr(16)
15009 .kr(4)
15010 .sr(1)
15011 .m(4)
15012 .n(16)
15013 .k(8)
15014 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15015 }
15016
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,strided_cn)15017 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, strided_cn) {
15018 TEST_REQUIRES_ARM_NEON_DOT;
15019 GemmMicrokernelTester()
15020 .mr(4)
15021 .nr(16)
15022 .kr(4)
15023 .sr(1)
15024 .m(4)
15025 .n(16)
15026 .k(8)
15027 .cn_stride(19)
15028 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15029 }
15030
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_eq_8_subtile)15031 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_eq_8_subtile) {
15032 TEST_REQUIRES_ARM_NEON_DOT;
15033 for (uint32_t n = 1; n <= 16; n++) {
15034 for (uint32_t m = 1; m <= 4; m++) {
15035 GemmMicrokernelTester()
15036 .mr(4)
15037 .nr(16)
15038 .kr(4)
15039 .sr(1)
15040 .m(m)
15041 .n(n)
15042 .k(8)
15043 .iterations(1)
15044 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15045 }
15046 }
15047 }
15048
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_eq_8_subtile_m)15049 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_eq_8_subtile_m) {
15050 TEST_REQUIRES_ARM_NEON_DOT;
15051 for (uint32_t m = 1; m <= 4; m++) {
15052 GemmMicrokernelTester()
15053 .mr(4)
15054 .nr(16)
15055 .kr(4)
15056 .sr(1)
15057 .m(m)
15058 .n(16)
15059 .k(8)
15060 .iterations(1)
15061 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15062 }
15063 }
15064
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_eq_8_subtile_n)15065 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_eq_8_subtile_n) {
15066 TEST_REQUIRES_ARM_NEON_DOT;
15067 for (uint32_t n = 1; n <= 16; n++) {
15068 GemmMicrokernelTester()
15069 .mr(4)
15070 .nr(16)
15071 .kr(4)
15072 .sr(1)
15073 .m(4)
15074 .n(n)
15075 .k(8)
15076 .iterations(1)
15077 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15078 }
15079 }
15080
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_lt_8)15081 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_lt_8) {
15082 TEST_REQUIRES_ARM_NEON_DOT;
15083 for (size_t k = 1; k < 8; k++) {
15084 GemmMicrokernelTester()
15085 .mr(4)
15086 .nr(16)
15087 .kr(4)
15088 .sr(1)
15089 .m(4)
15090 .n(16)
15091 .k(k)
15092 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15093 }
15094 }
15095
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_lt_8_subtile)15096 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_lt_8_subtile) {
15097 TEST_REQUIRES_ARM_NEON_DOT;
15098 for (size_t k = 1; k < 8; k++) {
15099 for (uint32_t n = 1; n <= 16; n++) {
15100 for (uint32_t m = 1; m <= 4; m++) {
15101 GemmMicrokernelTester()
15102 .mr(4)
15103 .nr(16)
15104 .kr(4)
15105 .sr(1)
15106 .m(m)
15107 .n(n)
15108 .k(k)
15109 .iterations(1)
15110 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15111 }
15112 }
15113 }
15114 }
15115
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_gt_8)15116 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_gt_8) {
15117 TEST_REQUIRES_ARM_NEON_DOT;
15118 for (size_t k = 9; k < 16; k++) {
15119 GemmMicrokernelTester()
15120 .mr(4)
15121 .nr(16)
15122 .kr(4)
15123 .sr(1)
15124 .m(4)
15125 .n(16)
15126 .k(k)
15127 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15128 }
15129 }
15130
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_gt_8_subtile)15131 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_gt_8_subtile) {
15132 TEST_REQUIRES_ARM_NEON_DOT;
15133 for (size_t k = 9; k < 16; k++) {
15134 for (uint32_t n = 1; n <= 16; n++) {
15135 for (uint32_t m = 1; m <= 4; m++) {
15136 GemmMicrokernelTester()
15137 .mr(4)
15138 .nr(16)
15139 .kr(4)
15140 .sr(1)
15141 .m(m)
15142 .n(n)
15143 .k(k)
15144 .iterations(1)
15145 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15146 }
15147 }
15148 }
15149 }
15150
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_div_8)15151 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_div_8) {
15152 TEST_REQUIRES_ARM_NEON_DOT;
15153 for (size_t k = 16; k <= 80; k += 8) {
15154 GemmMicrokernelTester()
15155 .mr(4)
15156 .nr(16)
15157 .kr(4)
15158 .sr(1)
15159 .m(4)
15160 .n(16)
15161 .k(k)
15162 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15163 }
15164 }
15165
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_div_8_subtile)15166 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_div_8_subtile) {
15167 TEST_REQUIRES_ARM_NEON_DOT;
15168 for (size_t k = 16; k <= 80; k += 8) {
15169 for (uint32_t n = 1; n <= 16; n++) {
15170 for (uint32_t m = 1; m <= 4; m++) {
15171 GemmMicrokernelTester()
15172 .mr(4)
15173 .nr(16)
15174 .kr(4)
15175 .sr(1)
15176 .m(m)
15177 .n(n)
15178 .k(k)
15179 .iterations(1)
15180 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15181 }
15182 }
15183 }
15184 }
15185
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_gt_16)15186 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_gt_16) {
15187 TEST_REQUIRES_ARM_NEON_DOT;
15188 for (uint32_t n = 17; n < 32; n++) {
15189 for (size_t k = 1; k <= 40; k += 9) {
15190 GemmMicrokernelTester()
15191 .mr(4)
15192 .nr(16)
15193 .kr(4)
15194 .sr(1)
15195 .m(4)
15196 .n(n)
15197 .k(k)
15198 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15199 }
15200 }
15201 }
15202
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_gt_16_strided_cn)15203 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_gt_16_strided_cn) {
15204 TEST_REQUIRES_ARM_NEON_DOT;
15205 for (uint32_t n = 17; n < 32; n++) {
15206 for (size_t k = 1; k <= 40; k += 9) {
15207 GemmMicrokernelTester()
15208 .mr(4)
15209 .nr(16)
15210 .kr(4)
15211 .sr(1)
15212 .m(4)
15213 .n(n)
15214 .k(k)
15215 .cn_stride(19)
15216 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15217 }
15218 }
15219 }
15220
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_gt_16_subtile)15221 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_gt_16_subtile) {
15222 TEST_REQUIRES_ARM_NEON_DOT;
15223 for (uint32_t n = 17; n < 32; n++) {
15224 for (size_t k = 1; k <= 40; k += 9) {
15225 for (uint32_t m = 1; m <= 4; m++) {
15226 GemmMicrokernelTester()
15227 .mr(4)
15228 .nr(16)
15229 .kr(4)
15230 .sr(1)
15231 .m(m)
15232 .n(n)
15233 .k(k)
15234 .iterations(1)
15235 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15236 }
15237 }
15238 }
15239 }
15240
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_div_16)15241 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_div_16) {
15242 TEST_REQUIRES_ARM_NEON_DOT;
15243 for (uint32_t n = 32; n <= 48; n += 16) {
15244 for (size_t k = 1; k <= 40; k += 9) {
15245 GemmMicrokernelTester()
15246 .mr(4)
15247 .nr(16)
15248 .kr(4)
15249 .sr(1)
15250 .m(4)
15251 .n(n)
15252 .k(k)
15253 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15254 }
15255 }
15256 }
15257
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_div_16_strided_cn)15258 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_div_16_strided_cn) {
15259 TEST_REQUIRES_ARM_NEON_DOT;
15260 for (uint32_t n = 32; n <= 48; n += 16) {
15261 for (size_t k = 1; k <= 40; k += 9) {
15262 GemmMicrokernelTester()
15263 .mr(4)
15264 .nr(16)
15265 .kr(4)
15266 .sr(1)
15267 .m(4)
15268 .n(n)
15269 .k(k)
15270 .cn_stride(19)
15271 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15272 }
15273 }
15274 }
15275
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_div_16_subtile)15276 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_div_16_subtile) {
15277 TEST_REQUIRES_ARM_NEON_DOT;
15278 for (uint32_t n = 32; n <= 48; n += 16) {
15279 for (size_t k = 1; k <= 40; k += 9) {
15280 for (uint32_t m = 1; m <= 4; m++) {
15281 GemmMicrokernelTester()
15282 .mr(4)
15283 .nr(16)
15284 .kr(4)
15285 .sr(1)
15286 .m(m)
15287 .n(n)
15288 .k(k)
15289 .iterations(1)
15290 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15291 }
15292 }
15293 }
15294 }
15295
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,small_kernel)15296 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, small_kernel) {
15297 TEST_REQUIRES_ARM_NEON_DOT;
15298 for (size_t k = 1; k <= 40; k += 9) {
15299 GemmMicrokernelTester()
15300 .mr(4)
15301 .nr(16)
15302 .kr(4)
15303 .sr(1)
15304 .m(4)
15305 .n(16)
15306 .k(k)
15307 .ks(3)
15308 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15309 }
15310 }
15311
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,small_kernel_subtile)15312 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, small_kernel_subtile) {
15313 TEST_REQUIRES_ARM_NEON_DOT;
15314 for (size_t k = 1; k <= 40; k += 9) {
15315 for (uint32_t n = 1; n <= 16; n++) {
15316 for (uint32_t m = 1; m <= 4; m++) {
15317 GemmMicrokernelTester()
15318 .mr(4)
15319 .nr(16)
15320 .kr(4)
15321 .sr(1)
15322 .m(m)
15323 .n(n)
15324 .k(k)
15325 .ks(3)
15326 .iterations(1)
15327 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15328 }
15329 }
15330 }
15331 }
15332
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_gt_16_small_kernel)15333 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_gt_16_small_kernel) {
15334 TEST_REQUIRES_ARM_NEON_DOT;
15335 for (uint32_t n = 17; n < 32; n++) {
15336 for (size_t k = 1; k <= 40; k += 9) {
15337 GemmMicrokernelTester()
15338 .mr(4)
15339 .nr(16)
15340 .kr(4)
15341 .sr(1)
15342 .m(4)
15343 .n(n)
15344 .k(k)
15345 .ks(3)
15346 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15347 }
15348 }
15349 }
15350
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_div_16_small_kernel)15351 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_div_16_small_kernel) {
15352 TEST_REQUIRES_ARM_NEON_DOT;
15353 for (uint32_t n = 32; n <= 48; n += 16) {
15354 for (size_t k = 1; k <= 40; k += 9) {
15355 GemmMicrokernelTester()
15356 .mr(4)
15357 .nr(16)
15358 .kr(4)
15359 .sr(1)
15360 .m(4)
15361 .n(n)
15362 .k(k)
15363 .ks(3)
15364 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15365 }
15366 }
15367 }
15368
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,strided_cm_subtile)15369 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, strided_cm_subtile) {
15370 TEST_REQUIRES_ARM_NEON_DOT;
15371 for (size_t k = 1; k <= 40; k += 9) {
15372 for (uint32_t n = 1; n <= 16; n++) {
15373 for (uint32_t m = 1; m <= 4; m++) {
15374 GemmMicrokernelTester()
15375 .mr(4)
15376 .nr(16)
15377 .kr(4)
15378 .sr(1)
15379 .m(m)
15380 .n(n)
15381 .k(k)
15382 .cm_stride(19)
15383 .iterations(1)
15384 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15385 }
15386 }
15387 }
15388 }
15389
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,a_offset)15390 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, a_offset) {
15391 TEST_REQUIRES_ARM_NEON_DOT;
15392 for (size_t k = 1; k <= 40; k += 9) {
15393 GemmMicrokernelTester()
15394 .mr(4)
15395 .nr(16)
15396 .kr(4)
15397 .sr(1)
15398 .m(4)
15399 .n(16)
15400 .k(k)
15401 .ks(3)
15402 .a_offset(163)
15403 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15404 }
15405 }
15406
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,zero)15407 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, zero) {
15408 TEST_REQUIRES_ARM_NEON_DOT;
15409 for (size_t k = 1; k <= 40; k += 9) {
15410 for (uint32_t mz = 0; mz < 4; mz++) {
15411 GemmMicrokernelTester()
15412 .mr(4)
15413 .nr(16)
15414 .kr(4)
15415 .sr(1)
15416 .m(4)
15417 .n(16)
15418 .k(k)
15419 .ks(3)
15420 .a_offset(163)
15421 .zero_index(mz)
15422 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15423 }
15424 }
15425 }
15426
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,qmin)15427 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, qmin) {
15428 TEST_REQUIRES_ARM_NEON_DOT;
15429 GemmMicrokernelTester()
15430 .mr(4)
15431 .nr(16)
15432 .kr(4)
15433 .sr(1)
15434 .m(4)
15435 .n(16)
15436 .k(8)
15437 .qmin(128)
15438 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15439 }
15440
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,qmax)15441 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, qmax) {
15442 TEST_REQUIRES_ARM_NEON_DOT;
15443 GemmMicrokernelTester()
15444 .mr(4)
15445 .nr(16)
15446 .kr(4)
15447 .sr(1)
15448 .m(4)
15449 .n(16)
15450 .k(8)
15451 .qmax(128)
15452 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15453 }
15454
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,strided_cm)15455 TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, strided_cm) {
15456 TEST_REQUIRES_ARM_NEON_DOT;
15457 GemmMicrokernelTester()
15458 .mr(4)
15459 .nr(16)
15460 .kr(4)
15461 .sr(1)
15462 .m(4)
15463 .n(16)
15464 .k(8)
15465 .cm_stride(19)
15466 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15467 }
15468 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
15469
15470
15471 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_eq_8)15472 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
15473 TEST_REQUIRES_ARM_NEON;
15474 GemmMicrokernelTester()
15475 .mr(6)
15476 .nr(8)
15477 .kr(1)
15478 .sr(1)
15479 .m(6)
15480 .n(8)
15481 .k(8)
15482 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15483 }
15484
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,strided_cn)15485 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, strided_cn) {
15486 TEST_REQUIRES_ARM_NEON;
15487 GemmMicrokernelTester()
15488 .mr(6)
15489 .nr(8)
15490 .kr(1)
15491 .sr(1)
15492 .m(6)
15493 .n(8)
15494 .k(8)
15495 .cn_stride(11)
15496 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15497 }
15498
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)15499 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
15500 TEST_REQUIRES_ARM_NEON;
15501 for (uint32_t n = 1; n <= 8; n++) {
15502 for (uint32_t m = 1; m <= 6; m++) {
15503 GemmMicrokernelTester()
15504 .mr(6)
15505 .nr(8)
15506 .kr(1)
15507 .sr(1)
15508 .m(m)
15509 .n(n)
15510 .k(8)
15511 .iterations(1)
15512 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15513 }
15514 }
15515 }
15516
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)15517 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
15518 TEST_REQUIRES_ARM_NEON;
15519 for (uint32_t m = 1; m <= 6; m++) {
15520 GemmMicrokernelTester()
15521 .mr(6)
15522 .nr(8)
15523 .kr(1)
15524 .sr(1)
15525 .m(m)
15526 .n(8)
15527 .k(8)
15528 .iterations(1)
15529 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15530 }
15531 }
15532
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)15533 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
15534 TEST_REQUIRES_ARM_NEON;
15535 for (uint32_t n = 1; n <= 8; n++) {
15536 GemmMicrokernelTester()
15537 .mr(6)
15538 .nr(8)
15539 .kr(1)
15540 .sr(1)
15541 .m(6)
15542 .n(n)
15543 .k(8)
15544 .iterations(1)
15545 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15546 }
15547 }
15548
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_lt_8)15549 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
15550 TEST_REQUIRES_ARM_NEON;
15551 for (size_t k = 1; k < 8; k++) {
15552 GemmMicrokernelTester()
15553 .mr(6)
15554 .nr(8)
15555 .kr(1)
15556 .sr(1)
15557 .m(6)
15558 .n(8)
15559 .k(k)
15560 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15561 }
15562 }
15563
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)15564 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
15565 TEST_REQUIRES_ARM_NEON;
15566 for (size_t k = 1; k < 8; k++) {
15567 for (uint32_t n = 1; n <= 8; n++) {
15568 for (uint32_t m = 1; m <= 6; m++) {
15569 GemmMicrokernelTester()
15570 .mr(6)
15571 .nr(8)
15572 .kr(1)
15573 .sr(1)
15574 .m(m)
15575 .n(n)
15576 .k(k)
15577 .iterations(1)
15578 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15579 }
15580 }
15581 }
15582 }
15583
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_gt_8)15584 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
15585 TEST_REQUIRES_ARM_NEON;
15586 for (size_t k = 9; k < 16; k++) {
15587 GemmMicrokernelTester()
15588 .mr(6)
15589 .nr(8)
15590 .kr(1)
15591 .sr(1)
15592 .m(6)
15593 .n(8)
15594 .k(k)
15595 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15596 }
15597 }
15598
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)15599 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
15600 TEST_REQUIRES_ARM_NEON;
15601 for (size_t k = 9; k < 16; k++) {
15602 for (uint32_t n = 1; n <= 8; n++) {
15603 for (uint32_t m = 1; m <= 6; m++) {
15604 GemmMicrokernelTester()
15605 .mr(6)
15606 .nr(8)
15607 .kr(1)
15608 .sr(1)
15609 .m(m)
15610 .n(n)
15611 .k(k)
15612 .iterations(1)
15613 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15614 }
15615 }
15616 }
15617 }
15618
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_div_8)15619 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_div_8) {
15620 TEST_REQUIRES_ARM_NEON;
15621 for (size_t k = 16; k <= 80; k += 8) {
15622 GemmMicrokernelTester()
15623 .mr(6)
15624 .nr(8)
15625 .kr(1)
15626 .sr(1)
15627 .m(6)
15628 .n(8)
15629 .k(k)
15630 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15631 }
15632 }
15633
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_div_8_subtile)15634 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
15635 TEST_REQUIRES_ARM_NEON;
15636 for (size_t k = 16; k <= 80; k += 8) {
15637 for (uint32_t n = 1; n <= 8; n++) {
15638 for (uint32_t m = 1; m <= 6; m++) {
15639 GemmMicrokernelTester()
15640 .mr(6)
15641 .nr(8)
15642 .kr(1)
15643 .sr(1)
15644 .m(m)
15645 .n(n)
15646 .k(k)
15647 .iterations(1)
15648 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15649 }
15650 }
15651 }
15652 }
15653
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_gt_8)15654 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
15655 TEST_REQUIRES_ARM_NEON;
15656 for (uint32_t n = 9; n < 16; n++) {
15657 for (size_t k = 1; k <= 40; k += 9) {
15658 GemmMicrokernelTester()
15659 .mr(6)
15660 .nr(8)
15661 .kr(1)
15662 .sr(1)
15663 .m(6)
15664 .n(n)
15665 .k(k)
15666 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15667 }
15668 }
15669 }
15670
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_cn)15671 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
15672 TEST_REQUIRES_ARM_NEON;
15673 for (uint32_t n = 9; n < 16; n++) {
15674 for (size_t k = 1; k <= 40; k += 9) {
15675 GemmMicrokernelTester()
15676 .mr(6)
15677 .nr(8)
15678 .kr(1)
15679 .sr(1)
15680 .m(6)
15681 .n(n)
15682 .k(k)
15683 .cn_stride(11)
15684 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15685 }
15686 }
15687 }
15688
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_gt_8_subtile)15689 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
15690 TEST_REQUIRES_ARM_NEON;
15691 for (uint32_t n = 9; n < 16; n++) {
15692 for (size_t k = 1; k <= 40; k += 9) {
15693 for (uint32_t m = 1; m <= 6; m++) {
15694 GemmMicrokernelTester()
15695 .mr(6)
15696 .nr(8)
15697 .kr(1)
15698 .sr(1)
15699 .m(m)
15700 .n(n)
15701 .k(k)
15702 .iterations(1)
15703 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15704 }
15705 }
15706 }
15707 }
15708
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_div_8)15709 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_div_8) {
15710 TEST_REQUIRES_ARM_NEON;
15711 for (uint32_t n = 16; n <= 24; n += 8) {
15712 for (size_t k = 1; k <= 40; k += 9) {
15713 GemmMicrokernelTester()
15714 .mr(6)
15715 .nr(8)
15716 .kr(1)
15717 .sr(1)
15718 .m(6)
15719 .n(n)
15720 .k(k)
15721 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15722 }
15723 }
15724 }
15725
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_cn)15726 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
15727 TEST_REQUIRES_ARM_NEON;
15728 for (uint32_t n = 16; n <= 24; n += 8) {
15729 for (size_t k = 1; k <= 40; k += 9) {
15730 GemmMicrokernelTester()
15731 .mr(6)
15732 .nr(8)
15733 .kr(1)
15734 .sr(1)
15735 .m(6)
15736 .n(n)
15737 .k(k)
15738 .cn_stride(11)
15739 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15740 }
15741 }
15742 }
15743
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_div_8_subtile)15744 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
15745 TEST_REQUIRES_ARM_NEON;
15746 for (uint32_t n = 16; n <= 24; n += 8) {
15747 for (size_t k = 1; k <= 40; k += 9) {
15748 for (uint32_t m = 1; m <= 6; m++) {
15749 GemmMicrokernelTester()
15750 .mr(6)
15751 .nr(8)
15752 .kr(1)
15753 .sr(1)
15754 .m(m)
15755 .n(n)
15756 .k(k)
15757 .iterations(1)
15758 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15759 }
15760 }
15761 }
15762 }
15763
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,small_kernel)15764 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, small_kernel) {
15765 TEST_REQUIRES_ARM_NEON;
15766 for (size_t k = 1; k <= 40; k += 9) {
15767 GemmMicrokernelTester()
15768 .mr(6)
15769 .nr(8)
15770 .kr(1)
15771 .sr(1)
15772 .m(6)
15773 .n(8)
15774 .k(k)
15775 .ks(3)
15776 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15777 }
15778 }
15779
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,small_kernel_subtile)15780 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
15781 TEST_REQUIRES_ARM_NEON;
15782 for (size_t k = 1; k <= 40; k += 9) {
15783 for (uint32_t n = 1; n <= 8; n++) {
15784 for (uint32_t m = 1; m <= 6; m++) {
15785 GemmMicrokernelTester()
15786 .mr(6)
15787 .nr(8)
15788 .kr(1)
15789 .sr(1)
15790 .m(m)
15791 .n(n)
15792 .k(k)
15793 .ks(3)
15794 .iterations(1)
15795 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15796 }
15797 }
15798 }
15799 }
15800
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_gt_8_small_kernel)15801 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
15802 TEST_REQUIRES_ARM_NEON;
15803 for (uint32_t n = 9; n < 16; n++) {
15804 for (size_t k = 1; k <= 40; k += 9) {
15805 GemmMicrokernelTester()
15806 .mr(6)
15807 .nr(8)
15808 .kr(1)
15809 .sr(1)
15810 .m(6)
15811 .n(n)
15812 .k(k)
15813 .ks(3)
15814 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15815 }
15816 }
15817 }
15818
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_div_8_small_kernel)15819 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_div_8_small_kernel) {
15820 TEST_REQUIRES_ARM_NEON;
15821 for (uint32_t n = 16; n <= 24; n += 8) {
15822 for (size_t k = 1; k <= 40; k += 9) {
15823 GemmMicrokernelTester()
15824 .mr(6)
15825 .nr(8)
15826 .kr(1)
15827 .sr(1)
15828 .m(6)
15829 .n(n)
15830 .k(k)
15831 .ks(3)
15832 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15833 }
15834 }
15835 }
15836
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,strided_cm_subtile)15837 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
15838 TEST_REQUIRES_ARM_NEON;
15839 for (size_t k = 1; k <= 40; k += 9) {
15840 for (uint32_t n = 1; n <= 8; n++) {
15841 for (uint32_t m = 1; m <= 6; m++) {
15842 GemmMicrokernelTester()
15843 .mr(6)
15844 .nr(8)
15845 .kr(1)
15846 .sr(1)
15847 .m(m)
15848 .n(n)
15849 .k(k)
15850 .cm_stride(11)
15851 .iterations(1)
15852 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15853 }
15854 }
15855 }
15856 }
15857
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,a_offset)15858 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, a_offset) {
15859 TEST_REQUIRES_ARM_NEON;
15860 for (size_t k = 1; k <= 40; k += 9) {
15861 GemmMicrokernelTester()
15862 .mr(6)
15863 .nr(8)
15864 .kr(1)
15865 .sr(1)
15866 .m(6)
15867 .n(8)
15868 .k(k)
15869 .ks(3)
15870 .a_offset(251)
15871 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15872 }
15873 }
15874
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,zero)15875 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, zero) {
15876 TEST_REQUIRES_ARM_NEON;
15877 for (size_t k = 1; k <= 40; k += 9) {
15878 for (uint32_t mz = 0; mz < 6; mz++) {
15879 GemmMicrokernelTester()
15880 .mr(6)
15881 .nr(8)
15882 .kr(1)
15883 .sr(1)
15884 .m(6)
15885 .n(8)
15886 .k(k)
15887 .ks(3)
15888 .a_offset(251)
15889 .zero_index(mz)
15890 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15891 }
15892 }
15893 }
15894
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,qmin)15895 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, qmin) {
15896 TEST_REQUIRES_ARM_NEON;
15897 GemmMicrokernelTester()
15898 .mr(6)
15899 .nr(8)
15900 .kr(1)
15901 .sr(1)
15902 .m(6)
15903 .n(8)
15904 .k(8)
15905 .qmin(128)
15906 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15907 }
15908
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,qmax)15909 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, qmax) {
15910 TEST_REQUIRES_ARM_NEON;
15911 GemmMicrokernelTester()
15912 .mr(6)
15913 .nr(8)
15914 .kr(1)
15915 .sr(1)
15916 .m(6)
15917 .n(8)
15918 .k(8)
15919 .qmax(128)
15920 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15921 }
15922
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,strided_cm)15923 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, strided_cm) {
15924 TEST_REQUIRES_ARM_NEON;
15925 GemmMicrokernelTester()
15926 .mr(6)
15927 .nr(8)
15928 .kr(1)
15929 .sr(1)
15930 .m(6)
15931 .n(8)
15932 .k(8)
15933 .cm_stride(11)
15934 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15935 }
15936 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15937
15938
15939 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_eq_8)15940 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_eq_8) {
15941 TEST_REQUIRES_ARM_NEON_V8;
15942 GemmMicrokernelTester()
15943 .mr(6)
15944 .nr(8)
15945 .kr(1)
15946 .sr(1)
15947 .m(6)
15948 .n(8)
15949 .k(8)
15950 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15951 }
15952
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,strided_cn)15953 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, strided_cn) {
15954 TEST_REQUIRES_ARM_NEON_V8;
15955 GemmMicrokernelTester()
15956 .mr(6)
15957 .nr(8)
15958 .kr(1)
15959 .sr(1)
15960 .m(6)
15961 .n(8)
15962 .k(8)
15963 .cn_stride(11)
15964 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15965 }
15966
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_eq_8_subtile)15967 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_eq_8_subtile) {
15968 TEST_REQUIRES_ARM_NEON_V8;
15969 for (uint32_t n = 1; n <= 8; n++) {
15970 for (uint32_t m = 1; m <= 6; m++) {
15971 GemmMicrokernelTester()
15972 .mr(6)
15973 .nr(8)
15974 .kr(1)
15975 .sr(1)
15976 .m(m)
15977 .n(n)
15978 .k(8)
15979 .iterations(1)
15980 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15981 }
15982 }
15983 }
15984
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_eq_8_subtile_m)15985 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
15986 TEST_REQUIRES_ARM_NEON_V8;
15987 for (uint32_t m = 1; m <= 6; m++) {
15988 GemmMicrokernelTester()
15989 .mr(6)
15990 .nr(8)
15991 .kr(1)
15992 .sr(1)
15993 .m(m)
15994 .n(8)
15995 .k(8)
15996 .iterations(1)
15997 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15998 }
15999 }
16000
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_eq_8_subtile_n)16001 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
16002 TEST_REQUIRES_ARM_NEON_V8;
16003 for (uint32_t n = 1; n <= 8; n++) {
16004 GemmMicrokernelTester()
16005 .mr(6)
16006 .nr(8)
16007 .kr(1)
16008 .sr(1)
16009 .m(6)
16010 .n(n)
16011 .k(8)
16012 .iterations(1)
16013 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16014 }
16015 }
16016
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_lt_8)16017 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_lt_8) {
16018 TEST_REQUIRES_ARM_NEON_V8;
16019 for (size_t k = 1; k < 8; k++) {
16020 GemmMicrokernelTester()
16021 .mr(6)
16022 .nr(8)
16023 .kr(1)
16024 .sr(1)
16025 .m(6)
16026 .n(8)
16027 .k(k)
16028 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16029 }
16030 }
16031
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_lt_8_subtile)16032 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_lt_8_subtile) {
16033 TEST_REQUIRES_ARM_NEON_V8;
16034 for (size_t k = 1; k < 8; k++) {
16035 for (uint32_t n = 1; n <= 8; n++) {
16036 for (uint32_t m = 1; m <= 6; m++) {
16037 GemmMicrokernelTester()
16038 .mr(6)
16039 .nr(8)
16040 .kr(1)
16041 .sr(1)
16042 .m(m)
16043 .n(n)
16044 .k(k)
16045 .iterations(1)
16046 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16047 }
16048 }
16049 }
16050 }
16051
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_gt_8)16052 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_gt_8) {
16053 TEST_REQUIRES_ARM_NEON_V8;
16054 for (size_t k = 9; k < 16; k++) {
16055 GemmMicrokernelTester()
16056 .mr(6)
16057 .nr(8)
16058 .kr(1)
16059 .sr(1)
16060 .m(6)
16061 .n(8)
16062 .k(k)
16063 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16064 }
16065 }
16066
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_gt_8_subtile)16067 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_gt_8_subtile) {
16068 TEST_REQUIRES_ARM_NEON_V8;
16069 for (size_t k = 9; k < 16; k++) {
16070 for (uint32_t n = 1; n <= 8; n++) {
16071 for (uint32_t m = 1; m <= 6; m++) {
16072 GemmMicrokernelTester()
16073 .mr(6)
16074 .nr(8)
16075 .kr(1)
16076 .sr(1)
16077 .m(m)
16078 .n(n)
16079 .k(k)
16080 .iterations(1)
16081 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16082 }
16083 }
16084 }
16085 }
16086
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_div_8)16087 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_div_8) {
16088 TEST_REQUIRES_ARM_NEON_V8;
16089 for (size_t k = 16; k <= 80; k += 8) {
16090 GemmMicrokernelTester()
16091 .mr(6)
16092 .nr(8)
16093 .kr(1)
16094 .sr(1)
16095 .m(6)
16096 .n(8)
16097 .k(k)
16098 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16099 }
16100 }
16101
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_div_8_subtile)16102 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_div_8_subtile) {
16103 TEST_REQUIRES_ARM_NEON_V8;
16104 for (size_t k = 16; k <= 80; k += 8) {
16105 for (uint32_t n = 1; n <= 8; n++) {
16106 for (uint32_t m = 1; m <= 6; m++) {
16107 GemmMicrokernelTester()
16108 .mr(6)
16109 .nr(8)
16110 .kr(1)
16111 .sr(1)
16112 .m(m)
16113 .n(n)
16114 .k(k)
16115 .iterations(1)
16116 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16117 }
16118 }
16119 }
16120 }
16121
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_gt_8)16122 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_gt_8) {
16123 TEST_REQUIRES_ARM_NEON_V8;
16124 for (uint32_t n = 9; n < 16; n++) {
16125 for (size_t k = 1; k <= 40; k += 9) {
16126 GemmMicrokernelTester()
16127 .mr(6)
16128 .nr(8)
16129 .kr(1)
16130 .sr(1)
16131 .m(6)
16132 .n(n)
16133 .k(k)
16134 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16135 }
16136 }
16137 }
16138
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_gt_8_strided_cn)16139 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_gt_8_strided_cn) {
16140 TEST_REQUIRES_ARM_NEON_V8;
16141 for (uint32_t n = 9; n < 16; n++) {
16142 for (size_t k = 1; k <= 40; k += 9) {
16143 GemmMicrokernelTester()
16144 .mr(6)
16145 .nr(8)
16146 .kr(1)
16147 .sr(1)
16148 .m(6)
16149 .n(n)
16150 .k(k)
16151 .cn_stride(11)
16152 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16153 }
16154 }
16155 }
16156
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_gt_8_subtile)16157 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_gt_8_subtile) {
16158 TEST_REQUIRES_ARM_NEON_V8;
16159 for (uint32_t n = 9; n < 16; n++) {
16160 for (size_t k = 1; k <= 40; k += 9) {
16161 for (uint32_t m = 1; m <= 6; m++) {
16162 GemmMicrokernelTester()
16163 .mr(6)
16164 .nr(8)
16165 .kr(1)
16166 .sr(1)
16167 .m(m)
16168 .n(n)
16169 .k(k)
16170 .iterations(1)
16171 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16172 }
16173 }
16174 }
16175 }
16176
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_div_8)16177 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_div_8) {
16178 TEST_REQUIRES_ARM_NEON_V8;
16179 for (uint32_t n = 16; n <= 24; n += 8) {
16180 for (size_t k = 1; k <= 40; k += 9) {
16181 GemmMicrokernelTester()
16182 .mr(6)
16183 .nr(8)
16184 .kr(1)
16185 .sr(1)
16186 .m(6)
16187 .n(n)
16188 .k(k)
16189 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16190 }
16191 }
16192 }
16193
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_div_8_strided_cn)16194 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_div_8_strided_cn) {
16195 TEST_REQUIRES_ARM_NEON_V8;
16196 for (uint32_t n = 16; n <= 24; n += 8) {
16197 for (size_t k = 1; k <= 40; k += 9) {
16198 GemmMicrokernelTester()
16199 .mr(6)
16200 .nr(8)
16201 .kr(1)
16202 .sr(1)
16203 .m(6)
16204 .n(n)
16205 .k(k)
16206 .cn_stride(11)
16207 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16208 }
16209 }
16210 }
16211
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_div_8_subtile)16212 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_div_8_subtile) {
16213 TEST_REQUIRES_ARM_NEON_V8;
16214 for (uint32_t n = 16; n <= 24; n += 8) {
16215 for (size_t k = 1; k <= 40; k += 9) {
16216 for (uint32_t m = 1; m <= 6; m++) {
16217 GemmMicrokernelTester()
16218 .mr(6)
16219 .nr(8)
16220 .kr(1)
16221 .sr(1)
16222 .m(m)
16223 .n(n)
16224 .k(k)
16225 .iterations(1)
16226 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16227 }
16228 }
16229 }
16230 }
16231
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,small_kernel)16232 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, small_kernel) {
16233 TEST_REQUIRES_ARM_NEON_V8;
16234 for (size_t k = 1; k <= 40; k += 9) {
16235 GemmMicrokernelTester()
16236 .mr(6)
16237 .nr(8)
16238 .kr(1)
16239 .sr(1)
16240 .m(6)
16241 .n(8)
16242 .k(k)
16243 .ks(3)
16244 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16245 }
16246 }
16247
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,small_kernel_subtile)16248 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, small_kernel_subtile) {
16249 TEST_REQUIRES_ARM_NEON_V8;
16250 for (size_t k = 1; k <= 40; k += 9) {
16251 for (uint32_t n = 1; n <= 8; n++) {
16252 for (uint32_t m = 1; m <= 6; m++) {
16253 GemmMicrokernelTester()
16254 .mr(6)
16255 .nr(8)
16256 .kr(1)
16257 .sr(1)
16258 .m(m)
16259 .n(n)
16260 .k(k)
16261 .ks(3)
16262 .iterations(1)
16263 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16264 }
16265 }
16266 }
16267 }
16268
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_gt_8_small_kernel)16269 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_gt_8_small_kernel) {
16270 TEST_REQUIRES_ARM_NEON_V8;
16271 for (uint32_t n = 9; n < 16; n++) {
16272 for (size_t k = 1; k <= 40; k += 9) {
16273 GemmMicrokernelTester()
16274 .mr(6)
16275 .nr(8)
16276 .kr(1)
16277 .sr(1)
16278 .m(6)
16279 .n(n)
16280 .k(k)
16281 .ks(3)
16282 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16283 }
16284 }
16285 }
16286
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_div_8_small_kernel)16287 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_div_8_small_kernel) {
16288 TEST_REQUIRES_ARM_NEON_V8;
16289 for (uint32_t n = 16; n <= 24; n += 8) {
16290 for (size_t k = 1; k <= 40; k += 9) {
16291 GemmMicrokernelTester()
16292 .mr(6)
16293 .nr(8)
16294 .kr(1)
16295 .sr(1)
16296 .m(6)
16297 .n(n)
16298 .k(k)
16299 .ks(3)
16300 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16301 }
16302 }
16303 }
16304
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,strided_cm_subtile)16305 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, strided_cm_subtile) {
16306 TEST_REQUIRES_ARM_NEON_V8;
16307 for (size_t k = 1; k <= 40; k += 9) {
16308 for (uint32_t n = 1; n <= 8; n++) {
16309 for (uint32_t m = 1; m <= 6; m++) {
16310 GemmMicrokernelTester()
16311 .mr(6)
16312 .nr(8)
16313 .kr(1)
16314 .sr(1)
16315 .m(m)
16316 .n(n)
16317 .k(k)
16318 .cm_stride(11)
16319 .iterations(1)
16320 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16321 }
16322 }
16323 }
16324 }
16325
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,a_offset)16326 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, a_offset) {
16327 TEST_REQUIRES_ARM_NEON_V8;
16328 for (size_t k = 1; k <= 40; k += 9) {
16329 GemmMicrokernelTester()
16330 .mr(6)
16331 .nr(8)
16332 .kr(1)
16333 .sr(1)
16334 .m(6)
16335 .n(8)
16336 .k(k)
16337 .ks(3)
16338 .a_offset(251)
16339 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16340 }
16341 }
16342
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,zero)16343 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, zero) {
16344 TEST_REQUIRES_ARM_NEON_V8;
16345 for (size_t k = 1; k <= 40; k += 9) {
16346 for (uint32_t mz = 0; mz < 6; mz++) {
16347 GemmMicrokernelTester()
16348 .mr(6)
16349 .nr(8)
16350 .kr(1)
16351 .sr(1)
16352 .m(6)
16353 .n(8)
16354 .k(k)
16355 .ks(3)
16356 .a_offset(251)
16357 .zero_index(mz)
16358 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16359 }
16360 }
16361 }
16362
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,qmin)16363 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, qmin) {
16364 TEST_REQUIRES_ARM_NEON_V8;
16365 GemmMicrokernelTester()
16366 .mr(6)
16367 .nr(8)
16368 .kr(1)
16369 .sr(1)
16370 .m(6)
16371 .n(8)
16372 .k(8)
16373 .qmin(128)
16374 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16375 }
16376
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,qmax)16377 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, qmax) {
16378 TEST_REQUIRES_ARM_NEON_V8;
16379 GemmMicrokernelTester()
16380 .mr(6)
16381 .nr(8)
16382 .kr(1)
16383 .sr(1)
16384 .m(6)
16385 .n(8)
16386 .k(8)
16387 .qmax(128)
16388 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16389 }
16390
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,strided_cm)16391 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, strided_cm) {
16392 TEST_REQUIRES_ARM_NEON_V8;
16393 GemmMicrokernelTester()
16394 .mr(6)
16395 .nr(8)
16396 .kr(1)
16397 .sr(1)
16398 .m(6)
16399 .n(8)
16400 .k(8)
16401 .cm_stride(11)
16402 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16403 }
16404 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
16405
16406
16407 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,k_eq_8)16408 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, k_eq_8) {
16409 TEST_REQUIRES_ARM_NEON_DOT;
16410 GemmMicrokernelTester()
16411 .mr(6)
16412 .nr(8)
16413 .kr(4)
16414 .sr(1)
16415 .m(6)
16416 .n(8)
16417 .k(8)
16418 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16419 }
16420
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,strided_cn)16421 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, strided_cn) {
16422 TEST_REQUIRES_ARM_NEON_DOT;
16423 GemmMicrokernelTester()
16424 .mr(6)
16425 .nr(8)
16426 .kr(4)
16427 .sr(1)
16428 .m(6)
16429 .n(8)
16430 .k(8)
16431 .cn_stride(11)
16432 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16433 }
16434
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,k_eq_8_subtile)16435 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, k_eq_8_subtile) {
16436 TEST_REQUIRES_ARM_NEON_DOT;
16437 for (uint32_t n = 1; n <= 8; n++) {
16438 for (uint32_t m = 1; m <= 6; m++) {
16439 GemmMicrokernelTester()
16440 .mr(6)
16441 .nr(8)
16442 .kr(4)
16443 .sr(1)
16444 .m(m)
16445 .n(n)
16446 .k(8)
16447 .iterations(1)
16448 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16449 }
16450 }
16451 }
16452
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,k_eq_8_subtile_m)16453 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, k_eq_8_subtile_m) {
16454 TEST_REQUIRES_ARM_NEON_DOT;
16455 for (uint32_t m = 1; m <= 6; m++) {
16456 GemmMicrokernelTester()
16457 .mr(6)
16458 .nr(8)
16459 .kr(4)
16460 .sr(1)
16461 .m(m)
16462 .n(8)
16463 .k(8)
16464 .iterations(1)
16465 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16466 }
16467 }
16468
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,k_eq_8_subtile_n)16469 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, k_eq_8_subtile_n) {
16470 TEST_REQUIRES_ARM_NEON_DOT;
16471 for (uint32_t n = 1; n <= 8; n++) {
16472 GemmMicrokernelTester()
16473 .mr(6)
16474 .nr(8)
16475 .kr(4)
16476 .sr(1)
16477 .m(6)
16478 .n(n)
16479 .k(8)
16480 .iterations(1)
16481 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16482 }
16483 }
16484
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,k_lt_8)16485 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, k_lt_8) {
16486 TEST_REQUIRES_ARM_NEON_DOT;
16487 for (size_t k = 1; k < 8; k++) {
16488 GemmMicrokernelTester()
16489 .mr(6)
16490 .nr(8)
16491 .kr(4)
16492 .sr(1)
16493 .m(6)
16494 .n(8)
16495 .k(k)
16496 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16497 }
16498 }
16499
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,k_lt_8_subtile)16500 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, k_lt_8_subtile) {
16501 TEST_REQUIRES_ARM_NEON_DOT;
16502 for (size_t k = 1; k < 8; k++) {
16503 for (uint32_t n = 1; n <= 8; n++) {
16504 for (uint32_t m = 1; m <= 6; m++) {
16505 GemmMicrokernelTester()
16506 .mr(6)
16507 .nr(8)
16508 .kr(4)
16509 .sr(1)
16510 .m(m)
16511 .n(n)
16512 .k(k)
16513 .iterations(1)
16514 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16515 }
16516 }
16517 }
16518 }
16519
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,k_gt_8)16520 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, k_gt_8) {
16521 TEST_REQUIRES_ARM_NEON_DOT;
16522 for (size_t k = 9; k < 16; k++) {
16523 GemmMicrokernelTester()
16524 .mr(6)
16525 .nr(8)
16526 .kr(4)
16527 .sr(1)
16528 .m(6)
16529 .n(8)
16530 .k(k)
16531 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16532 }
16533 }
16534
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,k_gt_8_subtile)16535 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, k_gt_8_subtile) {
16536 TEST_REQUIRES_ARM_NEON_DOT;
16537 for (size_t k = 9; k < 16; k++) {
16538 for (uint32_t n = 1; n <= 8; n++) {
16539 for (uint32_t m = 1; m <= 6; m++) {
16540 GemmMicrokernelTester()
16541 .mr(6)
16542 .nr(8)
16543 .kr(4)
16544 .sr(1)
16545 .m(m)
16546 .n(n)
16547 .k(k)
16548 .iterations(1)
16549 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16550 }
16551 }
16552 }
16553 }
16554
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,k_div_8)16555 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, k_div_8) {
16556 TEST_REQUIRES_ARM_NEON_DOT;
16557 for (size_t k = 16; k <= 80; k += 8) {
16558 GemmMicrokernelTester()
16559 .mr(6)
16560 .nr(8)
16561 .kr(4)
16562 .sr(1)
16563 .m(6)
16564 .n(8)
16565 .k(k)
16566 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16567 }
16568 }
16569
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,k_div_8_subtile)16570 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, k_div_8_subtile) {
16571 TEST_REQUIRES_ARM_NEON_DOT;
16572 for (size_t k = 16; k <= 80; k += 8) {
16573 for (uint32_t n = 1; n <= 8; n++) {
16574 for (uint32_t m = 1; m <= 6; m++) {
16575 GemmMicrokernelTester()
16576 .mr(6)
16577 .nr(8)
16578 .kr(4)
16579 .sr(1)
16580 .m(m)
16581 .n(n)
16582 .k(k)
16583 .iterations(1)
16584 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16585 }
16586 }
16587 }
16588 }
16589
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,n_gt_8)16590 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, n_gt_8) {
16591 TEST_REQUIRES_ARM_NEON_DOT;
16592 for (uint32_t n = 9; n < 16; n++) {
16593 for (size_t k = 1; k <= 40; k += 9) {
16594 GemmMicrokernelTester()
16595 .mr(6)
16596 .nr(8)
16597 .kr(4)
16598 .sr(1)
16599 .m(6)
16600 .n(n)
16601 .k(k)
16602 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16603 }
16604 }
16605 }
16606
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,n_gt_8_strided_cn)16607 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, n_gt_8_strided_cn) {
16608 TEST_REQUIRES_ARM_NEON_DOT;
16609 for (uint32_t n = 9; n < 16; n++) {
16610 for (size_t k = 1; k <= 40; k += 9) {
16611 GemmMicrokernelTester()
16612 .mr(6)
16613 .nr(8)
16614 .kr(4)
16615 .sr(1)
16616 .m(6)
16617 .n(n)
16618 .k(k)
16619 .cn_stride(11)
16620 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16621 }
16622 }
16623 }
16624
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,n_gt_8_subtile)16625 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, n_gt_8_subtile) {
16626 TEST_REQUIRES_ARM_NEON_DOT;
16627 for (uint32_t n = 9; n < 16; n++) {
16628 for (size_t k = 1; k <= 40; k += 9) {
16629 for (uint32_t m = 1; m <= 6; m++) {
16630 GemmMicrokernelTester()
16631 .mr(6)
16632 .nr(8)
16633 .kr(4)
16634 .sr(1)
16635 .m(m)
16636 .n(n)
16637 .k(k)
16638 .iterations(1)
16639 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16640 }
16641 }
16642 }
16643 }
16644
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,n_div_8)16645 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, n_div_8) {
16646 TEST_REQUIRES_ARM_NEON_DOT;
16647 for (uint32_t n = 16; n <= 24; n += 8) {
16648 for (size_t k = 1; k <= 40; k += 9) {
16649 GemmMicrokernelTester()
16650 .mr(6)
16651 .nr(8)
16652 .kr(4)
16653 .sr(1)
16654 .m(6)
16655 .n(n)
16656 .k(k)
16657 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16658 }
16659 }
16660 }
16661
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,n_div_8_strided_cn)16662 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, n_div_8_strided_cn) {
16663 TEST_REQUIRES_ARM_NEON_DOT;
16664 for (uint32_t n = 16; n <= 24; n += 8) {
16665 for (size_t k = 1; k <= 40; k += 9) {
16666 GemmMicrokernelTester()
16667 .mr(6)
16668 .nr(8)
16669 .kr(4)
16670 .sr(1)
16671 .m(6)
16672 .n(n)
16673 .k(k)
16674 .cn_stride(11)
16675 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16676 }
16677 }
16678 }
16679
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,n_div_8_subtile)16680 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, n_div_8_subtile) {
16681 TEST_REQUIRES_ARM_NEON_DOT;
16682 for (uint32_t n = 16; n <= 24; n += 8) {
16683 for (size_t k = 1; k <= 40; k += 9) {
16684 for (uint32_t m = 1; m <= 6; m++) {
16685 GemmMicrokernelTester()
16686 .mr(6)
16687 .nr(8)
16688 .kr(4)
16689 .sr(1)
16690 .m(m)
16691 .n(n)
16692 .k(k)
16693 .iterations(1)
16694 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16695 }
16696 }
16697 }
16698 }
16699
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,small_kernel)16700 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, small_kernel) {
16701 TEST_REQUIRES_ARM_NEON_DOT;
16702 for (size_t k = 1; k <= 40; k += 9) {
16703 GemmMicrokernelTester()
16704 .mr(6)
16705 .nr(8)
16706 .kr(4)
16707 .sr(1)
16708 .m(6)
16709 .n(8)
16710 .k(k)
16711 .ks(3)
16712 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16713 }
16714 }
16715
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,small_kernel_subtile)16716 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, small_kernel_subtile) {
16717 TEST_REQUIRES_ARM_NEON_DOT;
16718 for (size_t k = 1; k <= 40; k += 9) {
16719 for (uint32_t n = 1; n <= 8; n++) {
16720 for (uint32_t m = 1; m <= 6; m++) {
16721 GemmMicrokernelTester()
16722 .mr(6)
16723 .nr(8)
16724 .kr(4)
16725 .sr(1)
16726 .m(m)
16727 .n(n)
16728 .k(k)
16729 .ks(3)
16730 .iterations(1)
16731 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16732 }
16733 }
16734 }
16735 }
16736
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,n_gt_8_small_kernel)16737 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, n_gt_8_small_kernel) {
16738 TEST_REQUIRES_ARM_NEON_DOT;
16739 for (uint32_t n = 9; n < 16; n++) {
16740 for (size_t k = 1; k <= 40; k += 9) {
16741 GemmMicrokernelTester()
16742 .mr(6)
16743 .nr(8)
16744 .kr(4)
16745 .sr(1)
16746 .m(6)
16747 .n(n)
16748 .k(k)
16749 .ks(3)
16750 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16751 }
16752 }
16753 }
16754
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,n_div_8_small_kernel)16755 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, n_div_8_small_kernel) {
16756 TEST_REQUIRES_ARM_NEON_DOT;
16757 for (uint32_t n = 16; n <= 24; n += 8) {
16758 for (size_t k = 1; k <= 40; k += 9) {
16759 GemmMicrokernelTester()
16760 .mr(6)
16761 .nr(8)
16762 .kr(4)
16763 .sr(1)
16764 .m(6)
16765 .n(n)
16766 .k(k)
16767 .ks(3)
16768 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16769 }
16770 }
16771 }
16772
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,strided_cm_subtile)16773 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, strided_cm_subtile) {
16774 TEST_REQUIRES_ARM_NEON_DOT;
16775 for (size_t k = 1; k <= 40; k += 9) {
16776 for (uint32_t n = 1; n <= 8; n++) {
16777 for (uint32_t m = 1; m <= 6; m++) {
16778 GemmMicrokernelTester()
16779 .mr(6)
16780 .nr(8)
16781 .kr(4)
16782 .sr(1)
16783 .m(m)
16784 .n(n)
16785 .k(k)
16786 .cm_stride(11)
16787 .iterations(1)
16788 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16789 }
16790 }
16791 }
16792 }
16793
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,a_offset)16794 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, a_offset) {
16795 TEST_REQUIRES_ARM_NEON_DOT;
16796 for (size_t k = 1; k <= 40; k += 9) {
16797 GemmMicrokernelTester()
16798 .mr(6)
16799 .nr(8)
16800 .kr(4)
16801 .sr(1)
16802 .m(6)
16803 .n(8)
16804 .k(k)
16805 .ks(3)
16806 .a_offset(251)
16807 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16808 }
16809 }
16810
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,zero)16811 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, zero) {
16812 TEST_REQUIRES_ARM_NEON_DOT;
16813 for (size_t k = 1; k <= 40; k += 9) {
16814 for (uint32_t mz = 0; mz < 6; mz++) {
16815 GemmMicrokernelTester()
16816 .mr(6)
16817 .nr(8)
16818 .kr(4)
16819 .sr(1)
16820 .m(6)
16821 .n(8)
16822 .k(k)
16823 .ks(3)
16824 .a_offset(251)
16825 .zero_index(mz)
16826 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16827 }
16828 }
16829 }
16830
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,qmin)16831 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, qmin) {
16832 TEST_REQUIRES_ARM_NEON_DOT;
16833 GemmMicrokernelTester()
16834 .mr(6)
16835 .nr(8)
16836 .kr(4)
16837 .sr(1)
16838 .m(6)
16839 .n(8)
16840 .k(8)
16841 .qmin(128)
16842 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16843 }
16844
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,qmax)16845 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, qmax) {
16846 TEST_REQUIRES_ARM_NEON_DOT;
16847 GemmMicrokernelTester()
16848 .mr(6)
16849 .nr(8)
16850 .kr(4)
16851 .sr(1)
16852 .m(6)
16853 .n(8)
16854 .k(8)
16855 .qmax(128)
16856 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16857 }
16858
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,strided_cm)16859 TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, strided_cm) {
16860 TEST_REQUIRES_ARM_NEON_DOT;
16861 GemmMicrokernelTester()
16862 .mr(6)
16863 .nr(8)
16864 .kr(4)
16865 .sr(1)
16866 .m(6)
16867 .n(8)
16868 .k(8)
16869 .cm_stride(11)
16870 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16871 }
16872 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
16873
16874
16875 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_eq_8)16876 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_eq_8) {
16877 TEST_REQUIRES_ARM_NEON;
16878 GemmMicrokernelTester()
16879 .mr(6)
16880 .nr(16)
16881 .kr(1)
16882 .sr(1)
16883 .m(6)
16884 .n(16)
16885 .k(8)
16886 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16887 }
16888
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,strided_cn)16889 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, strided_cn) {
16890 TEST_REQUIRES_ARM_NEON;
16891 GemmMicrokernelTester()
16892 .mr(6)
16893 .nr(16)
16894 .kr(1)
16895 .sr(1)
16896 .m(6)
16897 .n(16)
16898 .k(8)
16899 .cn_stride(19)
16900 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16901 }
16902
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_eq_8_subtile)16903 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_eq_8_subtile) {
16904 TEST_REQUIRES_ARM_NEON;
16905 for (uint32_t n = 1; n <= 16; n++) {
16906 for (uint32_t m = 1; m <= 6; m++) {
16907 GemmMicrokernelTester()
16908 .mr(6)
16909 .nr(16)
16910 .kr(1)
16911 .sr(1)
16912 .m(m)
16913 .n(n)
16914 .k(8)
16915 .iterations(1)
16916 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16917 }
16918 }
16919 }
16920
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_eq_8_subtile_m)16921 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
16922 TEST_REQUIRES_ARM_NEON;
16923 for (uint32_t m = 1; m <= 6; m++) {
16924 GemmMicrokernelTester()
16925 .mr(6)
16926 .nr(16)
16927 .kr(1)
16928 .sr(1)
16929 .m(m)
16930 .n(16)
16931 .k(8)
16932 .iterations(1)
16933 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16934 }
16935 }
16936
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_eq_8_subtile_n)16937 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
16938 TEST_REQUIRES_ARM_NEON;
16939 for (uint32_t n = 1; n <= 16; n++) {
16940 GemmMicrokernelTester()
16941 .mr(6)
16942 .nr(16)
16943 .kr(1)
16944 .sr(1)
16945 .m(6)
16946 .n(n)
16947 .k(8)
16948 .iterations(1)
16949 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16950 }
16951 }
16952
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_lt_8)16953 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_lt_8) {
16954 TEST_REQUIRES_ARM_NEON;
16955 for (size_t k = 1; k < 8; k++) {
16956 GemmMicrokernelTester()
16957 .mr(6)
16958 .nr(16)
16959 .kr(1)
16960 .sr(1)
16961 .m(6)
16962 .n(16)
16963 .k(k)
16964 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16965 }
16966 }
16967
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_lt_8_subtile)16968 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_lt_8_subtile) {
16969 TEST_REQUIRES_ARM_NEON;
16970 for (size_t k = 1; k < 8; k++) {
16971 for (uint32_t n = 1; n <= 16; n++) {
16972 for (uint32_t m = 1; m <= 6; m++) {
16973 GemmMicrokernelTester()
16974 .mr(6)
16975 .nr(16)
16976 .kr(1)
16977 .sr(1)
16978 .m(m)
16979 .n(n)
16980 .k(k)
16981 .iterations(1)
16982 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16983 }
16984 }
16985 }
16986 }
16987
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_gt_8)16988 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_gt_8) {
16989 TEST_REQUIRES_ARM_NEON;
16990 for (size_t k = 9; k < 16; k++) {
16991 GemmMicrokernelTester()
16992 .mr(6)
16993 .nr(16)
16994 .kr(1)
16995 .sr(1)
16996 .m(6)
16997 .n(16)
16998 .k(k)
16999 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17000 }
17001 }
17002
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_gt_8_subtile)17003 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_gt_8_subtile) {
17004 TEST_REQUIRES_ARM_NEON;
17005 for (size_t k = 9; k < 16; k++) {
17006 for (uint32_t n = 1; n <= 16; n++) {
17007 for (uint32_t m = 1; m <= 6; m++) {
17008 GemmMicrokernelTester()
17009 .mr(6)
17010 .nr(16)
17011 .kr(1)
17012 .sr(1)
17013 .m(m)
17014 .n(n)
17015 .k(k)
17016 .iterations(1)
17017 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17018 }
17019 }
17020 }
17021 }
17022
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_div_8)17023 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_div_8) {
17024 TEST_REQUIRES_ARM_NEON;
17025 for (size_t k = 16; k <= 80; k += 8) {
17026 GemmMicrokernelTester()
17027 .mr(6)
17028 .nr(16)
17029 .kr(1)
17030 .sr(1)
17031 .m(6)
17032 .n(16)
17033 .k(k)
17034 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17035 }
17036 }
17037
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_div_8_subtile)17038 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_div_8_subtile) {
17039 TEST_REQUIRES_ARM_NEON;
17040 for (size_t k = 16; k <= 80; k += 8) {
17041 for (uint32_t n = 1; n <= 16; n++) {
17042 for (uint32_t m = 1; m <= 6; m++) {
17043 GemmMicrokernelTester()
17044 .mr(6)
17045 .nr(16)
17046 .kr(1)
17047 .sr(1)
17048 .m(m)
17049 .n(n)
17050 .k(k)
17051 .iterations(1)
17052 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17053 }
17054 }
17055 }
17056 }
17057
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_gt_16)17058 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_gt_16) {
17059 TEST_REQUIRES_ARM_NEON;
17060 for (uint32_t n = 17; n < 32; n++) {
17061 for (size_t k = 1; k <= 40; k += 9) {
17062 GemmMicrokernelTester()
17063 .mr(6)
17064 .nr(16)
17065 .kr(1)
17066 .sr(1)
17067 .m(6)
17068 .n(n)
17069 .k(k)
17070 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17071 }
17072 }
17073 }
17074
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_gt_16_strided_cn)17075 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
17076 TEST_REQUIRES_ARM_NEON;
17077 for (uint32_t n = 17; n < 32; n++) {
17078 for (size_t k = 1; k <= 40; k += 9) {
17079 GemmMicrokernelTester()
17080 .mr(6)
17081 .nr(16)
17082 .kr(1)
17083 .sr(1)
17084 .m(6)
17085 .n(n)
17086 .k(k)
17087 .cn_stride(19)
17088 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17089 }
17090 }
17091 }
17092
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_gt_16_subtile)17093 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_gt_16_subtile) {
17094 TEST_REQUIRES_ARM_NEON;
17095 for (uint32_t n = 17; n < 32; n++) {
17096 for (size_t k = 1; k <= 40; k += 9) {
17097 for (uint32_t m = 1; m <= 6; m++) {
17098 GemmMicrokernelTester()
17099 .mr(6)
17100 .nr(16)
17101 .kr(1)
17102 .sr(1)
17103 .m(m)
17104 .n(n)
17105 .k(k)
17106 .iterations(1)
17107 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17108 }
17109 }
17110 }
17111 }
17112
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_div_16)17113 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_div_16) {
17114 TEST_REQUIRES_ARM_NEON;
17115 for (uint32_t n = 32; n <= 48; n += 16) {
17116 for (size_t k = 1; k <= 40; k += 9) {
17117 GemmMicrokernelTester()
17118 .mr(6)
17119 .nr(16)
17120 .kr(1)
17121 .sr(1)
17122 .m(6)
17123 .n(n)
17124 .k(k)
17125 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17126 }
17127 }
17128 }
17129
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_div_16_strided_cn)17130 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
17131 TEST_REQUIRES_ARM_NEON;
17132 for (uint32_t n = 32; n <= 48; n += 16) {
17133 for (size_t k = 1; k <= 40; k += 9) {
17134 GemmMicrokernelTester()
17135 .mr(6)
17136 .nr(16)
17137 .kr(1)
17138 .sr(1)
17139 .m(6)
17140 .n(n)
17141 .k(k)
17142 .cn_stride(19)
17143 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17144 }
17145 }
17146 }
17147
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_div_16_subtile)17148 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_div_16_subtile) {
17149 TEST_REQUIRES_ARM_NEON;
17150 for (uint32_t n = 32; n <= 48; n += 16) {
17151 for (size_t k = 1; k <= 40; k += 9) {
17152 for (uint32_t m = 1; m <= 6; m++) {
17153 GemmMicrokernelTester()
17154 .mr(6)
17155 .nr(16)
17156 .kr(1)
17157 .sr(1)
17158 .m(m)
17159 .n(n)
17160 .k(k)
17161 .iterations(1)
17162 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17163 }
17164 }
17165 }
17166 }
17167
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,small_kernel)17168 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, small_kernel) {
17169 TEST_REQUIRES_ARM_NEON;
17170 for (size_t k = 1; k <= 40; k += 9) {
17171 GemmMicrokernelTester()
17172 .mr(6)
17173 .nr(16)
17174 .kr(1)
17175 .sr(1)
17176 .m(6)
17177 .n(16)
17178 .k(k)
17179 .ks(3)
17180 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17181 }
17182 }
17183
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,small_kernel_subtile)17184 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, small_kernel_subtile) {
17185 TEST_REQUIRES_ARM_NEON;
17186 for (size_t k = 1; k <= 40; k += 9) {
17187 for (uint32_t n = 1; n <= 16; n++) {
17188 for (uint32_t m = 1; m <= 6; m++) {
17189 GemmMicrokernelTester()
17190 .mr(6)
17191 .nr(16)
17192 .kr(1)
17193 .sr(1)
17194 .m(m)
17195 .n(n)
17196 .k(k)
17197 .ks(3)
17198 .iterations(1)
17199 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17200 }
17201 }
17202 }
17203 }
17204
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_gt_16_small_kernel)17205 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
17206 TEST_REQUIRES_ARM_NEON;
17207 for (uint32_t n = 17; n < 32; n++) {
17208 for (size_t k = 1; k <= 40; k += 9) {
17209 GemmMicrokernelTester()
17210 .mr(6)
17211 .nr(16)
17212 .kr(1)
17213 .sr(1)
17214 .m(6)
17215 .n(n)
17216 .k(k)
17217 .ks(3)
17218 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17219 }
17220 }
17221 }
17222
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_div_16_small_kernel)17223 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
17224 TEST_REQUIRES_ARM_NEON;
17225 for (uint32_t n = 32; n <= 48; n += 16) {
17226 for (size_t k = 1; k <= 40; k += 9) {
17227 GemmMicrokernelTester()
17228 .mr(6)
17229 .nr(16)
17230 .kr(1)
17231 .sr(1)
17232 .m(6)
17233 .n(n)
17234 .k(k)
17235 .ks(3)
17236 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17237 }
17238 }
17239 }
17240
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,strided_cm_subtile)17241 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, strided_cm_subtile) {
17242 TEST_REQUIRES_ARM_NEON;
17243 for (size_t k = 1; k <= 40; k += 9) {
17244 for (uint32_t n = 1; n <= 16; n++) {
17245 for (uint32_t m = 1; m <= 6; m++) {
17246 GemmMicrokernelTester()
17247 .mr(6)
17248 .nr(16)
17249 .kr(1)
17250 .sr(1)
17251 .m(m)
17252 .n(n)
17253 .k(k)
17254 .cm_stride(19)
17255 .iterations(1)
17256 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17257 }
17258 }
17259 }
17260 }
17261
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,a_offset)17262 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, a_offset) {
17263 TEST_REQUIRES_ARM_NEON;
17264 for (size_t k = 1; k <= 40; k += 9) {
17265 GemmMicrokernelTester()
17266 .mr(6)
17267 .nr(16)
17268 .kr(1)
17269 .sr(1)
17270 .m(6)
17271 .n(16)
17272 .k(k)
17273 .ks(3)
17274 .a_offset(251)
17275 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17276 }
17277 }
17278
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,zero)17279 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, zero) {
17280 TEST_REQUIRES_ARM_NEON;
17281 for (size_t k = 1; k <= 40; k += 9) {
17282 for (uint32_t mz = 0; mz < 6; mz++) {
17283 GemmMicrokernelTester()
17284 .mr(6)
17285 .nr(16)
17286 .kr(1)
17287 .sr(1)
17288 .m(6)
17289 .n(16)
17290 .k(k)
17291 .ks(3)
17292 .a_offset(251)
17293 .zero_index(mz)
17294 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17295 }
17296 }
17297 }
17298
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,qmin)17299 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, qmin) {
17300 TEST_REQUIRES_ARM_NEON;
17301 GemmMicrokernelTester()
17302 .mr(6)
17303 .nr(16)
17304 .kr(1)
17305 .sr(1)
17306 .m(6)
17307 .n(16)
17308 .k(8)
17309 .qmin(128)
17310 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17311 }
17312
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,qmax)17313 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, qmax) {
17314 TEST_REQUIRES_ARM_NEON;
17315 GemmMicrokernelTester()
17316 .mr(6)
17317 .nr(16)
17318 .kr(1)
17319 .sr(1)
17320 .m(6)
17321 .n(16)
17322 .k(8)
17323 .qmax(128)
17324 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17325 }
17326
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,strided_cm)17327 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, strided_cm) {
17328 TEST_REQUIRES_ARM_NEON;
17329 GemmMicrokernelTester()
17330 .mr(6)
17331 .nr(16)
17332 .kr(1)
17333 .sr(1)
17334 .m(6)
17335 .n(16)
17336 .k(8)
17337 .cm_stride(19)
17338 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17339 }
17340 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
17341
17342
17343 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,k_eq_8)17344 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, k_eq_8) {
17345 TEST_REQUIRES_ARM_NEON_DOT;
17346 GemmMicrokernelTester()
17347 .mr(6)
17348 .nr(16)
17349 .kr(4)
17350 .sr(1)
17351 .m(6)
17352 .n(16)
17353 .k(8)
17354 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17355 }
17356
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,strided_cn)17357 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, strided_cn) {
17358 TEST_REQUIRES_ARM_NEON_DOT;
17359 GemmMicrokernelTester()
17360 .mr(6)
17361 .nr(16)
17362 .kr(4)
17363 .sr(1)
17364 .m(6)
17365 .n(16)
17366 .k(8)
17367 .cn_stride(19)
17368 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17369 }
17370
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,k_eq_8_subtile)17371 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, k_eq_8_subtile) {
17372 TEST_REQUIRES_ARM_NEON_DOT;
17373 for (uint32_t n = 1; n <= 16; n++) {
17374 for (uint32_t m = 1; m <= 6; m++) {
17375 GemmMicrokernelTester()
17376 .mr(6)
17377 .nr(16)
17378 .kr(4)
17379 .sr(1)
17380 .m(m)
17381 .n(n)
17382 .k(8)
17383 .iterations(1)
17384 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17385 }
17386 }
17387 }
17388
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,k_eq_8_subtile_m)17389 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, k_eq_8_subtile_m) {
17390 TEST_REQUIRES_ARM_NEON_DOT;
17391 for (uint32_t m = 1; m <= 6; m++) {
17392 GemmMicrokernelTester()
17393 .mr(6)
17394 .nr(16)
17395 .kr(4)
17396 .sr(1)
17397 .m(m)
17398 .n(16)
17399 .k(8)
17400 .iterations(1)
17401 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17402 }
17403 }
17404
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,k_eq_8_subtile_n)17405 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, k_eq_8_subtile_n) {
17406 TEST_REQUIRES_ARM_NEON_DOT;
17407 for (uint32_t n = 1; n <= 16; n++) {
17408 GemmMicrokernelTester()
17409 .mr(6)
17410 .nr(16)
17411 .kr(4)
17412 .sr(1)
17413 .m(6)
17414 .n(n)
17415 .k(8)
17416 .iterations(1)
17417 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17418 }
17419 }
17420
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,k_lt_8)17421 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, k_lt_8) {
17422 TEST_REQUIRES_ARM_NEON_DOT;
17423 for (size_t k = 1; k < 8; k++) {
17424 GemmMicrokernelTester()
17425 .mr(6)
17426 .nr(16)
17427 .kr(4)
17428 .sr(1)
17429 .m(6)
17430 .n(16)
17431 .k(k)
17432 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17433 }
17434 }
17435
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,k_lt_8_subtile)17436 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, k_lt_8_subtile) {
17437 TEST_REQUIRES_ARM_NEON_DOT;
17438 for (size_t k = 1; k < 8; k++) {
17439 for (uint32_t n = 1; n <= 16; n++) {
17440 for (uint32_t m = 1; m <= 6; m++) {
17441 GemmMicrokernelTester()
17442 .mr(6)
17443 .nr(16)
17444 .kr(4)
17445 .sr(1)
17446 .m(m)
17447 .n(n)
17448 .k(k)
17449 .iterations(1)
17450 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17451 }
17452 }
17453 }
17454 }
17455
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,k_gt_8)17456 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, k_gt_8) {
17457 TEST_REQUIRES_ARM_NEON_DOT;
17458 for (size_t k = 9; k < 16; k++) {
17459 GemmMicrokernelTester()
17460 .mr(6)
17461 .nr(16)
17462 .kr(4)
17463 .sr(1)
17464 .m(6)
17465 .n(16)
17466 .k(k)
17467 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17468 }
17469 }
17470
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,k_gt_8_subtile)17471 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, k_gt_8_subtile) {
17472 TEST_REQUIRES_ARM_NEON_DOT;
17473 for (size_t k = 9; k < 16; k++) {
17474 for (uint32_t n = 1; n <= 16; n++) {
17475 for (uint32_t m = 1; m <= 6; m++) {
17476 GemmMicrokernelTester()
17477 .mr(6)
17478 .nr(16)
17479 .kr(4)
17480 .sr(1)
17481 .m(m)
17482 .n(n)
17483 .k(k)
17484 .iterations(1)
17485 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17486 }
17487 }
17488 }
17489 }
17490
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,k_div_8)17491 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, k_div_8) {
17492 TEST_REQUIRES_ARM_NEON_DOT;
17493 for (size_t k = 16; k <= 80; k += 8) {
17494 GemmMicrokernelTester()
17495 .mr(6)
17496 .nr(16)
17497 .kr(4)
17498 .sr(1)
17499 .m(6)
17500 .n(16)
17501 .k(k)
17502 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17503 }
17504 }
17505
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,k_div_8_subtile)17506 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, k_div_8_subtile) {
17507 TEST_REQUIRES_ARM_NEON_DOT;
17508 for (size_t k = 16; k <= 80; k += 8) {
17509 for (uint32_t n = 1; n <= 16; n++) {
17510 for (uint32_t m = 1; m <= 6; m++) {
17511 GemmMicrokernelTester()
17512 .mr(6)
17513 .nr(16)
17514 .kr(4)
17515 .sr(1)
17516 .m(m)
17517 .n(n)
17518 .k(k)
17519 .iterations(1)
17520 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17521 }
17522 }
17523 }
17524 }
17525
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,n_gt_16)17526 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, n_gt_16) {
17527 TEST_REQUIRES_ARM_NEON_DOT;
17528 for (uint32_t n = 17; n < 32; n++) {
17529 for (size_t k = 1; k <= 40; k += 9) {
17530 GemmMicrokernelTester()
17531 .mr(6)
17532 .nr(16)
17533 .kr(4)
17534 .sr(1)
17535 .m(6)
17536 .n(n)
17537 .k(k)
17538 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17539 }
17540 }
17541 }
17542
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,n_gt_16_strided_cn)17543 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, n_gt_16_strided_cn) {
17544 TEST_REQUIRES_ARM_NEON_DOT;
17545 for (uint32_t n = 17; n < 32; n++) {
17546 for (size_t k = 1; k <= 40; k += 9) {
17547 GemmMicrokernelTester()
17548 .mr(6)
17549 .nr(16)
17550 .kr(4)
17551 .sr(1)
17552 .m(6)
17553 .n(n)
17554 .k(k)
17555 .cn_stride(19)
17556 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17557 }
17558 }
17559 }
17560
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,n_gt_16_subtile)17561 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, n_gt_16_subtile) {
17562 TEST_REQUIRES_ARM_NEON_DOT;
17563 for (uint32_t n = 17; n < 32; n++) {
17564 for (size_t k = 1; k <= 40; k += 9) {
17565 for (uint32_t m = 1; m <= 6; m++) {
17566 GemmMicrokernelTester()
17567 .mr(6)
17568 .nr(16)
17569 .kr(4)
17570 .sr(1)
17571 .m(m)
17572 .n(n)
17573 .k(k)
17574 .iterations(1)
17575 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17576 }
17577 }
17578 }
17579 }
17580
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,n_div_16)17581 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, n_div_16) {
17582 TEST_REQUIRES_ARM_NEON_DOT;
17583 for (uint32_t n = 32; n <= 48; n += 16) {
17584 for (size_t k = 1; k <= 40; k += 9) {
17585 GemmMicrokernelTester()
17586 .mr(6)
17587 .nr(16)
17588 .kr(4)
17589 .sr(1)
17590 .m(6)
17591 .n(n)
17592 .k(k)
17593 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17594 }
17595 }
17596 }
17597
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,n_div_16_strided_cn)17598 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, n_div_16_strided_cn) {
17599 TEST_REQUIRES_ARM_NEON_DOT;
17600 for (uint32_t n = 32; n <= 48; n += 16) {
17601 for (size_t k = 1; k <= 40; k += 9) {
17602 GemmMicrokernelTester()
17603 .mr(6)
17604 .nr(16)
17605 .kr(4)
17606 .sr(1)
17607 .m(6)
17608 .n(n)
17609 .k(k)
17610 .cn_stride(19)
17611 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17612 }
17613 }
17614 }
17615
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,n_div_16_subtile)17616 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, n_div_16_subtile) {
17617 TEST_REQUIRES_ARM_NEON_DOT;
17618 for (uint32_t n = 32; n <= 48; n += 16) {
17619 for (size_t k = 1; k <= 40; k += 9) {
17620 for (uint32_t m = 1; m <= 6; m++) {
17621 GemmMicrokernelTester()
17622 .mr(6)
17623 .nr(16)
17624 .kr(4)
17625 .sr(1)
17626 .m(m)
17627 .n(n)
17628 .k(k)
17629 .iterations(1)
17630 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17631 }
17632 }
17633 }
17634 }
17635
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,small_kernel)17636 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, small_kernel) {
17637 TEST_REQUIRES_ARM_NEON_DOT;
17638 for (size_t k = 1; k <= 40; k += 9) {
17639 GemmMicrokernelTester()
17640 .mr(6)
17641 .nr(16)
17642 .kr(4)
17643 .sr(1)
17644 .m(6)
17645 .n(16)
17646 .k(k)
17647 .ks(3)
17648 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17649 }
17650 }
17651
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,small_kernel_subtile)17652 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, small_kernel_subtile) {
17653 TEST_REQUIRES_ARM_NEON_DOT;
17654 for (size_t k = 1; k <= 40; k += 9) {
17655 for (uint32_t n = 1; n <= 16; n++) {
17656 for (uint32_t m = 1; m <= 6; m++) {
17657 GemmMicrokernelTester()
17658 .mr(6)
17659 .nr(16)
17660 .kr(4)
17661 .sr(1)
17662 .m(m)
17663 .n(n)
17664 .k(k)
17665 .ks(3)
17666 .iterations(1)
17667 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17668 }
17669 }
17670 }
17671 }
17672
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,n_gt_16_small_kernel)17673 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, n_gt_16_small_kernel) {
17674 TEST_REQUIRES_ARM_NEON_DOT;
17675 for (uint32_t n = 17; n < 32; n++) {
17676 for (size_t k = 1; k <= 40; k += 9) {
17677 GemmMicrokernelTester()
17678 .mr(6)
17679 .nr(16)
17680 .kr(4)
17681 .sr(1)
17682 .m(6)
17683 .n(n)
17684 .k(k)
17685 .ks(3)
17686 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17687 }
17688 }
17689 }
17690
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,n_div_16_small_kernel)17691 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, n_div_16_small_kernel) {
17692 TEST_REQUIRES_ARM_NEON_DOT;
17693 for (uint32_t n = 32; n <= 48; n += 16) {
17694 for (size_t k = 1; k <= 40; k += 9) {
17695 GemmMicrokernelTester()
17696 .mr(6)
17697 .nr(16)
17698 .kr(4)
17699 .sr(1)
17700 .m(6)
17701 .n(n)
17702 .k(k)
17703 .ks(3)
17704 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17705 }
17706 }
17707 }
17708
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,strided_cm_subtile)17709 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, strided_cm_subtile) {
17710 TEST_REQUIRES_ARM_NEON_DOT;
17711 for (size_t k = 1; k <= 40; k += 9) {
17712 for (uint32_t n = 1; n <= 16; n++) {
17713 for (uint32_t m = 1; m <= 6; m++) {
17714 GemmMicrokernelTester()
17715 .mr(6)
17716 .nr(16)
17717 .kr(4)
17718 .sr(1)
17719 .m(m)
17720 .n(n)
17721 .k(k)
17722 .cm_stride(19)
17723 .iterations(1)
17724 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17725 }
17726 }
17727 }
17728 }
17729
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,a_offset)17730 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, a_offset) {
17731 TEST_REQUIRES_ARM_NEON_DOT;
17732 for (size_t k = 1; k <= 40; k += 9) {
17733 GemmMicrokernelTester()
17734 .mr(6)
17735 .nr(16)
17736 .kr(4)
17737 .sr(1)
17738 .m(6)
17739 .n(16)
17740 .k(k)
17741 .ks(3)
17742 .a_offset(251)
17743 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17744 }
17745 }
17746
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,zero)17747 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, zero) {
17748 TEST_REQUIRES_ARM_NEON_DOT;
17749 for (size_t k = 1; k <= 40; k += 9) {
17750 for (uint32_t mz = 0; mz < 6; mz++) {
17751 GemmMicrokernelTester()
17752 .mr(6)
17753 .nr(16)
17754 .kr(4)
17755 .sr(1)
17756 .m(6)
17757 .n(16)
17758 .k(k)
17759 .ks(3)
17760 .a_offset(251)
17761 .zero_index(mz)
17762 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17763 }
17764 }
17765 }
17766
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,qmin)17767 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, qmin) {
17768 TEST_REQUIRES_ARM_NEON_DOT;
17769 GemmMicrokernelTester()
17770 .mr(6)
17771 .nr(16)
17772 .kr(4)
17773 .sr(1)
17774 .m(6)
17775 .n(16)
17776 .k(8)
17777 .qmin(128)
17778 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17779 }
17780
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,qmax)17781 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, qmax) {
17782 TEST_REQUIRES_ARM_NEON_DOT;
17783 GemmMicrokernelTester()
17784 .mr(6)
17785 .nr(16)
17786 .kr(4)
17787 .sr(1)
17788 .m(6)
17789 .n(16)
17790 .k(8)
17791 .qmax(128)
17792 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17793 }
17794
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,strided_cm)17795 TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, strided_cm) {
17796 TEST_REQUIRES_ARM_NEON_DOT;
17797 GemmMicrokernelTester()
17798 .mr(6)
17799 .nr(16)
17800 .kr(4)
17801 .sr(1)
17802 .m(6)
17803 .n(16)
17804 .k(8)
17805 .cm_stride(19)
17806 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17807 }
17808 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
17809
17810
17811 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,k_eq_8)17812 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, k_eq_8) {
17813 TEST_REQUIRES_ARM_NEON_DOT;
17814 GemmMicrokernelTester()
17815 .mr(8)
17816 .nr(8)
17817 .kr(4)
17818 .sr(1)
17819 .m(8)
17820 .n(8)
17821 .k(8)
17822 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17823 }
17824
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,strided_cn)17825 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, strided_cn) {
17826 TEST_REQUIRES_ARM_NEON_DOT;
17827 GemmMicrokernelTester()
17828 .mr(8)
17829 .nr(8)
17830 .kr(4)
17831 .sr(1)
17832 .m(8)
17833 .n(8)
17834 .k(8)
17835 .cn_stride(11)
17836 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17837 }
17838
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,k_eq_8_subtile)17839 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, k_eq_8_subtile) {
17840 TEST_REQUIRES_ARM_NEON_DOT;
17841 for (uint32_t n = 1; n <= 8; n++) {
17842 for (uint32_t m = 1; m <= 8; m++) {
17843 GemmMicrokernelTester()
17844 .mr(8)
17845 .nr(8)
17846 .kr(4)
17847 .sr(1)
17848 .m(m)
17849 .n(n)
17850 .k(8)
17851 .iterations(1)
17852 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17853 }
17854 }
17855 }
17856
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,k_eq_8_subtile_m)17857 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, k_eq_8_subtile_m) {
17858 TEST_REQUIRES_ARM_NEON_DOT;
17859 for (uint32_t m = 1; m <= 8; m++) {
17860 GemmMicrokernelTester()
17861 .mr(8)
17862 .nr(8)
17863 .kr(4)
17864 .sr(1)
17865 .m(m)
17866 .n(8)
17867 .k(8)
17868 .iterations(1)
17869 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17870 }
17871 }
17872
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,k_eq_8_subtile_n)17873 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, k_eq_8_subtile_n) {
17874 TEST_REQUIRES_ARM_NEON_DOT;
17875 for (uint32_t n = 1; n <= 8; n++) {
17876 GemmMicrokernelTester()
17877 .mr(8)
17878 .nr(8)
17879 .kr(4)
17880 .sr(1)
17881 .m(8)
17882 .n(n)
17883 .k(8)
17884 .iterations(1)
17885 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17886 }
17887 }
17888
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,k_lt_8)17889 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, k_lt_8) {
17890 TEST_REQUIRES_ARM_NEON_DOT;
17891 for (size_t k = 1; k < 8; k++) {
17892 GemmMicrokernelTester()
17893 .mr(8)
17894 .nr(8)
17895 .kr(4)
17896 .sr(1)
17897 .m(8)
17898 .n(8)
17899 .k(k)
17900 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17901 }
17902 }
17903
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,k_lt_8_subtile)17904 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, k_lt_8_subtile) {
17905 TEST_REQUIRES_ARM_NEON_DOT;
17906 for (size_t k = 1; k < 8; k++) {
17907 for (uint32_t n = 1; n <= 8; n++) {
17908 for (uint32_t m = 1; m <= 8; m++) {
17909 GemmMicrokernelTester()
17910 .mr(8)
17911 .nr(8)
17912 .kr(4)
17913 .sr(1)
17914 .m(m)
17915 .n(n)
17916 .k(k)
17917 .iterations(1)
17918 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17919 }
17920 }
17921 }
17922 }
17923
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,k_gt_8)17924 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, k_gt_8) {
17925 TEST_REQUIRES_ARM_NEON_DOT;
17926 for (size_t k = 9; k < 16; k++) {
17927 GemmMicrokernelTester()
17928 .mr(8)
17929 .nr(8)
17930 .kr(4)
17931 .sr(1)
17932 .m(8)
17933 .n(8)
17934 .k(k)
17935 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17936 }
17937 }
17938
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,k_gt_8_subtile)17939 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, k_gt_8_subtile) {
17940 TEST_REQUIRES_ARM_NEON_DOT;
17941 for (size_t k = 9; k < 16; k++) {
17942 for (uint32_t n = 1; n <= 8; n++) {
17943 for (uint32_t m = 1; m <= 8; m++) {
17944 GemmMicrokernelTester()
17945 .mr(8)
17946 .nr(8)
17947 .kr(4)
17948 .sr(1)
17949 .m(m)
17950 .n(n)
17951 .k(k)
17952 .iterations(1)
17953 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17954 }
17955 }
17956 }
17957 }
17958
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,k_div_8)17959 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, k_div_8) {
17960 TEST_REQUIRES_ARM_NEON_DOT;
17961 for (size_t k = 16; k <= 80; k += 8) {
17962 GemmMicrokernelTester()
17963 .mr(8)
17964 .nr(8)
17965 .kr(4)
17966 .sr(1)
17967 .m(8)
17968 .n(8)
17969 .k(k)
17970 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17971 }
17972 }
17973
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,k_div_8_subtile)17974 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, k_div_8_subtile) {
17975 TEST_REQUIRES_ARM_NEON_DOT;
17976 for (size_t k = 16; k <= 80; k += 8) {
17977 for (uint32_t n = 1; n <= 8; n++) {
17978 for (uint32_t m = 1; m <= 8; m++) {
17979 GemmMicrokernelTester()
17980 .mr(8)
17981 .nr(8)
17982 .kr(4)
17983 .sr(1)
17984 .m(m)
17985 .n(n)
17986 .k(k)
17987 .iterations(1)
17988 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17989 }
17990 }
17991 }
17992 }
17993
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,n_gt_8)17994 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, n_gt_8) {
17995 TEST_REQUIRES_ARM_NEON_DOT;
17996 for (uint32_t n = 9; n < 16; n++) {
17997 for (size_t k = 1; k <= 40; k += 9) {
17998 GemmMicrokernelTester()
17999 .mr(8)
18000 .nr(8)
18001 .kr(4)
18002 .sr(1)
18003 .m(8)
18004 .n(n)
18005 .k(k)
18006 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18007 }
18008 }
18009 }
18010
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,n_gt_8_strided_cn)18011 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, n_gt_8_strided_cn) {
18012 TEST_REQUIRES_ARM_NEON_DOT;
18013 for (uint32_t n = 9; n < 16; n++) {
18014 for (size_t k = 1; k <= 40; k += 9) {
18015 GemmMicrokernelTester()
18016 .mr(8)
18017 .nr(8)
18018 .kr(4)
18019 .sr(1)
18020 .m(8)
18021 .n(n)
18022 .k(k)
18023 .cn_stride(11)
18024 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18025 }
18026 }
18027 }
18028
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,n_gt_8_subtile)18029 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, n_gt_8_subtile) {
18030 TEST_REQUIRES_ARM_NEON_DOT;
18031 for (uint32_t n = 9; n < 16; n++) {
18032 for (size_t k = 1; k <= 40; k += 9) {
18033 for (uint32_t m = 1; m <= 8; m++) {
18034 GemmMicrokernelTester()
18035 .mr(8)
18036 .nr(8)
18037 .kr(4)
18038 .sr(1)
18039 .m(m)
18040 .n(n)
18041 .k(k)
18042 .iterations(1)
18043 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18044 }
18045 }
18046 }
18047 }
18048
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,n_div_8)18049 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, n_div_8) {
18050 TEST_REQUIRES_ARM_NEON_DOT;
18051 for (uint32_t n = 16; n <= 24; n += 8) {
18052 for (size_t k = 1; k <= 40; k += 9) {
18053 GemmMicrokernelTester()
18054 .mr(8)
18055 .nr(8)
18056 .kr(4)
18057 .sr(1)
18058 .m(8)
18059 .n(n)
18060 .k(k)
18061 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18062 }
18063 }
18064 }
18065
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,n_div_8_strided_cn)18066 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, n_div_8_strided_cn) {
18067 TEST_REQUIRES_ARM_NEON_DOT;
18068 for (uint32_t n = 16; n <= 24; n += 8) {
18069 for (size_t k = 1; k <= 40; k += 9) {
18070 GemmMicrokernelTester()
18071 .mr(8)
18072 .nr(8)
18073 .kr(4)
18074 .sr(1)
18075 .m(8)
18076 .n(n)
18077 .k(k)
18078 .cn_stride(11)
18079 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18080 }
18081 }
18082 }
18083
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,n_div_8_subtile)18084 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, n_div_8_subtile) {
18085 TEST_REQUIRES_ARM_NEON_DOT;
18086 for (uint32_t n = 16; n <= 24; n += 8) {
18087 for (size_t k = 1; k <= 40; k += 9) {
18088 for (uint32_t m = 1; m <= 8; m++) {
18089 GemmMicrokernelTester()
18090 .mr(8)
18091 .nr(8)
18092 .kr(4)
18093 .sr(1)
18094 .m(m)
18095 .n(n)
18096 .k(k)
18097 .iterations(1)
18098 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18099 }
18100 }
18101 }
18102 }
18103
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,small_kernel)18104 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, small_kernel) {
18105 TEST_REQUIRES_ARM_NEON_DOT;
18106 for (size_t k = 1; k <= 40; k += 9) {
18107 GemmMicrokernelTester()
18108 .mr(8)
18109 .nr(8)
18110 .kr(4)
18111 .sr(1)
18112 .m(8)
18113 .n(8)
18114 .k(k)
18115 .ks(3)
18116 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18117 }
18118 }
18119
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,small_kernel_subtile)18120 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, small_kernel_subtile) {
18121 TEST_REQUIRES_ARM_NEON_DOT;
18122 for (size_t k = 1; k <= 40; k += 9) {
18123 for (uint32_t n = 1; n <= 8; n++) {
18124 for (uint32_t m = 1; m <= 8; m++) {
18125 GemmMicrokernelTester()
18126 .mr(8)
18127 .nr(8)
18128 .kr(4)
18129 .sr(1)
18130 .m(m)
18131 .n(n)
18132 .k(k)
18133 .ks(3)
18134 .iterations(1)
18135 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18136 }
18137 }
18138 }
18139 }
18140
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,n_gt_8_small_kernel)18141 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, n_gt_8_small_kernel) {
18142 TEST_REQUIRES_ARM_NEON_DOT;
18143 for (uint32_t n = 9; n < 16; n++) {
18144 for (size_t k = 1; k <= 40; k += 9) {
18145 GemmMicrokernelTester()
18146 .mr(8)
18147 .nr(8)
18148 .kr(4)
18149 .sr(1)
18150 .m(8)
18151 .n(n)
18152 .k(k)
18153 .ks(3)
18154 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18155 }
18156 }
18157 }
18158
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,n_div_8_small_kernel)18159 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, n_div_8_small_kernel) {
18160 TEST_REQUIRES_ARM_NEON_DOT;
18161 for (uint32_t n = 16; n <= 24; n += 8) {
18162 for (size_t k = 1; k <= 40; k += 9) {
18163 GemmMicrokernelTester()
18164 .mr(8)
18165 .nr(8)
18166 .kr(4)
18167 .sr(1)
18168 .m(8)
18169 .n(n)
18170 .k(k)
18171 .ks(3)
18172 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18173 }
18174 }
18175 }
18176
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,strided_cm_subtile)18177 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, strided_cm_subtile) {
18178 TEST_REQUIRES_ARM_NEON_DOT;
18179 for (size_t k = 1; k <= 40; k += 9) {
18180 for (uint32_t n = 1; n <= 8; n++) {
18181 for (uint32_t m = 1; m <= 8; m++) {
18182 GemmMicrokernelTester()
18183 .mr(8)
18184 .nr(8)
18185 .kr(4)
18186 .sr(1)
18187 .m(m)
18188 .n(n)
18189 .k(k)
18190 .cm_stride(11)
18191 .iterations(1)
18192 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18193 }
18194 }
18195 }
18196 }
18197
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,a_offset)18198 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, a_offset) {
18199 TEST_REQUIRES_ARM_NEON_DOT;
18200 for (size_t k = 1; k <= 40; k += 9) {
18201 GemmMicrokernelTester()
18202 .mr(8)
18203 .nr(8)
18204 .kr(4)
18205 .sr(1)
18206 .m(8)
18207 .n(8)
18208 .k(k)
18209 .ks(3)
18210 .a_offset(331)
18211 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18212 }
18213 }
18214
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,zero)18215 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, zero) {
18216 TEST_REQUIRES_ARM_NEON_DOT;
18217 for (size_t k = 1; k <= 40; k += 9) {
18218 for (uint32_t mz = 0; mz < 8; mz++) {
18219 GemmMicrokernelTester()
18220 .mr(8)
18221 .nr(8)
18222 .kr(4)
18223 .sr(1)
18224 .m(8)
18225 .n(8)
18226 .k(k)
18227 .ks(3)
18228 .a_offset(331)
18229 .zero_index(mz)
18230 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18231 }
18232 }
18233 }
18234
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,qmin)18235 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, qmin) {
18236 TEST_REQUIRES_ARM_NEON_DOT;
18237 GemmMicrokernelTester()
18238 .mr(8)
18239 .nr(8)
18240 .kr(4)
18241 .sr(1)
18242 .m(8)
18243 .n(8)
18244 .k(8)
18245 .qmin(128)
18246 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18247 }
18248
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,qmax)18249 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, qmax) {
18250 TEST_REQUIRES_ARM_NEON_DOT;
18251 GemmMicrokernelTester()
18252 .mr(8)
18253 .nr(8)
18254 .kr(4)
18255 .sr(1)
18256 .m(8)
18257 .n(8)
18258 .k(8)
18259 .qmax(128)
18260 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18261 }
18262
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,strided_cm)18263 TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, strided_cm) {
18264 TEST_REQUIRES_ARM_NEON_DOT;
18265 GemmMicrokernelTester()
18266 .mr(8)
18267 .nr(8)
18268 .kr(4)
18269 .sr(1)
18270 .m(8)
18271 .n(8)
18272 .k(8)
18273 .cm_stride(11)
18274 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18275 }
18276 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
18277
18278
18279 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8)18280 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8) {
18281 TEST_REQUIRES_X86_SSE2;
18282 GemmMicrokernelTester()
18283 .mr(1)
18284 .nr(4)
18285 .kr(2)
18286 .sr(1)
18287 .m(1)
18288 .n(4)
18289 .k(8)
18290 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18291 }
18292
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,strided_cn)18293 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cn) {
18294 TEST_REQUIRES_X86_SSE2;
18295 GemmMicrokernelTester()
18296 .mr(1)
18297 .nr(4)
18298 .kr(2)
18299 .sr(1)
18300 .m(1)
18301 .n(4)
18302 .k(8)
18303 .cn_stride(7)
18304 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18305 }
18306
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8_subtile)18307 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile) {
18308 TEST_REQUIRES_X86_SSE2;
18309 for (uint32_t n = 1; n <= 4; n++) {
18310 for (uint32_t m = 1; m <= 1; m++) {
18311 GemmMicrokernelTester()
18312 .mr(1)
18313 .nr(4)
18314 .kr(2)
18315 .sr(1)
18316 .m(m)
18317 .n(n)
18318 .k(8)
18319 .iterations(1)
18320 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18321 }
18322 }
18323 }
18324
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8_subtile_m)18325 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile_m) {
18326 TEST_REQUIRES_X86_SSE2;
18327 for (uint32_t m = 1; m <= 1; m++) {
18328 GemmMicrokernelTester()
18329 .mr(1)
18330 .nr(4)
18331 .kr(2)
18332 .sr(1)
18333 .m(m)
18334 .n(4)
18335 .k(8)
18336 .iterations(1)
18337 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18338 }
18339 }
18340
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8_subtile_n)18341 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile_n) {
18342 TEST_REQUIRES_X86_SSE2;
18343 for (uint32_t n = 1; n <= 4; n++) {
18344 GemmMicrokernelTester()
18345 .mr(1)
18346 .nr(4)
18347 .kr(2)
18348 .sr(1)
18349 .m(1)
18350 .n(n)
18351 .k(8)
18352 .iterations(1)
18353 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18354 }
18355 }
18356
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_lt_8)18357 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_lt_8) {
18358 TEST_REQUIRES_X86_SSE2;
18359 for (size_t k = 1; k < 8; k++) {
18360 GemmMicrokernelTester()
18361 .mr(1)
18362 .nr(4)
18363 .kr(2)
18364 .sr(1)
18365 .m(1)
18366 .n(4)
18367 .k(k)
18368 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18369 }
18370 }
18371
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_lt_8_subtile)18372 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_lt_8_subtile) {
18373 TEST_REQUIRES_X86_SSE2;
18374 for (size_t k = 1; k < 8; k++) {
18375 for (uint32_t n = 1; n <= 4; n++) {
18376 for (uint32_t m = 1; m <= 1; m++) {
18377 GemmMicrokernelTester()
18378 .mr(1)
18379 .nr(4)
18380 .kr(2)
18381 .sr(1)
18382 .m(m)
18383 .n(n)
18384 .k(k)
18385 .iterations(1)
18386 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18387 }
18388 }
18389 }
18390 }
18391
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_gt_8)18392 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_gt_8) {
18393 TEST_REQUIRES_X86_SSE2;
18394 for (size_t k = 9; k < 16; k++) {
18395 GemmMicrokernelTester()
18396 .mr(1)
18397 .nr(4)
18398 .kr(2)
18399 .sr(1)
18400 .m(1)
18401 .n(4)
18402 .k(k)
18403 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18404 }
18405 }
18406
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_gt_8_subtile)18407 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_gt_8_subtile) {
18408 TEST_REQUIRES_X86_SSE2;
18409 for (size_t k = 9; k < 16; k++) {
18410 for (uint32_t n = 1; n <= 4; n++) {
18411 for (uint32_t m = 1; m <= 1; m++) {
18412 GemmMicrokernelTester()
18413 .mr(1)
18414 .nr(4)
18415 .kr(2)
18416 .sr(1)
18417 .m(m)
18418 .n(n)
18419 .k(k)
18420 .iterations(1)
18421 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18422 }
18423 }
18424 }
18425 }
18426
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_div_8)18427 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_div_8) {
18428 TEST_REQUIRES_X86_SSE2;
18429 for (size_t k = 16; k <= 80; k += 8) {
18430 GemmMicrokernelTester()
18431 .mr(1)
18432 .nr(4)
18433 .kr(2)
18434 .sr(1)
18435 .m(1)
18436 .n(4)
18437 .k(k)
18438 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18439 }
18440 }
18441
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_div_8_subtile)18442 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_div_8_subtile) {
18443 TEST_REQUIRES_X86_SSE2;
18444 for (size_t k = 16; k <= 80; k += 8) {
18445 for (uint32_t n = 1; n <= 4; n++) {
18446 for (uint32_t m = 1; m <= 1; m++) {
18447 GemmMicrokernelTester()
18448 .mr(1)
18449 .nr(4)
18450 .kr(2)
18451 .sr(1)
18452 .m(m)
18453 .n(n)
18454 .k(k)
18455 .iterations(1)
18456 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18457 }
18458 }
18459 }
18460 }
18461
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4)18462 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4) {
18463 TEST_REQUIRES_X86_SSE2;
18464 for (uint32_t n = 5; n < 8; n++) {
18465 for (size_t k = 1; k <= 40; k += 9) {
18466 GemmMicrokernelTester()
18467 .mr(1)
18468 .nr(4)
18469 .kr(2)
18470 .sr(1)
18471 .m(1)
18472 .n(n)
18473 .k(k)
18474 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18475 }
18476 }
18477 }
18478
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4_strided_cn)18479 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_strided_cn) {
18480 TEST_REQUIRES_X86_SSE2;
18481 for (uint32_t n = 5; n < 8; n++) {
18482 for (size_t k = 1; k <= 40; k += 9) {
18483 GemmMicrokernelTester()
18484 .mr(1)
18485 .nr(4)
18486 .kr(2)
18487 .sr(1)
18488 .m(1)
18489 .n(n)
18490 .k(k)
18491 .cn_stride(7)
18492 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18493 }
18494 }
18495 }
18496
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4_subtile)18497 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_subtile) {
18498 TEST_REQUIRES_X86_SSE2;
18499 for (uint32_t n = 5; n < 8; n++) {
18500 for (size_t k = 1; k <= 40; k += 9) {
18501 for (uint32_t m = 1; m <= 1; m++) {
18502 GemmMicrokernelTester()
18503 .mr(1)
18504 .nr(4)
18505 .kr(2)
18506 .sr(1)
18507 .m(m)
18508 .n(n)
18509 .k(k)
18510 .iterations(1)
18511 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18512 }
18513 }
18514 }
18515 }
18516
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4)18517 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4) {
18518 TEST_REQUIRES_X86_SSE2;
18519 for (uint32_t n = 8; n <= 12; n += 4) {
18520 for (size_t k = 1; k <= 40; k += 9) {
18521 GemmMicrokernelTester()
18522 .mr(1)
18523 .nr(4)
18524 .kr(2)
18525 .sr(1)
18526 .m(1)
18527 .n(n)
18528 .k(k)
18529 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18530 }
18531 }
18532 }
18533
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4_strided_cn)18534 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_strided_cn) {
18535 TEST_REQUIRES_X86_SSE2;
18536 for (uint32_t n = 8; n <= 12; n += 4) {
18537 for (size_t k = 1; k <= 40; k += 9) {
18538 GemmMicrokernelTester()
18539 .mr(1)
18540 .nr(4)
18541 .kr(2)
18542 .sr(1)
18543 .m(1)
18544 .n(n)
18545 .k(k)
18546 .cn_stride(7)
18547 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18548 }
18549 }
18550 }
18551
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4_subtile)18552 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_subtile) {
18553 TEST_REQUIRES_X86_SSE2;
18554 for (uint32_t n = 8; n <= 12; n += 4) {
18555 for (size_t k = 1; k <= 40; k += 9) {
18556 for (uint32_t m = 1; m <= 1; m++) {
18557 GemmMicrokernelTester()
18558 .mr(1)
18559 .nr(4)
18560 .kr(2)
18561 .sr(1)
18562 .m(m)
18563 .n(n)
18564 .k(k)
18565 .iterations(1)
18566 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18567 }
18568 }
18569 }
18570 }
18571
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,small_kernel)18572 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, small_kernel) {
18573 TEST_REQUIRES_X86_SSE2;
18574 for (size_t k = 1; k <= 40; k += 9) {
18575 GemmMicrokernelTester()
18576 .mr(1)
18577 .nr(4)
18578 .kr(2)
18579 .sr(1)
18580 .m(1)
18581 .n(4)
18582 .k(k)
18583 .ks(3)
18584 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18585 }
18586 }
18587
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,small_kernel_subtile)18588 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, small_kernel_subtile) {
18589 TEST_REQUIRES_X86_SSE2;
18590 for (size_t k = 1; k <= 40; k += 9) {
18591 for (uint32_t n = 1; n <= 4; n++) {
18592 for (uint32_t m = 1; m <= 1; m++) {
18593 GemmMicrokernelTester()
18594 .mr(1)
18595 .nr(4)
18596 .kr(2)
18597 .sr(1)
18598 .m(m)
18599 .n(n)
18600 .k(k)
18601 .ks(3)
18602 .iterations(1)
18603 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18604 }
18605 }
18606 }
18607 }
18608
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4_small_kernel)18609 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_small_kernel) {
18610 TEST_REQUIRES_X86_SSE2;
18611 for (uint32_t n = 5; n < 8; n++) {
18612 for (size_t k = 1; k <= 40; k += 9) {
18613 GemmMicrokernelTester()
18614 .mr(1)
18615 .nr(4)
18616 .kr(2)
18617 .sr(1)
18618 .m(1)
18619 .n(n)
18620 .k(k)
18621 .ks(3)
18622 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18623 }
18624 }
18625 }
18626
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4_small_kernel)18627 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_small_kernel) {
18628 TEST_REQUIRES_X86_SSE2;
18629 for (uint32_t n = 8; n <= 12; n += 4) {
18630 for (size_t k = 1; k <= 40; k += 9) {
18631 GemmMicrokernelTester()
18632 .mr(1)
18633 .nr(4)
18634 .kr(2)
18635 .sr(1)
18636 .m(1)
18637 .n(n)
18638 .k(k)
18639 .ks(3)
18640 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18641 }
18642 }
18643 }
18644
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,strided_cm_subtile)18645 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cm_subtile) {
18646 TEST_REQUIRES_X86_SSE2;
18647 for (size_t k = 1; k <= 40; k += 9) {
18648 for (uint32_t n = 1; n <= 4; n++) {
18649 for (uint32_t m = 1; m <= 1; m++) {
18650 GemmMicrokernelTester()
18651 .mr(1)
18652 .nr(4)
18653 .kr(2)
18654 .sr(1)
18655 .m(m)
18656 .n(n)
18657 .k(k)
18658 .cm_stride(7)
18659 .iterations(1)
18660 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18661 }
18662 }
18663 }
18664 }
18665
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,a_offset)18666 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, a_offset) {
18667 TEST_REQUIRES_X86_SSE2;
18668 for (size_t k = 1; k <= 40; k += 9) {
18669 GemmMicrokernelTester()
18670 .mr(1)
18671 .nr(4)
18672 .kr(2)
18673 .sr(1)
18674 .m(1)
18675 .n(4)
18676 .k(k)
18677 .ks(3)
18678 .a_offset(43)
18679 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18680 }
18681 }
18682
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,zero)18683 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, zero) {
18684 TEST_REQUIRES_X86_SSE2;
18685 for (size_t k = 1; k <= 40; k += 9) {
18686 for (uint32_t mz = 0; mz < 1; mz++) {
18687 GemmMicrokernelTester()
18688 .mr(1)
18689 .nr(4)
18690 .kr(2)
18691 .sr(1)
18692 .m(1)
18693 .n(4)
18694 .k(k)
18695 .ks(3)
18696 .a_offset(43)
18697 .zero_index(mz)
18698 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18699 }
18700 }
18701 }
18702
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,qmin)18703 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, qmin) {
18704 TEST_REQUIRES_X86_SSE2;
18705 GemmMicrokernelTester()
18706 .mr(1)
18707 .nr(4)
18708 .kr(2)
18709 .sr(1)
18710 .m(1)
18711 .n(4)
18712 .k(8)
18713 .qmin(128)
18714 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18715 }
18716
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,qmax)18717 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, qmax) {
18718 TEST_REQUIRES_X86_SSE2;
18719 GemmMicrokernelTester()
18720 .mr(1)
18721 .nr(4)
18722 .kr(2)
18723 .sr(1)
18724 .m(1)
18725 .n(4)
18726 .k(8)
18727 .qmax(128)
18728 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18729 }
18730
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,strided_cm)18731 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cm) {
18732 TEST_REQUIRES_X86_SSE2;
18733 GemmMicrokernelTester()
18734 .mr(1)
18735 .nr(4)
18736 .kr(2)
18737 .sr(1)
18738 .m(1)
18739 .n(4)
18740 .k(8)
18741 .cm_stride(7)
18742 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18743 }
18744 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18745
18746
18747 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8)18748 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8) {
18749 TEST_REQUIRES_X86_SSE41;
18750 GemmMicrokernelTester()
18751 .mr(1)
18752 .nr(4)
18753 .kr(2)
18754 .sr(1)
18755 .m(1)
18756 .n(4)
18757 .k(8)
18758 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18759 }
18760
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,strided_cn)18761 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cn) {
18762 TEST_REQUIRES_X86_SSE41;
18763 GemmMicrokernelTester()
18764 .mr(1)
18765 .nr(4)
18766 .kr(2)
18767 .sr(1)
18768 .m(1)
18769 .n(4)
18770 .k(8)
18771 .cn_stride(7)
18772 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18773 }
18774
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8_subtile)18775 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile) {
18776 TEST_REQUIRES_X86_SSE41;
18777 for (uint32_t n = 1; n <= 4; n++) {
18778 for (uint32_t m = 1; m <= 1; m++) {
18779 GemmMicrokernelTester()
18780 .mr(1)
18781 .nr(4)
18782 .kr(2)
18783 .sr(1)
18784 .m(m)
18785 .n(n)
18786 .k(8)
18787 .iterations(1)
18788 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18789 }
18790 }
18791 }
18792
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8_subtile_m)18793 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile_m) {
18794 TEST_REQUIRES_X86_SSE41;
18795 for (uint32_t m = 1; m <= 1; m++) {
18796 GemmMicrokernelTester()
18797 .mr(1)
18798 .nr(4)
18799 .kr(2)
18800 .sr(1)
18801 .m(m)
18802 .n(4)
18803 .k(8)
18804 .iterations(1)
18805 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18806 }
18807 }
18808
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8_subtile_n)18809 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile_n) {
18810 TEST_REQUIRES_X86_SSE41;
18811 for (uint32_t n = 1; n <= 4; n++) {
18812 GemmMicrokernelTester()
18813 .mr(1)
18814 .nr(4)
18815 .kr(2)
18816 .sr(1)
18817 .m(1)
18818 .n(n)
18819 .k(8)
18820 .iterations(1)
18821 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18822 }
18823 }
18824
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_lt_8)18825 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_lt_8) {
18826 TEST_REQUIRES_X86_SSE41;
18827 for (size_t k = 1; k < 8; k++) {
18828 GemmMicrokernelTester()
18829 .mr(1)
18830 .nr(4)
18831 .kr(2)
18832 .sr(1)
18833 .m(1)
18834 .n(4)
18835 .k(k)
18836 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18837 }
18838 }
18839
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_lt_8_subtile)18840 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_lt_8_subtile) {
18841 TEST_REQUIRES_X86_SSE41;
18842 for (size_t k = 1; k < 8; k++) {
18843 for (uint32_t n = 1; n <= 4; n++) {
18844 for (uint32_t m = 1; m <= 1; m++) {
18845 GemmMicrokernelTester()
18846 .mr(1)
18847 .nr(4)
18848 .kr(2)
18849 .sr(1)
18850 .m(m)
18851 .n(n)
18852 .k(k)
18853 .iterations(1)
18854 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18855 }
18856 }
18857 }
18858 }
18859
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_gt_8)18860 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_gt_8) {
18861 TEST_REQUIRES_X86_SSE41;
18862 for (size_t k = 9; k < 16; k++) {
18863 GemmMicrokernelTester()
18864 .mr(1)
18865 .nr(4)
18866 .kr(2)
18867 .sr(1)
18868 .m(1)
18869 .n(4)
18870 .k(k)
18871 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18872 }
18873 }
18874
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_gt_8_subtile)18875 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_gt_8_subtile) {
18876 TEST_REQUIRES_X86_SSE41;
18877 for (size_t k = 9; k < 16; k++) {
18878 for (uint32_t n = 1; n <= 4; n++) {
18879 for (uint32_t m = 1; m <= 1; m++) {
18880 GemmMicrokernelTester()
18881 .mr(1)
18882 .nr(4)
18883 .kr(2)
18884 .sr(1)
18885 .m(m)
18886 .n(n)
18887 .k(k)
18888 .iterations(1)
18889 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18890 }
18891 }
18892 }
18893 }
18894
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_div_8)18895 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_div_8) {
18896 TEST_REQUIRES_X86_SSE41;
18897 for (size_t k = 16; k <= 80; k += 8) {
18898 GemmMicrokernelTester()
18899 .mr(1)
18900 .nr(4)
18901 .kr(2)
18902 .sr(1)
18903 .m(1)
18904 .n(4)
18905 .k(k)
18906 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18907 }
18908 }
18909
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_div_8_subtile)18910 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_div_8_subtile) {
18911 TEST_REQUIRES_X86_SSE41;
18912 for (size_t k = 16; k <= 80; k += 8) {
18913 for (uint32_t n = 1; n <= 4; n++) {
18914 for (uint32_t m = 1; m <= 1; m++) {
18915 GemmMicrokernelTester()
18916 .mr(1)
18917 .nr(4)
18918 .kr(2)
18919 .sr(1)
18920 .m(m)
18921 .n(n)
18922 .k(k)
18923 .iterations(1)
18924 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18925 }
18926 }
18927 }
18928 }
18929
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4)18930 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4) {
18931 TEST_REQUIRES_X86_SSE41;
18932 for (uint32_t n = 5; n < 8; n++) {
18933 for (size_t k = 1; k <= 40; k += 9) {
18934 GemmMicrokernelTester()
18935 .mr(1)
18936 .nr(4)
18937 .kr(2)
18938 .sr(1)
18939 .m(1)
18940 .n(n)
18941 .k(k)
18942 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18943 }
18944 }
18945 }
18946
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4_strided_cn)18947 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_strided_cn) {
18948 TEST_REQUIRES_X86_SSE41;
18949 for (uint32_t n = 5; n < 8; n++) {
18950 for (size_t k = 1; k <= 40; k += 9) {
18951 GemmMicrokernelTester()
18952 .mr(1)
18953 .nr(4)
18954 .kr(2)
18955 .sr(1)
18956 .m(1)
18957 .n(n)
18958 .k(k)
18959 .cn_stride(7)
18960 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18961 }
18962 }
18963 }
18964
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4_subtile)18965 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_subtile) {
18966 TEST_REQUIRES_X86_SSE41;
18967 for (uint32_t n = 5; n < 8; n++) {
18968 for (size_t k = 1; k <= 40; k += 9) {
18969 for (uint32_t m = 1; m <= 1; m++) {
18970 GemmMicrokernelTester()
18971 .mr(1)
18972 .nr(4)
18973 .kr(2)
18974 .sr(1)
18975 .m(m)
18976 .n(n)
18977 .k(k)
18978 .iterations(1)
18979 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18980 }
18981 }
18982 }
18983 }
18984
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4)18985 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4) {
18986 TEST_REQUIRES_X86_SSE41;
18987 for (uint32_t n = 8; n <= 12; n += 4) {
18988 for (size_t k = 1; k <= 40; k += 9) {
18989 GemmMicrokernelTester()
18990 .mr(1)
18991 .nr(4)
18992 .kr(2)
18993 .sr(1)
18994 .m(1)
18995 .n(n)
18996 .k(k)
18997 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18998 }
18999 }
19000 }
19001
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4_strided_cn)19002 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_strided_cn) {
19003 TEST_REQUIRES_X86_SSE41;
19004 for (uint32_t n = 8; n <= 12; n += 4) {
19005 for (size_t k = 1; k <= 40; k += 9) {
19006 GemmMicrokernelTester()
19007 .mr(1)
19008 .nr(4)
19009 .kr(2)
19010 .sr(1)
19011 .m(1)
19012 .n(n)
19013 .k(k)
19014 .cn_stride(7)
19015 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19016 }
19017 }
19018 }
19019
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4_subtile)19020 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_subtile) {
19021 TEST_REQUIRES_X86_SSE41;
19022 for (uint32_t n = 8; n <= 12; n += 4) {
19023 for (size_t k = 1; k <= 40; k += 9) {
19024 for (uint32_t m = 1; m <= 1; m++) {
19025 GemmMicrokernelTester()
19026 .mr(1)
19027 .nr(4)
19028 .kr(2)
19029 .sr(1)
19030 .m(m)
19031 .n(n)
19032 .k(k)
19033 .iterations(1)
19034 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19035 }
19036 }
19037 }
19038 }
19039
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,small_kernel)19040 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, small_kernel) {
19041 TEST_REQUIRES_X86_SSE41;
19042 for (size_t k = 1; k <= 40; k += 9) {
19043 GemmMicrokernelTester()
19044 .mr(1)
19045 .nr(4)
19046 .kr(2)
19047 .sr(1)
19048 .m(1)
19049 .n(4)
19050 .k(k)
19051 .ks(3)
19052 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19053 }
19054 }
19055
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,small_kernel_subtile)19056 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, small_kernel_subtile) {
19057 TEST_REQUIRES_X86_SSE41;
19058 for (size_t k = 1; k <= 40; k += 9) {
19059 for (uint32_t n = 1; n <= 4; n++) {
19060 for (uint32_t m = 1; m <= 1; m++) {
19061 GemmMicrokernelTester()
19062 .mr(1)
19063 .nr(4)
19064 .kr(2)
19065 .sr(1)
19066 .m(m)
19067 .n(n)
19068 .k(k)
19069 .ks(3)
19070 .iterations(1)
19071 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19072 }
19073 }
19074 }
19075 }
19076
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4_small_kernel)19077 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_small_kernel) {
19078 TEST_REQUIRES_X86_SSE41;
19079 for (uint32_t n = 5; n < 8; n++) {
19080 for (size_t k = 1; k <= 40; k += 9) {
19081 GemmMicrokernelTester()
19082 .mr(1)
19083 .nr(4)
19084 .kr(2)
19085 .sr(1)
19086 .m(1)
19087 .n(n)
19088 .k(k)
19089 .ks(3)
19090 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19091 }
19092 }
19093 }
19094
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4_small_kernel)19095 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_small_kernel) {
19096 TEST_REQUIRES_X86_SSE41;
19097 for (uint32_t n = 8; n <= 12; n += 4) {
19098 for (size_t k = 1; k <= 40; k += 9) {
19099 GemmMicrokernelTester()
19100 .mr(1)
19101 .nr(4)
19102 .kr(2)
19103 .sr(1)
19104 .m(1)
19105 .n(n)
19106 .k(k)
19107 .ks(3)
19108 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19109 }
19110 }
19111 }
19112
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,strided_cm_subtile)19113 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cm_subtile) {
19114 TEST_REQUIRES_X86_SSE41;
19115 for (size_t k = 1; k <= 40; k += 9) {
19116 for (uint32_t n = 1; n <= 4; n++) {
19117 for (uint32_t m = 1; m <= 1; m++) {
19118 GemmMicrokernelTester()
19119 .mr(1)
19120 .nr(4)
19121 .kr(2)
19122 .sr(1)
19123 .m(m)
19124 .n(n)
19125 .k(k)
19126 .cm_stride(7)
19127 .iterations(1)
19128 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19129 }
19130 }
19131 }
19132 }
19133
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,a_offset)19134 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, a_offset) {
19135 TEST_REQUIRES_X86_SSE41;
19136 for (size_t k = 1; k <= 40; k += 9) {
19137 GemmMicrokernelTester()
19138 .mr(1)
19139 .nr(4)
19140 .kr(2)
19141 .sr(1)
19142 .m(1)
19143 .n(4)
19144 .k(k)
19145 .ks(3)
19146 .a_offset(43)
19147 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19148 }
19149 }
19150
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,zero)19151 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, zero) {
19152 TEST_REQUIRES_X86_SSE41;
19153 for (size_t k = 1; k <= 40; k += 9) {
19154 for (uint32_t mz = 0; mz < 1; mz++) {
19155 GemmMicrokernelTester()
19156 .mr(1)
19157 .nr(4)
19158 .kr(2)
19159 .sr(1)
19160 .m(1)
19161 .n(4)
19162 .k(k)
19163 .ks(3)
19164 .a_offset(43)
19165 .zero_index(mz)
19166 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19167 }
19168 }
19169 }
19170
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,qmin)19171 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, qmin) {
19172 TEST_REQUIRES_X86_SSE41;
19173 GemmMicrokernelTester()
19174 .mr(1)
19175 .nr(4)
19176 .kr(2)
19177 .sr(1)
19178 .m(1)
19179 .n(4)
19180 .k(8)
19181 .qmin(128)
19182 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19183 }
19184
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,qmax)19185 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, qmax) {
19186 TEST_REQUIRES_X86_SSE41;
19187 GemmMicrokernelTester()
19188 .mr(1)
19189 .nr(4)
19190 .kr(2)
19191 .sr(1)
19192 .m(1)
19193 .n(4)
19194 .k(8)
19195 .qmax(128)
19196 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19197 }
19198
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,strided_cm)19199 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cm) {
19200 TEST_REQUIRES_X86_SSE41;
19201 GemmMicrokernelTester()
19202 .mr(1)
19203 .nr(4)
19204 .kr(2)
19205 .sr(1)
19206 .m(1)
19207 .n(4)
19208 .k(8)
19209 .cm_stride(7)
19210 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19211 }
19212 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19213
19214
19215 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8)19216 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8) {
19217 TEST_REQUIRES_X86_SSE2;
19218 GemmMicrokernelTester()
19219 .mr(3)
19220 .nr(4)
19221 .kr(2)
19222 .sr(1)
19223 .m(3)
19224 .n(4)
19225 .k(8)
19226 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19227 }
19228
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cn)19229 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cn) {
19230 TEST_REQUIRES_X86_SSE2;
19231 GemmMicrokernelTester()
19232 .mr(3)
19233 .nr(4)
19234 .kr(2)
19235 .sr(1)
19236 .m(3)
19237 .n(4)
19238 .k(8)
19239 .cn_stride(7)
19240 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19241 }
19242
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile)19243 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile) {
19244 TEST_REQUIRES_X86_SSE2;
19245 for (uint32_t n = 1; n <= 4; n++) {
19246 for (uint32_t m = 1; m <= 3; m++) {
19247 GemmMicrokernelTester()
19248 .mr(3)
19249 .nr(4)
19250 .kr(2)
19251 .sr(1)
19252 .m(m)
19253 .n(n)
19254 .k(8)
19255 .iterations(1)
19256 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19257 }
19258 }
19259 }
19260
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile_m)19261 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_m) {
19262 TEST_REQUIRES_X86_SSE2;
19263 for (uint32_t m = 1; m <= 3; m++) {
19264 GemmMicrokernelTester()
19265 .mr(3)
19266 .nr(4)
19267 .kr(2)
19268 .sr(1)
19269 .m(m)
19270 .n(4)
19271 .k(8)
19272 .iterations(1)
19273 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19274 }
19275 }
19276
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile_n)19277 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_n) {
19278 TEST_REQUIRES_X86_SSE2;
19279 for (uint32_t n = 1; n <= 4; n++) {
19280 GemmMicrokernelTester()
19281 .mr(3)
19282 .nr(4)
19283 .kr(2)
19284 .sr(1)
19285 .m(3)
19286 .n(n)
19287 .k(8)
19288 .iterations(1)
19289 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19290 }
19291 }
19292
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_lt_8)19293 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8) {
19294 TEST_REQUIRES_X86_SSE2;
19295 for (size_t k = 1; k < 8; k++) {
19296 GemmMicrokernelTester()
19297 .mr(3)
19298 .nr(4)
19299 .kr(2)
19300 .sr(1)
19301 .m(3)
19302 .n(4)
19303 .k(k)
19304 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19305 }
19306 }
19307
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_lt_8_subtile)19308 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8_subtile) {
19309 TEST_REQUIRES_X86_SSE2;
19310 for (size_t k = 1; k < 8; k++) {
19311 for (uint32_t n = 1; n <= 4; n++) {
19312 for (uint32_t m = 1; m <= 3; m++) {
19313 GemmMicrokernelTester()
19314 .mr(3)
19315 .nr(4)
19316 .kr(2)
19317 .sr(1)
19318 .m(m)
19319 .n(n)
19320 .k(k)
19321 .iterations(1)
19322 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19323 }
19324 }
19325 }
19326 }
19327
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_gt_8)19328 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8) {
19329 TEST_REQUIRES_X86_SSE2;
19330 for (size_t k = 9; k < 16; k++) {
19331 GemmMicrokernelTester()
19332 .mr(3)
19333 .nr(4)
19334 .kr(2)
19335 .sr(1)
19336 .m(3)
19337 .n(4)
19338 .k(k)
19339 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19340 }
19341 }
19342
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_gt_8_subtile)19343 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8_subtile) {
19344 TEST_REQUIRES_X86_SSE2;
19345 for (size_t k = 9; k < 16; k++) {
19346 for (uint32_t n = 1; n <= 4; n++) {
19347 for (uint32_t m = 1; m <= 3; m++) {
19348 GemmMicrokernelTester()
19349 .mr(3)
19350 .nr(4)
19351 .kr(2)
19352 .sr(1)
19353 .m(m)
19354 .n(n)
19355 .k(k)
19356 .iterations(1)
19357 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19358 }
19359 }
19360 }
19361 }
19362
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_div_8)19363 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8) {
19364 TEST_REQUIRES_X86_SSE2;
19365 for (size_t k = 16; k <= 80; k += 8) {
19366 GemmMicrokernelTester()
19367 .mr(3)
19368 .nr(4)
19369 .kr(2)
19370 .sr(1)
19371 .m(3)
19372 .n(4)
19373 .k(k)
19374 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19375 }
19376 }
19377
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_div_8_subtile)19378 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8_subtile) {
19379 TEST_REQUIRES_X86_SSE2;
19380 for (size_t k = 16; k <= 80; k += 8) {
19381 for (uint32_t n = 1; n <= 4; n++) {
19382 for (uint32_t m = 1; m <= 3; m++) {
19383 GemmMicrokernelTester()
19384 .mr(3)
19385 .nr(4)
19386 .kr(2)
19387 .sr(1)
19388 .m(m)
19389 .n(n)
19390 .k(k)
19391 .iterations(1)
19392 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19393 }
19394 }
19395 }
19396 }
19397
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4)19398 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4) {
19399 TEST_REQUIRES_X86_SSE2;
19400 for (uint32_t n = 5; n < 8; n++) {
19401 for (size_t k = 1; k <= 40; k += 9) {
19402 GemmMicrokernelTester()
19403 .mr(3)
19404 .nr(4)
19405 .kr(2)
19406 .sr(1)
19407 .m(3)
19408 .n(n)
19409 .k(k)
19410 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19411 }
19412 }
19413 }
19414
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_strided_cn)19415 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_strided_cn) {
19416 TEST_REQUIRES_X86_SSE2;
19417 for (uint32_t n = 5; n < 8; n++) {
19418 for (size_t k = 1; k <= 40; k += 9) {
19419 GemmMicrokernelTester()
19420 .mr(3)
19421 .nr(4)
19422 .kr(2)
19423 .sr(1)
19424 .m(3)
19425 .n(n)
19426 .k(k)
19427 .cn_stride(7)
19428 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19429 }
19430 }
19431 }
19432
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_subtile)19433 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_subtile) {
19434 TEST_REQUIRES_X86_SSE2;
19435 for (uint32_t n = 5; n < 8; n++) {
19436 for (size_t k = 1; k <= 40; k += 9) {
19437 for (uint32_t m = 1; m <= 3; m++) {
19438 GemmMicrokernelTester()
19439 .mr(3)
19440 .nr(4)
19441 .kr(2)
19442 .sr(1)
19443 .m(m)
19444 .n(n)
19445 .k(k)
19446 .iterations(1)
19447 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19448 }
19449 }
19450 }
19451 }
19452
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4)19453 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4) {
19454 TEST_REQUIRES_X86_SSE2;
19455 for (uint32_t n = 8; n <= 12; n += 4) {
19456 for (size_t k = 1; k <= 40; k += 9) {
19457 GemmMicrokernelTester()
19458 .mr(3)
19459 .nr(4)
19460 .kr(2)
19461 .sr(1)
19462 .m(3)
19463 .n(n)
19464 .k(k)
19465 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19466 }
19467 }
19468 }
19469
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_strided_cn)19470 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_strided_cn) {
19471 TEST_REQUIRES_X86_SSE2;
19472 for (uint32_t n = 8; n <= 12; n += 4) {
19473 for (size_t k = 1; k <= 40; k += 9) {
19474 GemmMicrokernelTester()
19475 .mr(3)
19476 .nr(4)
19477 .kr(2)
19478 .sr(1)
19479 .m(3)
19480 .n(n)
19481 .k(k)
19482 .cn_stride(7)
19483 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19484 }
19485 }
19486 }
19487
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_subtile)19488 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_subtile) {
19489 TEST_REQUIRES_X86_SSE2;
19490 for (uint32_t n = 8; n <= 12; n += 4) {
19491 for (size_t k = 1; k <= 40; k += 9) {
19492 for (uint32_t m = 1; m <= 3; m++) {
19493 GemmMicrokernelTester()
19494 .mr(3)
19495 .nr(4)
19496 .kr(2)
19497 .sr(1)
19498 .m(m)
19499 .n(n)
19500 .k(k)
19501 .iterations(1)
19502 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19503 }
19504 }
19505 }
19506 }
19507
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,small_kernel)19508 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, small_kernel) {
19509 TEST_REQUIRES_X86_SSE2;
19510 for (size_t k = 1; k <= 40; k += 9) {
19511 GemmMicrokernelTester()
19512 .mr(3)
19513 .nr(4)
19514 .kr(2)
19515 .sr(1)
19516 .m(3)
19517 .n(4)
19518 .k(k)
19519 .ks(3)
19520 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19521 }
19522 }
19523
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,small_kernel_subtile)19524 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, small_kernel_subtile) {
19525 TEST_REQUIRES_X86_SSE2;
19526 for (size_t k = 1; k <= 40; k += 9) {
19527 for (uint32_t n = 1; n <= 4; n++) {
19528 for (uint32_t m = 1; m <= 3; m++) {
19529 GemmMicrokernelTester()
19530 .mr(3)
19531 .nr(4)
19532 .kr(2)
19533 .sr(1)
19534 .m(m)
19535 .n(n)
19536 .k(k)
19537 .ks(3)
19538 .iterations(1)
19539 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19540 }
19541 }
19542 }
19543 }
19544
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_small_kernel)19545 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_small_kernel) {
19546 TEST_REQUIRES_X86_SSE2;
19547 for (uint32_t n = 5; n < 8; n++) {
19548 for (size_t k = 1; k <= 40; k += 9) {
19549 GemmMicrokernelTester()
19550 .mr(3)
19551 .nr(4)
19552 .kr(2)
19553 .sr(1)
19554 .m(3)
19555 .n(n)
19556 .k(k)
19557 .ks(3)
19558 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19559 }
19560 }
19561 }
19562
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_small_kernel)19563 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_small_kernel) {
19564 TEST_REQUIRES_X86_SSE2;
19565 for (uint32_t n = 8; n <= 12; n += 4) {
19566 for (size_t k = 1; k <= 40; k += 9) {
19567 GemmMicrokernelTester()
19568 .mr(3)
19569 .nr(4)
19570 .kr(2)
19571 .sr(1)
19572 .m(3)
19573 .n(n)
19574 .k(k)
19575 .ks(3)
19576 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19577 }
19578 }
19579 }
19580
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cm_subtile)19581 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm_subtile) {
19582 TEST_REQUIRES_X86_SSE2;
19583 for (size_t k = 1; k <= 40; k += 9) {
19584 for (uint32_t n = 1; n <= 4; n++) {
19585 for (uint32_t m = 1; m <= 3; m++) {
19586 GemmMicrokernelTester()
19587 .mr(3)
19588 .nr(4)
19589 .kr(2)
19590 .sr(1)
19591 .m(m)
19592 .n(n)
19593 .k(k)
19594 .cm_stride(7)
19595 .iterations(1)
19596 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19597 }
19598 }
19599 }
19600 }
19601
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,a_offset)19602 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, a_offset) {
19603 TEST_REQUIRES_X86_SSE2;
19604 for (size_t k = 1; k <= 40; k += 9) {
19605 GemmMicrokernelTester()
19606 .mr(3)
19607 .nr(4)
19608 .kr(2)
19609 .sr(1)
19610 .m(3)
19611 .n(4)
19612 .k(k)
19613 .ks(3)
19614 .a_offset(127)
19615 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19616 }
19617 }
19618
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,zero)19619 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, zero) {
19620 TEST_REQUIRES_X86_SSE2;
19621 for (size_t k = 1; k <= 40; k += 9) {
19622 for (uint32_t mz = 0; mz < 3; mz++) {
19623 GemmMicrokernelTester()
19624 .mr(3)
19625 .nr(4)
19626 .kr(2)
19627 .sr(1)
19628 .m(3)
19629 .n(4)
19630 .k(k)
19631 .ks(3)
19632 .a_offset(127)
19633 .zero_index(mz)
19634 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19635 }
19636 }
19637 }
19638
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,qmin)19639 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmin) {
19640 TEST_REQUIRES_X86_SSE2;
19641 GemmMicrokernelTester()
19642 .mr(3)
19643 .nr(4)
19644 .kr(2)
19645 .sr(1)
19646 .m(3)
19647 .n(4)
19648 .k(8)
19649 .qmin(128)
19650 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19651 }
19652
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,qmax)19653 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmax) {
19654 TEST_REQUIRES_X86_SSE2;
19655 GemmMicrokernelTester()
19656 .mr(3)
19657 .nr(4)
19658 .kr(2)
19659 .sr(1)
19660 .m(3)
19661 .n(4)
19662 .k(8)
19663 .qmax(128)
19664 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19665 }
19666
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cm)19667 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm) {
19668 TEST_REQUIRES_X86_SSE2;
19669 GemmMicrokernelTester()
19670 .mr(3)
19671 .nr(4)
19672 .kr(2)
19673 .sr(1)
19674 .m(3)
19675 .n(4)
19676 .k(8)
19677 .cm_stride(7)
19678 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19679 }
19680 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19681
19682
19683 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8)19684 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8) {
19685 TEST_REQUIRES_X86_SSE41;
19686 GemmMicrokernelTester()
19687 .mr(4)
19688 .nr(4)
19689 .kr(2)
19690 .sr(1)
19691 .m(4)
19692 .n(4)
19693 .k(8)
19694 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19695 }
19696
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,strided_cn)19697 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cn) {
19698 TEST_REQUIRES_X86_SSE41;
19699 GemmMicrokernelTester()
19700 .mr(4)
19701 .nr(4)
19702 .kr(2)
19703 .sr(1)
19704 .m(4)
19705 .n(4)
19706 .k(8)
19707 .cn_stride(7)
19708 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19709 }
19710
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8_subtile)19711 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile) {
19712 TEST_REQUIRES_X86_SSE41;
19713 for (uint32_t n = 1; n <= 4; n++) {
19714 for (uint32_t m = 1; m <= 4; m++) {
19715 GemmMicrokernelTester()
19716 .mr(4)
19717 .nr(4)
19718 .kr(2)
19719 .sr(1)
19720 .m(m)
19721 .n(n)
19722 .k(8)
19723 .iterations(1)
19724 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19725 }
19726 }
19727 }
19728
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8_subtile_m)19729 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile_m) {
19730 TEST_REQUIRES_X86_SSE41;
19731 for (uint32_t m = 1; m <= 4; m++) {
19732 GemmMicrokernelTester()
19733 .mr(4)
19734 .nr(4)
19735 .kr(2)
19736 .sr(1)
19737 .m(m)
19738 .n(4)
19739 .k(8)
19740 .iterations(1)
19741 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19742 }
19743 }
19744
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8_subtile_n)19745 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile_n) {
19746 TEST_REQUIRES_X86_SSE41;
19747 for (uint32_t n = 1; n <= 4; n++) {
19748 GemmMicrokernelTester()
19749 .mr(4)
19750 .nr(4)
19751 .kr(2)
19752 .sr(1)
19753 .m(4)
19754 .n(n)
19755 .k(8)
19756 .iterations(1)
19757 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19758 }
19759 }
19760
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_lt_8)19761 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_lt_8) {
19762 TEST_REQUIRES_X86_SSE41;
19763 for (size_t k = 1; k < 8; k++) {
19764 GemmMicrokernelTester()
19765 .mr(4)
19766 .nr(4)
19767 .kr(2)
19768 .sr(1)
19769 .m(4)
19770 .n(4)
19771 .k(k)
19772 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19773 }
19774 }
19775
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_lt_8_subtile)19776 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_lt_8_subtile) {
19777 TEST_REQUIRES_X86_SSE41;
19778 for (size_t k = 1; k < 8; k++) {
19779 for (uint32_t n = 1; n <= 4; n++) {
19780 for (uint32_t m = 1; m <= 4; m++) {
19781 GemmMicrokernelTester()
19782 .mr(4)
19783 .nr(4)
19784 .kr(2)
19785 .sr(1)
19786 .m(m)
19787 .n(n)
19788 .k(k)
19789 .iterations(1)
19790 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19791 }
19792 }
19793 }
19794 }
19795
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_gt_8)19796 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_gt_8) {
19797 TEST_REQUIRES_X86_SSE41;
19798 for (size_t k = 9; k < 16; k++) {
19799 GemmMicrokernelTester()
19800 .mr(4)
19801 .nr(4)
19802 .kr(2)
19803 .sr(1)
19804 .m(4)
19805 .n(4)
19806 .k(k)
19807 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19808 }
19809 }
19810
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_gt_8_subtile)19811 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_gt_8_subtile) {
19812 TEST_REQUIRES_X86_SSE41;
19813 for (size_t k = 9; k < 16; k++) {
19814 for (uint32_t n = 1; n <= 4; n++) {
19815 for (uint32_t m = 1; m <= 4; m++) {
19816 GemmMicrokernelTester()
19817 .mr(4)
19818 .nr(4)
19819 .kr(2)
19820 .sr(1)
19821 .m(m)
19822 .n(n)
19823 .k(k)
19824 .iterations(1)
19825 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19826 }
19827 }
19828 }
19829 }
19830
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_div_8)19831 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_div_8) {
19832 TEST_REQUIRES_X86_SSE41;
19833 for (size_t k = 16; k <= 80; k += 8) {
19834 GemmMicrokernelTester()
19835 .mr(4)
19836 .nr(4)
19837 .kr(2)
19838 .sr(1)
19839 .m(4)
19840 .n(4)
19841 .k(k)
19842 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19843 }
19844 }
19845
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_div_8_subtile)19846 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_div_8_subtile) {
19847 TEST_REQUIRES_X86_SSE41;
19848 for (size_t k = 16; k <= 80; k += 8) {
19849 for (uint32_t n = 1; n <= 4; n++) {
19850 for (uint32_t m = 1; m <= 4; m++) {
19851 GemmMicrokernelTester()
19852 .mr(4)
19853 .nr(4)
19854 .kr(2)
19855 .sr(1)
19856 .m(m)
19857 .n(n)
19858 .k(k)
19859 .iterations(1)
19860 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19861 }
19862 }
19863 }
19864 }
19865
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4)19866 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4) {
19867 TEST_REQUIRES_X86_SSE41;
19868 for (uint32_t n = 5; n < 8; n++) {
19869 for (size_t k = 1; k <= 40; k += 9) {
19870 GemmMicrokernelTester()
19871 .mr(4)
19872 .nr(4)
19873 .kr(2)
19874 .sr(1)
19875 .m(4)
19876 .n(n)
19877 .k(k)
19878 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19879 }
19880 }
19881 }
19882
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4_strided_cn)19883 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_strided_cn) {
19884 TEST_REQUIRES_X86_SSE41;
19885 for (uint32_t n = 5; n < 8; n++) {
19886 for (size_t k = 1; k <= 40; k += 9) {
19887 GemmMicrokernelTester()
19888 .mr(4)
19889 .nr(4)
19890 .kr(2)
19891 .sr(1)
19892 .m(4)
19893 .n(n)
19894 .k(k)
19895 .cn_stride(7)
19896 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19897 }
19898 }
19899 }
19900
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4_subtile)19901 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_subtile) {
19902 TEST_REQUIRES_X86_SSE41;
19903 for (uint32_t n = 5; n < 8; n++) {
19904 for (size_t k = 1; k <= 40; k += 9) {
19905 for (uint32_t m = 1; m <= 4; m++) {
19906 GemmMicrokernelTester()
19907 .mr(4)
19908 .nr(4)
19909 .kr(2)
19910 .sr(1)
19911 .m(m)
19912 .n(n)
19913 .k(k)
19914 .iterations(1)
19915 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19916 }
19917 }
19918 }
19919 }
19920
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4)19921 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4) {
19922 TEST_REQUIRES_X86_SSE41;
19923 for (uint32_t n = 8; n <= 12; n += 4) {
19924 for (size_t k = 1; k <= 40; k += 9) {
19925 GemmMicrokernelTester()
19926 .mr(4)
19927 .nr(4)
19928 .kr(2)
19929 .sr(1)
19930 .m(4)
19931 .n(n)
19932 .k(k)
19933 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19934 }
19935 }
19936 }
19937
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4_strided_cn)19938 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_strided_cn) {
19939 TEST_REQUIRES_X86_SSE41;
19940 for (uint32_t n = 8; n <= 12; n += 4) {
19941 for (size_t k = 1; k <= 40; k += 9) {
19942 GemmMicrokernelTester()
19943 .mr(4)
19944 .nr(4)
19945 .kr(2)
19946 .sr(1)
19947 .m(4)
19948 .n(n)
19949 .k(k)
19950 .cn_stride(7)
19951 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19952 }
19953 }
19954 }
19955
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4_subtile)19956 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_subtile) {
19957 TEST_REQUIRES_X86_SSE41;
19958 for (uint32_t n = 8; n <= 12; n += 4) {
19959 for (size_t k = 1; k <= 40; k += 9) {
19960 for (uint32_t m = 1; m <= 4; m++) {
19961 GemmMicrokernelTester()
19962 .mr(4)
19963 .nr(4)
19964 .kr(2)
19965 .sr(1)
19966 .m(m)
19967 .n(n)
19968 .k(k)
19969 .iterations(1)
19970 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19971 }
19972 }
19973 }
19974 }
19975
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,small_kernel)19976 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, small_kernel) {
19977 TEST_REQUIRES_X86_SSE41;
19978 for (size_t k = 1; k <= 40; k += 9) {
19979 GemmMicrokernelTester()
19980 .mr(4)
19981 .nr(4)
19982 .kr(2)
19983 .sr(1)
19984 .m(4)
19985 .n(4)
19986 .k(k)
19987 .ks(3)
19988 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19989 }
19990 }
19991
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,small_kernel_subtile)19992 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, small_kernel_subtile) {
19993 TEST_REQUIRES_X86_SSE41;
19994 for (size_t k = 1; k <= 40; k += 9) {
19995 for (uint32_t n = 1; n <= 4; n++) {
19996 for (uint32_t m = 1; m <= 4; m++) {
19997 GemmMicrokernelTester()
19998 .mr(4)
19999 .nr(4)
20000 .kr(2)
20001 .sr(1)
20002 .m(m)
20003 .n(n)
20004 .k(k)
20005 .ks(3)
20006 .iterations(1)
20007 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20008 }
20009 }
20010 }
20011 }
20012
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4_small_kernel)20013 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_small_kernel) {
20014 TEST_REQUIRES_X86_SSE41;
20015 for (uint32_t n = 5; n < 8; n++) {
20016 for (size_t k = 1; k <= 40; k += 9) {
20017 GemmMicrokernelTester()
20018 .mr(4)
20019 .nr(4)
20020 .kr(2)
20021 .sr(1)
20022 .m(4)
20023 .n(n)
20024 .k(k)
20025 .ks(3)
20026 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20027 }
20028 }
20029 }
20030
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4_small_kernel)20031 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_small_kernel) {
20032 TEST_REQUIRES_X86_SSE41;
20033 for (uint32_t n = 8; n <= 12; n += 4) {
20034 for (size_t k = 1; k <= 40; k += 9) {
20035 GemmMicrokernelTester()
20036 .mr(4)
20037 .nr(4)
20038 .kr(2)
20039 .sr(1)
20040 .m(4)
20041 .n(n)
20042 .k(k)
20043 .ks(3)
20044 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20045 }
20046 }
20047 }
20048
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,strided_cm_subtile)20049 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cm_subtile) {
20050 TEST_REQUIRES_X86_SSE41;
20051 for (size_t k = 1; k <= 40; k += 9) {
20052 for (uint32_t n = 1; n <= 4; n++) {
20053 for (uint32_t m = 1; m <= 4; m++) {
20054 GemmMicrokernelTester()
20055 .mr(4)
20056 .nr(4)
20057 .kr(2)
20058 .sr(1)
20059 .m(m)
20060 .n(n)
20061 .k(k)
20062 .cm_stride(7)
20063 .iterations(1)
20064 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20065 }
20066 }
20067 }
20068 }
20069
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,a_offset)20070 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, a_offset) {
20071 TEST_REQUIRES_X86_SSE41;
20072 for (size_t k = 1; k <= 40; k += 9) {
20073 GemmMicrokernelTester()
20074 .mr(4)
20075 .nr(4)
20076 .kr(2)
20077 .sr(1)
20078 .m(4)
20079 .n(4)
20080 .k(k)
20081 .ks(3)
20082 .a_offset(163)
20083 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20084 }
20085 }
20086
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,zero)20087 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, zero) {
20088 TEST_REQUIRES_X86_SSE41;
20089 for (size_t k = 1; k <= 40; k += 9) {
20090 for (uint32_t mz = 0; mz < 4; mz++) {
20091 GemmMicrokernelTester()
20092 .mr(4)
20093 .nr(4)
20094 .kr(2)
20095 .sr(1)
20096 .m(4)
20097 .n(4)
20098 .k(k)
20099 .ks(3)
20100 .a_offset(163)
20101 .zero_index(mz)
20102 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20103 }
20104 }
20105 }
20106
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,qmin)20107 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, qmin) {
20108 TEST_REQUIRES_X86_SSE41;
20109 GemmMicrokernelTester()
20110 .mr(4)
20111 .nr(4)
20112 .kr(2)
20113 .sr(1)
20114 .m(4)
20115 .n(4)
20116 .k(8)
20117 .qmin(128)
20118 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20119 }
20120
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,qmax)20121 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, qmax) {
20122 TEST_REQUIRES_X86_SSE41;
20123 GemmMicrokernelTester()
20124 .mr(4)
20125 .nr(4)
20126 .kr(2)
20127 .sr(1)
20128 .m(4)
20129 .n(4)
20130 .k(8)
20131 .qmax(128)
20132 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20133 }
20134
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,strided_cm)20135 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cm) {
20136 TEST_REQUIRES_X86_SSE41;
20137 GemmMicrokernelTester()
20138 .mr(4)
20139 .nr(4)
20140 .kr(2)
20141 .sr(1)
20142 .m(4)
20143 .n(4)
20144 .k(8)
20145 .cm_stride(7)
20146 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20147 }
20148 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20149
20150
20151 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8)20152 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8) {
20153 TEST_REQUIRES_X86_AVX;
20154 GemmMicrokernelTester()
20155 .mr(1)
20156 .nr(4)
20157 .kr(2)
20158 .sr(1)
20159 .m(1)
20160 .n(4)
20161 .k(8)
20162 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20163 }
20164
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,strided_cn)20165 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, strided_cn) {
20166 TEST_REQUIRES_X86_AVX;
20167 GemmMicrokernelTester()
20168 .mr(1)
20169 .nr(4)
20170 .kr(2)
20171 .sr(1)
20172 .m(1)
20173 .n(4)
20174 .k(8)
20175 .cn_stride(7)
20176 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20177 }
20178
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8_subtile)20179 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_subtile) {
20180 TEST_REQUIRES_X86_AVX;
20181 for (uint32_t n = 1; n <= 4; n++) {
20182 for (uint32_t m = 1; m <= 1; m++) {
20183 GemmMicrokernelTester()
20184 .mr(1)
20185 .nr(4)
20186 .kr(2)
20187 .sr(1)
20188 .m(m)
20189 .n(n)
20190 .k(8)
20191 .iterations(1)
20192 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20193 }
20194 }
20195 }
20196
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8_subtile_m)20197 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_subtile_m) {
20198 TEST_REQUIRES_X86_AVX;
20199 for (uint32_t m = 1; m <= 1; m++) {
20200 GemmMicrokernelTester()
20201 .mr(1)
20202 .nr(4)
20203 .kr(2)
20204 .sr(1)
20205 .m(m)
20206 .n(4)
20207 .k(8)
20208 .iterations(1)
20209 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20210 }
20211 }
20212
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8_subtile_n)20213 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_subtile_n) {
20214 TEST_REQUIRES_X86_AVX;
20215 for (uint32_t n = 1; n <= 4; n++) {
20216 GemmMicrokernelTester()
20217 .mr(1)
20218 .nr(4)
20219 .kr(2)
20220 .sr(1)
20221 .m(1)
20222 .n(n)
20223 .k(8)
20224 .iterations(1)
20225 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20226 }
20227 }
20228
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_lt_8)20229 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_lt_8) {
20230 TEST_REQUIRES_X86_AVX;
20231 for (size_t k = 1; k < 8; k++) {
20232 GemmMicrokernelTester()
20233 .mr(1)
20234 .nr(4)
20235 .kr(2)
20236 .sr(1)
20237 .m(1)
20238 .n(4)
20239 .k(k)
20240 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20241 }
20242 }
20243
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_lt_8_subtile)20244 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_lt_8_subtile) {
20245 TEST_REQUIRES_X86_AVX;
20246 for (size_t k = 1; k < 8; k++) {
20247 for (uint32_t n = 1; n <= 4; n++) {
20248 for (uint32_t m = 1; m <= 1; m++) {
20249 GemmMicrokernelTester()
20250 .mr(1)
20251 .nr(4)
20252 .kr(2)
20253 .sr(1)
20254 .m(m)
20255 .n(n)
20256 .k(k)
20257 .iterations(1)
20258 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20259 }
20260 }
20261 }
20262 }
20263
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_gt_8)20264 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_gt_8) {
20265 TEST_REQUIRES_X86_AVX;
20266 for (size_t k = 9; k < 16; k++) {
20267 GemmMicrokernelTester()
20268 .mr(1)
20269 .nr(4)
20270 .kr(2)
20271 .sr(1)
20272 .m(1)
20273 .n(4)
20274 .k(k)
20275 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20276 }
20277 }
20278
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_gt_8_subtile)20279 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_gt_8_subtile) {
20280 TEST_REQUIRES_X86_AVX;
20281 for (size_t k = 9; k < 16; k++) {
20282 for (uint32_t n = 1; n <= 4; n++) {
20283 for (uint32_t m = 1; m <= 1; m++) {
20284 GemmMicrokernelTester()
20285 .mr(1)
20286 .nr(4)
20287 .kr(2)
20288 .sr(1)
20289 .m(m)
20290 .n(n)
20291 .k(k)
20292 .iterations(1)
20293 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20294 }
20295 }
20296 }
20297 }
20298
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_div_8)20299 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_div_8) {
20300 TEST_REQUIRES_X86_AVX;
20301 for (size_t k = 16; k <= 80; k += 8) {
20302 GemmMicrokernelTester()
20303 .mr(1)
20304 .nr(4)
20305 .kr(2)
20306 .sr(1)
20307 .m(1)
20308 .n(4)
20309 .k(k)
20310 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20311 }
20312 }
20313
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_div_8_subtile)20314 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_div_8_subtile) {
20315 TEST_REQUIRES_X86_AVX;
20316 for (size_t k = 16; k <= 80; k += 8) {
20317 for (uint32_t n = 1; n <= 4; n++) {
20318 for (uint32_t m = 1; m <= 1; m++) {
20319 GemmMicrokernelTester()
20320 .mr(1)
20321 .nr(4)
20322 .kr(2)
20323 .sr(1)
20324 .m(m)
20325 .n(n)
20326 .k(k)
20327 .iterations(1)
20328 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20329 }
20330 }
20331 }
20332 }
20333
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4)20334 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4) {
20335 TEST_REQUIRES_X86_AVX;
20336 for (uint32_t n = 5; n < 8; n++) {
20337 for (size_t k = 1; k <= 40; k += 9) {
20338 GemmMicrokernelTester()
20339 .mr(1)
20340 .nr(4)
20341 .kr(2)
20342 .sr(1)
20343 .m(1)
20344 .n(n)
20345 .k(k)
20346 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20347 }
20348 }
20349 }
20350
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4_strided_cn)20351 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4_strided_cn) {
20352 TEST_REQUIRES_X86_AVX;
20353 for (uint32_t n = 5; n < 8; n++) {
20354 for (size_t k = 1; k <= 40; k += 9) {
20355 GemmMicrokernelTester()
20356 .mr(1)
20357 .nr(4)
20358 .kr(2)
20359 .sr(1)
20360 .m(1)
20361 .n(n)
20362 .k(k)
20363 .cn_stride(7)
20364 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20365 }
20366 }
20367 }
20368
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4_subtile)20369 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4_subtile) {
20370 TEST_REQUIRES_X86_AVX;
20371 for (uint32_t n = 5; n < 8; n++) {
20372 for (size_t k = 1; k <= 40; k += 9) {
20373 for (uint32_t m = 1; m <= 1; m++) {
20374 GemmMicrokernelTester()
20375 .mr(1)
20376 .nr(4)
20377 .kr(2)
20378 .sr(1)
20379 .m(m)
20380 .n(n)
20381 .k(k)
20382 .iterations(1)
20383 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20384 }
20385 }
20386 }
20387 }
20388
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4)20389 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4) {
20390 TEST_REQUIRES_X86_AVX;
20391 for (uint32_t n = 8; n <= 12; n += 4) {
20392 for (size_t k = 1; k <= 40; k += 9) {
20393 GemmMicrokernelTester()
20394 .mr(1)
20395 .nr(4)
20396 .kr(2)
20397 .sr(1)
20398 .m(1)
20399 .n(n)
20400 .k(k)
20401 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20402 }
20403 }
20404 }
20405
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4_strided_cn)20406 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4_strided_cn) {
20407 TEST_REQUIRES_X86_AVX;
20408 for (uint32_t n = 8; n <= 12; n += 4) {
20409 for (size_t k = 1; k <= 40; k += 9) {
20410 GemmMicrokernelTester()
20411 .mr(1)
20412 .nr(4)
20413 .kr(2)
20414 .sr(1)
20415 .m(1)
20416 .n(n)
20417 .k(k)
20418 .cn_stride(7)
20419 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20420 }
20421 }
20422 }
20423
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4_subtile)20424 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4_subtile) {
20425 TEST_REQUIRES_X86_AVX;
20426 for (uint32_t n = 8; n <= 12; n += 4) {
20427 for (size_t k = 1; k <= 40; k += 9) {
20428 for (uint32_t m = 1; m <= 1; m++) {
20429 GemmMicrokernelTester()
20430 .mr(1)
20431 .nr(4)
20432 .kr(2)
20433 .sr(1)
20434 .m(m)
20435 .n(n)
20436 .k(k)
20437 .iterations(1)
20438 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20439 }
20440 }
20441 }
20442 }
20443
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,small_kernel)20444 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, small_kernel) {
20445 TEST_REQUIRES_X86_AVX;
20446 for (size_t k = 1; k <= 40; k += 9) {
20447 GemmMicrokernelTester()
20448 .mr(1)
20449 .nr(4)
20450 .kr(2)
20451 .sr(1)
20452 .m(1)
20453 .n(4)
20454 .k(k)
20455 .ks(3)
20456 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20457 }
20458 }
20459
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,small_kernel_subtile)20460 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, small_kernel_subtile) {
20461 TEST_REQUIRES_X86_AVX;
20462 for (size_t k = 1; k <= 40; k += 9) {
20463 for (uint32_t n = 1; n <= 4; n++) {
20464 for (uint32_t m = 1; m <= 1; m++) {
20465 GemmMicrokernelTester()
20466 .mr(1)
20467 .nr(4)
20468 .kr(2)
20469 .sr(1)
20470 .m(m)
20471 .n(n)
20472 .k(k)
20473 .ks(3)
20474 .iterations(1)
20475 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20476 }
20477 }
20478 }
20479 }
20480
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4_small_kernel)20481 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4_small_kernel) {
20482 TEST_REQUIRES_X86_AVX;
20483 for (uint32_t n = 5; n < 8; n++) {
20484 for (size_t k = 1; k <= 40; k += 9) {
20485 GemmMicrokernelTester()
20486 .mr(1)
20487 .nr(4)
20488 .kr(2)
20489 .sr(1)
20490 .m(1)
20491 .n(n)
20492 .k(k)
20493 .ks(3)
20494 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20495 }
20496 }
20497 }
20498
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4_small_kernel)20499 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4_small_kernel) {
20500 TEST_REQUIRES_X86_AVX;
20501 for (uint32_t n = 8; n <= 12; n += 4) {
20502 for (size_t k = 1; k <= 40; k += 9) {
20503 GemmMicrokernelTester()
20504 .mr(1)
20505 .nr(4)
20506 .kr(2)
20507 .sr(1)
20508 .m(1)
20509 .n(n)
20510 .k(k)
20511 .ks(3)
20512 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20513 }
20514 }
20515 }
20516
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,strided_cm_subtile)20517 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, strided_cm_subtile) {
20518 TEST_REQUIRES_X86_AVX;
20519 for (size_t k = 1; k <= 40; k += 9) {
20520 for (uint32_t n = 1; n <= 4; n++) {
20521 for (uint32_t m = 1; m <= 1; m++) {
20522 GemmMicrokernelTester()
20523 .mr(1)
20524 .nr(4)
20525 .kr(2)
20526 .sr(1)
20527 .m(m)
20528 .n(n)
20529 .k(k)
20530 .cm_stride(7)
20531 .iterations(1)
20532 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20533 }
20534 }
20535 }
20536 }
20537
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,a_offset)20538 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, a_offset) {
20539 TEST_REQUIRES_X86_AVX;
20540 for (size_t k = 1; k <= 40; k += 9) {
20541 GemmMicrokernelTester()
20542 .mr(1)
20543 .nr(4)
20544 .kr(2)
20545 .sr(1)
20546 .m(1)
20547 .n(4)
20548 .k(k)
20549 .ks(3)
20550 .a_offset(43)
20551 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20552 }
20553 }
20554
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,zero)20555 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, zero) {
20556 TEST_REQUIRES_X86_AVX;
20557 for (size_t k = 1; k <= 40; k += 9) {
20558 for (uint32_t mz = 0; mz < 1; mz++) {
20559 GemmMicrokernelTester()
20560 .mr(1)
20561 .nr(4)
20562 .kr(2)
20563 .sr(1)
20564 .m(1)
20565 .n(4)
20566 .k(k)
20567 .ks(3)
20568 .a_offset(43)
20569 .zero_index(mz)
20570 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20571 }
20572 }
20573 }
20574
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,qmin)20575 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, qmin) {
20576 TEST_REQUIRES_X86_AVX;
20577 GemmMicrokernelTester()
20578 .mr(1)
20579 .nr(4)
20580 .kr(2)
20581 .sr(1)
20582 .m(1)
20583 .n(4)
20584 .k(8)
20585 .qmin(128)
20586 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20587 }
20588
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,qmax)20589 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, qmax) {
20590 TEST_REQUIRES_X86_AVX;
20591 GemmMicrokernelTester()
20592 .mr(1)
20593 .nr(4)
20594 .kr(2)
20595 .sr(1)
20596 .m(1)
20597 .n(4)
20598 .k(8)
20599 .qmax(128)
20600 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20601 }
20602
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,strided_cm)20603 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, strided_cm) {
20604 TEST_REQUIRES_X86_AVX;
20605 GemmMicrokernelTester()
20606 .mr(1)
20607 .nr(4)
20608 .kr(2)
20609 .sr(1)
20610 .m(1)
20611 .n(4)
20612 .k(8)
20613 .cm_stride(7)
20614 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20615 }
20616 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20617
20618
20619 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8)20620 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8) {
20621 TEST_REQUIRES_X86_AVX;
20622 GemmMicrokernelTester()
20623 .mr(3)
20624 .nr(4)
20625 .kr(2)
20626 .sr(1)
20627 .m(3)
20628 .n(4)
20629 .k(8)
20630 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20631 }
20632
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cn)20633 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cn) {
20634 TEST_REQUIRES_X86_AVX;
20635 GemmMicrokernelTester()
20636 .mr(3)
20637 .nr(4)
20638 .kr(2)
20639 .sr(1)
20640 .m(3)
20641 .n(4)
20642 .k(8)
20643 .cn_stride(7)
20644 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20645 }
20646
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile)20647 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile) {
20648 TEST_REQUIRES_X86_AVX;
20649 for (uint32_t n = 1; n <= 4; n++) {
20650 for (uint32_t m = 1; m <= 3; m++) {
20651 GemmMicrokernelTester()
20652 .mr(3)
20653 .nr(4)
20654 .kr(2)
20655 .sr(1)
20656 .m(m)
20657 .n(n)
20658 .k(8)
20659 .iterations(1)
20660 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20661 }
20662 }
20663 }
20664
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile_m)20665 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_m) {
20666 TEST_REQUIRES_X86_AVX;
20667 for (uint32_t m = 1; m <= 3; m++) {
20668 GemmMicrokernelTester()
20669 .mr(3)
20670 .nr(4)
20671 .kr(2)
20672 .sr(1)
20673 .m(m)
20674 .n(4)
20675 .k(8)
20676 .iterations(1)
20677 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20678 }
20679 }
20680
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile_n)20681 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_n) {
20682 TEST_REQUIRES_X86_AVX;
20683 for (uint32_t n = 1; n <= 4; n++) {
20684 GemmMicrokernelTester()
20685 .mr(3)
20686 .nr(4)
20687 .kr(2)
20688 .sr(1)
20689 .m(3)
20690 .n(n)
20691 .k(8)
20692 .iterations(1)
20693 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20694 }
20695 }
20696
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_lt_8)20697 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8) {
20698 TEST_REQUIRES_X86_AVX;
20699 for (size_t k = 1; k < 8; k++) {
20700 GemmMicrokernelTester()
20701 .mr(3)
20702 .nr(4)
20703 .kr(2)
20704 .sr(1)
20705 .m(3)
20706 .n(4)
20707 .k(k)
20708 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20709 }
20710 }
20711
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_lt_8_subtile)20712 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8_subtile) {
20713 TEST_REQUIRES_X86_AVX;
20714 for (size_t k = 1; k < 8; k++) {
20715 for (uint32_t n = 1; n <= 4; n++) {
20716 for (uint32_t m = 1; m <= 3; m++) {
20717 GemmMicrokernelTester()
20718 .mr(3)
20719 .nr(4)
20720 .kr(2)
20721 .sr(1)
20722 .m(m)
20723 .n(n)
20724 .k(k)
20725 .iterations(1)
20726 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20727 }
20728 }
20729 }
20730 }
20731
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_gt_8)20732 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8) {
20733 TEST_REQUIRES_X86_AVX;
20734 for (size_t k = 9; k < 16; k++) {
20735 GemmMicrokernelTester()
20736 .mr(3)
20737 .nr(4)
20738 .kr(2)
20739 .sr(1)
20740 .m(3)
20741 .n(4)
20742 .k(k)
20743 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20744 }
20745 }
20746
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_gt_8_subtile)20747 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8_subtile) {
20748 TEST_REQUIRES_X86_AVX;
20749 for (size_t k = 9; k < 16; k++) {
20750 for (uint32_t n = 1; n <= 4; n++) {
20751 for (uint32_t m = 1; m <= 3; m++) {
20752 GemmMicrokernelTester()
20753 .mr(3)
20754 .nr(4)
20755 .kr(2)
20756 .sr(1)
20757 .m(m)
20758 .n(n)
20759 .k(k)
20760 .iterations(1)
20761 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20762 }
20763 }
20764 }
20765 }
20766
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_div_8)20767 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8) {
20768 TEST_REQUIRES_X86_AVX;
20769 for (size_t k = 16; k <= 80; k += 8) {
20770 GemmMicrokernelTester()
20771 .mr(3)
20772 .nr(4)
20773 .kr(2)
20774 .sr(1)
20775 .m(3)
20776 .n(4)
20777 .k(k)
20778 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20779 }
20780 }
20781
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_div_8_subtile)20782 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8_subtile) {
20783 TEST_REQUIRES_X86_AVX;
20784 for (size_t k = 16; k <= 80; k += 8) {
20785 for (uint32_t n = 1; n <= 4; n++) {
20786 for (uint32_t m = 1; m <= 3; m++) {
20787 GemmMicrokernelTester()
20788 .mr(3)
20789 .nr(4)
20790 .kr(2)
20791 .sr(1)
20792 .m(m)
20793 .n(n)
20794 .k(k)
20795 .iterations(1)
20796 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20797 }
20798 }
20799 }
20800 }
20801
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4)20802 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4) {
20803 TEST_REQUIRES_X86_AVX;
20804 for (uint32_t n = 5; n < 8; n++) {
20805 for (size_t k = 1; k <= 40; k += 9) {
20806 GemmMicrokernelTester()
20807 .mr(3)
20808 .nr(4)
20809 .kr(2)
20810 .sr(1)
20811 .m(3)
20812 .n(n)
20813 .k(k)
20814 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20815 }
20816 }
20817 }
20818
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_strided_cn)20819 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_strided_cn) {
20820 TEST_REQUIRES_X86_AVX;
20821 for (uint32_t n = 5; n < 8; n++) {
20822 for (size_t k = 1; k <= 40; k += 9) {
20823 GemmMicrokernelTester()
20824 .mr(3)
20825 .nr(4)
20826 .kr(2)
20827 .sr(1)
20828 .m(3)
20829 .n(n)
20830 .k(k)
20831 .cn_stride(7)
20832 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20833 }
20834 }
20835 }
20836
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_subtile)20837 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_subtile) {
20838 TEST_REQUIRES_X86_AVX;
20839 for (uint32_t n = 5; n < 8; n++) {
20840 for (size_t k = 1; k <= 40; k += 9) {
20841 for (uint32_t m = 1; m <= 3; m++) {
20842 GemmMicrokernelTester()
20843 .mr(3)
20844 .nr(4)
20845 .kr(2)
20846 .sr(1)
20847 .m(m)
20848 .n(n)
20849 .k(k)
20850 .iterations(1)
20851 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20852 }
20853 }
20854 }
20855 }
20856
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4)20857 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4) {
20858 TEST_REQUIRES_X86_AVX;
20859 for (uint32_t n = 8; n <= 12; n += 4) {
20860 for (size_t k = 1; k <= 40; k += 9) {
20861 GemmMicrokernelTester()
20862 .mr(3)
20863 .nr(4)
20864 .kr(2)
20865 .sr(1)
20866 .m(3)
20867 .n(n)
20868 .k(k)
20869 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20870 }
20871 }
20872 }
20873
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_strided_cn)20874 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_strided_cn) {
20875 TEST_REQUIRES_X86_AVX;
20876 for (uint32_t n = 8; n <= 12; n += 4) {
20877 for (size_t k = 1; k <= 40; k += 9) {
20878 GemmMicrokernelTester()
20879 .mr(3)
20880 .nr(4)
20881 .kr(2)
20882 .sr(1)
20883 .m(3)
20884 .n(n)
20885 .k(k)
20886 .cn_stride(7)
20887 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20888 }
20889 }
20890 }
20891
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_subtile)20892 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_subtile) {
20893 TEST_REQUIRES_X86_AVX;
20894 for (uint32_t n = 8; n <= 12; n += 4) {
20895 for (size_t k = 1; k <= 40; k += 9) {
20896 for (uint32_t m = 1; m <= 3; m++) {
20897 GemmMicrokernelTester()
20898 .mr(3)
20899 .nr(4)
20900 .kr(2)
20901 .sr(1)
20902 .m(m)
20903 .n(n)
20904 .k(k)
20905 .iterations(1)
20906 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20907 }
20908 }
20909 }
20910 }
20911
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,small_kernel)20912 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, small_kernel) {
20913 TEST_REQUIRES_X86_AVX;
20914 for (size_t k = 1; k <= 40; k += 9) {
20915 GemmMicrokernelTester()
20916 .mr(3)
20917 .nr(4)
20918 .kr(2)
20919 .sr(1)
20920 .m(3)
20921 .n(4)
20922 .k(k)
20923 .ks(3)
20924 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20925 }
20926 }
20927
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,small_kernel_subtile)20928 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, small_kernel_subtile) {
20929 TEST_REQUIRES_X86_AVX;
20930 for (size_t k = 1; k <= 40; k += 9) {
20931 for (uint32_t n = 1; n <= 4; n++) {
20932 for (uint32_t m = 1; m <= 3; m++) {
20933 GemmMicrokernelTester()
20934 .mr(3)
20935 .nr(4)
20936 .kr(2)
20937 .sr(1)
20938 .m(m)
20939 .n(n)
20940 .k(k)
20941 .ks(3)
20942 .iterations(1)
20943 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20944 }
20945 }
20946 }
20947 }
20948
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_small_kernel)20949 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_small_kernel) {
20950 TEST_REQUIRES_X86_AVX;
20951 for (uint32_t n = 5; n < 8; n++) {
20952 for (size_t k = 1; k <= 40; k += 9) {
20953 GemmMicrokernelTester()
20954 .mr(3)
20955 .nr(4)
20956 .kr(2)
20957 .sr(1)
20958 .m(3)
20959 .n(n)
20960 .k(k)
20961 .ks(3)
20962 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20963 }
20964 }
20965 }
20966
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_small_kernel)20967 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_small_kernel) {
20968 TEST_REQUIRES_X86_AVX;
20969 for (uint32_t n = 8; n <= 12; n += 4) {
20970 for (size_t k = 1; k <= 40; k += 9) {
20971 GemmMicrokernelTester()
20972 .mr(3)
20973 .nr(4)
20974 .kr(2)
20975 .sr(1)
20976 .m(3)
20977 .n(n)
20978 .k(k)
20979 .ks(3)
20980 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20981 }
20982 }
20983 }
20984
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cm_subtile)20985 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm_subtile) {
20986 TEST_REQUIRES_X86_AVX;
20987 for (size_t k = 1; k <= 40; k += 9) {
20988 for (uint32_t n = 1; n <= 4; n++) {
20989 for (uint32_t m = 1; m <= 3; m++) {
20990 GemmMicrokernelTester()
20991 .mr(3)
20992 .nr(4)
20993 .kr(2)
20994 .sr(1)
20995 .m(m)
20996 .n(n)
20997 .k(k)
20998 .cm_stride(7)
20999 .iterations(1)
21000 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21001 }
21002 }
21003 }
21004 }
21005
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,a_offset)21006 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, a_offset) {
21007 TEST_REQUIRES_X86_AVX;
21008 for (size_t k = 1; k <= 40; k += 9) {
21009 GemmMicrokernelTester()
21010 .mr(3)
21011 .nr(4)
21012 .kr(2)
21013 .sr(1)
21014 .m(3)
21015 .n(4)
21016 .k(k)
21017 .ks(3)
21018 .a_offset(127)
21019 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21020 }
21021 }
21022
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,zero)21023 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, zero) {
21024 TEST_REQUIRES_X86_AVX;
21025 for (size_t k = 1; k <= 40; k += 9) {
21026 for (uint32_t mz = 0; mz < 3; mz++) {
21027 GemmMicrokernelTester()
21028 .mr(3)
21029 .nr(4)
21030 .kr(2)
21031 .sr(1)
21032 .m(3)
21033 .n(4)
21034 .k(k)
21035 .ks(3)
21036 .a_offset(127)
21037 .zero_index(mz)
21038 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21039 }
21040 }
21041 }
21042
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,qmin)21043 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmin) {
21044 TEST_REQUIRES_X86_AVX;
21045 GemmMicrokernelTester()
21046 .mr(3)
21047 .nr(4)
21048 .kr(2)
21049 .sr(1)
21050 .m(3)
21051 .n(4)
21052 .k(8)
21053 .qmin(128)
21054 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21055 }
21056
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,qmax)21057 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmax) {
21058 TEST_REQUIRES_X86_AVX;
21059 GemmMicrokernelTester()
21060 .mr(3)
21061 .nr(4)
21062 .kr(2)
21063 .sr(1)
21064 .m(3)
21065 .n(4)
21066 .k(8)
21067 .qmax(128)
21068 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21069 }
21070
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cm)21071 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm) {
21072 TEST_REQUIRES_X86_AVX;
21073 GemmMicrokernelTester()
21074 .mr(3)
21075 .nr(4)
21076 .kr(2)
21077 .sr(1)
21078 .m(3)
21079 .n(4)
21080 .k(8)
21081 .cm_stride(7)
21082 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21083 }
21084 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21085
21086
21087 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8)21088 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8) {
21089 TEST_REQUIRES_X86_SSE2;
21090 GemmMicrokernelTester()
21091 .mr(1)
21092 .nr(4)
21093 .kr(2)
21094 .sr(1)
21095 .m(1)
21096 .n(4)
21097 .k(8)
21098 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21099 }
21100
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cn)21101 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cn) {
21102 TEST_REQUIRES_X86_SSE2;
21103 GemmMicrokernelTester()
21104 .mr(1)
21105 .nr(4)
21106 .kr(2)
21107 .sr(1)
21108 .m(1)
21109 .n(4)
21110 .k(8)
21111 .cn_stride(7)
21112 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21113 }
21114
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile)21115 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile) {
21116 TEST_REQUIRES_X86_SSE2;
21117 for (uint32_t n = 1; n <= 4; n++) {
21118 for (uint32_t m = 1; m <= 1; m++) {
21119 GemmMicrokernelTester()
21120 .mr(1)
21121 .nr(4)
21122 .kr(2)
21123 .sr(1)
21124 .m(m)
21125 .n(n)
21126 .k(8)
21127 .iterations(1)
21128 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21129 }
21130 }
21131 }
21132
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile_m)21133 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_m) {
21134 TEST_REQUIRES_X86_SSE2;
21135 for (uint32_t m = 1; m <= 1; m++) {
21136 GemmMicrokernelTester()
21137 .mr(1)
21138 .nr(4)
21139 .kr(2)
21140 .sr(1)
21141 .m(m)
21142 .n(4)
21143 .k(8)
21144 .iterations(1)
21145 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21146 }
21147 }
21148
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile_n)21149 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_n) {
21150 TEST_REQUIRES_X86_SSE2;
21151 for (uint32_t n = 1; n <= 4; n++) {
21152 GemmMicrokernelTester()
21153 .mr(1)
21154 .nr(4)
21155 .kr(2)
21156 .sr(1)
21157 .m(1)
21158 .n(n)
21159 .k(8)
21160 .iterations(1)
21161 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21162 }
21163 }
21164
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_lt_8)21165 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8) {
21166 TEST_REQUIRES_X86_SSE2;
21167 for (size_t k = 1; k < 8; k++) {
21168 GemmMicrokernelTester()
21169 .mr(1)
21170 .nr(4)
21171 .kr(2)
21172 .sr(1)
21173 .m(1)
21174 .n(4)
21175 .k(k)
21176 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21177 }
21178 }
21179
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_lt_8_subtile)21180 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8_subtile) {
21181 TEST_REQUIRES_X86_SSE2;
21182 for (size_t k = 1; k < 8; k++) {
21183 for (uint32_t n = 1; n <= 4; n++) {
21184 for (uint32_t m = 1; m <= 1; m++) {
21185 GemmMicrokernelTester()
21186 .mr(1)
21187 .nr(4)
21188 .kr(2)
21189 .sr(1)
21190 .m(m)
21191 .n(n)
21192 .k(k)
21193 .iterations(1)
21194 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21195 }
21196 }
21197 }
21198 }
21199
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_gt_8)21200 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8) {
21201 TEST_REQUIRES_X86_SSE2;
21202 for (size_t k = 9; k < 16; k++) {
21203 GemmMicrokernelTester()
21204 .mr(1)
21205 .nr(4)
21206 .kr(2)
21207 .sr(1)
21208 .m(1)
21209 .n(4)
21210 .k(k)
21211 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21212 }
21213 }
21214
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_gt_8_subtile)21215 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8_subtile) {
21216 TEST_REQUIRES_X86_SSE2;
21217 for (size_t k = 9; k < 16; k++) {
21218 for (uint32_t n = 1; n <= 4; n++) {
21219 for (uint32_t m = 1; m <= 1; m++) {
21220 GemmMicrokernelTester()
21221 .mr(1)
21222 .nr(4)
21223 .kr(2)
21224 .sr(1)
21225 .m(m)
21226 .n(n)
21227 .k(k)
21228 .iterations(1)
21229 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21230 }
21231 }
21232 }
21233 }
21234
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_div_8)21235 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8) {
21236 TEST_REQUIRES_X86_SSE2;
21237 for (size_t k = 16; k <= 80; k += 8) {
21238 GemmMicrokernelTester()
21239 .mr(1)
21240 .nr(4)
21241 .kr(2)
21242 .sr(1)
21243 .m(1)
21244 .n(4)
21245 .k(k)
21246 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21247 }
21248 }
21249
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_div_8_subtile)21250 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8_subtile) {
21251 TEST_REQUIRES_X86_SSE2;
21252 for (size_t k = 16; k <= 80; k += 8) {
21253 for (uint32_t n = 1; n <= 4; n++) {
21254 for (uint32_t m = 1; m <= 1; m++) {
21255 GemmMicrokernelTester()
21256 .mr(1)
21257 .nr(4)
21258 .kr(2)
21259 .sr(1)
21260 .m(m)
21261 .n(n)
21262 .k(k)
21263 .iterations(1)
21264 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21265 }
21266 }
21267 }
21268 }
21269
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4)21270 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4) {
21271 TEST_REQUIRES_X86_SSE2;
21272 for (uint32_t n = 5; n < 8; n++) {
21273 for (size_t k = 1; k <= 40; k += 9) {
21274 GemmMicrokernelTester()
21275 .mr(1)
21276 .nr(4)
21277 .kr(2)
21278 .sr(1)
21279 .m(1)
21280 .n(n)
21281 .k(k)
21282 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21283 }
21284 }
21285 }
21286
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_strided_cn)21287 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_strided_cn) {
21288 TEST_REQUIRES_X86_SSE2;
21289 for (uint32_t n = 5; n < 8; n++) {
21290 for (size_t k = 1; k <= 40; k += 9) {
21291 GemmMicrokernelTester()
21292 .mr(1)
21293 .nr(4)
21294 .kr(2)
21295 .sr(1)
21296 .m(1)
21297 .n(n)
21298 .k(k)
21299 .cn_stride(7)
21300 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21301 }
21302 }
21303 }
21304
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_subtile)21305 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_subtile) {
21306 TEST_REQUIRES_X86_SSE2;
21307 for (uint32_t n = 5; n < 8; n++) {
21308 for (size_t k = 1; k <= 40; k += 9) {
21309 for (uint32_t m = 1; m <= 1; m++) {
21310 GemmMicrokernelTester()
21311 .mr(1)
21312 .nr(4)
21313 .kr(2)
21314 .sr(1)
21315 .m(m)
21316 .n(n)
21317 .k(k)
21318 .iterations(1)
21319 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21320 }
21321 }
21322 }
21323 }
21324
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4)21325 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4) {
21326 TEST_REQUIRES_X86_SSE2;
21327 for (uint32_t n = 8; n <= 12; n += 4) {
21328 for (size_t k = 1; k <= 40; k += 9) {
21329 GemmMicrokernelTester()
21330 .mr(1)
21331 .nr(4)
21332 .kr(2)
21333 .sr(1)
21334 .m(1)
21335 .n(n)
21336 .k(k)
21337 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21338 }
21339 }
21340 }
21341
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_strided_cn)21342 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_strided_cn) {
21343 TEST_REQUIRES_X86_SSE2;
21344 for (uint32_t n = 8; n <= 12; n += 4) {
21345 for (size_t k = 1; k <= 40; k += 9) {
21346 GemmMicrokernelTester()
21347 .mr(1)
21348 .nr(4)
21349 .kr(2)
21350 .sr(1)
21351 .m(1)
21352 .n(n)
21353 .k(k)
21354 .cn_stride(7)
21355 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21356 }
21357 }
21358 }
21359
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_subtile)21360 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_subtile) {
21361 TEST_REQUIRES_X86_SSE2;
21362 for (uint32_t n = 8; n <= 12; n += 4) {
21363 for (size_t k = 1; k <= 40; k += 9) {
21364 for (uint32_t m = 1; m <= 1; m++) {
21365 GemmMicrokernelTester()
21366 .mr(1)
21367 .nr(4)
21368 .kr(2)
21369 .sr(1)
21370 .m(m)
21371 .n(n)
21372 .k(k)
21373 .iterations(1)
21374 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21375 }
21376 }
21377 }
21378 }
21379
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,small_kernel)21380 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, small_kernel) {
21381 TEST_REQUIRES_X86_SSE2;
21382 for (size_t k = 1; k <= 40; k += 9) {
21383 GemmMicrokernelTester()
21384 .mr(1)
21385 .nr(4)
21386 .kr(2)
21387 .sr(1)
21388 .m(1)
21389 .n(4)
21390 .k(k)
21391 .ks(3)
21392 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21393 }
21394 }
21395
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,small_kernel_subtile)21396 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, small_kernel_subtile) {
21397 TEST_REQUIRES_X86_SSE2;
21398 for (size_t k = 1; k <= 40; k += 9) {
21399 for (uint32_t n = 1; n <= 4; n++) {
21400 for (uint32_t m = 1; m <= 1; m++) {
21401 GemmMicrokernelTester()
21402 .mr(1)
21403 .nr(4)
21404 .kr(2)
21405 .sr(1)
21406 .m(m)
21407 .n(n)
21408 .k(k)
21409 .ks(3)
21410 .iterations(1)
21411 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21412 }
21413 }
21414 }
21415 }
21416
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_small_kernel)21417 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_small_kernel) {
21418 TEST_REQUIRES_X86_SSE2;
21419 for (uint32_t n = 5; n < 8; n++) {
21420 for (size_t k = 1; k <= 40; k += 9) {
21421 GemmMicrokernelTester()
21422 .mr(1)
21423 .nr(4)
21424 .kr(2)
21425 .sr(1)
21426 .m(1)
21427 .n(n)
21428 .k(k)
21429 .ks(3)
21430 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21431 }
21432 }
21433 }
21434
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_small_kernel)21435 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_small_kernel) {
21436 TEST_REQUIRES_X86_SSE2;
21437 for (uint32_t n = 8; n <= 12; n += 4) {
21438 for (size_t k = 1; k <= 40; k += 9) {
21439 GemmMicrokernelTester()
21440 .mr(1)
21441 .nr(4)
21442 .kr(2)
21443 .sr(1)
21444 .m(1)
21445 .n(n)
21446 .k(k)
21447 .ks(3)
21448 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21449 }
21450 }
21451 }
21452
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cm_subtile)21453 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm_subtile) {
21454 TEST_REQUIRES_X86_SSE2;
21455 for (size_t k = 1; k <= 40; k += 9) {
21456 for (uint32_t n = 1; n <= 4; n++) {
21457 for (uint32_t m = 1; m <= 1; m++) {
21458 GemmMicrokernelTester()
21459 .mr(1)
21460 .nr(4)
21461 .kr(2)
21462 .sr(1)
21463 .m(m)
21464 .n(n)
21465 .k(k)
21466 .cm_stride(7)
21467 .iterations(1)
21468 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21469 }
21470 }
21471 }
21472 }
21473
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,a_offset)21474 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, a_offset) {
21475 TEST_REQUIRES_X86_SSE2;
21476 for (size_t k = 1; k <= 40; k += 9) {
21477 GemmMicrokernelTester()
21478 .mr(1)
21479 .nr(4)
21480 .kr(2)
21481 .sr(1)
21482 .m(1)
21483 .n(4)
21484 .k(k)
21485 .ks(3)
21486 .a_offset(43)
21487 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21488 }
21489 }
21490
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,zero)21491 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, zero) {
21492 TEST_REQUIRES_X86_SSE2;
21493 for (size_t k = 1; k <= 40; k += 9) {
21494 for (uint32_t mz = 0; mz < 1; mz++) {
21495 GemmMicrokernelTester()
21496 .mr(1)
21497 .nr(4)
21498 .kr(2)
21499 .sr(1)
21500 .m(1)
21501 .n(4)
21502 .k(k)
21503 .ks(3)
21504 .a_offset(43)
21505 .zero_index(mz)
21506 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21507 }
21508 }
21509 }
21510
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,qmin)21511 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmin) {
21512 TEST_REQUIRES_X86_SSE2;
21513 GemmMicrokernelTester()
21514 .mr(1)
21515 .nr(4)
21516 .kr(2)
21517 .sr(1)
21518 .m(1)
21519 .n(4)
21520 .k(8)
21521 .qmin(128)
21522 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21523 }
21524
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,qmax)21525 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmax) {
21526 TEST_REQUIRES_X86_SSE2;
21527 GemmMicrokernelTester()
21528 .mr(1)
21529 .nr(4)
21530 .kr(2)
21531 .sr(1)
21532 .m(1)
21533 .n(4)
21534 .k(8)
21535 .qmax(128)
21536 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21537 }
21538
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cm)21539 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm) {
21540 TEST_REQUIRES_X86_SSE2;
21541 GemmMicrokernelTester()
21542 .mr(1)
21543 .nr(4)
21544 .kr(2)
21545 .sr(1)
21546 .m(1)
21547 .n(4)
21548 .k(8)
21549 .cm_stride(7)
21550 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21551 }
21552 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21553
21554
21555 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8)21556 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8) {
21557 TEST_REQUIRES_X86_SSE2;
21558 GemmMicrokernelTester()
21559 .mr(3)
21560 .nr(4)
21561 .kr(2)
21562 .sr(1)
21563 .m(3)
21564 .n(4)
21565 .k(8)
21566 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21567 }
21568
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,strided_cn)21569 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cn) {
21570 TEST_REQUIRES_X86_SSE2;
21571 GemmMicrokernelTester()
21572 .mr(3)
21573 .nr(4)
21574 .kr(2)
21575 .sr(1)
21576 .m(3)
21577 .n(4)
21578 .k(8)
21579 .cn_stride(7)
21580 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21581 }
21582
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8_subtile)21583 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile) {
21584 TEST_REQUIRES_X86_SSE2;
21585 for (uint32_t n = 1; n <= 4; n++) {
21586 for (uint32_t m = 1; m <= 3; m++) {
21587 GemmMicrokernelTester()
21588 .mr(3)
21589 .nr(4)
21590 .kr(2)
21591 .sr(1)
21592 .m(m)
21593 .n(n)
21594 .k(8)
21595 .iterations(1)
21596 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21597 }
21598 }
21599 }
21600
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8_subtile_m)21601 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile_m) {
21602 TEST_REQUIRES_X86_SSE2;
21603 for (uint32_t m = 1; m <= 3; m++) {
21604 GemmMicrokernelTester()
21605 .mr(3)
21606 .nr(4)
21607 .kr(2)
21608 .sr(1)
21609 .m(m)
21610 .n(4)
21611 .k(8)
21612 .iterations(1)
21613 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21614 }
21615 }
21616
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8_subtile_n)21617 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile_n) {
21618 TEST_REQUIRES_X86_SSE2;
21619 for (uint32_t n = 1; n <= 4; n++) {
21620 GemmMicrokernelTester()
21621 .mr(3)
21622 .nr(4)
21623 .kr(2)
21624 .sr(1)
21625 .m(3)
21626 .n(n)
21627 .k(8)
21628 .iterations(1)
21629 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21630 }
21631 }
21632
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_lt_8)21633 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_lt_8) {
21634 TEST_REQUIRES_X86_SSE2;
21635 for (size_t k = 1; k < 8; k++) {
21636 GemmMicrokernelTester()
21637 .mr(3)
21638 .nr(4)
21639 .kr(2)
21640 .sr(1)
21641 .m(3)
21642 .n(4)
21643 .k(k)
21644 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21645 }
21646 }
21647
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_lt_8_subtile)21648 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_lt_8_subtile) {
21649 TEST_REQUIRES_X86_SSE2;
21650 for (size_t k = 1; k < 8; k++) {
21651 for (uint32_t n = 1; n <= 4; n++) {
21652 for (uint32_t m = 1; m <= 3; m++) {
21653 GemmMicrokernelTester()
21654 .mr(3)
21655 .nr(4)
21656 .kr(2)
21657 .sr(1)
21658 .m(m)
21659 .n(n)
21660 .k(k)
21661 .iterations(1)
21662 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21663 }
21664 }
21665 }
21666 }
21667
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_gt_8)21668 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_gt_8) {
21669 TEST_REQUIRES_X86_SSE2;
21670 for (size_t k = 9; k < 16; k++) {
21671 GemmMicrokernelTester()
21672 .mr(3)
21673 .nr(4)
21674 .kr(2)
21675 .sr(1)
21676 .m(3)
21677 .n(4)
21678 .k(k)
21679 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21680 }
21681 }
21682
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_gt_8_subtile)21683 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_gt_8_subtile) {
21684 TEST_REQUIRES_X86_SSE2;
21685 for (size_t k = 9; k < 16; k++) {
21686 for (uint32_t n = 1; n <= 4; n++) {
21687 for (uint32_t m = 1; m <= 3; m++) {
21688 GemmMicrokernelTester()
21689 .mr(3)
21690 .nr(4)
21691 .kr(2)
21692 .sr(1)
21693 .m(m)
21694 .n(n)
21695 .k(k)
21696 .iterations(1)
21697 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21698 }
21699 }
21700 }
21701 }
21702
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_div_8)21703 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_div_8) {
21704 TEST_REQUIRES_X86_SSE2;
21705 for (size_t k = 16; k <= 80; k += 8) {
21706 GemmMicrokernelTester()
21707 .mr(3)
21708 .nr(4)
21709 .kr(2)
21710 .sr(1)
21711 .m(3)
21712 .n(4)
21713 .k(k)
21714 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21715 }
21716 }
21717
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_div_8_subtile)21718 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_div_8_subtile) {
21719 TEST_REQUIRES_X86_SSE2;
21720 for (size_t k = 16; k <= 80; k += 8) {
21721 for (uint32_t n = 1; n <= 4; n++) {
21722 for (uint32_t m = 1; m <= 3; m++) {
21723 GemmMicrokernelTester()
21724 .mr(3)
21725 .nr(4)
21726 .kr(2)
21727 .sr(1)
21728 .m(m)
21729 .n(n)
21730 .k(k)
21731 .iterations(1)
21732 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21733 }
21734 }
21735 }
21736 }
21737
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4)21738 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4) {
21739 TEST_REQUIRES_X86_SSE2;
21740 for (uint32_t n = 5; n < 8; n++) {
21741 for (size_t k = 1; k <= 40; k += 9) {
21742 GemmMicrokernelTester()
21743 .mr(3)
21744 .nr(4)
21745 .kr(2)
21746 .sr(1)
21747 .m(3)
21748 .n(n)
21749 .k(k)
21750 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21751 }
21752 }
21753 }
21754
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4_strided_cn)21755 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_strided_cn) {
21756 TEST_REQUIRES_X86_SSE2;
21757 for (uint32_t n = 5; n < 8; n++) {
21758 for (size_t k = 1; k <= 40; k += 9) {
21759 GemmMicrokernelTester()
21760 .mr(3)
21761 .nr(4)
21762 .kr(2)
21763 .sr(1)
21764 .m(3)
21765 .n(n)
21766 .k(k)
21767 .cn_stride(7)
21768 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21769 }
21770 }
21771 }
21772
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4_subtile)21773 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_subtile) {
21774 TEST_REQUIRES_X86_SSE2;
21775 for (uint32_t n = 5; n < 8; n++) {
21776 for (size_t k = 1; k <= 40; k += 9) {
21777 for (uint32_t m = 1; m <= 3; m++) {
21778 GemmMicrokernelTester()
21779 .mr(3)
21780 .nr(4)
21781 .kr(2)
21782 .sr(1)
21783 .m(m)
21784 .n(n)
21785 .k(k)
21786 .iterations(1)
21787 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21788 }
21789 }
21790 }
21791 }
21792
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4)21793 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4) {
21794 TEST_REQUIRES_X86_SSE2;
21795 for (uint32_t n = 8; n <= 12; n += 4) {
21796 for (size_t k = 1; k <= 40; k += 9) {
21797 GemmMicrokernelTester()
21798 .mr(3)
21799 .nr(4)
21800 .kr(2)
21801 .sr(1)
21802 .m(3)
21803 .n(n)
21804 .k(k)
21805 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21806 }
21807 }
21808 }
21809
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4_strided_cn)21810 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_strided_cn) {
21811 TEST_REQUIRES_X86_SSE2;
21812 for (uint32_t n = 8; n <= 12; n += 4) {
21813 for (size_t k = 1; k <= 40; k += 9) {
21814 GemmMicrokernelTester()
21815 .mr(3)
21816 .nr(4)
21817 .kr(2)
21818 .sr(1)
21819 .m(3)
21820 .n(n)
21821 .k(k)
21822 .cn_stride(7)
21823 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21824 }
21825 }
21826 }
21827
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4_subtile)21828 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_subtile) {
21829 TEST_REQUIRES_X86_SSE2;
21830 for (uint32_t n = 8; n <= 12; n += 4) {
21831 for (size_t k = 1; k <= 40; k += 9) {
21832 for (uint32_t m = 1; m <= 3; m++) {
21833 GemmMicrokernelTester()
21834 .mr(3)
21835 .nr(4)
21836 .kr(2)
21837 .sr(1)
21838 .m(m)
21839 .n(n)
21840 .k(k)
21841 .iterations(1)
21842 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21843 }
21844 }
21845 }
21846 }
21847
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,small_kernel)21848 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, small_kernel) {
21849 TEST_REQUIRES_X86_SSE2;
21850 for (size_t k = 1; k <= 40; k += 9) {
21851 GemmMicrokernelTester()
21852 .mr(3)
21853 .nr(4)
21854 .kr(2)
21855 .sr(1)
21856 .m(3)
21857 .n(4)
21858 .k(k)
21859 .ks(3)
21860 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21861 }
21862 }
21863
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,small_kernel_subtile)21864 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, small_kernel_subtile) {
21865 TEST_REQUIRES_X86_SSE2;
21866 for (size_t k = 1; k <= 40; k += 9) {
21867 for (uint32_t n = 1; n <= 4; n++) {
21868 for (uint32_t m = 1; m <= 3; m++) {
21869 GemmMicrokernelTester()
21870 .mr(3)
21871 .nr(4)
21872 .kr(2)
21873 .sr(1)
21874 .m(m)
21875 .n(n)
21876 .k(k)
21877 .ks(3)
21878 .iterations(1)
21879 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21880 }
21881 }
21882 }
21883 }
21884
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4_small_kernel)21885 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_small_kernel) {
21886 TEST_REQUIRES_X86_SSE2;
21887 for (uint32_t n = 5; n < 8; n++) {
21888 for (size_t k = 1; k <= 40; k += 9) {
21889 GemmMicrokernelTester()
21890 .mr(3)
21891 .nr(4)
21892 .kr(2)
21893 .sr(1)
21894 .m(3)
21895 .n(n)
21896 .k(k)
21897 .ks(3)
21898 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21899 }
21900 }
21901 }
21902
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4_small_kernel)21903 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_small_kernel) {
21904 TEST_REQUIRES_X86_SSE2;
21905 for (uint32_t n = 8; n <= 12; n += 4) {
21906 for (size_t k = 1; k <= 40; k += 9) {
21907 GemmMicrokernelTester()
21908 .mr(3)
21909 .nr(4)
21910 .kr(2)
21911 .sr(1)
21912 .m(3)
21913 .n(n)
21914 .k(k)
21915 .ks(3)
21916 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21917 }
21918 }
21919 }
21920
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,strided_cm_subtile)21921 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cm_subtile) {
21922 TEST_REQUIRES_X86_SSE2;
21923 for (size_t k = 1; k <= 40; k += 9) {
21924 for (uint32_t n = 1; n <= 4; n++) {
21925 for (uint32_t m = 1; m <= 3; m++) {
21926 GemmMicrokernelTester()
21927 .mr(3)
21928 .nr(4)
21929 .kr(2)
21930 .sr(1)
21931 .m(m)
21932 .n(n)
21933 .k(k)
21934 .cm_stride(7)
21935 .iterations(1)
21936 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21937 }
21938 }
21939 }
21940 }
21941
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,a_offset)21942 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, a_offset) {
21943 TEST_REQUIRES_X86_SSE2;
21944 for (size_t k = 1; k <= 40; k += 9) {
21945 GemmMicrokernelTester()
21946 .mr(3)
21947 .nr(4)
21948 .kr(2)
21949 .sr(1)
21950 .m(3)
21951 .n(4)
21952 .k(k)
21953 .ks(3)
21954 .a_offset(127)
21955 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21956 }
21957 }
21958
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,zero)21959 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, zero) {
21960 TEST_REQUIRES_X86_SSE2;
21961 for (size_t k = 1; k <= 40; k += 9) {
21962 for (uint32_t mz = 0; mz < 3; mz++) {
21963 GemmMicrokernelTester()
21964 .mr(3)
21965 .nr(4)
21966 .kr(2)
21967 .sr(1)
21968 .m(3)
21969 .n(4)
21970 .k(k)
21971 .ks(3)
21972 .a_offset(127)
21973 .zero_index(mz)
21974 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21975 }
21976 }
21977 }
21978
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,qmin)21979 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, qmin) {
21980 TEST_REQUIRES_X86_SSE2;
21981 GemmMicrokernelTester()
21982 .mr(3)
21983 .nr(4)
21984 .kr(2)
21985 .sr(1)
21986 .m(3)
21987 .n(4)
21988 .k(8)
21989 .qmin(128)
21990 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21991 }
21992
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,qmax)21993 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, qmax) {
21994 TEST_REQUIRES_X86_SSE2;
21995 GemmMicrokernelTester()
21996 .mr(3)
21997 .nr(4)
21998 .kr(2)
21999 .sr(1)
22000 .m(3)
22001 .n(4)
22002 .k(8)
22003 .qmax(128)
22004 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22005 }
22006
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,strided_cm)22007 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cm) {
22008 TEST_REQUIRES_X86_SSE2;
22009 GemmMicrokernelTester()
22010 .mr(3)
22011 .nr(4)
22012 .kr(2)
22013 .sr(1)
22014 .m(3)
22015 .n(4)
22016 .k(8)
22017 .cm_stride(7)
22018 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22019 }
22020 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22021
22022
22023 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8)22024 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8) {
22025 TEST_REQUIRES_X86_SSE41;
22026 GemmMicrokernelTester()
22027 .mr(3)
22028 .nr(4)
22029 .kr(2)
22030 .sr(1)
22031 .m(3)
22032 .n(4)
22033 .k(8)
22034 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22035 }
22036
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,strided_cn)22037 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cn) {
22038 TEST_REQUIRES_X86_SSE41;
22039 GemmMicrokernelTester()
22040 .mr(3)
22041 .nr(4)
22042 .kr(2)
22043 .sr(1)
22044 .m(3)
22045 .n(4)
22046 .k(8)
22047 .cn_stride(7)
22048 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22049 }
22050
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_subtile)22051 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile) {
22052 TEST_REQUIRES_X86_SSE41;
22053 for (uint32_t n = 1; n <= 4; n++) {
22054 for (uint32_t m = 1; m <= 3; m++) {
22055 GemmMicrokernelTester()
22056 .mr(3)
22057 .nr(4)
22058 .kr(2)
22059 .sr(1)
22060 .m(m)
22061 .n(n)
22062 .k(8)
22063 .iterations(1)
22064 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22065 }
22066 }
22067 }
22068
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_subtile_m)22069 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_m) {
22070 TEST_REQUIRES_X86_SSE41;
22071 for (uint32_t m = 1; m <= 3; m++) {
22072 GemmMicrokernelTester()
22073 .mr(3)
22074 .nr(4)
22075 .kr(2)
22076 .sr(1)
22077 .m(m)
22078 .n(4)
22079 .k(8)
22080 .iterations(1)
22081 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22082 }
22083 }
22084
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_subtile_n)22085 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_n) {
22086 TEST_REQUIRES_X86_SSE41;
22087 for (uint32_t n = 1; n <= 4; n++) {
22088 GemmMicrokernelTester()
22089 .mr(3)
22090 .nr(4)
22091 .kr(2)
22092 .sr(1)
22093 .m(3)
22094 .n(n)
22095 .k(8)
22096 .iterations(1)
22097 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22098 }
22099 }
22100
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_lt_8)22101 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8) {
22102 TEST_REQUIRES_X86_SSE41;
22103 for (size_t k = 1; k < 8; k++) {
22104 GemmMicrokernelTester()
22105 .mr(3)
22106 .nr(4)
22107 .kr(2)
22108 .sr(1)
22109 .m(3)
22110 .n(4)
22111 .k(k)
22112 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22113 }
22114 }
22115
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_lt_8_subtile)22116 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8_subtile) {
22117 TEST_REQUIRES_X86_SSE41;
22118 for (size_t k = 1; k < 8; k++) {
22119 for (uint32_t n = 1; n <= 4; n++) {
22120 for (uint32_t m = 1; m <= 3; m++) {
22121 GemmMicrokernelTester()
22122 .mr(3)
22123 .nr(4)
22124 .kr(2)
22125 .sr(1)
22126 .m(m)
22127 .n(n)
22128 .k(k)
22129 .iterations(1)
22130 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22131 }
22132 }
22133 }
22134 }
22135
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_gt_8)22136 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8) {
22137 TEST_REQUIRES_X86_SSE41;
22138 for (size_t k = 9; k < 16; k++) {
22139 GemmMicrokernelTester()
22140 .mr(3)
22141 .nr(4)
22142 .kr(2)
22143 .sr(1)
22144 .m(3)
22145 .n(4)
22146 .k(k)
22147 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22148 }
22149 }
22150
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_gt_8_subtile)22151 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8_subtile) {
22152 TEST_REQUIRES_X86_SSE41;
22153 for (size_t k = 9; k < 16; k++) {
22154 for (uint32_t n = 1; n <= 4; n++) {
22155 for (uint32_t m = 1; m <= 3; m++) {
22156 GemmMicrokernelTester()
22157 .mr(3)
22158 .nr(4)
22159 .kr(2)
22160 .sr(1)
22161 .m(m)
22162 .n(n)
22163 .k(k)
22164 .iterations(1)
22165 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22166 }
22167 }
22168 }
22169 }
22170
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_div_8)22171 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8) {
22172 TEST_REQUIRES_X86_SSE41;
22173 for (size_t k = 16; k <= 80; k += 8) {
22174 GemmMicrokernelTester()
22175 .mr(3)
22176 .nr(4)
22177 .kr(2)
22178 .sr(1)
22179 .m(3)
22180 .n(4)
22181 .k(k)
22182 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22183 }
22184 }
22185
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_div_8_subtile)22186 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8_subtile) {
22187 TEST_REQUIRES_X86_SSE41;
22188 for (size_t k = 16; k <= 80; k += 8) {
22189 for (uint32_t n = 1; n <= 4; n++) {
22190 for (uint32_t m = 1; m <= 3; m++) {
22191 GemmMicrokernelTester()
22192 .mr(3)
22193 .nr(4)
22194 .kr(2)
22195 .sr(1)
22196 .m(m)
22197 .n(n)
22198 .k(k)
22199 .iterations(1)
22200 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22201 }
22202 }
22203 }
22204 }
22205
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4)22206 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4) {
22207 TEST_REQUIRES_X86_SSE41;
22208 for (uint32_t n = 5; n < 8; n++) {
22209 for (size_t k = 1; k <= 40; k += 9) {
22210 GemmMicrokernelTester()
22211 .mr(3)
22212 .nr(4)
22213 .kr(2)
22214 .sr(1)
22215 .m(3)
22216 .n(n)
22217 .k(k)
22218 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22219 }
22220 }
22221 }
22222
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4_strided_cn)22223 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_strided_cn) {
22224 TEST_REQUIRES_X86_SSE41;
22225 for (uint32_t n = 5; n < 8; n++) {
22226 for (size_t k = 1; k <= 40; k += 9) {
22227 GemmMicrokernelTester()
22228 .mr(3)
22229 .nr(4)
22230 .kr(2)
22231 .sr(1)
22232 .m(3)
22233 .n(n)
22234 .k(k)
22235 .cn_stride(7)
22236 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22237 }
22238 }
22239 }
22240
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4_subtile)22241 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_subtile) {
22242 TEST_REQUIRES_X86_SSE41;
22243 for (uint32_t n = 5; n < 8; n++) {
22244 for (size_t k = 1; k <= 40; k += 9) {
22245 for (uint32_t m = 1; m <= 3; m++) {
22246 GemmMicrokernelTester()
22247 .mr(3)
22248 .nr(4)
22249 .kr(2)
22250 .sr(1)
22251 .m(m)
22252 .n(n)
22253 .k(k)
22254 .iterations(1)
22255 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22256 }
22257 }
22258 }
22259 }
22260
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4)22261 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4) {
22262 TEST_REQUIRES_X86_SSE41;
22263 for (uint32_t n = 8; n <= 12; n += 4) {
22264 for (size_t k = 1; k <= 40; k += 9) {
22265 GemmMicrokernelTester()
22266 .mr(3)
22267 .nr(4)
22268 .kr(2)
22269 .sr(1)
22270 .m(3)
22271 .n(n)
22272 .k(k)
22273 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22274 }
22275 }
22276 }
22277
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4_strided_cn)22278 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_strided_cn) {
22279 TEST_REQUIRES_X86_SSE41;
22280 for (uint32_t n = 8; n <= 12; n += 4) {
22281 for (size_t k = 1; k <= 40; k += 9) {
22282 GemmMicrokernelTester()
22283 .mr(3)
22284 .nr(4)
22285 .kr(2)
22286 .sr(1)
22287 .m(3)
22288 .n(n)
22289 .k(k)
22290 .cn_stride(7)
22291 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22292 }
22293 }
22294 }
22295
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4_subtile)22296 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_subtile) {
22297 TEST_REQUIRES_X86_SSE41;
22298 for (uint32_t n = 8; n <= 12; n += 4) {
22299 for (size_t k = 1; k <= 40; k += 9) {
22300 for (uint32_t m = 1; m <= 3; m++) {
22301 GemmMicrokernelTester()
22302 .mr(3)
22303 .nr(4)
22304 .kr(2)
22305 .sr(1)
22306 .m(m)
22307 .n(n)
22308 .k(k)
22309 .iterations(1)
22310 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22311 }
22312 }
22313 }
22314 }
22315
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,small_kernel)22316 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, small_kernel) {
22317 TEST_REQUIRES_X86_SSE41;
22318 for (size_t k = 1; k <= 40; k += 9) {
22319 GemmMicrokernelTester()
22320 .mr(3)
22321 .nr(4)
22322 .kr(2)
22323 .sr(1)
22324 .m(3)
22325 .n(4)
22326 .k(k)
22327 .ks(3)
22328 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22329 }
22330 }
22331
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,small_kernel_subtile)22332 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, small_kernel_subtile) {
22333 TEST_REQUIRES_X86_SSE41;
22334 for (size_t k = 1; k <= 40; k += 9) {
22335 for (uint32_t n = 1; n <= 4; n++) {
22336 for (uint32_t m = 1; m <= 3; m++) {
22337 GemmMicrokernelTester()
22338 .mr(3)
22339 .nr(4)
22340 .kr(2)
22341 .sr(1)
22342 .m(m)
22343 .n(n)
22344 .k(k)
22345 .ks(3)
22346 .iterations(1)
22347 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22348 }
22349 }
22350 }
22351 }
22352
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4_small_kernel)22353 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_small_kernel) {
22354 TEST_REQUIRES_X86_SSE41;
22355 for (uint32_t n = 5; n < 8; n++) {
22356 for (size_t k = 1; k <= 40; k += 9) {
22357 GemmMicrokernelTester()
22358 .mr(3)
22359 .nr(4)
22360 .kr(2)
22361 .sr(1)
22362 .m(3)
22363 .n(n)
22364 .k(k)
22365 .ks(3)
22366 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22367 }
22368 }
22369 }
22370
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4_small_kernel)22371 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_small_kernel) {
22372 TEST_REQUIRES_X86_SSE41;
22373 for (uint32_t n = 8; n <= 12; n += 4) {
22374 for (size_t k = 1; k <= 40; k += 9) {
22375 GemmMicrokernelTester()
22376 .mr(3)
22377 .nr(4)
22378 .kr(2)
22379 .sr(1)
22380 .m(3)
22381 .n(n)
22382 .k(k)
22383 .ks(3)
22384 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22385 }
22386 }
22387 }
22388
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,strided_cm_subtile)22389 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm_subtile) {
22390 TEST_REQUIRES_X86_SSE41;
22391 for (size_t k = 1; k <= 40; k += 9) {
22392 for (uint32_t n = 1; n <= 4; n++) {
22393 for (uint32_t m = 1; m <= 3; m++) {
22394 GemmMicrokernelTester()
22395 .mr(3)
22396 .nr(4)
22397 .kr(2)
22398 .sr(1)
22399 .m(m)
22400 .n(n)
22401 .k(k)
22402 .cm_stride(7)
22403 .iterations(1)
22404 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22405 }
22406 }
22407 }
22408 }
22409
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,a_offset)22410 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, a_offset) {
22411 TEST_REQUIRES_X86_SSE41;
22412 for (size_t k = 1; k <= 40; k += 9) {
22413 GemmMicrokernelTester()
22414 .mr(3)
22415 .nr(4)
22416 .kr(2)
22417 .sr(1)
22418 .m(3)
22419 .n(4)
22420 .k(k)
22421 .ks(3)
22422 .a_offset(127)
22423 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22424 }
22425 }
22426
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,zero)22427 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, zero) {
22428 TEST_REQUIRES_X86_SSE41;
22429 for (size_t k = 1; k <= 40; k += 9) {
22430 for (uint32_t mz = 0; mz < 3; mz++) {
22431 GemmMicrokernelTester()
22432 .mr(3)
22433 .nr(4)
22434 .kr(2)
22435 .sr(1)
22436 .m(3)
22437 .n(4)
22438 .k(k)
22439 .ks(3)
22440 .a_offset(127)
22441 .zero_index(mz)
22442 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22443 }
22444 }
22445 }
22446
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,qmin)22447 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmin) {
22448 TEST_REQUIRES_X86_SSE41;
22449 GemmMicrokernelTester()
22450 .mr(3)
22451 .nr(4)
22452 .kr(2)
22453 .sr(1)
22454 .m(3)
22455 .n(4)
22456 .k(8)
22457 .qmin(128)
22458 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22459 }
22460
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,qmax)22461 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmax) {
22462 TEST_REQUIRES_X86_SSE41;
22463 GemmMicrokernelTester()
22464 .mr(3)
22465 .nr(4)
22466 .kr(2)
22467 .sr(1)
22468 .m(3)
22469 .n(4)
22470 .k(8)
22471 .qmax(128)
22472 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22473 }
22474
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,strided_cm)22475 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm) {
22476 TEST_REQUIRES_X86_SSE41;
22477 GemmMicrokernelTester()
22478 .mr(3)
22479 .nr(4)
22480 .kr(2)
22481 .sr(1)
22482 .m(3)
22483 .n(4)
22484 .k(8)
22485 .cm_stride(7)
22486 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22487 }
22488 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22489
22490
22491 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8)22492 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8) {
22493 TEST_REQUIRES_X86_SSE2;
22494 GemmMicrokernelTester()
22495 .mr(4)
22496 .nr(4)
22497 .kr(2)
22498 .sr(1)
22499 .m(4)
22500 .n(4)
22501 .k(8)
22502 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22503 }
22504
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cn)22505 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cn) {
22506 TEST_REQUIRES_X86_SSE2;
22507 GemmMicrokernelTester()
22508 .mr(4)
22509 .nr(4)
22510 .kr(2)
22511 .sr(1)
22512 .m(4)
22513 .n(4)
22514 .k(8)
22515 .cn_stride(7)
22516 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22517 }
22518
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile)22519 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile) {
22520 TEST_REQUIRES_X86_SSE2;
22521 for (uint32_t n = 1; n <= 4; n++) {
22522 for (uint32_t m = 1; m <= 4; m++) {
22523 GemmMicrokernelTester()
22524 .mr(4)
22525 .nr(4)
22526 .kr(2)
22527 .sr(1)
22528 .m(m)
22529 .n(n)
22530 .k(8)
22531 .iterations(1)
22532 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22533 }
22534 }
22535 }
22536
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile_m)22537 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_m) {
22538 TEST_REQUIRES_X86_SSE2;
22539 for (uint32_t m = 1; m <= 4; m++) {
22540 GemmMicrokernelTester()
22541 .mr(4)
22542 .nr(4)
22543 .kr(2)
22544 .sr(1)
22545 .m(m)
22546 .n(4)
22547 .k(8)
22548 .iterations(1)
22549 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22550 }
22551 }
22552
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile_n)22553 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_n) {
22554 TEST_REQUIRES_X86_SSE2;
22555 for (uint32_t n = 1; n <= 4; n++) {
22556 GemmMicrokernelTester()
22557 .mr(4)
22558 .nr(4)
22559 .kr(2)
22560 .sr(1)
22561 .m(4)
22562 .n(n)
22563 .k(8)
22564 .iterations(1)
22565 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22566 }
22567 }
22568
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_lt_8)22569 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8) {
22570 TEST_REQUIRES_X86_SSE2;
22571 for (size_t k = 1; k < 8; k++) {
22572 GemmMicrokernelTester()
22573 .mr(4)
22574 .nr(4)
22575 .kr(2)
22576 .sr(1)
22577 .m(4)
22578 .n(4)
22579 .k(k)
22580 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22581 }
22582 }
22583
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_lt_8_subtile)22584 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8_subtile) {
22585 TEST_REQUIRES_X86_SSE2;
22586 for (size_t k = 1; k < 8; k++) {
22587 for (uint32_t n = 1; n <= 4; n++) {
22588 for (uint32_t m = 1; m <= 4; m++) {
22589 GemmMicrokernelTester()
22590 .mr(4)
22591 .nr(4)
22592 .kr(2)
22593 .sr(1)
22594 .m(m)
22595 .n(n)
22596 .k(k)
22597 .iterations(1)
22598 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22599 }
22600 }
22601 }
22602 }
22603
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_gt_8)22604 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8) {
22605 TEST_REQUIRES_X86_SSE2;
22606 for (size_t k = 9; k < 16; k++) {
22607 GemmMicrokernelTester()
22608 .mr(4)
22609 .nr(4)
22610 .kr(2)
22611 .sr(1)
22612 .m(4)
22613 .n(4)
22614 .k(k)
22615 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22616 }
22617 }
22618
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_gt_8_subtile)22619 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8_subtile) {
22620 TEST_REQUIRES_X86_SSE2;
22621 for (size_t k = 9; k < 16; k++) {
22622 for (uint32_t n = 1; n <= 4; n++) {
22623 for (uint32_t m = 1; m <= 4; m++) {
22624 GemmMicrokernelTester()
22625 .mr(4)
22626 .nr(4)
22627 .kr(2)
22628 .sr(1)
22629 .m(m)
22630 .n(n)
22631 .k(k)
22632 .iterations(1)
22633 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22634 }
22635 }
22636 }
22637 }
22638
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_div_8)22639 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8) {
22640 TEST_REQUIRES_X86_SSE2;
22641 for (size_t k = 16; k <= 80; k += 8) {
22642 GemmMicrokernelTester()
22643 .mr(4)
22644 .nr(4)
22645 .kr(2)
22646 .sr(1)
22647 .m(4)
22648 .n(4)
22649 .k(k)
22650 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22651 }
22652 }
22653
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_div_8_subtile)22654 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8_subtile) {
22655 TEST_REQUIRES_X86_SSE2;
22656 for (size_t k = 16; k <= 80; k += 8) {
22657 for (uint32_t n = 1; n <= 4; n++) {
22658 for (uint32_t m = 1; m <= 4; m++) {
22659 GemmMicrokernelTester()
22660 .mr(4)
22661 .nr(4)
22662 .kr(2)
22663 .sr(1)
22664 .m(m)
22665 .n(n)
22666 .k(k)
22667 .iterations(1)
22668 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22669 }
22670 }
22671 }
22672 }
22673
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4)22674 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4) {
22675 TEST_REQUIRES_X86_SSE2;
22676 for (uint32_t n = 5; n < 8; n++) {
22677 for (size_t k = 1; k <= 40; k += 9) {
22678 GemmMicrokernelTester()
22679 .mr(4)
22680 .nr(4)
22681 .kr(2)
22682 .sr(1)
22683 .m(4)
22684 .n(n)
22685 .k(k)
22686 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22687 }
22688 }
22689 }
22690
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_strided_cn)22691 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_strided_cn) {
22692 TEST_REQUIRES_X86_SSE2;
22693 for (uint32_t n = 5; n < 8; n++) {
22694 for (size_t k = 1; k <= 40; k += 9) {
22695 GemmMicrokernelTester()
22696 .mr(4)
22697 .nr(4)
22698 .kr(2)
22699 .sr(1)
22700 .m(4)
22701 .n(n)
22702 .k(k)
22703 .cn_stride(7)
22704 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22705 }
22706 }
22707 }
22708
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_subtile)22709 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_subtile) {
22710 TEST_REQUIRES_X86_SSE2;
22711 for (uint32_t n = 5; n < 8; n++) {
22712 for (size_t k = 1; k <= 40; k += 9) {
22713 for (uint32_t m = 1; m <= 4; m++) {
22714 GemmMicrokernelTester()
22715 .mr(4)
22716 .nr(4)
22717 .kr(2)
22718 .sr(1)
22719 .m(m)
22720 .n(n)
22721 .k(k)
22722 .iterations(1)
22723 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22724 }
22725 }
22726 }
22727 }
22728
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4)22729 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4) {
22730 TEST_REQUIRES_X86_SSE2;
22731 for (uint32_t n = 8; n <= 12; n += 4) {
22732 for (size_t k = 1; k <= 40; k += 9) {
22733 GemmMicrokernelTester()
22734 .mr(4)
22735 .nr(4)
22736 .kr(2)
22737 .sr(1)
22738 .m(4)
22739 .n(n)
22740 .k(k)
22741 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22742 }
22743 }
22744 }
22745
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_strided_cn)22746 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_strided_cn) {
22747 TEST_REQUIRES_X86_SSE2;
22748 for (uint32_t n = 8; n <= 12; n += 4) {
22749 for (size_t k = 1; k <= 40; k += 9) {
22750 GemmMicrokernelTester()
22751 .mr(4)
22752 .nr(4)
22753 .kr(2)
22754 .sr(1)
22755 .m(4)
22756 .n(n)
22757 .k(k)
22758 .cn_stride(7)
22759 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22760 }
22761 }
22762 }
22763
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_subtile)22764 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_subtile) {
22765 TEST_REQUIRES_X86_SSE2;
22766 for (uint32_t n = 8; n <= 12; n += 4) {
22767 for (size_t k = 1; k <= 40; k += 9) {
22768 for (uint32_t m = 1; m <= 4; m++) {
22769 GemmMicrokernelTester()
22770 .mr(4)
22771 .nr(4)
22772 .kr(2)
22773 .sr(1)
22774 .m(m)
22775 .n(n)
22776 .k(k)
22777 .iterations(1)
22778 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22779 }
22780 }
22781 }
22782 }
22783
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,small_kernel)22784 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, small_kernel) {
22785 TEST_REQUIRES_X86_SSE2;
22786 for (size_t k = 1; k <= 40; k += 9) {
22787 GemmMicrokernelTester()
22788 .mr(4)
22789 .nr(4)
22790 .kr(2)
22791 .sr(1)
22792 .m(4)
22793 .n(4)
22794 .k(k)
22795 .ks(3)
22796 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22797 }
22798 }
22799
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,small_kernel_subtile)22800 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, small_kernel_subtile) {
22801 TEST_REQUIRES_X86_SSE2;
22802 for (size_t k = 1; k <= 40; k += 9) {
22803 for (uint32_t n = 1; n <= 4; n++) {
22804 for (uint32_t m = 1; m <= 4; m++) {
22805 GemmMicrokernelTester()
22806 .mr(4)
22807 .nr(4)
22808 .kr(2)
22809 .sr(1)
22810 .m(m)
22811 .n(n)
22812 .k(k)
22813 .ks(3)
22814 .iterations(1)
22815 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22816 }
22817 }
22818 }
22819 }
22820
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_small_kernel)22821 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_small_kernel) {
22822 TEST_REQUIRES_X86_SSE2;
22823 for (uint32_t n = 5; n < 8; n++) {
22824 for (size_t k = 1; k <= 40; k += 9) {
22825 GemmMicrokernelTester()
22826 .mr(4)
22827 .nr(4)
22828 .kr(2)
22829 .sr(1)
22830 .m(4)
22831 .n(n)
22832 .k(k)
22833 .ks(3)
22834 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22835 }
22836 }
22837 }
22838
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_small_kernel)22839 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_small_kernel) {
22840 TEST_REQUIRES_X86_SSE2;
22841 for (uint32_t n = 8; n <= 12; n += 4) {
22842 for (size_t k = 1; k <= 40; k += 9) {
22843 GemmMicrokernelTester()
22844 .mr(4)
22845 .nr(4)
22846 .kr(2)
22847 .sr(1)
22848 .m(4)
22849 .n(n)
22850 .k(k)
22851 .ks(3)
22852 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22853 }
22854 }
22855 }
22856
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cm_subtile)22857 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm_subtile) {
22858 TEST_REQUIRES_X86_SSE2;
22859 for (size_t k = 1; k <= 40; k += 9) {
22860 for (uint32_t n = 1; n <= 4; n++) {
22861 for (uint32_t m = 1; m <= 4; m++) {
22862 GemmMicrokernelTester()
22863 .mr(4)
22864 .nr(4)
22865 .kr(2)
22866 .sr(1)
22867 .m(m)
22868 .n(n)
22869 .k(k)
22870 .cm_stride(7)
22871 .iterations(1)
22872 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22873 }
22874 }
22875 }
22876 }
22877
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,a_offset)22878 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, a_offset) {
22879 TEST_REQUIRES_X86_SSE2;
22880 for (size_t k = 1; k <= 40; k += 9) {
22881 GemmMicrokernelTester()
22882 .mr(4)
22883 .nr(4)
22884 .kr(2)
22885 .sr(1)
22886 .m(4)
22887 .n(4)
22888 .k(k)
22889 .ks(3)
22890 .a_offset(163)
22891 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22892 }
22893 }
22894
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,zero)22895 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, zero) {
22896 TEST_REQUIRES_X86_SSE2;
22897 for (size_t k = 1; k <= 40; k += 9) {
22898 for (uint32_t mz = 0; mz < 4; mz++) {
22899 GemmMicrokernelTester()
22900 .mr(4)
22901 .nr(4)
22902 .kr(2)
22903 .sr(1)
22904 .m(4)
22905 .n(4)
22906 .k(k)
22907 .ks(3)
22908 .a_offset(163)
22909 .zero_index(mz)
22910 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22911 }
22912 }
22913 }
22914
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,qmin)22915 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmin) {
22916 TEST_REQUIRES_X86_SSE2;
22917 GemmMicrokernelTester()
22918 .mr(4)
22919 .nr(4)
22920 .kr(2)
22921 .sr(1)
22922 .m(4)
22923 .n(4)
22924 .k(8)
22925 .qmin(128)
22926 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22927 }
22928
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,qmax)22929 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmax) {
22930 TEST_REQUIRES_X86_SSE2;
22931 GemmMicrokernelTester()
22932 .mr(4)
22933 .nr(4)
22934 .kr(2)
22935 .sr(1)
22936 .m(4)
22937 .n(4)
22938 .k(8)
22939 .qmax(128)
22940 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22941 }
22942
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cm)22943 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm) {
22944 TEST_REQUIRES_X86_SSE2;
22945 GemmMicrokernelTester()
22946 .mr(4)
22947 .nr(4)
22948 .kr(2)
22949 .sr(1)
22950 .m(4)
22951 .n(4)
22952 .k(8)
22953 .cm_stride(7)
22954 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22955 }
22956 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22957
22958
22959 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8)22960 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8) {
22961 TEST_REQUIRES_X86_SSE41;
22962 GemmMicrokernelTester()
22963 .mr(4)
22964 .nr(4)
22965 .kr(2)
22966 .sr(1)
22967 .m(4)
22968 .n(4)
22969 .k(8)
22970 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22971 }
22972
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,strided_cn)22973 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cn) {
22974 TEST_REQUIRES_X86_SSE41;
22975 GemmMicrokernelTester()
22976 .mr(4)
22977 .nr(4)
22978 .kr(2)
22979 .sr(1)
22980 .m(4)
22981 .n(4)
22982 .k(8)
22983 .cn_stride(7)
22984 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22985 }
22986
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_subtile)22987 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile) {
22988 TEST_REQUIRES_X86_SSE41;
22989 for (uint32_t n = 1; n <= 4; n++) {
22990 for (uint32_t m = 1; m <= 4; m++) {
22991 GemmMicrokernelTester()
22992 .mr(4)
22993 .nr(4)
22994 .kr(2)
22995 .sr(1)
22996 .m(m)
22997 .n(n)
22998 .k(8)
22999 .iterations(1)
23000 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23001 }
23002 }
23003 }
23004
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_subtile_m)23005 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_m) {
23006 TEST_REQUIRES_X86_SSE41;
23007 for (uint32_t m = 1; m <= 4; m++) {
23008 GemmMicrokernelTester()
23009 .mr(4)
23010 .nr(4)
23011 .kr(2)
23012 .sr(1)
23013 .m(m)
23014 .n(4)
23015 .k(8)
23016 .iterations(1)
23017 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23018 }
23019 }
23020
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_subtile_n)23021 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_n) {
23022 TEST_REQUIRES_X86_SSE41;
23023 for (uint32_t n = 1; n <= 4; n++) {
23024 GemmMicrokernelTester()
23025 .mr(4)
23026 .nr(4)
23027 .kr(2)
23028 .sr(1)
23029 .m(4)
23030 .n(n)
23031 .k(8)
23032 .iterations(1)
23033 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23034 }
23035 }
23036
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_lt_8)23037 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8) {
23038 TEST_REQUIRES_X86_SSE41;
23039 for (size_t k = 1; k < 8; k++) {
23040 GemmMicrokernelTester()
23041 .mr(4)
23042 .nr(4)
23043 .kr(2)
23044 .sr(1)
23045 .m(4)
23046 .n(4)
23047 .k(k)
23048 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23049 }
23050 }
23051
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_lt_8_subtile)23052 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8_subtile) {
23053 TEST_REQUIRES_X86_SSE41;
23054 for (size_t k = 1; k < 8; k++) {
23055 for (uint32_t n = 1; n <= 4; n++) {
23056 for (uint32_t m = 1; m <= 4; m++) {
23057 GemmMicrokernelTester()
23058 .mr(4)
23059 .nr(4)
23060 .kr(2)
23061 .sr(1)
23062 .m(m)
23063 .n(n)
23064 .k(k)
23065 .iterations(1)
23066 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23067 }
23068 }
23069 }
23070 }
23071
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_gt_8)23072 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8) {
23073 TEST_REQUIRES_X86_SSE41;
23074 for (size_t k = 9; k < 16; k++) {
23075 GemmMicrokernelTester()
23076 .mr(4)
23077 .nr(4)
23078 .kr(2)
23079 .sr(1)
23080 .m(4)
23081 .n(4)
23082 .k(k)
23083 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23084 }
23085 }
23086
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_gt_8_subtile)23087 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8_subtile) {
23088 TEST_REQUIRES_X86_SSE41;
23089 for (size_t k = 9; k < 16; k++) {
23090 for (uint32_t n = 1; n <= 4; n++) {
23091 for (uint32_t m = 1; m <= 4; m++) {
23092 GemmMicrokernelTester()
23093 .mr(4)
23094 .nr(4)
23095 .kr(2)
23096 .sr(1)
23097 .m(m)
23098 .n(n)
23099 .k(k)
23100 .iterations(1)
23101 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23102 }
23103 }
23104 }
23105 }
23106
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_div_8)23107 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8) {
23108 TEST_REQUIRES_X86_SSE41;
23109 for (size_t k = 16; k <= 80; k += 8) {
23110 GemmMicrokernelTester()
23111 .mr(4)
23112 .nr(4)
23113 .kr(2)
23114 .sr(1)
23115 .m(4)
23116 .n(4)
23117 .k(k)
23118 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23119 }
23120 }
23121
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_div_8_subtile)23122 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8_subtile) {
23123 TEST_REQUIRES_X86_SSE41;
23124 for (size_t k = 16; k <= 80; k += 8) {
23125 for (uint32_t n = 1; n <= 4; n++) {
23126 for (uint32_t m = 1; m <= 4; m++) {
23127 GemmMicrokernelTester()
23128 .mr(4)
23129 .nr(4)
23130 .kr(2)
23131 .sr(1)
23132 .m(m)
23133 .n(n)
23134 .k(k)
23135 .iterations(1)
23136 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23137 }
23138 }
23139 }
23140 }
23141
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4)23142 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4) {
23143 TEST_REQUIRES_X86_SSE41;
23144 for (uint32_t n = 5; n < 8; n++) {
23145 for (size_t k = 1; k <= 40; k += 9) {
23146 GemmMicrokernelTester()
23147 .mr(4)
23148 .nr(4)
23149 .kr(2)
23150 .sr(1)
23151 .m(4)
23152 .n(n)
23153 .k(k)
23154 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23155 }
23156 }
23157 }
23158
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4_strided_cn)23159 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_strided_cn) {
23160 TEST_REQUIRES_X86_SSE41;
23161 for (uint32_t n = 5; n < 8; n++) {
23162 for (size_t k = 1; k <= 40; k += 9) {
23163 GemmMicrokernelTester()
23164 .mr(4)
23165 .nr(4)
23166 .kr(2)
23167 .sr(1)
23168 .m(4)
23169 .n(n)
23170 .k(k)
23171 .cn_stride(7)
23172 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23173 }
23174 }
23175 }
23176
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4_subtile)23177 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_subtile) {
23178 TEST_REQUIRES_X86_SSE41;
23179 for (uint32_t n = 5; n < 8; n++) {
23180 for (size_t k = 1; k <= 40; k += 9) {
23181 for (uint32_t m = 1; m <= 4; m++) {
23182 GemmMicrokernelTester()
23183 .mr(4)
23184 .nr(4)
23185 .kr(2)
23186 .sr(1)
23187 .m(m)
23188 .n(n)
23189 .k(k)
23190 .iterations(1)
23191 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23192 }
23193 }
23194 }
23195 }
23196
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4)23197 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4) {
23198 TEST_REQUIRES_X86_SSE41;
23199 for (uint32_t n = 8; n <= 12; n += 4) {
23200 for (size_t k = 1; k <= 40; k += 9) {
23201 GemmMicrokernelTester()
23202 .mr(4)
23203 .nr(4)
23204 .kr(2)
23205 .sr(1)
23206 .m(4)
23207 .n(n)
23208 .k(k)
23209 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23210 }
23211 }
23212 }
23213
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4_strided_cn)23214 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_strided_cn) {
23215 TEST_REQUIRES_X86_SSE41;
23216 for (uint32_t n = 8; n <= 12; n += 4) {
23217 for (size_t k = 1; k <= 40; k += 9) {
23218 GemmMicrokernelTester()
23219 .mr(4)
23220 .nr(4)
23221 .kr(2)
23222 .sr(1)
23223 .m(4)
23224 .n(n)
23225 .k(k)
23226 .cn_stride(7)
23227 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23228 }
23229 }
23230 }
23231
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4_subtile)23232 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_subtile) {
23233 TEST_REQUIRES_X86_SSE41;
23234 for (uint32_t n = 8; n <= 12; n += 4) {
23235 for (size_t k = 1; k <= 40; k += 9) {
23236 for (uint32_t m = 1; m <= 4; m++) {
23237 GemmMicrokernelTester()
23238 .mr(4)
23239 .nr(4)
23240 .kr(2)
23241 .sr(1)
23242 .m(m)
23243 .n(n)
23244 .k(k)
23245 .iterations(1)
23246 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23247 }
23248 }
23249 }
23250 }
23251
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,small_kernel)23252 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, small_kernel) {
23253 TEST_REQUIRES_X86_SSE41;
23254 for (size_t k = 1; k <= 40; k += 9) {
23255 GemmMicrokernelTester()
23256 .mr(4)
23257 .nr(4)
23258 .kr(2)
23259 .sr(1)
23260 .m(4)
23261 .n(4)
23262 .k(k)
23263 .ks(3)
23264 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23265 }
23266 }
23267
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,small_kernel_subtile)23268 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, small_kernel_subtile) {
23269 TEST_REQUIRES_X86_SSE41;
23270 for (size_t k = 1; k <= 40; k += 9) {
23271 for (uint32_t n = 1; n <= 4; n++) {
23272 for (uint32_t m = 1; m <= 4; m++) {
23273 GemmMicrokernelTester()
23274 .mr(4)
23275 .nr(4)
23276 .kr(2)
23277 .sr(1)
23278 .m(m)
23279 .n(n)
23280 .k(k)
23281 .ks(3)
23282 .iterations(1)
23283 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23284 }
23285 }
23286 }
23287 }
23288
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4_small_kernel)23289 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_small_kernel) {
23290 TEST_REQUIRES_X86_SSE41;
23291 for (uint32_t n = 5; n < 8; n++) {
23292 for (size_t k = 1; k <= 40; k += 9) {
23293 GemmMicrokernelTester()
23294 .mr(4)
23295 .nr(4)
23296 .kr(2)
23297 .sr(1)
23298 .m(4)
23299 .n(n)
23300 .k(k)
23301 .ks(3)
23302 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23303 }
23304 }
23305 }
23306
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4_small_kernel)23307 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_small_kernel) {
23308 TEST_REQUIRES_X86_SSE41;
23309 for (uint32_t n = 8; n <= 12; n += 4) {
23310 for (size_t k = 1; k <= 40; k += 9) {
23311 GemmMicrokernelTester()
23312 .mr(4)
23313 .nr(4)
23314 .kr(2)
23315 .sr(1)
23316 .m(4)
23317 .n(n)
23318 .k(k)
23319 .ks(3)
23320 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23321 }
23322 }
23323 }
23324
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,strided_cm_subtile)23325 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm_subtile) {
23326 TEST_REQUIRES_X86_SSE41;
23327 for (size_t k = 1; k <= 40; k += 9) {
23328 for (uint32_t n = 1; n <= 4; n++) {
23329 for (uint32_t m = 1; m <= 4; m++) {
23330 GemmMicrokernelTester()
23331 .mr(4)
23332 .nr(4)
23333 .kr(2)
23334 .sr(1)
23335 .m(m)
23336 .n(n)
23337 .k(k)
23338 .cm_stride(7)
23339 .iterations(1)
23340 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23341 }
23342 }
23343 }
23344 }
23345
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,a_offset)23346 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, a_offset) {
23347 TEST_REQUIRES_X86_SSE41;
23348 for (size_t k = 1; k <= 40; k += 9) {
23349 GemmMicrokernelTester()
23350 .mr(4)
23351 .nr(4)
23352 .kr(2)
23353 .sr(1)
23354 .m(4)
23355 .n(4)
23356 .k(k)
23357 .ks(3)
23358 .a_offset(163)
23359 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23360 }
23361 }
23362
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,zero)23363 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, zero) {
23364 TEST_REQUIRES_X86_SSE41;
23365 for (size_t k = 1; k <= 40; k += 9) {
23366 for (uint32_t mz = 0; mz < 4; mz++) {
23367 GemmMicrokernelTester()
23368 .mr(4)
23369 .nr(4)
23370 .kr(2)
23371 .sr(1)
23372 .m(4)
23373 .n(4)
23374 .k(k)
23375 .ks(3)
23376 .a_offset(163)
23377 .zero_index(mz)
23378 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23379 }
23380 }
23381 }
23382
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,qmin)23383 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmin) {
23384 TEST_REQUIRES_X86_SSE41;
23385 GemmMicrokernelTester()
23386 .mr(4)
23387 .nr(4)
23388 .kr(2)
23389 .sr(1)
23390 .m(4)
23391 .n(4)
23392 .k(8)
23393 .qmin(128)
23394 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23395 }
23396
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,qmax)23397 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmax) {
23398 TEST_REQUIRES_X86_SSE41;
23399 GemmMicrokernelTester()
23400 .mr(4)
23401 .nr(4)
23402 .kr(2)
23403 .sr(1)
23404 .m(4)
23405 .n(4)
23406 .k(8)
23407 .qmax(128)
23408 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23409 }
23410
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,strided_cm)23411 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm) {
23412 TEST_REQUIRES_X86_SSE41;
23413 GemmMicrokernelTester()
23414 .mr(4)
23415 .nr(4)
23416 .kr(2)
23417 .sr(1)
23418 .m(4)
23419 .n(4)
23420 .k(8)
23421 .cm_stride(7)
23422 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23423 }
23424 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23425
23426
23427 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8)23428 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8) {
23429 TEST_REQUIRES_X86_XOP;
23430 GemmMicrokernelTester()
23431 .mr(3)
23432 .nr(4)
23433 .kr(2)
23434 .sr(1)
23435 .m(3)
23436 .n(4)
23437 .k(8)
23438 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23439 }
23440
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,strided_cn)23441 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cn) {
23442 TEST_REQUIRES_X86_XOP;
23443 GemmMicrokernelTester()
23444 .mr(3)
23445 .nr(4)
23446 .kr(2)
23447 .sr(1)
23448 .m(3)
23449 .n(4)
23450 .k(8)
23451 .cn_stride(7)
23452 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23453 }
23454
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8_subtile)23455 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile) {
23456 TEST_REQUIRES_X86_XOP;
23457 for (uint32_t n = 1; n <= 4; n++) {
23458 for (uint32_t m = 1; m <= 3; m++) {
23459 GemmMicrokernelTester()
23460 .mr(3)
23461 .nr(4)
23462 .kr(2)
23463 .sr(1)
23464 .m(m)
23465 .n(n)
23466 .k(8)
23467 .iterations(1)
23468 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23469 }
23470 }
23471 }
23472
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8_subtile_m)23473 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile_m) {
23474 TEST_REQUIRES_X86_XOP;
23475 for (uint32_t m = 1; m <= 3; m++) {
23476 GemmMicrokernelTester()
23477 .mr(3)
23478 .nr(4)
23479 .kr(2)
23480 .sr(1)
23481 .m(m)
23482 .n(4)
23483 .k(8)
23484 .iterations(1)
23485 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23486 }
23487 }
23488
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8_subtile_n)23489 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile_n) {
23490 TEST_REQUIRES_X86_XOP;
23491 for (uint32_t n = 1; n <= 4; n++) {
23492 GemmMicrokernelTester()
23493 .mr(3)
23494 .nr(4)
23495 .kr(2)
23496 .sr(1)
23497 .m(3)
23498 .n(n)
23499 .k(8)
23500 .iterations(1)
23501 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23502 }
23503 }
23504
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_lt_8)23505 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_lt_8) {
23506 TEST_REQUIRES_X86_XOP;
23507 for (size_t k = 1; k < 8; k++) {
23508 GemmMicrokernelTester()
23509 .mr(3)
23510 .nr(4)
23511 .kr(2)
23512 .sr(1)
23513 .m(3)
23514 .n(4)
23515 .k(k)
23516 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23517 }
23518 }
23519
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_lt_8_subtile)23520 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_lt_8_subtile) {
23521 TEST_REQUIRES_X86_XOP;
23522 for (size_t k = 1; k < 8; k++) {
23523 for (uint32_t n = 1; n <= 4; n++) {
23524 for (uint32_t m = 1; m <= 3; m++) {
23525 GemmMicrokernelTester()
23526 .mr(3)
23527 .nr(4)
23528 .kr(2)
23529 .sr(1)
23530 .m(m)
23531 .n(n)
23532 .k(k)
23533 .iterations(1)
23534 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23535 }
23536 }
23537 }
23538 }
23539
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_gt_8)23540 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_gt_8) {
23541 TEST_REQUIRES_X86_XOP;
23542 for (size_t k = 9; k < 16; k++) {
23543 GemmMicrokernelTester()
23544 .mr(3)
23545 .nr(4)
23546 .kr(2)
23547 .sr(1)
23548 .m(3)
23549 .n(4)
23550 .k(k)
23551 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23552 }
23553 }
23554
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_gt_8_subtile)23555 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_gt_8_subtile) {
23556 TEST_REQUIRES_X86_XOP;
23557 for (size_t k = 9; k < 16; k++) {
23558 for (uint32_t n = 1; n <= 4; n++) {
23559 for (uint32_t m = 1; m <= 3; m++) {
23560 GemmMicrokernelTester()
23561 .mr(3)
23562 .nr(4)
23563 .kr(2)
23564 .sr(1)
23565 .m(m)
23566 .n(n)
23567 .k(k)
23568 .iterations(1)
23569 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23570 }
23571 }
23572 }
23573 }
23574
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_div_8)23575 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_div_8) {
23576 TEST_REQUIRES_X86_XOP;
23577 for (size_t k = 16; k <= 80; k += 8) {
23578 GemmMicrokernelTester()
23579 .mr(3)
23580 .nr(4)
23581 .kr(2)
23582 .sr(1)
23583 .m(3)
23584 .n(4)
23585 .k(k)
23586 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23587 }
23588 }
23589
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_div_8_subtile)23590 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_div_8_subtile) {
23591 TEST_REQUIRES_X86_XOP;
23592 for (size_t k = 16; k <= 80; k += 8) {
23593 for (uint32_t n = 1; n <= 4; n++) {
23594 for (uint32_t m = 1; m <= 3; m++) {
23595 GemmMicrokernelTester()
23596 .mr(3)
23597 .nr(4)
23598 .kr(2)
23599 .sr(1)
23600 .m(m)
23601 .n(n)
23602 .k(k)
23603 .iterations(1)
23604 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23605 }
23606 }
23607 }
23608 }
23609
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4)23610 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4) {
23611 TEST_REQUIRES_X86_XOP;
23612 for (uint32_t n = 5; n < 8; n++) {
23613 for (size_t k = 1; k <= 40; k += 9) {
23614 GemmMicrokernelTester()
23615 .mr(3)
23616 .nr(4)
23617 .kr(2)
23618 .sr(1)
23619 .m(3)
23620 .n(n)
23621 .k(k)
23622 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23623 }
23624 }
23625 }
23626
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4_strided_cn)23627 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_strided_cn) {
23628 TEST_REQUIRES_X86_XOP;
23629 for (uint32_t n = 5; n < 8; n++) {
23630 for (size_t k = 1; k <= 40; k += 9) {
23631 GemmMicrokernelTester()
23632 .mr(3)
23633 .nr(4)
23634 .kr(2)
23635 .sr(1)
23636 .m(3)
23637 .n(n)
23638 .k(k)
23639 .cn_stride(7)
23640 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23641 }
23642 }
23643 }
23644
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4_subtile)23645 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_subtile) {
23646 TEST_REQUIRES_X86_XOP;
23647 for (uint32_t n = 5; n < 8; n++) {
23648 for (size_t k = 1; k <= 40; k += 9) {
23649 for (uint32_t m = 1; m <= 3; m++) {
23650 GemmMicrokernelTester()
23651 .mr(3)
23652 .nr(4)
23653 .kr(2)
23654 .sr(1)
23655 .m(m)
23656 .n(n)
23657 .k(k)
23658 .iterations(1)
23659 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23660 }
23661 }
23662 }
23663 }
23664
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4)23665 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4) {
23666 TEST_REQUIRES_X86_XOP;
23667 for (uint32_t n = 8; n <= 12; n += 4) {
23668 for (size_t k = 1; k <= 40; k += 9) {
23669 GemmMicrokernelTester()
23670 .mr(3)
23671 .nr(4)
23672 .kr(2)
23673 .sr(1)
23674 .m(3)
23675 .n(n)
23676 .k(k)
23677 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23678 }
23679 }
23680 }
23681
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4_strided_cn)23682 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_strided_cn) {
23683 TEST_REQUIRES_X86_XOP;
23684 for (uint32_t n = 8; n <= 12; n += 4) {
23685 for (size_t k = 1; k <= 40; k += 9) {
23686 GemmMicrokernelTester()
23687 .mr(3)
23688 .nr(4)
23689 .kr(2)
23690 .sr(1)
23691 .m(3)
23692 .n(n)
23693 .k(k)
23694 .cn_stride(7)
23695 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23696 }
23697 }
23698 }
23699
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4_subtile)23700 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_subtile) {
23701 TEST_REQUIRES_X86_XOP;
23702 for (uint32_t n = 8; n <= 12; n += 4) {
23703 for (size_t k = 1; k <= 40; k += 9) {
23704 for (uint32_t m = 1; m <= 3; m++) {
23705 GemmMicrokernelTester()
23706 .mr(3)
23707 .nr(4)
23708 .kr(2)
23709 .sr(1)
23710 .m(m)
23711 .n(n)
23712 .k(k)
23713 .iterations(1)
23714 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23715 }
23716 }
23717 }
23718 }
23719
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,small_kernel)23720 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, small_kernel) {
23721 TEST_REQUIRES_X86_XOP;
23722 for (size_t k = 1; k <= 40; k += 9) {
23723 GemmMicrokernelTester()
23724 .mr(3)
23725 .nr(4)
23726 .kr(2)
23727 .sr(1)
23728 .m(3)
23729 .n(4)
23730 .k(k)
23731 .ks(3)
23732 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23733 }
23734 }
23735
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,small_kernel_subtile)23736 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, small_kernel_subtile) {
23737 TEST_REQUIRES_X86_XOP;
23738 for (size_t k = 1; k <= 40; k += 9) {
23739 for (uint32_t n = 1; n <= 4; n++) {
23740 for (uint32_t m = 1; m <= 3; m++) {
23741 GemmMicrokernelTester()
23742 .mr(3)
23743 .nr(4)
23744 .kr(2)
23745 .sr(1)
23746 .m(m)
23747 .n(n)
23748 .k(k)
23749 .ks(3)
23750 .iterations(1)
23751 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23752 }
23753 }
23754 }
23755 }
23756
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4_small_kernel)23757 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_small_kernel) {
23758 TEST_REQUIRES_X86_XOP;
23759 for (uint32_t n = 5; n < 8; n++) {
23760 for (size_t k = 1; k <= 40; k += 9) {
23761 GemmMicrokernelTester()
23762 .mr(3)
23763 .nr(4)
23764 .kr(2)
23765 .sr(1)
23766 .m(3)
23767 .n(n)
23768 .k(k)
23769 .ks(3)
23770 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23771 }
23772 }
23773 }
23774
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4_small_kernel)23775 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_small_kernel) {
23776 TEST_REQUIRES_X86_XOP;
23777 for (uint32_t n = 8; n <= 12; n += 4) {
23778 for (size_t k = 1; k <= 40; k += 9) {
23779 GemmMicrokernelTester()
23780 .mr(3)
23781 .nr(4)
23782 .kr(2)
23783 .sr(1)
23784 .m(3)
23785 .n(n)
23786 .k(k)
23787 .ks(3)
23788 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23789 }
23790 }
23791 }
23792
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,strided_cm_subtile)23793 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cm_subtile) {
23794 TEST_REQUIRES_X86_XOP;
23795 for (size_t k = 1; k <= 40; k += 9) {
23796 for (uint32_t n = 1; n <= 4; n++) {
23797 for (uint32_t m = 1; m <= 3; m++) {
23798 GemmMicrokernelTester()
23799 .mr(3)
23800 .nr(4)
23801 .kr(2)
23802 .sr(1)
23803 .m(m)
23804 .n(n)
23805 .k(k)
23806 .cm_stride(7)
23807 .iterations(1)
23808 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23809 }
23810 }
23811 }
23812 }
23813
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,a_offset)23814 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, a_offset) {
23815 TEST_REQUIRES_X86_XOP;
23816 for (size_t k = 1; k <= 40; k += 9) {
23817 GemmMicrokernelTester()
23818 .mr(3)
23819 .nr(4)
23820 .kr(2)
23821 .sr(1)
23822 .m(3)
23823 .n(4)
23824 .k(k)
23825 .ks(3)
23826 .a_offset(127)
23827 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23828 }
23829 }
23830
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,zero)23831 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, zero) {
23832 TEST_REQUIRES_X86_XOP;
23833 for (size_t k = 1; k <= 40; k += 9) {
23834 for (uint32_t mz = 0; mz < 3; mz++) {
23835 GemmMicrokernelTester()
23836 .mr(3)
23837 .nr(4)
23838 .kr(2)
23839 .sr(1)
23840 .m(3)
23841 .n(4)
23842 .k(k)
23843 .ks(3)
23844 .a_offset(127)
23845 .zero_index(mz)
23846 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23847 }
23848 }
23849 }
23850
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,qmin)23851 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, qmin) {
23852 TEST_REQUIRES_X86_XOP;
23853 GemmMicrokernelTester()
23854 .mr(3)
23855 .nr(4)
23856 .kr(2)
23857 .sr(1)
23858 .m(3)
23859 .n(4)
23860 .k(8)
23861 .qmin(128)
23862 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23863 }
23864
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,qmax)23865 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, qmax) {
23866 TEST_REQUIRES_X86_XOP;
23867 GemmMicrokernelTester()
23868 .mr(3)
23869 .nr(4)
23870 .kr(2)
23871 .sr(1)
23872 .m(3)
23873 .n(4)
23874 .k(8)
23875 .qmax(128)
23876 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23877 }
23878
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,strided_cm)23879 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cm) {
23880 TEST_REQUIRES_X86_XOP;
23881 GemmMicrokernelTester()
23882 .mr(3)
23883 .nr(4)
23884 .kr(2)
23885 .sr(1)
23886 .m(3)
23887 .n(4)
23888 .k(8)
23889 .cm_stride(7)
23890 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23891 }
23892 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23893
23894
23895 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8)23896 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8) {
23897 TEST_REQUIRES_X86_AVX;
23898 GemmMicrokernelTester()
23899 .mr(4)
23900 .nr(4)
23901 .kr(2)
23902 .sr(1)
23903 .m(4)
23904 .n(4)
23905 .k(8)
23906 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23907 }
23908
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,strided_cn)23909 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cn) {
23910 TEST_REQUIRES_X86_AVX;
23911 GemmMicrokernelTester()
23912 .mr(4)
23913 .nr(4)
23914 .kr(2)
23915 .sr(1)
23916 .m(4)
23917 .n(4)
23918 .k(8)
23919 .cn_stride(7)
23920 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23921 }
23922
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8_subtile)23923 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile) {
23924 TEST_REQUIRES_X86_AVX;
23925 for (uint32_t n = 1; n <= 4; n++) {
23926 for (uint32_t m = 1; m <= 4; m++) {
23927 GemmMicrokernelTester()
23928 .mr(4)
23929 .nr(4)
23930 .kr(2)
23931 .sr(1)
23932 .m(m)
23933 .n(n)
23934 .k(8)
23935 .iterations(1)
23936 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23937 }
23938 }
23939 }
23940
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8_subtile_m)23941 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile_m) {
23942 TEST_REQUIRES_X86_AVX;
23943 for (uint32_t m = 1; m <= 4; m++) {
23944 GemmMicrokernelTester()
23945 .mr(4)
23946 .nr(4)
23947 .kr(2)
23948 .sr(1)
23949 .m(m)
23950 .n(4)
23951 .k(8)
23952 .iterations(1)
23953 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23954 }
23955 }
23956
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8_subtile_n)23957 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile_n) {
23958 TEST_REQUIRES_X86_AVX;
23959 for (uint32_t n = 1; n <= 4; n++) {
23960 GemmMicrokernelTester()
23961 .mr(4)
23962 .nr(4)
23963 .kr(2)
23964 .sr(1)
23965 .m(4)
23966 .n(n)
23967 .k(8)
23968 .iterations(1)
23969 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23970 }
23971 }
23972
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_lt_8)23973 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_lt_8) {
23974 TEST_REQUIRES_X86_AVX;
23975 for (size_t k = 1; k < 8; k++) {
23976 GemmMicrokernelTester()
23977 .mr(4)
23978 .nr(4)
23979 .kr(2)
23980 .sr(1)
23981 .m(4)
23982 .n(4)
23983 .k(k)
23984 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23985 }
23986 }
23987
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_lt_8_subtile)23988 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_lt_8_subtile) {
23989 TEST_REQUIRES_X86_AVX;
23990 for (size_t k = 1; k < 8; k++) {
23991 for (uint32_t n = 1; n <= 4; n++) {
23992 for (uint32_t m = 1; m <= 4; m++) {
23993 GemmMicrokernelTester()
23994 .mr(4)
23995 .nr(4)
23996 .kr(2)
23997 .sr(1)
23998 .m(m)
23999 .n(n)
24000 .k(k)
24001 .iterations(1)
24002 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24003 }
24004 }
24005 }
24006 }
24007
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_gt_8)24008 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_gt_8) {
24009 TEST_REQUIRES_X86_AVX;
24010 for (size_t k = 9; k < 16; k++) {
24011 GemmMicrokernelTester()
24012 .mr(4)
24013 .nr(4)
24014 .kr(2)
24015 .sr(1)
24016 .m(4)
24017 .n(4)
24018 .k(k)
24019 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24020 }
24021 }
24022
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_gt_8_subtile)24023 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_gt_8_subtile) {
24024 TEST_REQUIRES_X86_AVX;
24025 for (size_t k = 9; k < 16; k++) {
24026 for (uint32_t n = 1; n <= 4; n++) {
24027 for (uint32_t m = 1; m <= 4; m++) {
24028 GemmMicrokernelTester()
24029 .mr(4)
24030 .nr(4)
24031 .kr(2)
24032 .sr(1)
24033 .m(m)
24034 .n(n)
24035 .k(k)
24036 .iterations(1)
24037 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24038 }
24039 }
24040 }
24041 }
24042
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_div_8)24043 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_div_8) {
24044 TEST_REQUIRES_X86_AVX;
24045 for (size_t k = 16; k <= 80; k += 8) {
24046 GemmMicrokernelTester()
24047 .mr(4)
24048 .nr(4)
24049 .kr(2)
24050 .sr(1)
24051 .m(4)
24052 .n(4)
24053 .k(k)
24054 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24055 }
24056 }
24057
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_div_8_subtile)24058 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_div_8_subtile) {
24059 TEST_REQUIRES_X86_AVX;
24060 for (size_t k = 16; k <= 80; k += 8) {
24061 for (uint32_t n = 1; n <= 4; n++) {
24062 for (uint32_t m = 1; m <= 4; m++) {
24063 GemmMicrokernelTester()
24064 .mr(4)
24065 .nr(4)
24066 .kr(2)
24067 .sr(1)
24068 .m(m)
24069 .n(n)
24070 .k(k)
24071 .iterations(1)
24072 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24073 }
24074 }
24075 }
24076 }
24077
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4)24078 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4) {
24079 TEST_REQUIRES_X86_AVX;
24080 for (uint32_t n = 5; n < 8; n++) {
24081 for (size_t k = 1; k <= 40; k += 9) {
24082 GemmMicrokernelTester()
24083 .mr(4)
24084 .nr(4)
24085 .kr(2)
24086 .sr(1)
24087 .m(4)
24088 .n(n)
24089 .k(k)
24090 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24091 }
24092 }
24093 }
24094
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4_strided_cn)24095 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_strided_cn) {
24096 TEST_REQUIRES_X86_AVX;
24097 for (uint32_t n = 5; n < 8; n++) {
24098 for (size_t k = 1; k <= 40; k += 9) {
24099 GemmMicrokernelTester()
24100 .mr(4)
24101 .nr(4)
24102 .kr(2)
24103 .sr(1)
24104 .m(4)
24105 .n(n)
24106 .k(k)
24107 .cn_stride(7)
24108 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24109 }
24110 }
24111 }
24112
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4_subtile)24113 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_subtile) {
24114 TEST_REQUIRES_X86_AVX;
24115 for (uint32_t n = 5; n < 8; n++) {
24116 for (size_t k = 1; k <= 40; k += 9) {
24117 for (uint32_t m = 1; m <= 4; m++) {
24118 GemmMicrokernelTester()
24119 .mr(4)
24120 .nr(4)
24121 .kr(2)
24122 .sr(1)
24123 .m(m)
24124 .n(n)
24125 .k(k)
24126 .iterations(1)
24127 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24128 }
24129 }
24130 }
24131 }
24132
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4)24133 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4) {
24134 TEST_REQUIRES_X86_AVX;
24135 for (uint32_t n = 8; n <= 12; n += 4) {
24136 for (size_t k = 1; k <= 40; k += 9) {
24137 GemmMicrokernelTester()
24138 .mr(4)
24139 .nr(4)
24140 .kr(2)
24141 .sr(1)
24142 .m(4)
24143 .n(n)
24144 .k(k)
24145 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24146 }
24147 }
24148 }
24149
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4_strided_cn)24150 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_strided_cn) {
24151 TEST_REQUIRES_X86_AVX;
24152 for (uint32_t n = 8; n <= 12; n += 4) {
24153 for (size_t k = 1; k <= 40; k += 9) {
24154 GemmMicrokernelTester()
24155 .mr(4)
24156 .nr(4)
24157 .kr(2)
24158 .sr(1)
24159 .m(4)
24160 .n(n)
24161 .k(k)
24162 .cn_stride(7)
24163 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24164 }
24165 }
24166 }
24167
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4_subtile)24168 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_subtile) {
24169 TEST_REQUIRES_X86_AVX;
24170 for (uint32_t n = 8; n <= 12; n += 4) {
24171 for (size_t k = 1; k <= 40; k += 9) {
24172 for (uint32_t m = 1; m <= 4; m++) {
24173 GemmMicrokernelTester()
24174 .mr(4)
24175 .nr(4)
24176 .kr(2)
24177 .sr(1)
24178 .m(m)
24179 .n(n)
24180 .k(k)
24181 .iterations(1)
24182 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24183 }
24184 }
24185 }
24186 }
24187
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,small_kernel)24188 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, small_kernel) {
24189 TEST_REQUIRES_X86_AVX;
24190 for (size_t k = 1; k <= 40; k += 9) {
24191 GemmMicrokernelTester()
24192 .mr(4)
24193 .nr(4)
24194 .kr(2)
24195 .sr(1)
24196 .m(4)
24197 .n(4)
24198 .k(k)
24199 .ks(3)
24200 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24201 }
24202 }
24203
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,small_kernel_subtile)24204 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, small_kernel_subtile) {
24205 TEST_REQUIRES_X86_AVX;
24206 for (size_t k = 1; k <= 40; k += 9) {
24207 for (uint32_t n = 1; n <= 4; n++) {
24208 for (uint32_t m = 1; m <= 4; m++) {
24209 GemmMicrokernelTester()
24210 .mr(4)
24211 .nr(4)
24212 .kr(2)
24213 .sr(1)
24214 .m(m)
24215 .n(n)
24216 .k(k)
24217 .ks(3)
24218 .iterations(1)
24219 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24220 }
24221 }
24222 }
24223 }
24224
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4_small_kernel)24225 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_small_kernel) {
24226 TEST_REQUIRES_X86_AVX;
24227 for (uint32_t n = 5; n < 8; n++) {
24228 for (size_t k = 1; k <= 40; k += 9) {
24229 GemmMicrokernelTester()
24230 .mr(4)
24231 .nr(4)
24232 .kr(2)
24233 .sr(1)
24234 .m(4)
24235 .n(n)
24236 .k(k)
24237 .ks(3)
24238 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24239 }
24240 }
24241 }
24242
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4_small_kernel)24243 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_small_kernel) {
24244 TEST_REQUIRES_X86_AVX;
24245 for (uint32_t n = 8; n <= 12; n += 4) {
24246 for (size_t k = 1; k <= 40; k += 9) {
24247 GemmMicrokernelTester()
24248 .mr(4)
24249 .nr(4)
24250 .kr(2)
24251 .sr(1)
24252 .m(4)
24253 .n(n)
24254 .k(k)
24255 .ks(3)
24256 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24257 }
24258 }
24259 }
24260
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,strided_cm_subtile)24261 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cm_subtile) {
24262 TEST_REQUIRES_X86_AVX;
24263 for (size_t k = 1; k <= 40; k += 9) {
24264 for (uint32_t n = 1; n <= 4; n++) {
24265 for (uint32_t m = 1; m <= 4; m++) {
24266 GemmMicrokernelTester()
24267 .mr(4)
24268 .nr(4)
24269 .kr(2)
24270 .sr(1)
24271 .m(m)
24272 .n(n)
24273 .k(k)
24274 .cm_stride(7)
24275 .iterations(1)
24276 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24277 }
24278 }
24279 }
24280 }
24281
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,a_offset)24282 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, a_offset) {
24283 TEST_REQUIRES_X86_AVX;
24284 for (size_t k = 1; k <= 40; k += 9) {
24285 GemmMicrokernelTester()
24286 .mr(4)
24287 .nr(4)
24288 .kr(2)
24289 .sr(1)
24290 .m(4)
24291 .n(4)
24292 .k(k)
24293 .ks(3)
24294 .a_offset(163)
24295 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24296 }
24297 }
24298
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,zero)24299 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, zero) {
24300 TEST_REQUIRES_X86_AVX;
24301 for (size_t k = 1; k <= 40; k += 9) {
24302 for (uint32_t mz = 0; mz < 4; mz++) {
24303 GemmMicrokernelTester()
24304 .mr(4)
24305 .nr(4)
24306 .kr(2)
24307 .sr(1)
24308 .m(4)
24309 .n(4)
24310 .k(k)
24311 .ks(3)
24312 .a_offset(163)
24313 .zero_index(mz)
24314 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24315 }
24316 }
24317 }
24318
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,qmin)24319 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, qmin) {
24320 TEST_REQUIRES_X86_AVX;
24321 GemmMicrokernelTester()
24322 .mr(4)
24323 .nr(4)
24324 .kr(2)
24325 .sr(1)
24326 .m(4)
24327 .n(4)
24328 .k(8)
24329 .qmin(128)
24330 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24331 }
24332
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,qmax)24333 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, qmax) {
24334 TEST_REQUIRES_X86_AVX;
24335 GemmMicrokernelTester()
24336 .mr(4)
24337 .nr(4)
24338 .kr(2)
24339 .sr(1)
24340 .m(4)
24341 .n(4)
24342 .k(8)
24343 .qmax(128)
24344 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24345 }
24346
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,strided_cm)24347 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cm) {
24348 TEST_REQUIRES_X86_AVX;
24349 GemmMicrokernelTester()
24350 .mr(4)
24351 .nr(4)
24352 .kr(2)
24353 .sr(1)
24354 .m(4)
24355 .n(4)
24356 .k(8)
24357 .cm_stride(7)
24358 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24359 }
24360 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24361
24362
24363 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8)24364 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8) {
24365 TEST_REQUIRES_X86_XOP;
24366 GemmMicrokernelTester()
24367 .mr(4)
24368 .nr(4)
24369 .kr(2)
24370 .sr(1)
24371 .m(4)
24372 .n(4)
24373 .k(8)
24374 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24375 }
24376
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cn)24377 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cn) {
24378 TEST_REQUIRES_X86_XOP;
24379 GemmMicrokernelTester()
24380 .mr(4)
24381 .nr(4)
24382 .kr(2)
24383 .sr(1)
24384 .m(4)
24385 .n(4)
24386 .k(8)
24387 .cn_stride(7)
24388 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24389 }
24390
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile)24391 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile) {
24392 TEST_REQUIRES_X86_XOP;
24393 for (uint32_t n = 1; n <= 4; n++) {
24394 for (uint32_t m = 1; m <= 4; m++) {
24395 GemmMicrokernelTester()
24396 .mr(4)
24397 .nr(4)
24398 .kr(2)
24399 .sr(1)
24400 .m(m)
24401 .n(n)
24402 .k(8)
24403 .iterations(1)
24404 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24405 }
24406 }
24407 }
24408
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile_m)24409 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_m) {
24410 TEST_REQUIRES_X86_XOP;
24411 for (uint32_t m = 1; m <= 4; m++) {
24412 GemmMicrokernelTester()
24413 .mr(4)
24414 .nr(4)
24415 .kr(2)
24416 .sr(1)
24417 .m(m)
24418 .n(4)
24419 .k(8)
24420 .iterations(1)
24421 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24422 }
24423 }
24424
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile_n)24425 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_n) {
24426 TEST_REQUIRES_X86_XOP;
24427 for (uint32_t n = 1; n <= 4; n++) {
24428 GemmMicrokernelTester()
24429 .mr(4)
24430 .nr(4)
24431 .kr(2)
24432 .sr(1)
24433 .m(4)
24434 .n(n)
24435 .k(8)
24436 .iterations(1)
24437 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24438 }
24439 }
24440
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_lt_8)24441 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8) {
24442 TEST_REQUIRES_X86_XOP;
24443 for (size_t k = 1; k < 8; k++) {
24444 GemmMicrokernelTester()
24445 .mr(4)
24446 .nr(4)
24447 .kr(2)
24448 .sr(1)
24449 .m(4)
24450 .n(4)
24451 .k(k)
24452 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24453 }
24454 }
24455
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_lt_8_subtile)24456 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8_subtile) {
24457 TEST_REQUIRES_X86_XOP;
24458 for (size_t k = 1; k < 8; k++) {
24459 for (uint32_t n = 1; n <= 4; n++) {
24460 for (uint32_t m = 1; m <= 4; m++) {
24461 GemmMicrokernelTester()
24462 .mr(4)
24463 .nr(4)
24464 .kr(2)
24465 .sr(1)
24466 .m(m)
24467 .n(n)
24468 .k(k)
24469 .iterations(1)
24470 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24471 }
24472 }
24473 }
24474 }
24475
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_gt_8)24476 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8) {
24477 TEST_REQUIRES_X86_XOP;
24478 for (size_t k = 9; k < 16; k++) {
24479 GemmMicrokernelTester()
24480 .mr(4)
24481 .nr(4)
24482 .kr(2)
24483 .sr(1)
24484 .m(4)
24485 .n(4)
24486 .k(k)
24487 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24488 }
24489 }
24490
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_gt_8_subtile)24491 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8_subtile) {
24492 TEST_REQUIRES_X86_XOP;
24493 for (size_t k = 9; k < 16; k++) {
24494 for (uint32_t n = 1; n <= 4; n++) {
24495 for (uint32_t m = 1; m <= 4; m++) {
24496 GemmMicrokernelTester()
24497 .mr(4)
24498 .nr(4)
24499 .kr(2)
24500 .sr(1)
24501 .m(m)
24502 .n(n)
24503 .k(k)
24504 .iterations(1)
24505 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24506 }
24507 }
24508 }
24509 }
24510
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_div_8)24511 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8) {
24512 TEST_REQUIRES_X86_XOP;
24513 for (size_t k = 16; k <= 80; k += 8) {
24514 GemmMicrokernelTester()
24515 .mr(4)
24516 .nr(4)
24517 .kr(2)
24518 .sr(1)
24519 .m(4)
24520 .n(4)
24521 .k(k)
24522 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24523 }
24524 }
24525
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_div_8_subtile)24526 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8_subtile) {
24527 TEST_REQUIRES_X86_XOP;
24528 for (size_t k = 16; k <= 80; k += 8) {
24529 for (uint32_t n = 1; n <= 4; n++) {
24530 for (uint32_t m = 1; m <= 4; m++) {
24531 GemmMicrokernelTester()
24532 .mr(4)
24533 .nr(4)
24534 .kr(2)
24535 .sr(1)
24536 .m(m)
24537 .n(n)
24538 .k(k)
24539 .iterations(1)
24540 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24541 }
24542 }
24543 }
24544 }
24545
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4)24546 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4) {
24547 TEST_REQUIRES_X86_XOP;
24548 for (uint32_t n = 5; n < 8; n++) {
24549 for (size_t k = 1; k <= 40; k += 9) {
24550 GemmMicrokernelTester()
24551 .mr(4)
24552 .nr(4)
24553 .kr(2)
24554 .sr(1)
24555 .m(4)
24556 .n(n)
24557 .k(k)
24558 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24559 }
24560 }
24561 }
24562
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_strided_cn)24563 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_strided_cn) {
24564 TEST_REQUIRES_X86_XOP;
24565 for (uint32_t n = 5; n < 8; n++) {
24566 for (size_t k = 1; k <= 40; k += 9) {
24567 GemmMicrokernelTester()
24568 .mr(4)
24569 .nr(4)
24570 .kr(2)
24571 .sr(1)
24572 .m(4)
24573 .n(n)
24574 .k(k)
24575 .cn_stride(7)
24576 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24577 }
24578 }
24579 }
24580
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_subtile)24581 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_subtile) {
24582 TEST_REQUIRES_X86_XOP;
24583 for (uint32_t n = 5; n < 8; n++) {
24584 for (size_t k = 1; k <= 40; k += 9) {
24585 for (uint32_t m = 1; m <= 4; m++) {
24586 GemmMicrokernelTester()
24587 .mr(4)
24588 .nr(4)
24589 .kr(2)
24590 .sr(1)
24591 .m(m)
24592 .n(n)
24593 .k(k)
24594 .iterations(1)
24595 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24596 }
24597 }
24598 }
24599 }
24600
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4)24601 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4) {
24602 TEST_REQUIRES_X86_XOP;
24603 for (uint32_t n = 8; n <= 12; n += 4) {
24604 for (size_t k = 1; k <= 40; k += 9) {
24605 GemmMicrokernelTester()
24606 .mr(4)
24607 .nr(4)
24608 .kr(2)
24609 .sr(1)
24610 .m(4)
24611 .n(n)
24612 .k(k)
24613 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24614 }
24615 }
24616 }
24617
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_strided_cn)24618 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_strided_cn) {
24619 TEST_REQUIRES_X86_XOP;
24620 for (uint32_t n = 8; n <= 12; n += 4) {
24621 for (size_t k = 1; k <= 40; k += 9) {
24622 GemmMicrokernelTester()
24623 .mr(4)
24624 .nr(4)
24625 .kr(2)
24626 .sr(1)
24627 .m(4)
24628 .n(n)
24629 .k(k)
24630 .cn_stride(7)
24631 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24632 }
24633 }
24634 }
24635
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_subtile)24636 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_subtile) {
24637 TEST_REQUIRES_X86_XOP;
24638 for (uint32_t n = 8; n <= 12; n += 4) {
24639 for (size_t k = 1; k <= 40; k += 9) {
24640 for (uint32_t m = 1; m <= 4; m++) {
24641 GemmMicrokernelTester()
24642 .mr(4)
24643 .nr(4)
24644 .kr(2)
24645 .sr(1)
24646 .m(m)
24647 .n(n)
24648 .k(k)
24649 .iterations(1)
24650 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24651 }
24652 }
24653 }
24654 }
24655
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,small_kernel)24656 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, small_kernel) {
24657 TEST_REQUIRES_X86_XOP;
24658 for (size_t k = 1; k <= 40; k += 9) {
24659 GemmMicrokernelTester()
24660 .mr(4)
24661 .nr(4)
24662 .kr(2)
24663 .sr(1)
24664 .m(4)
24665 .n(4)
24666 .k(k)
24667 .ks(3)
24668 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24669 }
24670 }
24671
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,small_kernel_subtile)24672 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, small_kernel_subtile) {
24673 TEST_REQUIRES_X86_XOP;
24674 for (size_t k = 1; k <= 40; k += 9) {
24675 for (uint32_t n = 1; n <= 4; n++) {
24676 for (uint32_t m = 1; m <= 4; m++) {
24677 GemmMicrokernelTester()
24678 .mr(4)
24679 .nr(4)
24680 .kr(2)
24681 .sr(1)
24682 .m(m)
24683 .n(n)
24684 .k(k)
24685 .ks(3)
24686 .iterations(1)
24687 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24688 }
24689 }
24690 }
24691 }
24692
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_small_kernel)24693 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_small_kernel) {
24694 TEST_REQUIRES_X86_XOP;
24695 for (uint32_t n = 5; n < 8; n++) {
24696 for (size_t k = 1; k <= 40; k += 9) {
24697 GemmMicrokernelTester()
24698 .mr(4)
24699 .nr(4)
24700 .kr(2)
24701 .sr(1)
24702 .m(4)
24703 .n(n)
24704 .k(k)
24705 .ks(3)
24706 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24707 }
24708 }
24709 }
24710
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_small_kernel)24711 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_small_kernel) {
24712 TEST_REQUIRES_X86_XOP;
24713 for (uint32_t n = 8; n <= 12; n += 4) {
24714 for (size_t k = 1; k <= 40; k += 9) {
24715 GemmMicrokernelTester()
24716 .mr(4)
24717 .nr(4)
24718 .kr(2)
24719 .sr(1)
24720 .m(4)
24721 .n(n)
24722 .k(k)
24723 .ks(3)
24724 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24725 }
24726 }
24727 }
24728
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cm_subtile)24729 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm_subtile) {
24730 TEST_REQUIRES_X86_XOP;
24731 for (size_t k = 1; k <= 40; k += 9) {
24732 for (uint32_t n = 1; n <= 4; n++) {
24733 for (uint32_t m = 1; m <= 4; m++) {
24734 GemmMicrokernelTester()
24735 .mr(4)
24736 .nr(4)
24737 .kr(2)
24738 .sr(1)
24739 .m(m)
24740 .n(n)
24741 .k(k)
24742 .cm_stride(7)
24743 .iterations(1)
24744 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24745 }
24746 }
24747 }
24748 }
24749
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,a_offset)24750 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, a_offset) {
24751 TEST_REQUIRES_X86_XOP;
24752 for (size_t k = 1; k <= 40; k += 9) {
24753 GemmMicrokernelTester()
24754 .mr(4)
24755 .nr(4)
24756 .kr(2)
24757 .sr(1)
24758 .m(4)
24759 .n(4)
24760 .k(k)
24761 .ks(3)
24762 .a_offset(163)
24763 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24764 }
24765 }
24766
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,zero)24767 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, zero) {
24768 TEST_REQUIRES_X86_XOP;
24769 for (size_t k = 1; k <= 40; k += 9) {
24770 for (uint32_t mz = 0; mz < 4; mz++) {
24771 GemmMicrokernelTester()
24772 .mr(4)
24773 .nr(4)
24774 .kr(2)
24775 .sr(1)
24776 .m(4)
24777 .n(4)
24778 .k(k)
24779 .ks(3)
24780 .a_offset(163)
24781 .zero_index(mz)
24782 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24783 }
24784 }
24785 }
24786
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,qmin)24787 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmin) {
24788 TEST_REQUIRES_X86_XOP;
24789 GemmMicrokernelTester()
24790 .mr(4)
24791 .nr(4)
24792 .kr(2)
24793 .sr(1)
24794 .m(4)
24795 .n(4)
24796 .k(8)
24797 .qmin(128)
24798 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24799 }
24800
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,qmax)24801 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmax) {
24802 TEST_REQUIRES_X86_XOP;
24803 GemmMicrokernelTester()
24804 .mr(4)
24805 .nr(4)
24806 .kr(2)
24807 .sr(1)
24808 .m(4)
24809 .n(4)
24810 .k(8)
24811 .qmax(128)
24812 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24813 }
24814
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cm)24815 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm) {
24816 TEST_REQUIRES_X86_XOP;
24817 GemmMicrokernelTester()
24818 .mr(4)
24819 .nr(4)
24820 .kr(2)
24821 .sr(1)
24822 .m(4)
24823 .n(4)
24824 .k(8)
24825 .cm_stride(7)
24826 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24827 }
24828 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24829
24830
24831 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8)24832 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8) {
24833 TEST_REQUIRES_X86_SSE41;
24834 GemmMicrokernelTester()
24835 .mr(1)
24836 .nr(4)
24837 .kr(2)
24838 .sr(4)
24839 .m(1)
24840 .n(4)
24841 .k(8)
24842 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24843 }
24844
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,strided_cn)24845 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, strided_cn) {
24846 TEST_REQUIRES_X86_SSE41;
24847 GemmMicrokernelTester()
24848 .mr(1)
24849 .nr(4)
24850 .kr(2)
24851 .sr(4)
24852 .m(1)
24853 .n(4)
24854 .k(8)
24855 .cn_stride(7)
24856 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24857 }
24858
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8_subtile)24859 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8_subtile) {
24860 TEST_REQUIRES_X86_SSE41;
24861 for (uint32_t n = 1; n <= 4; n++) {
24862 for (uint32_t m = 1; m <= 1; m++) {
24863 GemmMicrokernelTester()
24864 .mr(1)
24865 .nr(4)
24866 .kr(2)
24867 .sr(4)
24868 .m(m)
24869 .n(n)
24870 .k(8)
24871 .iterations(1)
24872 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24873 }
24874 }
24875 }
24876
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8_subtile_m)24877 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
24878 TEST_REQUIRES_X86_SSE41;
24879 for (uint32_t m = 1; m <= 1; m++) {
24880 GemmMicrokernelTester()
24881 .mr(1)
24882 .nr(4)
24883 .kr(2)
24884 .sr(4)
24885 .m(m)
24886 .n(4)
24887 .k(8)
24888 .iterations(1)
24889 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24890 }
24891 }
24892
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8_subtile_n)24893 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
24894 TEST_REQUIRES_X86_SSE41;
24895 for (uint32_t n = 1; n <= 4; n++) {
24896 GemmMicrokernelTester()
24897 .mr(1)
24898 .nr(4)
24899 .kr(2)
24900 .sr(4)
24901 .m(1)
24902 .n(n)
24903 .k(8)
24904 .iterations(1)
24905 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24906 }
24907 }
24908
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_lt_8)24909 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_lt_8) {
24910 TEST_REQUIRES_X86_SSE41;
24911 for (size_t k = 1; k < 8; k++) {
24912 GemmMicrokernelTester()
24913 .mr(1)
24914 .nr(4)
24915 .kr(2)
24916 .sr(4)
24917 .m(1)
24918 .n(4)
24919 .k(k)
24920 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24921 }
24922 }
24923
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_lt_8_subtile)24924 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_lt_8_subtile) {
24925 TEST_REQUIRES_X86_SSE41;
24926 for (size_t k = 1; k < 8; k++) {
24927 for (uint32_t n = 1; n <= 4; n++) {
24928 for (uint32_t m = 1; m <= 1; m++) {
24929 GemmMicrokernelTester()
24930 .mr(1)
24931 .nr(4)
24932 .kr(2)
24933 .sr(4)
24934 .m(m)
24935 .n(n)
24936 .k(k)
24937 .iterations(1)
24938 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24939 }
24940 }
24941 }
24942 }
24943
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_gt_8)24944 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_gt_8) {
24945 TEST_REQUIRES_X86_SSE41;
24946 for (size_t k = 9; k < 16; k++) {
24947 GemmMicrokernelTester()
24948 .mr(1)
24949 .nr(4)
24950 .kr(2)
24951 .sr(4)
24952 .m(1)
24953 .n(4)
24954 .k(k)
24955 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24956 }
24957 }
24958
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_gt_8_subtile)24959 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_gt_8_subtile) {
24960 TEST_REQUIRES_X86_SSE41;
24961 for (size_t k = 9; k < 16; k++) {
24962 for (uint32_t n = 1; n <= 4; n++) {
24963 for (uint32_t m = 1; m <= 1; m++) {
24964 GemmMicrokernelTester()
24965 .mr(1)
24966 .nr(4)
24967 .kr(2)
24968 .sr(4)
24969 .m(m)
24970 .n(n)
24971 .k(k)
24972 .iterations(1)
24973 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24974 }
24975 }
24976 }
24977 }
24978
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_div_8)24979 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_div_8) {
24980 TEST_REQUIRES_X86_SSE41;
24981 for (size_t k = 16; k <= 80; k += 8) {
24982 GemmMicrokernelTester()
24983 .mr(1)
24984 .nr(4)
24985 .kr(2)
24986 .sr(4)
24987 .m(1)
24988 .n(4)
24989 .k(k)
24990 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24991 }
24992 }
24993
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_div_8_subtile)24994 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_div_8_subtile) {
24995 TEST_REQUIRES_X86_SSE41;
24996 for (size_t k = 16; k <= 80; k += 8) {
24997 for (uint32_t n = 1; n <= 4; n++) {
24998 for (uint32_t m = 1; m <= 1; m++) {
24999 GemmMicrokernelTester()
25000 .mr(1)
25001 .nr(4)
25002 .kr(2)
25003 .sr(4)
25004 .m(m)
25005 .n(n)
25006 .k(k)
25007 .iterations(1)
25008 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25009 }
25010 }
25011 }
25012 }
25013
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4)25014 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4) {
25015 TEST_REQUIRES_X86_SSE41;
25016 for (uint32_t n = 5; n < 8; n++) {
25017 for (size_t k = 1; k <= 40; k += 9) {
25018 GemmMicrokernelTester()
25019 .mr(1)
25020 .nr(4)
25021 .kr(2)
25022 .sr(4)
25023 .m(1)
25024 .n(n)
25025 .k(k)
25026 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25027 }
25028 }
25029 }
25030
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4_strided_cn)25031 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
25032 TEST_REQUIRES_X86_SSE41;
25033 for (uint32_t n = 5; n < 8; n++) {
25034 for (size_t k = 1; k <= 40; k += 9) {
25035 GemmMicrokernelTester()
25036 .mr(1)
25037 .nr(4)
25038 .kr(2)
25039 .sr(4)
25040 .m(1)
25041 .n(n)
25042 .k(k)
25043 .cn_stride(7)
25044 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25045 }
25046 }
25047 }
25048
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4_subtile)25049 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4_subtile) {
25050 TEST_REQUIRES_X86_SSE41;
25051 for (uint32_t n = 5; n < 8; n++) {
25052 for (size_t k = 1; k <= 40; k += 9) {
25053 for (uint32_t m = 1; m <= 1; m++) {
25054 GemmMicrokernelTester()
25055 .mr(1)
25056 .nr(4)
25057 .kr(2)
25058 .sr(4)
25059 .m(m)
25060 .n(n)
25061 .k(k)
25062 .iterations(1)
25063 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25064 }
25065 }
25066 }
25067 }
25068
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4)25069 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4) {
25070 TEST_REQUIRES_X86_SSE41;
25071 for (uint32_t n = 8; n <= 12; n += 4) {
25072 for (size_t k = 1; k <= 40; k += 9) {
25073 GemmMicrokernelTester()
25074 .mr(1)
25075 .nr(4)
25076 .kr(2)
25077 .sr(4)
25078 .m(1)
25079 .n(n)
25080 .k(k)
25081 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25082 }
25083 }
25084 }
25085
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4_strided_cn)25086 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
25087 TEST_REQUIRES_X86_SSE41;
25088 for (uint32_t n = 8; n <= 12; n += 4) {
25089 for (size_t k = 1; k <= 40; k += 9) {
25090 GemmMicrokernelTester()
25091 .mr(1)
25092 .nr(4)
25093 .kr(2)
25094 .sr(4)
25095 .m(1)
25096 .n(n)
25097 .k(k)
25098 .cn_stride(7)
25099 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25100 }
25101 }
25102 }
25103
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4_subtile)25104 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4_subtile) {
25105 TEST_REQUIRES_X86_SSE41;
25106 for (uint32_t n = 8; n <= 12; n += 4) {
25107 for (size_t k = 1; k <= 40; k += 9) {
25108 for (uint32_t m = 1; m <= 1; m++) {
25109 GemmMicrokernelTester()
25110 .mr(1)
25111 .nr(4)
25112 .kr(2)
25113 .sr(4)
25114 .m(m)
25115 .n(n)
25116 .k(k)
25117 .iterations(1)
25118 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25119 }
25120 }
25121 }
25122 }
25123
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,small_kernel)25124 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, small_kernel) {
25125 TEST_REQUIRES_X86_SSE41;
25126 for (size_t k = 1; k <= 40; k += 9) {
25127 GemmMicrokernelTester()
25128 .mr(1)
25129 .nr(4)
25130 .kr(2)
25131 .sr(4)
25132 .m(1)
25133 .n(4)
25134 .k(k)
25135 .ks(3)
25136 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25137 }
25138 }
25139
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,small_kernel_subtile)25140 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, small_kernel_subtile) {
25141 TEST_REQUIRES_X86_SSE41;
25142 for (size_t k = 1; k <= 40; k += 9) {
25143 for (uint32_t n = 1; n <= 4; n++) {
25144 for (uint32_t m = 1; m <= 1; m++) {
25145 GemmMicrokernelTester()
25146 .mr(1)
25147 .nr(4)
25148 .kr(2)
25149 .sr(4)
25150 .m(m)
25151 .n(n)
25152 .k(k)
25153 .ks(3)
25154 .iterations(1)
25155 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25156 }
25157 }
25158 }
25159 }
25160
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4_small_kernel)25161 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4_small_kernel) {
25162 TEST_REQUIRES_X86_SSE41;
25163 for (uint32_t n = 5; n < 8; n++) {
25164 for (size_t k = 1; k <= 40; k += 9) {
25165 GemmMicrokernelTester()
25166 .mr(1)
25167 .nr(4)
25168 .kr(2)
25169 .sr(4)
25170 .m(1)
25171 .n(n)
25172 .k(k)
25173 .ks(3)
25174 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25175 }
25176 }
25177 }
25178
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4_small_kernel)25179 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4_small_kernel) {
25180 TEST_REQUIRES_X86_SSE41;
25181 for (uint32_t n = 8; n <= 12; n += 4) {
25182 for (size_t k = 1; k <= 40; k += 9) {
25183 GemmMicrokernelTester()
25184 .mr(1)
25185 .nr(4)
25186 .kr(2)
25187 .sr(4)
25188 .m(1)
25189 .n(n)
25190 .k(k)
25191 .ks(3)
25192 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25193 }
25194 }
25195 }
25196
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,strided_cm_subtile)25197 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, strided_cm_subtile) {
25198 TEST_REQUIRES_X86_SSE41;
25199 for (size_t k = 1; k <= 40; k += 9) {
25200 for (uint32_t n = 1; n <= 4; n++) {
25201 for (uint32_t m = 1; m <= 1; m++) {
25202 GemmMicrokernelTester()
25203 .mr(1)
25204 .nr(4)
25205 .kr(2)
25206 .sr(4)
25207 .m(m)
25208 .n(n)
25209 .k(k)
25210 .cm_stride(7)
25211 .iterations(1)
25212 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25213 }
25214 }
25215 }
25216 }
25217
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,a_offset)25218 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, a_offset) {
25219 TEST_REQUIRES_X86_SSE41;
25220 for (size_t k = 1; k <= 40; k += 9) {
25221 GemmMicrokernelTester()
25222 .mr(1)
25223 .nr(4)
25224 .kr(2)
25225 .sr(4)
25226 .m(1)
25227 .n(4)
25228 .k(k)
25229 .ks(3)
25230 .a_offset(43)
25231 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25232 }
25233 }
25234
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,zero)25235 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, zero) {
25236 TEST_REQUIRES_X86_SSE41;
25237 for (size_t k = 1; k <= 40; k += 9) {
25238 for (uint32_t mz = 0; mz < 1; mz++) {
25239 GemmMicrokernelTester()
25240 .mr(1)
25241 .nr(4)
25242 .kr(2)
25243 .sr(4)
25244 .m(1)
25245 .n(4)
25246 .k(k)
25247 .ks(3)
25248 .a_offset(43)
25249 .zero_index(mz)
25250 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25251 }
25252 }
25253 }
25254
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,qmin)25255 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, qmin) {
25256 TEST_REQUIRES_X86_SSE41;
25257 GemmMicrokernelTester()
25258 .mr(1)
25259 .nr(4)
25260 .kr(2)
25261 .sr(4)
25262 .m(1)
25263 .n(4)
25264 .k(8)
25265 .qmin(128)
25266 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25267 }
25268
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,qmax)25269 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, qmax) {
25270 TEST_REQUIRES_X86_SSE41;
25271 GemmMicrokernelTester()
25272 .mr(1)
25273 .nr(4)
25274 .kr(2)
25275 .sr(4)
25276 .m(1)
25277 .n(4)
25278 .k(8)
25279 .qmax(128)
25280 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25281 }
25282
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,strided_cm)25283 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, strided_cm) {
25284 TEST_REQUIRES_X86_SSE41;
25285 GemmMicrokernelTester()
25286 .mr(1)
25287 .nr(4)
25288 .kr(2)
25289 .sr(4)
25290 .m(1)
25291 .n(4)
25292 .k(8)
25293 .cm_stride(7)
25294 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25295 }
25296 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25297
25298
25299 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8)25300 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8) {
25301 TEST_REQUIRES_X86_SSE2;
25302 GemmMicrokernelTester()
25303 .mr(3)
25304 .nr(4)
25305 .kr(2)
25306 .sr(4)
25307 .m(3)
25308 .n(4)
25309 .k(8)
25310 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25311 }
25312
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,strided_cn)25313 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, strided_cn) {
25314 TEST_REQUIRES_X86_SSE2;
25315 GemmMicrokernelTester()
25316 .mr(3)
25317 .nr(4)
25318 .kr(2)
25319 .sr(4)
25320 .m(3)
25321 .n(4)
25322 .k(8)
25323 .cn_stride(7)
25324 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25325 }
25326
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_subtile)25327 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_subtile) {
25328 TEST_REQUIRES_X86_SSE2;
25329 for (uint32_t n = 1; n <= 4; n++) {
25330 for (uint32_t m = 1; m <= 3; m++) {
25331 GemmMicrokernelTester()
25332 .mr(3)
25333 .nr(4)
25334 .kr(2)
25335 .sr(4)
25336 .m(m)
25337 .n(n)
25338 .k(8)
25339 .iterations(1)
25340 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25341 }
25342 }
25343 }
25344
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_subtile_m)25345 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
25346 TEST_REQUIRES_X86_SSE2;
25347 for (uint32_t m = 1; m <= 3; m++) {
25348 GemmMicrokernelTester()
25349 .mr(3)
25350 .nr(4)
25351 .kr(2)
25352 .sr(4)
25353 .m(m)
25354 .n(4)
25355 .k(8)
25356 .iterations(1)
25357 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25358 }
25359 }
25360
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_subtile_n)25361 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
25362 TEST_REQUIRES_X86_SSE2;
25363 for (uint32_t n = 1; n <= 4; n++) {
25364 GemmMicrokernelTester()
25365 .mr(3)
25366 .nr(4)
25367 .kr(2)
25368 .sr(4)
25369 .m(3)
25370 .n(n)
25371 .k(8)
25372 .iterations(1)
25373 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25374 }
25375 }
25376
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_lt_8)25377 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_lt_8) {
25378 TEST_REQUIRES_X86_SSE2;
25379 for (size_t k = 1; k < 8; k++) {
25380 GemmMicrokernelTester()
25381 .mr(3)
25382 .nr(4)
25383 .kr(2)
25384 .sr(4)
25385 .m(3)
25386 .n(4)
25387 .k(k)
25388 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25389 }
25390 }
25391
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_lt_8_subtile)25392 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_lt_8_subtile) {
25393 TEST_REQUIRES_X86_SSE2;
25394 for (size_t k = 1; k < 8; k++) {
25395 for (uint32_t n = 1; n <= 4; n++) {
25396 for (uint32_t m = 1; m <= 3; m++) {
25397 GemmMicrokernelTester()
25398 .mr(3)
25399 .nr(4)
25400 .kr(2)
25401 .sr(4)
25402 .m(m)
25403 .n(n)
25404 .k(k)
25405 .iterations(1)
25406 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25407 }
25408 }
25409 }
25410 }
25411
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_gt_8)25412 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_gt_8) {
25413 TEST_REQUIRES_X86_SSE2;
25414 for (size_t k = 9; k < 16; k++) {
25415 GemmMicrokernelTester()
25416 .mr(3)
25417 .nr(4)
25418 .kr(2)
25419 .sr(4)
25420 .m(3)
25421 .n(4)
25422 .k(k)
25423 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25424 }
25425 }
25426
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_gt_8_subtile)25427 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_gt_8_subtile) {
25428 TEST_REQUIRES_X86_SSE2;
25429 for (size_t k = 9; k < 16; k++) {
25430 for (uint32_t n = 1; n <= 4; n++) {
25431 for (uint32_t m = 1; m <= 3; m++) {
25432 GemmMicrokernelTester()
25433 .mr(3)
25434 .nr(4)
25435 .kr(2)
25436 .sr(4)
25437 .m(m)
25438 .n(n)
25439 .k(k)
25440 .iterations(1)
25441 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25442 }
25443 }
25444 }
25445 }
25446
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_div_8)25447 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_div_8) {
25448 TEST_REQUIRES_X86_SSE2;
25449 for (size_t k = 16; k <= 80; k += 8) {
25450 GemmMicrokernelTester()
25451 .mr(3)
25452 .nr(4)
25453 .kr(2)
25454 .sr(4)
25455 .m(3)
25456 .n(4)
25457 .k(k)
25458 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25459 }
25460 }
25461
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_div_8_subtile)25462 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_div_8_subtile) {
25463 TEST_REQUIRES_X86_SSE2;
25464 for (size_t k = 16; k <= 80; k += 8) {
25465 for (uint32_t n = 1; n <= 4; n++) {
25466 for (uint32_t m = 1; m <= 3; m++) {
25467 GemmMicrokernelTester()
25468 .mr(3)
25469 .nr(4)
25470 .kr(2)
25471 .sr(4)
25472 .m(m)
25473 .n(n)
25474 .k(k)
25475 .iterations(1)
25476 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25477 }
25478 }
25479 }
25480 }
25481
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4)25482 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4) {
25483 TEST_REQUIRES_X86_SSE2;
25484 for (uint32_t n = 5; n < 8; n++) {
25485 for (size_t k = 1; k <= 40; k += 9) {
25486 GemmMicrokernelTester()
25487 .mr(3)
25488 .nr(4)
25489 .kr(2)
25490 .sr(4)
25491 .m(3)
25492 .n(n)
25493 .k(k)
25494 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25495 }
25496 }
25497 }
25498
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4_strided_cn)25499 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
25500 TEST_REQUIRES_X86_SSE2;
25501 for (uint32_t n = 5; n < 8; n++) {
25502 for (size_t k = 1; k <= 40; k += 9) {
25503 GemmMicrokernelTester()
25504 .mr(3)
25505 .nr(4)
25506 .kr(2)
25507 .sr(4)
25508 .m(3)
25509 .n(n)
25510 .k(k)
25511 .cn_stride(7)
25512 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25513 }
25514 }
25515 }
25516
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4_subtile)25517 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4_subtile) {
25518 TEST_REQUIRES_X86_SSE2;
25519 for (uint32_t n = 5; n < 8; n++) {
25520 for (size_t k = 1; k <= 40; k += 9) {
25521 for (uint32_t m = 1; m <= 3; m++) {
25522 GemmMicrokernelTester()
25523 .mr(3)
25524 .nr(4)
25525 .kr(2)
25526 .sr(4)
25527 .m(m)
25528 .n(n)
25529 .k(k)
25530 .iterations(1)
25531 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25532 }
25533 }
25534 }
25535 }
25536
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4)25537 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4) {
25538 TEST_REQUIRES_X86_SSE2;
25539 for (uint32_t n = 8; n <= 12; n += 4) {
25540 for (size_t k = 1; k <= 40; k += 9) {
25541 GemmMicrokernelTester()
25542 .mr(3)
25543 .nr(4)
25544 .kr(2)
25545 .sr(4)
25546 .m(3)
25547 .n(n)
25548 .k(k)
25549 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25550 }
25551 }
25552 }
25553
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4_strided_cn)25554 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
25555 TEST_REQUIRES_X86_SSE2;
25556 for (uint32_t n = 8; n <= 12; n += 4) {
25557 for (size_t k = 1; k <= 40; k += 9) {
25558 GemmMicrokernelTester()
25559 .mr(3)
25560 .nr(4)
25561 .kr(2)
25562 .sr(4)
25563 .m(3)
25564 .n(n)
25565 .k(k)
25566 .cn_stride(7)
25567 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25568 }
25569 }
25570 }
25571
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4_subtile)25572 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4_subtile) {
25573 TEST_REQUIRES_X86_SSE2;
25574 for (uint32_t n = 8; n <= 12; n += 4) {
25575 for (size_t k = 1; k <= 40; k += 9) {
25576 for (uint32_t m = 1; m <= 3; m++) {
25577 GemmMicrokernelTester()
25578 .mr(3)
25579 .nr(4)
25580 .kr(2)
25581 .sr(4)
25582 .m(m)
25583 .n(n)
25584 .k(k)
25585 .iterations(1)
25586 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25587 }
25588 }
25589 }
25590 }
25591
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,small_kernel)25592 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, small_kernel) {
25593 TEST_REQUIRES_X86_SSE2;
25594 for (size_t k = 1; k <= 40; k += 9) {
25595 GemmMicrokernelTester()
25596 .mr(3)
25597 .nr(4)
25598 .kr(2)
25599 .sr(4)
25600 .m(3)
25601 .n(4)
25602 .k(k)
25603 .ks(3)
25604 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25605 }
25606 }
25607
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,small_kernel_subtile)25608 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, small_kernel_subtile) {
25609 TEST_REQUIRES_X86_SSE2;
25610 for (size_t k = 1; k <= 40; k += 9) {
25611 for (uint32_t n = 1; n <= 4; n++) {
25612 for (uint32_t m = 1; m <= 3; m++) {
25613 GemmMicrokernelTester()
25614 .mr(3)
25615 .nr(4)
25616 .kr(2)
25617 .sr(4)
25618 .m(m)
25619 .n(n)
25620 .k(k)
25621 .ks(3)
25622 .iterations(1)
25623 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25624 }
25625 }
25626 }
25627 }
25628
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4_small_kernel)25629 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4_small_kernel) {
25630 TEST_REQUIRES_X86_SSE2;
25631 for (uint32_t n = 5; n < 8; n++) {
25632 for (size_t k = 1; k <= 40; k += 9) {
25633 GemmMicrokernelTester()
25634 .mr(3)
25635 .nr(4)
25636 .kr(2)
25637 .sr(4)
25638 .m(3)
25639 .n(n)
25640 .k(k)
25641 .ks(3)
25642 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25643 }
25644 }
25645 }
25646
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4_small_kernel)25647 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4_small_kernel) {
25648 TEST_REQUIRES_X86_SSE2;
25649 for (uint32_t n = 8; n <= 12; n += 4) {
25650 for (size_t k = 1; k <= 40; k += 9) {
25651 GemmMicrokernelTester()
25652 .mr(3)
25653 .nr(4)
25654 .kr(2)
25655 .sr(4)
25656 .m(3)
25657 .n(n)
25658 .k(k)
25659 .ks(3)
25660 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25661 }
25662 }
25663 }
25664
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,strided_cm_subtile)25665 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, strided_cm_subtile) {
25666 TEST_REQUIRES_X86_SSE2;
25667 for (size_t k = 1; k <= 40; k += 9) {
25668 for (uint32_t n = 1; n <= 4; n++) {
25669 for (uint32_t m = 1; m <= 3; m++) {
25670 GemmMicrokernelTester()
25671 .mr(3)
25672 .nr(4)
25673 .kr(2)
25674 .sr(4)
25675 .m(m)
25676 .n(n)
25677 .k(k)
25678 .cm_stride(7)
25679 .iterations(1)
25680 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25681 }
25682 }
25683 }
25684 }
25685
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,a_offset)25686 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, a_offset) {
25687 TEST_REQUIRES_X86_SSE2;
25688 for (size_t k = 1; k <= 40; k += 9) {
25689 GemmMicrokernelTester()
25690 .mr(3)
25691 .nr(4)
25692 .kr(2)
25693 .sr(4)
25694 .m(3)
25695 .n(4)
25696 .k(k)
25697 .ks(3)
25698 .a_offset(127)
25699 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25700 }
25701 }
25702
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,zero)25703 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, zero) {
25704 TEST_REQUIRES_X86_SSE2;
25705 for (size_t k = 1; k <= 40; k += 9) {
25706 for (uint32_t mz = 0; mz < 3; mz++) {
25707 GemmMicrokernelTester()
25708 .mr(3)
25709 .nr(4)
25710 .kr(2)
25711 .sr(4)
25712 .m(3)
25713 .n(4)
25714 .k(k)
25715 .ks(3)
25716 .a_offset(127)
25717 .zero_index(mz)
25718 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25719 }
25720 }
25721 }
25722
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,qmin)25723 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, qmin) {
25724 TEST_REQUIRES_X86_SSE2;
25725 GemmMicrokernelTester()
25726 .mr(3)
25727 .nr(4)
25728 .kr(2)
25729 .sr(4)
25730 .m(3)
25731 .n(4)
25732 .k(8)
25733 .qmin(128)
25734 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25735 }
25736
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,qmax)25737 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, qmax) {
25738 TEST_REQUIRES_X86_SSE2;
25739 GemmMicrokernelTester()
25740 .mr(3)
25741 .nr(4)
25742 .kr(2)
25743 .sr(4)
25744 .m(3)
25745 .n(4)
25746 .k(8)
25747 .qmax(128)
25748 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25749 }
25750
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,strided_cm)25751 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, strided_cm) {
25752 TEST_REQUIRES_X86_SSE2;
25753 GemmMicrokernelTester()
25754 .mr(3)
25755 .nr(4)
25756 .kr(2)
25757 .sr(4)
25758 .m(3)
25759 .n(4)
25760 .k(8)
25761 .cm_stride(7)
25762 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25763 }
25764 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25765
25766
25767 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8)25768 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8) {
25769 TEST_REQUIRES_X86_XOP;
25770 GemmMicrokernelTester()
25771 .mr(3)
25772 .nr(4)
25773 .kr(2)
25774 .sr(4)
25775 .m(3)
25776 .n(4)
25777 .k(8)
25778 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25779 }
25780
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cn)25781 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cn) {
25782 TEST_REQUIRES_X86_XOP;
25783 GemmMicrokernelTester()
25784 .mr(3)
25785 .nr(4)
25786 .kr(2)
25787 .sr(4)
25788 .m(3)
25789 .n(4)
25790 .k(8)
25791 .cn_stride(7)
25792 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25793 }
25794
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile)25795 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile) {
25796 TEST_REQUIRES_X86_XOP;
25797 for (uint32_t n = 1; n <= 4; n++) {
25798 for (uint32_t m = 1; m <= 3; m++) {
25799 GemmMicrokernelTester()
25800 .mr(3)
25801 .nr(4)
25802 .kr(2)
25803 .sr(4)
25804 .m(m)
25805 .n(n)
25806 .k(8)
25807 .iterations(1)
25808 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25809 }
25810 }
25811 }
25812
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile_m)25813 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
25814 TEST_REQUIRES_X86_XOP;
25815 for (uint32_t m = 1; m <= 3; m++) {
25816 GemmMicrokernelTester()
25817 .mr(3)
25818 .nr(4)
25819 .kr(2)
25820 .sr(4)
25821 .m(m)
25822 .n(4)
25823 .k(8)
25824 .iterations(1)
25825 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25826 }
25827 }
25828
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile_n)25829 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
25830 TEST_REQUIRES_X86_XOP;
25831 for (uint32_t n = 1; n <= 4; n++) {
25832 GemmMicrokernelTester()
25833 .mr(3)
25834 .nr(4)
25835 .kr(2)
25836 .sr(4)
25837 .m(3)
25838 .n(n)
25839 .k(8)
25840 .iterations(1)
25841 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25842 }
25843 }
25844
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_lt_8)25845 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_lt_8) {
25846 TEST_REQUIRES_X86_XOP;
25847 for (size_t k = 1; k < 8; k++) {
25848 GemmMicrokernelTester()
25849 .mr(3)
25850 .nr(4)
25851 .kr(2)
25852 .sr(4)
25853 .m(3)
25854 .n(4)
25855 .k(k)
25856 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25857 }
25858 }
25859
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_lt_8_subtile)25860 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_lt_8_subtile) {
25861 TEST_REQUIRES_X86_XOP;
25862 for (size_t k = 1; k < 8; k++) {
25863 for (uint32_t n = 1; n <= 4; n++) {
25864 for (uint32_t m = 1; m <= 3; m++) {
25865 GemmMicrokernelTester()
25866 .mr(3)
25867 .nr(4)
25868 .kr(2)
25869 .sr(4)
25870 .m(m)
25871 .n(n)
25872 .k(k)
25873 .iterations(1)
25874 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25875 }
25876 }
25877 }
25878 }
25879
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_gt_8)25880 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_gt_8) {
25881 TEST_REQUIRES_X86_XOP;
25882 for (size_t k = 9; k < 16; k++) {
25883 GemmMicrokernelTester()
25884 .mr(3)
25885 .nr(4)
25886 .kr(2)
25887 .sr(4)
25888 .m(3)
25889 .n(4)
25890 .k(k)
25891 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25892 }
25893 }
25894
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_gt_8_subtile)25895 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_gt_8_subtile) {
25896 TEST_REQUIRES_X86_XOP;
25897 for (size_t k = 9; k < 16; k++) {
25898 for (uint32_t n = 1; n <= 4; n++) {
25899 for (uint32_t m = 1; m <= 3; m++) {
25900 GemmMicrokernelTester()
25901 .mr(3)
25902 .nr(4)
25903 .kr(2)
25904 .sr(4)
25905 .m(m)
25906 .n(n)
25907 .k(k)
25908 .iterations(1)
25909 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25910 }
25911 }
25912 }
25913 }
25914
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_div_8)25915 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_div_8) {
25916 TEST_REQUIRES_X86_XOP;
25917 for (size_t k = 16; k <= 80; k += 8) {
25918 GemmMicrokernelTester()
25919 .mr(3)
25920 .nr(4)
25921 .kr(2)
25922 .sr(4)
25923 .m(3)
25924 .n(4)
25925 .k(k)
25926 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25927 }
25928 }
25929
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_div_8_subtile)25930 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_div_8_subtile) {
25931 TEST_REQUIRES_X86_XOP;
25932 for (size_t k = 16; k <= 80; k += 8) {
25933 for (uint32_t n = 1; n <= 4; n++) {
25934 for (uint32_t m = 1; m <= 3; m++) {
25935 GemmMicrokernelTester()
25936 .mr(3)
25937 .nr(4)
25938 .kr(2)
25939 .sr(4)
25940 .m(m)
25941 .n(n)
25942 .k(k)
25943 .iterations(1)
25944 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25945 }
25946 }
25947 }
25948 }
25949
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4)25950 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4) {
25951 TEST_REQUIRES_X86_XOP;
25952 for (uint32_t n = 5; n < 8; n++) {
25953 for (size_t k = 1; k <= 40; k += 9) {
25954 GemmMicrokernelTester()
25955 .mr(3)
25956 .nr(4)
25957 .kr(2)
25958 .sr(4)
25959 .m(3)
25960 .n(n)
25961 .k(k)
25962 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25963 }
25964 }
25965 }
25966
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_strided_cn)25967 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
25968 TEST_REQUIRES_X86_XOP;
25969 for (uint32_t n = 5; n < 8; n++) {
25970 for (size_t k = 1; k <= 40; k += 9) {
25971 GemmMicrokernelTester()
25972 .mr(3)
25973 .nr(4)
25974 .kr(2)
25975 .sr(4)
25976 .m(3)
25977 .n(n)
25978 .k(k)
25979 .cn_stride(7)
25980 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25981 }
25982 }
25983 }
25984
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_subtile)25985 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_subtile) {
25986 TEST_REQUIRES_X86_XOP;
25987 for (uint32_t n = 5; n < 8; n++) {
25988 for (size_t k = 1; k <= 40; k += 9) {
25989 for (uint32_t m = 1; m <= 3; m++) {
25990 GemmMicrokernelTester()
25991 .mr(3)
25992 .nr(4)
25993 .kr(2)
25994 .sr(4)
25995 .m(m)
25996 .n(n)
25997 .k(k)
25998 .iterations(1)
25999 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26000 }
26001 }
26002 }
26003 }
26004
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4)26005 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4) {
26006 TEST_REQUIRES_X86_XOP;
26007 for (uint32_t n = 8; n <= 12; n += 4) {
26008 for (size_t k = 1; k <= 40; k += 9) {
26009 GemmMicrokernelTester()
26010 .mr(3)
26011 .nr(4)
26012 .kr(2)
26013 .sr(4)
26014 .m(3)
26015 .n(n)
26016 .k(k)
26017 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26018 }
26019 }
26020 }
26021
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_strided_cn)26022 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_strided_cn) {
26023 TEST_REQUIRES_X86_XOP;
26024 for (uint32_t n = 8; n <= 12; n += 4) {
26025 for (size_t k = 1; k <= 40; k += 9) {
26026 GemmMicrokernelTester()
26027 .mr(3)
26028 .nr(4)
26029 .kr(2)
26030 .sr(4)
26031 .m(3)
26032 .n(n)
26033 .k(k)
26034 .cn_stride(7)
26035 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26036 }
26037 }
26038 }
26039
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_subtile)26040 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_subtile) {
26041 TEST_REQUIRES_X86_XOP;
26042 for (uint32_t n = 8; n <= 12; n += 4) {
26043 for (size_t k = 1; k <= 40; k += 9) {
26044 for (uint32_t m = 1; m <= 3; m++) {
26045 GemmMicrokernelTester()
26046 .mr(3)
26047 .nr(4)
26048 .kr(2)
26049 .sr(4)
26050 .m(m)
26051 .n(n)
26052 .k(k)
26053 .iterations(1)
26054 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26055 }
26056 }
26057 }
26058 }
26059
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,small_kernel)26060 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, small_kernel) {
26061 TEST_REQUIRES_X86_XOP;
26062 for (size_t k = 1; k <= 40; k += 9) {
26063 GemmMicrokernelTester()
26064 .mr(3)
26065 .nr(4)
26066 .kr(2)
26067 .sr(4)
26068 .m(3)
26069 .n(4)
26070 .k(k)
26071 .ks(3)
26072 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26073 }
26074 }
26075
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,small_kernel_subtile)26076 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, small_kernel_subtile) {
26077 TEST_REQUIRES_X86_XOP;
26078 for (size_t k = 1; k <= 40; k += 9) {
26079 for (uint32_t n = 1; n <= 4; n++) {
26080 for (uint32_t m = 1; m <= 3; m++) {
26081 GemmMicrokernelTester()
26082 .mr(3)
26083 .nr(4)
26084 .kr(2)
26085 .sr(4)
26086 .m(m)
26087 .n(n)
26088 .k(k)
26089 .ks(3)
26090 .iterations(1)
26091 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26092 }
26093 }
26094 }
26095 }
26096
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_small_kernel)26097 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_small_kernel) {
26098 TEST_REQUIRES_X86_XOP;
26099 for (uint32_t n = 5; n < 8; n++) {
26100 for (size_t k = 1; k <= 40; k += 9) {
26101 GemmMicrokernelTester()
26102 .mr(3)
26103 .nr(4)
26104 .kr(2)
26105 .sr(4)
26106 .m(3)
26107 .n(n)
26108 .k(k)
26109 .ks(3)
26110 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26111 }
26112 }
26113 }
26114
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_small_kernel)26115 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_small_kernel) {
26116 TEST_REQUIRES_X86_XOP;
26117 for (uint32_t n = 8; n <= 12; n += 4) {
26118 for (size_t k = 1; k <= 40; k += 9) {
26119 GemmMicrokernelTester()
26120 .mr(3)
26121 .nr(4)
26122 .kr(2)
26123 .sr(4)
26124 .m(3)
26125 .n(n)
26126 .k(k)
26127 .ks(3)
26128 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26129 }
26130 }
26131 }
26132
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cm_subtile)26133 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cm_subtile) {
26134 TEST_REQUIRES_X86_XOP;
26135 for (size_t k = 1; k <= 40; k += 9) {
26136 for (uint32_t n = 1; n <= 4; n++) {
26137 for (uint32_t m = 1; m <= 3; m++) {
26138 GemmMicrokernelTester()
26139 .mr(3)
26140 .nr(4)
26141 .kr(2)
26142 .sr(4)
26143 .m(m)
26144 .n(n)
26145 .k(k)
26146 .cm_stride(7)
26147 .iterations(1)
26148 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26149 }
26150 }
26151 }
26152 }
26153
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,a_offset)26154 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, a_offset) {
26155 TEST_REQUIRES_X86_XOP;
26156 for (size_t k = 1; k <= 40; k += 9) {
26157 GemmMicrokernelTester()
26158 .mr(3)
26159 .nr(4)
26160 .kr(2)
26161 .sr(4)
26162 .m(3)
26163 .n(4)
26164 .k(k)
26165 .ks(3)
26166 .a_offset(127)
26167 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26168 }
26169 }
26170
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,zero)26171 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, zero) {
26172 TEST_REQUIRES_X86_XOP;
26173 for (size_t k = 1; k <= 40; k += 9) {
26174 for (uint32_t mz = 0; mz < 3; mz++) {
26175 GemmMicrokernelTester()
26176 .mr(3)
26177 .nr(4)
26178 .kr(2)
26179 .sr(4)
26180 .m(3)
26181 .n(4)
26182 .k(k)
26183 .ks(3)
26184 .a_offset(127)
26185 .zero_index(mz)
26186 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26187 }
26188 }
26189 }
26190
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,qmin)26191 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, qmin) {
26192 TEST_REQUIRES_X86_XOP;
26193 GemmMicrokernelTester()
26194 .mr(3)
26195 .nr(4)
26196 .kr(2)
26197 .sr(4)
26198 .m(3)
26199 .n(4)
26200 .k(8)
26201 .qmin(128)
26202 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26203 }
26204
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,qmax)26205 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, qmax) {
26206 TEST_REQUIRES_X86_XOP;
26207 GemmMicrokernelTester()
26208 .mr(3)
26209 .nr(4)
26210 .kr(2)
26211 .sr(4)
26212 .m(3)
26213 .n(4)
26214 .k(8)
26215 .qmax(128)
26216 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26217 }
26218
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cm)26219 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cm) {
26220 TEST_REQUIRES_X86_XOP;
26221 GemmMicrokernelTester()
26222 .mr(3)
26223 .nr(4)
26224 .kr(2)
26225 .sr(4)
26226 .m(3)
26227 .n(4)
26228 .k(8)
26229 .cm_stride(7)
26230 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26231 }
26232 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26233
26234
26235 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8)26236 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8) {
26237 TEST_REQUIRES_X86_SSE41;
26238 GemmMicrokernelTester()
26239 .mr(2)
26240 .nr(4)
26241 .kr(2)
26242 .sr(4)
26243 .m(2)
26244 .n(4)
26245 .k(8)
26246 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26247 }
26248
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,strided_cn)26249 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, strided_cn) {
26250 TEST_REQUIRES_X86_SSE41;
26251 GemmMicrokernelTester()
26252 .mr(2)
26253 .nr(4)
26254 .kr(2)
26255 .sr(4)
26256 .m(2)
26257 .n(4)
26258 .k(8)
26259 .cn_stride(7)
26260 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26261 }
26262
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8_subtile)26263 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8_subtile) {
26264 TEST_REQUIRES_X86_SSE41;
26265 for (uint32_t n = 1; n <= 4; n++) {
26266 for (uint32_t m = 1; m <= 2; m++) {
26267 GemmMicrokernelTester()
26268 .mr(2)
26269 .nr(4)
26270 .kr(2)
26271 .sr(4)
26272 .m(m)
26273 .n(n)
26274 .k(8)
26275 .iterations(1)
26276 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26277 }
26278 }
26279 }
26280
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8_subtile_m)26281 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
26282 TEST_REQUIRES_X86_SSE41;
26283 for (uint32_t m = 1; m <= 2; m++) {
26284 GemmMicrokernelTester()
26285 .mr(2)
26286 .nr(4)
26287 .kr(2)
26288 .sr(4)
26289 .m(m)
26290 .n(4)
26291 .k(8)
26292 .iterations(1)
26293 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26294 }
26295 }
26296
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8_subtile_n)26297 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
26298 TEST_REQUIRES_X86_SSE41;
26299 for (uint32_t n = 1; n <= 4; n++) {
26300 GemmMicrokernelTester()
26301 .mr(2)
26302 .nr(4)
26303 .kr(2)
26304 .sr(4)
26305 .m(2)
26306 .n(n)
26307 .k(8)
26308 .iterations(1)
26309 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26310 }
26311 }
26312
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_lt_8)26313 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_lt_8) {
26314 TEST_REQUIRES_X86_SSE41;
26315 for (size_t k = 1; k < 8; k++) {
26316 GemmMicrokernelTester()
26317 .mr(2)
26318 .nr(4)
26319 .kr(2)
26320 .sr(4)
26321 .m(2)
26322 .n(4)
26323 .k(k)
26324 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26325 }
26326 }
26327
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_lt_8_subtile)26328 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_lt_8_subtile) {
26329 TEST_REQUIRES_X86_SSE41;
26330 for (size_t k = 1; k < 8; k++) {
26331 for (uint32_t n = 1; n <= 4; n++) {
26332 for (uint32_t m = 1; m <= 2; m++) {
26333 GemmMicrokernelTester()
26334 .mr(2)
26335 .nr(4)
26336 .kr(2)
26337 .sr(4)
26338 .m(m)
26339 .n(n)
26340 .k(k)
26341 .iterations(1)
26342 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26343 }
26344 }
26345 }
26346 }
26347
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_gt_8)26348 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_gt_8) {
26349 TEST_REQUIRES_X86_SSE41;
26350 for (size_t k = 9; k < 16; k++) {
26351 GemmMicrokernelTester()
26352 .mr(2)
26353 .nr(4)
26354 .kr(2)
26355 .sr(4)
26356 .m(2)
26357 .n(4)
26358 .k(k)
26359 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26360 }
26361 }
26362
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_gt_8_subtile)26363 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_gt_8_subtile) {
26364 TEST_REQUIRES_X86_SSE41;
26365 for (size_t k = 9; k < 16; k++) {
26366 for (uint32_t n = 1; n <= 4; n++) {
26367 for (uint32_t m = 1; m <= 2; m++) {
26368 GemmMicrokernelTester()
26369 .mr(2)
26370 .nr(4)
26371 .kr(2)
26372 .sr(4)
26373 .m(m)
26374 .n(n)
26375 .k(k)
26376 .iterations(1)
26377 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26378 }
26379 }
26380 }
26381 }
26382
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_div_8)26383 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_div_8) {
26384 TEST_REQUIRES_X86_SSE41;
26385 for (size_t k = 16; k <= 80; k += 8) {
26386 GemmMicrokernelTester()
26387 .mr(2)
26388 .nr(4)
26389 .kr(2)
26390 .sr(4)
26391 .m(2)
26392 .n(4)
26393 .k(k)
26394 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26395 }
26396 }
26397
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_div_8_subtile)26398 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_div_8_subtile) {
26399 TEST_REQUIRES_X86_SSE41;
26400 for (size_t k = 16; k <= 80; k += 8) {
26401 for (uint32_t n = 1; n <= 4; n++) {
26402 for (uint32_t m = 1; m <= 2; m++) {
26403 GemmMicrokernelTester()
26404 .mr(2)
26405 .nr(4)
26406 .kr(2)
26407 .sr(4)
26408 .m(m)
26409 .n(n)
26410 .k(k)
26411 .iterations(1)
26412 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26413 }
26414 }
26415 }
26416 }
26417
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4)26418 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4) {
26419 TEST_REQUIRES_X86_SSE41;
26420 for (uint32_t n = 5; n < 8; n++) {
26421 for (size_t k = 1; k <= 40; k += 9) {
26422 GemmMicrokernelTester()
26423 .mr(2)
26424 .nr(4)
26425 .kr(2)
26426 .sr(4)
26427 .m(2)
26428 .n(n)
26429 .k(k)
26430 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26431 }
26432 }
26433 }
26434
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4_strided_cn)26435 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
26436 TEST_REQUIRES_X86_SSE41;
26437 for (uint32_t n = 5; n < 8; n++) {
26438 for (size_t k = 1; k <= 40; k += 9) {
26439 GemmMicrokernelTester()
26440 .mr(2)
26441 .nr(4)
26442 .kr(2)
26443 .sr(4)
26444 .m(2)
26445 .n(n)
26446 .k(k)
26447 .cn_stride(7)
26448 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26449 }
26450 }
26451 }
26452
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4_subtile)26453 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4_subtile) {
26454 TEST_REQUIRES_X86_SSE41;
26455 for (uint32_t n = 5; n < 8; n++) {
26456 for (size_t k = 1; k <= 40; k += 9) {
26457 for (uint32_t m = 1; m <= 2; m++) {
26458 GemmMicrokernelTester()
26459 .mr(2)
26460 .nr(4)
26461 .kr(2)
26462 .sr(4)
26463 .m(m)
26464 .n(n)
26465 .k(k)
26466 .iterations(1)
26467 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26468 }
26469 }
26470 }
26471 }
26472
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4)26473 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4) {
26474 TEST_REQUIRES_X86_SSE41;
26475 for (uint32_t n = 8; n <= 12; n += 4) {
26476 for (size_t k = 1; k <= 40; k += 9) {
26477 GemmMicrokernelTester()
26478 .mr(2)
26479 .nr(4)
26480 .kr(2)
26481 .sr(4)
26482 .m(2)
26483 .n(n)
26484 .k(k)
26485 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26486 }
26487 }
26488 }
26489
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4_strided_cn)26490 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
26491 TEST_REQUIRES_X86_SSE41;
26492 for (uint32_t n = 8; n <= 12; n += 4) {
26493 for (size_t k = 1; k <= 40; k += 9) {
26494 GemmMicrokernelTester()
26495 .mr(2)
26496 .nr(4)
26497 .kr(2)
26498 .sr(4)
26499 .m(2)
26500 .n(n)
26501 .k(k)
26502 .cn_stride(7)
26503 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26504 }
26505 }
26506 }
26507
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4_subtile)26508 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4_subtile) {
26509 TEST_REQUIRES_X86_SSE41;
26510 for (uint32_t n = 8; n <= 12; n += 4) {
26511 for (size_t k = 1; k <= 40; k += 9) {
26512 for (uint32_t m = 1; m <= 2; m++) {
26513 GemmMicrokernelTester()
26514 .mr(2)
26515 .nr(4)
26516 .kr(2)
26517 .sr(4)
26518 .m(m)
26519 .n(n)
26520 .k(k)
26521 .iterations(1)
26522 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26523 }
26524 }
26525 }
26526 }
26527
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,small_kernel)26528 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, small_kernel) {
26529 TEST_REQUIRES_X86_SSE41;
26530 for (size_t k = 1; k <= 40; k += 9) {
26531 GemmMicrokernelTester()
26532 .mr(2)
26533 .nr(4)
26534 .kr(2)
26535 .sr(4)
26536 .m(2)
26537 .n(4)
26538 .k(k)
26539 .ks(3)
26540 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26541 }
26542 }
26543
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,small_kernel_subtile)26544 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, small_kernel_subtile) {
26545 TEST_REQUIRES_X86_SSE41;
26546 for (size_t k = 1; k <= 40; k += 9) {
26547 for (uint32_t n = 1; n <= 4; n++) {
26548 for (uint32_t m = 1; m <= 2; m++) {
26549 GemmMicrokernelTester()
26550 .mr(2)
26551 .nr(4)
26552 .kr(2)
26553 .sr(4)
26554 .m(m)
26555 .n(n)
26556 .k(k)
26557 .ks(3)
26558 .iterations(1)
26559 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26560 }
26561 }
26562 }
26563 }
26564
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4_small_kernel)26565 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4_small_kernel) {
26566 TEST_REQUIRES_X86_SSE41;
26567 for (uint32_t n = 5; n < 8; n++) {
26568 for (size_t k = 1; k <= 40; k += 9) {
26569 GemmMicrokernelTester()
26570 .mr(2)
26571 .nr(4)
26572 .kr(2)
26573 .sr(4)
26574 .m(2)
26575 .n(n)
26576 .k(k)
26577 .ks(3)
26578 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26579 }
26580 }
26581 }
26582
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4_small_kernel)26583 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4_small_kernel) {
26584 TEST_REQUIRES_X86_SSE41;
26585 for (uint32_t n = 8; n <= 12; n += 4) {
26586 for (size_t k = 1; k <= 40; k += 9) {
26587 GemmMicrokernelTester()
26588 .mr(2)
26589 .nr(4)
26590 .kr(2)
26591 .sr(4)
26592 .m(2)
26593 .n(n)
26594 .k(k)
26595 .ks(3)
26596 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26597 }
26598 }
26599 }
26600
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,strided_cm_subtile)26601 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, strided_cm_subtile) {
26602 TEST_REQUIRES_X86_SSE41;
26603 for (size_t k = 1; k <= 40; k += 9) {
26604 for (uint32_t n = 1; n <= 4; n++) {
26605 for (uint32_t m = 1; m <= 2; m++) {
26606 GemmMicrokernelTester()
26607 .mr(2)
26608 .nr(4)
26609 .kr(2)
26610 .sr(4)
26611 .m(m)
26612 .n(n)
26613 .k(k)
26614 .cm_stride(7)
26615 .iterations(1)
26616 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26617 }
26618 }
26619 }
26620 }
26621
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,a_offset)26622 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, a_offset) {
26623 TEST_REQUIRES_X86_SSE41;
26624 for (size_t k = 1; k <= 40; k += 9) {
26625 GemmMicrokernelTester()
26626 .mr(2)
26627 .nr(4)
26628 .kr(2)
26629 .sr(4)
26630 .m(2)
26631 .n(4)
26632 .k(k)
26633 .ks(3)
26634 .a_offset(83)
26635 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26636 }
26637 }
26638
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,zero)26639 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, zero) {
26640 TEST_REQUIRES_X86_SSE41;
26641 for (size_t k = 1; k <= 40; k += 9) {
26642 for (uint32_t mz = 0; mz < 2; mz++) {
26643 GemmMicrokernelTester()
26644 .mr(2)
26645 .nr(4)
26646 .kr(2)
26647 .sr(4)
26648 .m(2)
26649 .n(4)
26650 .k(k)
26651 .ks(3)
26652 .a_offset(83)
26653 .zero_index(mz)
26654 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26655 }
26656 }
26657 }
26658
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,qmin)26659 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, qmin) {
26660 TEST_REQUIRES_X86_SSE41;
26661 GemmMicrokernelTester()
26662 .mr(2)
26663 .nr(4)
26664 .kr(2)
26665 .sr(4)
26666 .m(2)
26667 .n(4)
26668 .k(8)
26669 .qmin(128)
26670 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26671 }
26672
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,qmax)26673 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, qmax) {
26674 TEST_REQUIRES_X86_SSE41;
26675 GemmMicrokernelTester()
26676 .mr(2)
26677 .nr(4)
26678 .kr(2)
26679 .sr(4)
26680 .m(2)
26681 .n(4)
26682 .k(8)
26683 .qmax(128)
26684 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26685 }
26686
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,strided_cm)26687 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, strided_cm) {
26688 TEST_REQUIRES_X86_SSE41;
26689 GemmMicrokernelTester()
26690 .mr(2)
26691 .nr(4)
26692 .kr(2)
26693 .sr(4)
26694 .m(2)
26695 .n(4)
26696 .k(8)
26697 .cm_stride(7)
26698 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26699 }
26700 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26701
26702
26703 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8)26704 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8) {
26705 TEST_REQUIRES_X86_SSE41;
26706 GemmMicrokernelTester()
26707 .mr(4)
26708 .nr(4)
26709 .kr(2)
26710 .sr(4)
26711 .m(4)
26712 .n(4)
26713 .k(8)
26714 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26715 }
26716
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cn)26717 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cn) {
26718 TEST_REQUIRES_X86_SSE41;
26719 GemmMicrokernelTester()
26720 .mr(4)
26721 .nr(4)
26722 .kr(2)
26723 .sr(4)
26724 .m(4)
26725 .n(4)
26726 .k(8)
26727 .cn_stride(7)
26728 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26729 }
26730
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile)26731 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile) {
26732 TEST_REQUIRES_X86_SSE41;
26733 for (uint32_t n = 1; n <= 4; n++) {
26734 for (uint32_t m = 1; m <= 4; m++) {
26735 GemmMicrokernelTester()
26736 .mr(4)
26737 .nr(4)
26738 .kr(2)
26739 .sr(4)
26740 .m(m)
26741 .n(n)
26742 .k(8)
26743 .iterations(1)
26744 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26745 }
26746 }
26747 }
26748
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile_m)26749 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
26750 TEST_REQUIRES_X86_SSE41;
26751 for (uint32_t m = 1; m <= 4; m++) {
26752 GemmMicrokernelTester()
26753 .mr(4)
26754 .nr(4)
26755 .kr(2)
26756 .sr(4)
26757 .m(m)
26758 .n(4)
26759 .k(8)
26760 .iterations(1)
26761 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26762 }
26763 }
26764
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile_n)26765 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
26766 TEST_REQUIRES_X86_SSE41;
26767 for (uint32_t n = 1; n <= 4; n++) {
26768 GemmMicrokernelTester()
26769 .mr(4)
26770 .nr(4)
26771 .kr(2)
26772 .sr(4)
26773 .m(4)
26774 .n(n)
26775 .k(8)
26776 .iterations(1)
26777 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26778 }
26779 }
26780
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_lt_8)26781 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_lt_8) {
26782 TEST_REQUIRES_X86_SSE41;
26783 for (size_t k = 1; k < 8; k++) {
26784 GemmMicrokernelTester()
26785 .mr(4)
26786 .nr(4)
26787 .kr(2)
26788 .sr(4)
26789 .m(4)
26790 .n(4)
26791 .k(k)
26792 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26793 }
26794 }
26795
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_lt_8_subtile)26796 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_lt_8_subtile) {
26797 TEST_REQUIRES_X86_SSE41;
26798 for (size_t k = 1; k < 8; k++) {
26799 for (uint32_t n = 1; n <= 4; n++) {
26800 for (uint32_t m = 1; m <= 4; m++) {
26801 GemmMicrokernelTester()
26802 .mr(4)
26803 .nr(4)
26804 .kr(2)
26805 .sr(4)
26806 .m(m)
26807 .n(n)
26808 .k(k)
26809 .iterations(1)
26810 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26811 }
26812 }
26813 }
26814 }
26815
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_gt_8)26816 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_gt_8) {
26817 TEST_REQUIRES_X86_SSE41;
26818 for (size_t k = 9; k < 16; k++) {
26819 GemmMicrokernelTester()
26820 .mr(4)
26821 .nr(4)
26822 .kr(2)
26823 .sr(4)
26824 .m(4)
26825 .n(4)
26826 .k(k)
26827 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26828 }
26829 }
26830
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_gt_8_subtile)26831 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_gt_8_subtile) {
26832 TEST_REQUIRES_X86_SSE41;
26833 for (size_t k = 9; k < 16; k++) {
26834 for (uint32_t n = 1; n <= 4; n++) {
26835 for (uint32_t m = 1; m <= 4; m++) {
26836 GemmMicrokernelTester()
26837 .mr(4)
26838 .nr(4)
26839 .kr(2)
26840 .sr(4)
26841 .m(m)
26842 .n(n)
26843 .k(k)
26844 .iterations(1)
26845 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26846 }
26847 }
26848 }
26849 }
26850
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_div_8)26851 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_div_8) {
26852 TEST_REQUIRES_X86_SSE41;
26853 for (size_t k = 16; k <= 80; k += 8) {
26854 GemmMicrokernelTester()
26855 .mr(4)
26856 .nr(4)
26857 .kr(2)
26858 .sr(4)
26859 .m(4)
26860 .n(4)
26861 .k(k)
26862 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26863 }
26864 }
26865
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_div_8_subtile)26866 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_div_8_subtile) {
26867 TEST_REQUIRES_X86_SSE41;
26868 for (size_t k = 16; k <= 80; k += 8) {
26869 for (uint32_t n = 1; n <= 4; n++) {
26870 for (uint32_t m = 1; m <= 4; m++) {
26871 GemmMicrokernelTester()
26872 .mr(4)
26873 .nr(4)
26874 .kr(2)
26875 .sr(4)
26876 .m(m)
26877 .n(n)
26878 .k(k)
26879 .iterations(1)
26880 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26881 }
26882 }
26883 }
26884 }
26885
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4)26886 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4) {
26887 TEST_REQUIRES_X86_SSE41;
26888 for (uint32_t n = 5; n < 8; n++) {
26889 for (size_t k = 1; k <= 40; k += 9) {
26890 GemmMicrokernelTester()
26891 .mr(4)
26892 .nr(4)
26893 .kr(2)
26894 .sr(4)
26895 .m(4)
26896 .n(n)
26897 .k(k)
26898 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26899 }
26900 }
26901 }
26902
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_strided_cn)26903 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
26904 TEST_REQUIRES_X86_SSE41;
26905 for (uint32_t n = 5; n < 8; n++) {
26906 for (size_t k = 1; k <= 40; k += 9) {
26907 GemmMicrokernelTester()
26908 .mr(4)
26909 .nr(4)
26910 .kr(2)
26911 .sr(4)
26912 .m(4)
26913 .n(n)
26914 .k(k)
26915 .cn_stride(7)
26916 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26917 }
26918 }
26919 }
26920
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_subtile)26921 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_subtile) {
26922 TEST_REQUIRES_X86_SSE41;
26923 for (uint32_t n = 5; n < 8; n++) {
26924 for (size_t k = 1; k <= 40; k += 9) {
26925 for (uint32_t m = 1; m <= 4; m++) {
26926 GemmMicrokernelTester()
26927 .mr(4)
26928 .nr(4)
26929 .kr(2)
26930 .sr(4)
26931 .m(m)
26932 .n(n)
26933 .k(k)
26934 .iterations(1)
26935 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26936 }
26937 }
26938 }
26939 }
26940
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4)26941 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4) {
26942 TEST_REQUIRES_X86_SSE41;
26943 for (uint32_t n = 8; n <= 12; n += 4) {
26944 for (size_t k = 1; k <= 40; k += 9) {
26945 GemmMicrokernelTester()
26946 .mr(4)
26947 .nr(4)
26948 .kr(2)
26949 .sr(4)
26950 .m(4)
26951 .n(n)
26952 .k(k)
26953 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26954 }
26955 }
26956 }
26957
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_strided_cn)26958 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
26959 TEST_REQUIRES_X86_SSE41;
26960 for (uint32_t n = 8; n <= 12; n += 4) {
26961 for (size_t k = 1; k <= 40; k += 9) {
26962 GemmMicrokernelTester()
26963 .mr(4)
26964 .nr(4)
26965 .kr(2)
26966 .sr(4)
26967 .m(4)
26968 .n(n)
26969 .k(k)
26970 .cn_stride(7)
26971 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26972 }
26973 }
26974 }
26975
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_subtile)26976 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_subtile) {
26977 TEST_REQUIRES_X86_SSE41;
26978 for (uint32_t n = 8; n <= 12; n += 4) {
26979 for (size_t k = 1; k <= 40; k += 9) {
26980 for (uint32_t m = 1; m <= 4; m++) {
26981 GemmMicrokernelTester()
26982 .mr(4)
26983 .nr(4)
26984 .kr(2)
26985 .sr(4)
26986 .m(m)
26987 .n(n)
26988 .k(k)
26989 .iterations(1)
26990 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26991 }
26992 }
26993 }
26994 }
26995
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,small_kernel)26996 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, small_kernel) {
26997 TEST_REQUIRES_X86_SSE41;
26998 for (size_t k = 1; k <= 40; k += 9) {
26999 GemmMicrokernelTester()
27000 .mr(4)
27001 .nr(4)
27002 .kr(2)
27003 .sr(4)
27004 .m(4)
27005 .n(4)
27006 .k(k)
27007 .ks(3)
27008 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27009 }
27010 }
27011
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,small_kernel_subtile)27012 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, small_kernel_subtile) {
27013 TEST_REQUIRES_X86_SSE41;
27014 for (size_t k = 1; k <= 40; k += 9) {
27015 for (uint32_t n = 1; n <= 4; n++) {
27016 for (uint32_t m = 1; m <= 4; m++) {
27017 GemmMicrokernelTester()
27018 .mr(4)
27019 .nr(4)
27020 .kr(2)
27021 .sr(4)
27022 .m(m)
27023 .n(n)
27024 .k(k)
27025 .ks(3)
27026 .iterations(1)
27027 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27028 }
27029 }
27030 }
27031 }
27032
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_small_kernel)27033 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_small_kernel) {
27034 TEST_REQUIRES_X86_SSE41;
27035 for (uint32_t n = 5; n < 8; n++) {
27036 for (size_t k = 1; k <= 40; k += 9) {
27037 GemmMicrokernelTester()
27038 .mr(4)
27039 .nr(4)
27040 .kr(2)
27041 .sr(4)
27042 .m(4)
27043 .n(n)
27044 .k(k)
27045 .ks(3)
27046 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27047 }
27048 }
27049 }
27050
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_small_kernel)27051 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_small_kernel) {
27052 TEST_REQUIRES_X86_SSE41;
27053 for (uint32_t n = 8; n <= 12; n += 4) {
27054 for (size_t k = 1; k <= 40; k += 9) {
27055 GemmMicrokernelTester()
27056 .mr(4)
27057 .nr(4)
27058 .kr(2)
27059 .sr(4)
27060 .m(4)
27061 .n(n)
27062 .k(k)
27063 .ks(3)
27064 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27065 }
27066 }
27067 }
27068
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cm_subtile)27069 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cm_subtile) {
27070 TEST_REQUIRES_X86_SSE41;
27071 for (size_t k = 1; k <= 40; k += 9) {
27072 for (uint32_t n = 1; n <= 4; n++) {
27073 for (uint32_t m = 1; m <= 4; m++) {
27074 GemmMicrokernelTester()
27075 .mr(4)
27076 .nr(4)
27077 .kr(2)
27078 .sr(4)
27079 .m(m)
27080 .n(n)
27081 .k(k)
27082 .cm_stride(7)
27083 .iterations(1)
27084 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27085 }
27086 }
27087 }
27088 }
27089
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,a_offset)27090 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, a_offset) {
27091 TEST_REQUIRES_X86_SSE41;
27092 for (size_t k = 1; k <= 40; k += 9) {
27093 GemmMicrokernelTester()
27094 .mr(4)
27095 .nr(4)
27096 .kr(2)
27097 .sr(4)
27098 .m(4)
27099 .n(4)
27100 .k(k)
27101 .ks(3)
27102 .a_offset(163)
27103 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27104 }
27105 }
27106
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,zero)27107 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, zero) {
27108 TEST_REQUIRES_X86_SSE41;
27109 for (size_t k = 1; k <= 40; k += 9) {
27110 for (uint32_t mz = 0; mz < 4; mz++) {
27111 GemmMicrokernelTester()
27112 .mr(4)
27113 .nr(4)
27114 .kr(2)
27115 .sr(4)
27116 .m(4)
27117 .n(4)
27118 .k(k)
27119 .ks(3)
27120 .a_offset(163)
27121 .zero_index(mz)
27122 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27123 }
27124 }
27125 }
27126
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,qmin)27127 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, qmin) {
27128 TEST_REQUIRES_X86_SSE41;
27129 GemmMicrokernelTester()
27130 .mr(4)
27131 .nr(4)
27132 .kr(2)
27133 .sr(4)
27134 .m(4)
27135 .n(4)
27136 .k(8)
27137 .qmin(128)
27138 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27139 }
27140
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,qmax)27141 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, qmax) {
27142 TEST_REQUIRES_X86_SSE41;
27143 GemmMicrokernelTester()
27144 .mr(4)
27145 .nr(4)
27146 .kr(2)
27147 .sr(4)
27148 .m(4)
27149 .n(4)
27150 .k(8)
27151 .qmax(128)
27152 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27153 }
27154
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cm)27155 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cm) {
27156 TEST_REQUIRES_X86_SSE41;
27157 GemmMicrokernelTester()
27158 .mr(4)
27159 .nr(4)
27160 .kr(2)
27161 .sr(4)
27162 .m(4)
27163 .n(4)
27164 .k(8)
27165 .cm_stride(7)
27166 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27167 }
27168 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27169
27170
27171 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8)27172 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8) {
27173 TEST_REQUIRES_X86_AVX;
27174 GemmMicrokernelTester()
27175 .mr(1)
27176 .nr(4)
27177 .kr(2)
27178 .sr(4)
27179 .m(1)
27180 .n(4)
27181 .k(8)
27182 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27183 }
27184
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,strided_cn)27185 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, strided_cn) {
27186 TEST_REQUIRES_X86_AVX;
27187 GemmMicrokernelTester()
27188 .mr(1)
27189 .nr(4)
27190 .kr(2)
27191 .sr(4)
27192 .m(1)
27193 .n(4)
27194 .k(8)
27195 .cn_stride(7)
27196 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27197 }
27198
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_subtile)27199 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_subtile) {
27200 TEST_REQUIRES_X86_AVX;
27201 for (uint32_t n = 1; n <= 4; n++) {
27202 for (uint32_t m = 1; m <= 1; m++) {
27203 GemmMicrokernelTester()
27204 .mr(1)
27205 .nr(4)
27206 .kr(2)
27207 .sr(4)
27208 .m(m)
27209 .n(n)
27210 .k(8)
27211 .iterations(1)
27212 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27213 }
27214 }
27215 }
27216
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_subtile_m)27217 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
27218 TEST_REQUIRES_X86_AVX;
27219 for (uint32_t m = 1; m <= 1; m++) {
27220 GemmMicrokernelTester()
27221 .mr(1)
27222 .nr(4)
27223 .kr(2)
27224 .sr(4)
27225 .m(m)
27226 .n(4)
27227 .k(8)
27228 .iterations(1)
27229 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27230 }
27231 }
27232
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_subtile_n)27233 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
27234 TEST_REQUIRES_X86_AVX;
27235 for (uint32_t n = 1; n <= 4; n++) {
27236 GemmMicrokernelTester()
27237 .mr(1)
27238 .nr(4)
27239 .kr(2)
27240 .sr(4)
27241 .m(1)
27242 .n(n)
27243 .k(8)
27244 .iterations(1)
27245 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27246 }
27247 }
27248
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_lt_8)27249 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_lt_8) {
27250 TEST_REQUIRES_X86_AVX;
27251 for (size_t k = 1; k < 8; k++) {
27252 GemmMicrokernelTester()
27253 .mr(1)
27254 .nr(4)
27255 .kr(2)
27256 .sr(4)
27257 .m(1)
27258 .n(4)
27259 .k(k)
27260 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27261 }
27262 }
27263
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_lt_8_subtile)27264 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_lt_8_subtile) {
27265 TEST_REQUIRES_X86_AVX;
27266 for (size_t k = 1; k < 8; k++) {
27267 for (uint32_t n = 1; n <= 4; n++) {
27268 for (uint32_t m = 1; m <= 1; m++) {
27269 GemmMicrokernelTester()
27270 .mr(1)
27271 .nr(4)
27272 .kr(2)
27273 .sr(4)
27274 .m(m)
27275 .n(n)
27276 .k(k)
27277 .iterations(1)
27278 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27279 }
27280 }
27281 }
27282 }
27283
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_gt_8)27284 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_gt_8) {
27285 TEST_REQUIRES_X86_AVX;
27286 for (size_t k = 9; k < 16; k++) {
27287 GemmMicrokernelTester()
27288 .mr(1)
27289 .nr(4)
27290 .kr(2)
27291 .sr(4)
27292 .m(1)
27293 .n(4)
27294 .k(k)
27295 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27296 }
27297 }
27298
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_gt_8_subtile)27299 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_gt_8_subtile) {
27300 TEST_REQUIRES_X86_AVX;
27301 for (size_t k = 9; k < 16; k++) {
27302 for (uint32_t n = 1; n <= 4; n++) {
27303 for (uint32_t m = 1; m <= 1; m++) {
27304 GemmMicrokernelTester()
27305 .mr(1)
27306 .nr(4)
27307 .kr(2)
27308 .sr(4)
27309 .m(m)
27310 .n(n)
27311 .k(k)
27312 .iterations(1)
27313 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27314 }
27315 }
27316 }
27317 }
27318
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_div_8)27319 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_div_8) {
27320 TEST_REQUIRES_X86_AVX;
27321 for (size_t k = 16; k <= 80; k += 8) {
27322 GemmMicrokernelTester()
27323 .mr(1)
27324 .nr(4)
27325 .kr(2)
27326 .sr(4)
27327 .m(1)
27328 .n(4)
27329 .k(k)
27330 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27331 }
27332 }
27333
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_div_8_subtile)27334 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_div_8_subtile) {
27335 TEST_REQUIRES_X86_AVX;
27336 for (size_t k = 16; k <= 80; k += 8) {
27337 for (uint32_t n = 1; n <= 4; n++) {
27338 for (uint32_t m = 1; m <= 1; m++) {
27339 GemmMicrokernelTester()
27340 .mr(1)
27341 .nr(4)
27342 .kr(2)
27343 .sr(4)
27344 .m(m)
27345 .n(n)
27346 .k(k)
27347 .iterations(1)
27348 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27349 }
27350 }
27351 }
27352 }
27353
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4)27354 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4) {
27355 TEST_REQUIRES_X86_AVX;
27356 for (uint32_t n = 5; n < 8; n++) {
27357 for (size_t k = 1; k <= 40; k += 9) {
27358 GemmMicrokernelTester()
27359 .mr(1)
27360 .nr(4)
27361 .kr(2)
27362 .sr(4)
27363 .m(1)
27364 .n(n)
27365 .k(k)
27366 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27367 }
27368 }
27369 }
27370
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4_strided_cn)27371 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
27372 TEST_REQUIRES_X86_AVX;
27373 for (uint32_t n = 5; n < 8; n++) {
27374 for (size_t k = 1; k <= 40; k += 9) {
27375 GemmMicrokernelTester()
27376 .mr(1)
27377 .nr(4)
27378 .kr(2)
27379 .sr(4)
27380 .m(1)
27381 .n(n)
27382 .k(k)
27383 .cn_stride(7)
27384 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27385 }
27386 }
27387 }
27388
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4_subtile)27389 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4_subtile) {
27390 TEST_REQUIRES_X86_AVX;
27391 for (uint32_t n = 5; n < 8; n++) {
27392 for (size_t k = 1; k <= 40; k += 9) {
27393 for (uint32_t m = 1; m <= 1; m++) {
27394 GemmMicrokernelTester()
27395 .mr(1)
27396 .nr(4)
27397 .kr(2)
27398 .sr(4)
27399 .m(m)
27400 .n(n)
27401 .k(k)
27402 .iterations(1)
27403 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27404 }
27405 }
27406 }
27407 }
27408
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4)27409 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4) {
27410 TEST_REQUIRES_X86_AVX;
27411 for (uint32_t n = 8; n <= 12; n += 4) {
27412 for (size_t k = 1; k <= 40; k += 9) {
27413 GemmMicrokernelTester()
27414 .mr(1)
27415 .nr(4)
27416 .kr(2)
27417 .sr(4)
27418 .m(1)
27419 .n(n)
27420 .k(k)
27421 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27422 }
27423 }
27424 }
27425
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4_strided_cn)27426 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4_strided_cn) {
27427 TEST_REQUIRES_X86_AVX;
27428 for (uint32_t n = 8; n <= 12; n += 4) {
27429 for (size_t k = 1; k <= 40; k += 9) {
27430 GemmMicrokernelTester()
27431 .mr(1)
27432 .nr(4)
27433 .kr(2)
27434 .sr(4)
27435 .m(1)
27436 .n(n)
27437 .k(k)
27438 .cn_stride(7)
27439 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27440 }
27441 }
27442 }
27443
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4_subtile)27444 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4_subtile) {
27445 TEST_REQUIRES_X86_AVX;
27446 for (uint32_t n = 8; n <= 12; n += 4) {
27447 for (size_t k = 1; k <= 40; k += 9) {
27448 for (uint32_t m = 1; m <= 1; m++) {
27449 GemmMicrokernelTester()
27450 .mr(1)
27451 .nr(4)
27452 .kr(2)
27453 .sr(4)
27454 .m(m)
27455 .n(n)
27456 .k(k)
27457 .iterations(1)
27458 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27459 }
27460 }
27461 }
27462 }
27463
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,small_kernel)27464 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, small_kernel) {
27465 TEST_REQUIRES_X86_AVX;
27466 for (size_t k = 1; k <= 40; k += 9) {
27467 GemmMicrokernelTester()
27468 .mr(1)
27469 .nr(4)
27470 .kr(2)
27471 .sr(4)
27472 .m(1)
27473 .n(4)
27474 .k(k)
27475 .ks(3)
27476 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27477 }
27478 }
27479
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,small_kernel_subtile)27480 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, small_kernel_subtile) {
27481 TEST_REQUIRES_X86_AVX;
27482 for (size_t k = 1; k <= 40; k += 9) {
27483 for (uint32_t n = 1; n <= 4; n++) {
27484 for (uint32_t m = 1; m <= 1; m++) {
27485 GemmMicrokernelTester()
27486 .mr(1)
27487 .nr(4)
27488 .kr(2)
27489 .sr(4)
27490 .m(m)
27491 .n(n)
27492 .k(k)
27493 .ks(3)
27494 .iterations(1)
27495 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27496 }
27497 }
27498 }
27499 }
27500
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4_small_kernel)27501 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4_small_kernel) {
27502 TEST_REQUIRES_X86_AVX;
27503 for (uint32_t n = 5; n < 8; n++) {
27504 for (size_t k = 1; k <= 40; k += 9) {
27505 GemmMicrokernelTester()
27506 .mr(1)
27507 .nr(4)
27508 .kr(2)
27509 .sr(4)
27510 .m(1)
27511 .n(n)
27512 .k(k)
27513 .ks(3)
27514 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27515 }
27516 }
27517 }
27518
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4_small_kernel)27519 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4_small_kernel) {
27520 TEST_REQUIRES_X86_AVX;
27521 for (uint32_t n = 8; n <= 12; n += 4) {
27522 for (size_t k = 1; k <= 40; k += 9) {
27523 GemmMicrokernelTester()
27524 .mr(1)
27525 .nr(4)
27526 .kr(2)
27527 .sr(4)
27528 .m(1)
27529 .n(n)
27530 .k(k)
27531 .ks(3)
27532 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27533 }
27534 }
27535 }
27536
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,strided_cm_subtile)27537 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, strided_cm_subtile) {
27538 TEST_REQUIRES_X86_AVX;
27539 for (size_t k = 1; k <= 40; k += 9) {
27540 for (uint32_t n = 1; n <= 4; n++) {
27541 for (uint32_t m = 1; m <= 1; m++) {
27542 GemmMicrokernelTester()
27543 .mr(1)
27544 .nr(4)
27545 .kr(2)
27546 .sr(4)
27547 .m(m)
27548 .n(n)
27549 .k(k)
27550 .cm_stride(7)
27551 .iterations(1)
27552 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27553 }
27554 }
27555 }
27556 }
27557
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,a_offset)27558 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, a_offset) {
27559 TEST_REQUIRES_X86_AVX;
27560 for (size_t k = 1; k <= 40; k += 9) {
27561 GemmMicrokernelTester()
27562 .mr(1)
27563 .nr(4)
27564 .kr(2)
27565 .sr(4)
27566 .m(1)
27567 .n(4)
27568 .k(k)
27569 .ks(3)
27570 .a_offset(43)
27571 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27572 }
27573 }
27574
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,zero)27575 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, zero) {
27576 TEST_REQUIRES_X86_AVX;
27577 for (size_t k = 1; k <= 40; k += 9) {
27578 for (uint32_t mz = 0; mz < 1; mz++) {
27579 GemmMicrokernelTester()
27580 .mr(1)
27581 .nr(4)
27582 .kr(2)
27583 .sr(4)
27584 .m(1)
27585 .n(4)
27586 .k(k)
27587 .ks(3)
27588 .a_offset(43)
27589 .zero_index(mz)
27590 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27591 }
27592 }
27593 }
27594
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,qmin)27595 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, qmin) {
27596 TEST_REQUIRES_X86_AVX;
27597 GemmMicrokernelTester()
27598 .mr(1)
27599 .nr(4)
27600 .kr(2)
27601 .sr(4)
27602 .m(1)
27603 .n(4)
27604 .k(8)
27605 .qmin(128)
27606 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27607 }
27608
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,qmax)27609 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, qmax) {
27610 TEST_REQUIRES_X86_AVX;
27611 GemmMicrokernelTester()
27612 .mr(1)
27613 .nr(4)
27614 .kr(2)
27615 .sr(4)
27616 .m(1)
27617 .n(4)
27618 .k(8)
27619 .qmax(128)
27620 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27621 }
27622
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,strided_cm)27623 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, strided_cm) {
27624 TEST_REQUIRES_X86_AVX;
27625 GemmMicrokernelTester()
27626 .mr(1)
27627 .nr(4)
27628 .kr(2)
27629 .sr(4)
27630 .m(1)
27631 .n(4)
27632 .k(8)
27633 .cm_stride(7)
27634 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27635 }
27636 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27637
27638
27639 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8)27640 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8) {
27641 TEST_REQUIRES_X86_XOP;
27642 GemmMicrokernelTester()
27643 .mr(2)
27644 .nr(4)
27645 .kr(2)
27646 .sr(4)
27647 .m(2)
27648 .n(4)
27649 .k(8)
27650 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27651 }
27652
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,strided_cn)27653 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, strided_cn) {
27654 TEST_REQUIRES_X86_XOP;
27655 GemmMicrokernelTester()
27656 .mr(2)
27657 .nr(4)
27658 .kr(2)
27659 .sr(4)
27660 .m(2)
27661 .n(4)
27662 .k(8)
27663 .cn_stride(7)
27664 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27665 }
27666
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_subtile)27667 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_subtile) {
27668 TEST_REQUIRES_X86_XOP;
27669 for (uint32_t n = 1; n <= 4; n++) {
27670 for (uint32_t m = 1; m <= 2; m++) {
27671 GemmMicrokernelTester()
27672 .mr(2)
27673 .nr(4)
27674 .kr(2)
27675 .sr(4)
27676 .m(m)
27677 .n(n)
27678 .k(8)
27679 .iterations(1)
27680 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27681 }
27682 }
27683 }
27684
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_subtile_m)27685 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
27686 TEST_REQUIRES_X86_XOP;
27687 for (uint32_t m = 1; m <= 2; m++) {
27688 GemmMicrokernelTester()
27689 .mr(2)
27690 .nr(4)
27691 .kr(2)
27692 .sr(4)
27693 .m(m)
27694 .n(4)
27695 .k(8)
27696 .iterations(1)
27697 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27698 }
27699 }
27700
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_subtile_n)27701 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
27702 TEST_REQUIRES_X86_XOP;
27703 for (uint32_t n = 1; n <= 4; n++) {
27704 GemmMicrokernelTester()
27705 .mr(2)
27706 .nr(4)
27707 .kr(2)
27708 .sr(4)
27709 .m(2)
27710 .n(n)
27711 .k(8)
27712 .iterations(1)
27713 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27714 }
27715 }
27716
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_lt_8)27717 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_lt_8) {
27718 TEST_REQUIRES_X86_XOP;
27719 for (size_t k = 1; k < 8; k++) {
27720 GemmMicrokernelTester()
27721 .mr(2)
27722 .nr(4)
27723 .kr(2)
27724 .sr(4)
27725 .m(2)
27726 .n(4)
27727 .k(k)
27728 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27729 }
27730 }
27731
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_lt_8_subtile)27732 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_lt_8_subtile) {
27733 TEST_REQUIRES_X86_XOP;
27734 for (size_t k = 1; k < 8; k++) {
27735 for (uint32_t n = 1; n <= 4; n++) {
27736 for (uint32_t m = 1; m <= 2; m++) {
27737 GemmMicrokernelTester()
27738 .mr(2)
27739 .nr(4)
27740 .kr(2)
27741 .sr(4)
27742 .m(m)
27743 .n(n)
27744 .k(k)
27745 .iterations(1)
27746 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27747 }
27748 }
27749 }
27750 }
27751
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_gt_8)27752 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_gt_8) {
27753 TEST_REQUIRES_X86_XOP;
27754 for (size_t k = 9; k < 16; k++) {
27755 GemmMicrokernelTester()
27756 .mr(2)
27757 .nr(4)
27758 .kr(2)
27759 .sr(4)
27760 .m(2)
27761 .n(4)
27762 .k(k)
27763 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27764 }
27765 }
27766
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_gt_8_subtile)27767 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_gt_8_subtile) {
27768 TEST_REQUIRES_X86_XOP;
27769 for (size_t k = 9; k < 16; k++) {
27770 for (uint32_t n = 1; n <= 4; n++) {
27771 for (uint32_t m = 1; m <= 2; m++) {
27772 GemmMicrokernelTester()
27773 .mr(2)
27774 .nr(4)
27775 .kr(2)
27776 .sr(4)
27777 .m(m)
27778 .n(n)
27779 .k(k)
27780 .iterations(1)
27781 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27782 }
27783 }
27784 }
27785 }
27786
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_div_8)27787 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_div_8) {
27788 TEST_REQUIRES_X86_XOP;
27789 for (size_t k = 16; k <= 80; k += 8) {
27790 GemmMicrokernelTester()
27791 .mr(2)
27792 .nr(4)
27793 .kr(2)
27794 .sr(4)
27795 .m(2)
27796 .n(4)
27797 .k(k)
27798 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27799 }
27800 }
27801
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_div_8_subtile)27802 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_div_8_subtile) {
27803 TEST_REQUIRES_X86_XOP;
27804 for (size_t k = 16; k <= 80; k += 8) {
27805 for (uint32_t n = 1; n <= 4; n++) {
27806 for (uint32_t m = 1; m <= 2; m++) {
27807 GemmMicrokernelTester()
27808 .mr(2)
27809 .nr(4)
27810 .kr(2)
27811 .sr(4)
27812 .m(m)
27813 .n(n)
27814 .k(k)
27815 .iterations(1)
27816 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27817 }
27818 }
27819 }
27820 }
27821
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4)27822 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4) {
27823 TEST_REQUIRES_X86_XOP;
27824 for (uint32_t n = 5; n < 8; n++) {
27825 for (size_t k = 1; k <= 40; k += 9) {
27826 GemmMicrokernelTester()
27827 .mr(2)
27828 .nr(4)
27829 .kr(2)
27830 .sr(4)
27831 .m(2)
27832 .n(n)
27833 .k(k)
27834 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27835 }
27836 }
27837 }
27838
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4_strided_cn)27839 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
27840 TEST_REQUIRES_X86_XOP;
27841 for (uint32_t n = 5; n < 8; n++) {
27842 for (size_t k = 1; k <= 40; k += 9) {
27843 GemmMicrokernelTester()
27844 .mr(2)
27845 .nr(4)
27846 .kr(2)
27847 .sr(4)
27848 .m(2)
27849 .n(n)
27850 .k(k)
27851 .cn_stride(7)
27852 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27853 }
27854 }
27855 }
27856
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4_subtile)27857 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4_subtile) {
27858 TEST_REQUIRES_X86_XOP;
27859 for (uint32_t n = 5; n < 8; n++) {
27860 for (size_t k = 1; k <= 40; k += 9) {
27861 for (uint32_t m = 1; m <= 2; m++) {
27862 GemmMicrokernelTester()
27863 .mr(2)
27864 .nr(4)
27865 .kr(2)
27866 .sr(4)
27867 .m(m)
27868 .n(n)
27869 .k(k)
27870 .iterations(1)
27871 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27872 }
27873 }
27874 }
27875 }
27876
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4)27877 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4) {
27878 TEST_REQUIRES_X86_XOP;
27879 for (uint32_t n = 8; n <= 12; n += 4) {
27880 for (size_t k = 1; k <= 40; k += 9) {
27881 GemmMicrokernelTester()
27882 .mr(2)
27883 .nr(4)
27884 .kr(2)
27885 .sr(4)
27886 .m(2)
27887 .n(n)
27888 .k(k)
27889 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27890 }
27891 }
27892 }
27893
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4_strided_cn)27894 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4_strided_cn) {
27895 TEST_REQUIRES_X86_XOP;
27896 for (uint32_t n = 8; n <= 12; n += 4) {
27897 for (size_t k = 1; k <= 40; k += 9) {
27898 GemmMicrokernelTester()
27899 .mr(2)
27900 .nr(4)
27901 .kr(2)
27902 .sr(4)
27903 .m(2)
27904 .n(n)
27905 .k(k)
27906 .cn_stride(7)
27907 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27908 }
27909 }
27910 }
27911
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4_subtile)27912 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4_subtile) {
27913 TEST_REQUIRES_X86_XOP;
27914 for (uint32_t n = 8; n <= 12; n += 4) {
27915 for (size_t k = 1; k <= 40; k += 9) {
27916 for (uint32_t m = 1; m <= 2; m++) {
27917 GemmMicrokernelTester()
27918 .mr(2)
27919 .nr(4)
27920 .kr(2)
27921 .sr(4)
27922 .m(m)
27923 .n(n)
27924 .k(k)
27925 .iterations(1)
27926 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27927 }
27928 }
27929 }
27930 }
27931
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,small_kernel)27932 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, small_kernel) {
27933 TEST_REQUIRES_X86_XOP;
27934 for (size_t k = 1; k <= 40; k += 9) {
27935 GemmMicrokernelTester()
27936 .mr(2)
27937 .nr(4)
27938 .kr(2)
27939 .sr(4)
27940 .m(2)
27941 .n(4)
27942 .k(k)
27943 .ks(3)
27944 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27945 }
27946 }
27947
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,small_kernel_subtile)27948 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, small_kernel_subtile) {
27949 TEST_REQUIRES_X86_XOP;
27950 for (size_t k = 1; k <= 40; k += 9) {
27951 for (uint32_t n = 1; n <= 4; n++) {
27952 for (uint32_t m = 1; m <= 2; m++) {
27953 GemmMicrokernelTester()
27954 .mr(2)
27955 .nr(4)
27956 .kr(2)
27957 .sr(4)
27958 .m(m)
27959 .n(n)
27960 .k(k)
27961 .ks(3)
27962 .iterations(1)
27963 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27964 }
27965 }
27966 }
27967 }
27968
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4_small_kernel)27969 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4_small_kernel) {
27970 TEST_REQUIRES_X86_XOP;
27971 for (uint32_t n = 5; n < 8; n++) {
27972 for (size_t k = 1; k <= 40; k += 9) {
27973 GemmMicrokernelTester()
27974 .mr(2)
27975 .nr(4)
27976 .kr(2)
27977 .sr(4)
27978 .m(2)
27979 .n(n)
27980 .k(k)
27981 .ks(3)
27982 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27983 }
27984 }
27985 }
27986
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4_small_kernel)27987 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4_small_kernel) {
27988 TEST_REQUIRES_X86_XOP;
27989 for (uint32_t n = 8; n <= 12; n += 4) {
27990 for (size_t k = 1; k <= 40; k += 9) {
27991 GemmMicrokernelTester()
27992 .mr(2)
27993 .nr(4)
27994 .kr(2)
27995 .sr(4)
27996 .m(2)
27997 .n(n)
27998 .k(k)
27999 .ks(3)
28000 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28001 }
28002 }
28003 }
28004
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,strided_cm_subtile)28005 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, strided_cm_subtile) {
28006 TEST_REQUIRES_X86_XOP;
28007 for (size_t k = 1; k <= 40; k += 9) {
28008 for (uint32_t n = 1; n <= 4; n++) {
28009 for (uint32_t m = 1; m <= 2; m++) {
28010 GemmMicrokernelTester()
28011 .mr(2)
28012 .nr(4)
28013 .kr(2)
28014 .sr(4)
28015 .m(m)
28016 .n(n)
28017 .k(k)
28018 .cm_stride(7)
28019 .iterations(1)
28020 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28021 }
28022 }
28023 }
28024 }
28025
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,a_offset)28026 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, a_offset) {
28027 TEST_REQUIRES_X86_XOP;
28028 for (size_t k = 1; k <= 40; k += 9) {
28029 GemmMicrokernelTester()
28030 .mr(2)
28031 .nr(4)
28032 .kr(2)
28033 .sr(4)
28034 .m(2)
28035 .n(4)
28036 .k(k)
28037 .ks(3)
28038 .a_offset(83)
28039 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28040 }
28041 }
28042
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,zero)28043 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, zero) {
28044 TEST_REQUIRES_X86_XOP;
28045 for (size_t k = 1; k <= 40; k += 9) {
28046 for (uint32_t mz = 0; mz < 2; mz++) {
28047 GemmMicrokernelTester()
28048 .mr(2)
28049 .nr(4)
28050 .kr(2)
28051 .sr(4)
28052 .m(2)
28053 .n(4)
28054 .k(k)
28055 .ks(3)
28056 .a_offset(83)
28057 .zero_index(mz)
28058 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28059 }
28060 }
28061 }
28062
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,qmin)28063 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, qmin) {
28064 TEST_REQUIRES_X86_XOP;
28065 GemmMicrokernelTester()
28066 .mr(2)
28067 .nr(4)
28068 .kr(2)
28069 .sr(4)
28070 .m(2)
28071 .n(4)
28072 .k(8)
28073 .qmin(128)
28074 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28075 }
28076
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,qmax)28077 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, qmax) {
28078 TEST_REQUIRES_X86_XOP;
28079 GemmMicrokernelTester()
28080 .mr(2)
28081 .nr(4)
28082 .kr(2)
28083 .sr(4)
28084 .m(2)
28085 .n(4)
28086 .k(8)
28087 .qmax(128)
28088 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28089 }
28090
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,strided_cm)28091 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, strided_cm) {
28092 TEST_REQUIRES_X86_XOP;
28093 GemmMicrokernelTester()
28094 .mr(2)
28095 .nr(4)
28096 .kr(2)
28097 .sr(4)
28098 .m(2)
28099 .n(4)
28100 .k(8)
28101 .cm_stride(7)
28102 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28103 }
28104 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
28105
28106
28107 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8)28108 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8) {
28109 TEST_REQUIRES_X86_AVX;
28110 GemmMicrokernelTester()
28111 .mr(3)
28112 .nr(4)
28113 .kr(2)
28114 .sr(4)
28115 .m(3)
28116 .n(4)
28117 .k(8)
28118 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28119 }
28120
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,strided_cn)28121 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, strided_cn) {
28122 TEST_REQUIRES_X86_AVX;
28123 GemmMicrokernelTester()
28124 .mr(3)
28125 .nr(4)
28126 .kr(2)
28127 .sr(4)
28128 .m(3)
28129 .n(4)
28130 .k(8)
28131 .cn_stride(7)
28132 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28133 }
28134
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_subtile)28135 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_subtile) {
28136 TEST_REQUIRES_X86_AVX;
28137 for (uint32_t n = 1; n <= 4; n++) {
28138 for (uint32_t m = 1; m <= 3; m++) {
28139 GemmMicrokernelTester()
28140 .mr(3)
28141 .nr(4)
28142 .kr(2)
28143 .sr(4)
28144 .m(m)
28145 .n(n)
28146 .k(8)
28147 .iterations(1)
28148 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28149 }
28150 }
28151 }
28152
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_subtile_m)28153 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
28154 TEST_REQUIRES_X86_AVX;
28155 for (uint32_t m = 1; m <= 3; m++) {
28156 GemmMicrokernelTester()
28157 .mr(3)
28158 .nr(4)
28159 .kr(2)
28160 .sr(4)
28161 .m(m)
28162 .n(4)
28163 .k(8)
28164 .iterations(1)
28165 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28166 }
28167 }
28168
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_subtile_n)28169 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
28170 TEST_REQUIRES_X86_AVX;
28171 for (uint32_t n = 1; n <= 4; n++) {
28172 GemmMicrokernelTester()
28173 .mr(3)
28174 .nr(4)
28175 .kr(2)
28176 .sr(4)
28177 .m(3)
28178 .n(n)
28179 .k(8)
28180 .iterations(1)
28181 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28182 }
28183 }
28184
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_lt_8)28185 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_lt_8) {
28186 TEST_REQUIRES_X86_AVX;
28187 for (size_t k = 1; k < 8; k++) {
28188 GemmMicrokernelTester()
28189 .mr(3)
28190 .nr(4)
28191 .kr(2)
28192 .sr(4)
28193 .m(3)
28194 .n(4)
28195 .k(k)
28196 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28197 }
28198 }
28199
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_lt_8_subtile)28200 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_lt_8_subtile) {
28201 TEST_REQUIRES_X86_AVX;
28202 for (size_t k = 1; k < 8; k++) {
28203 for (uint32_t n = 1; n <= 4; n++) {
28204 for (uint32_t m = 1; m <= 3; m++) {
28205 GemmMicrokernelTester()
28206 .mr(3)
28207 .nr(4)
28208 .kr(2)
28209 .sr(4)
28210 .m(m)
28211 .n(n)
28212 .k(k)
28213 .iterations(1)
28214 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28215 }
28216 }
28217 }
28218 }
28219
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_gt_8)28220 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_gt_8) {
28221 TEST_REQUIRES_X86_AVX;
28222 for (size_t k = 9; k < 16; k++) {
28223 GemmMicrokernelTester()
28224 .mr(3)
28225 .nr(4)
28226 .kr(2)
28227 .sr(4)
28228 .m(3)
28229 .n(4)
28230 .k(k)
28231 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28232 }
28233 }
28234
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_gt_8_subtile)28235 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_gt_8_subtile) {
28236 TEST_REQUIRES_X86_AVX;
28237 for (size_t k = 9; k < 16; k++) {
28238 for (uint32_t n = 1; n <= 4; n++) {
28239 for (uint32_t m = 1; m <= 3; m++) {
28240 GemmMicrokernelTester()
28241 .mr(3)
28242 .nr(4)
28243 .kr(2)
28244 .sr(4)
28245 .m(m)
28246 .n(n)
28247 .k(k)
28248 .iterations(1)
28249 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28250 }
28251 }
28252 }
28253 }
28254
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_div_8)28255 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_div_8) {
28256 TEST_REQUIRES_X86_AVX;
28257 for (size_t k = 16; k <= 80; k += 8) {
28258 GemmMicrokernelTester()
28259 .mr(3)
28260 .nr(4)
28261 .kr(2)
28262 .sr(4)
28263 .m(3)
28264 .n(4)
28265 .k(k)
28266 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28267 }
28268 }
28269
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_div_8_subtile)28270 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_div_8_subtile) {
28271 TEST_REQUIRES_X86_AVX;
28272 for (size_t k = 16; k <= 80; k += 8) {
28273 for (uint32_t n = 1; n <= 4; n++) {
28274 for (uint32_t m = 1; m <= 3; m++) {
28275 GemmMicrokernelTester()
28276 .mr(3)
28277 .nr(4)
28278 .kr(2)
28279 .sr(4)
28280 .m(m)
28281 .n(n)
28282 .k(k)
28283 .iterations(1)
28284 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28285 }
28286 }
28287 }
28288 }
28289
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4)28290 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4) {
28291 TEST_REQUIRES_X86_AVX;
28292 for (uint32_t n = 5; n < 8; n++) {
28293 for (size_t k = 1; k <= 40; k += 9) {
28294 GemmMicrokernelTester()
28295 .mr(3)
28296 .nr(4)
28297 .kr(2)
28298 .sr(4)
28299 .m(3)
28300 .n(n)
28301 .k(k)
28302 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28303 }
28304 }
28305 }
28306
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4_strided_cn)28307 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
28308 TEST_REQUIRES_X86_AVX;
28309 for (uint32_t n = 5; n < 8; n++) {
28310 for (size_t k = 1; k <= 40; k += 9) {
28311 GemmMicrokernelTester()
28312 .mr(3)
28313 .nr(4)
28314 .kr(2)
28315 .sr(4)
28316 .m(3)
28317 .n(n)
28318 .k(k)
28319 .cn_stride(7)
28320 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28321 }
28322 }
28323 }
28324
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4_subtile)28325 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4_subtile) {
28326 TEST_REQUIRES_X86_AVX;
28327 for (uint32_t n = 5; n < 8; n++) {
28328 for (size_t k = 1; k <= 40; k += 9) {
28329 for (uint32_t m = 1; m <= 3; m++) {
28330 GemmMicrokernelTester()
28331 .mr(3)
28332 .nr(4)
28333 .kr(2)
28334 .sr(4)
28335 .m(m)
28336 .n(n)
28337 .k(k)
28338 .iterations(1)
28339 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28340 }
28341 }
28342 }
28343 }
28344
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4)28345 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4) {
28346 TEST_REQUIRES_X86_AVX;
28347 for (uint32_t n = 8; n <= 12; n += 4) {
28348 for (size_t k = 1; k <= 40; k += 9) {
28349 GemmMicrokernelTester()
28350 .mr(3)
28351 .nr(4)
28352 .kr(2)
28353 .sr(4)
28354 .m(3)
28355 .n(n)
28356 .k(k)
28357 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28358 }
28359 }
28360 }
28361
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4_strided_cn)28362 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4_strided_cn) {
28363 TEST_REQUIRES_X86_AVX;
28364 for (uint32_t n = 8; n <= 12; n += 4) {
28365 for (size_t k = 1; k <= 40; k += 9) {
28366 GemmMicrokernelTester()
28367 .mr(3)
28368 .nr(4)
28369 .kr(2)
28370 .sr(4)
28371 .m(3)
28372 .n(n)
28373 .k(k)
28374 .cn_stride(7)
28375 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28376 }
28377 }
28378 }
28379
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4_subtile)28380 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4_subtile) {
28381 TEST_REQUIRES_X86_AVX;
28382 for (uint32_t n = 8; n <= 12; n += 4) {
28383 for (size_t k = 1; k <= 40; k += 9) {
28384 for (uint32_t m = 1; m <= 3; m++) {
28385 GemmMicrokernelTester()
28386 .mr(3)
28387 .nr(4)
28388 .kr(2)
28389 .sr(4)
28390 .m(m)
28391 .n(n)
28392 .k(k)
28393 .iterations(1)
28394 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28395 }
28396 }
28397 }
28398 }
28399
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,small_kernel)28400 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, small_kernel) {
28401 TEST_REQUIRES_X86_AVX;
28402 for (size_t k = 1; k <= 40; k += 9) {
28403 GemmMicrokernelTester()
28404 .mr(3)
28405 .nr(4)
28406 .kr(2)
28407 .sr(4)
28408 .m(3)
28409 .n(4)
28410 .k(k)
28411 .ks(3)
28412 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28413 }
28414 }
28415
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,small_kernel_subtile)28416 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, small_kernel_subtile) {
28417 TEST_REQUIRES_X86_AVX;
28418 for (size_t k = 1; k <= 40; k += 9) {
28419 for (uint32_t n = 1; n <= 4; n++) {
28420 for (uint32_t m = 1; m <= 3; m++) {
28421 GemmMicrokernelTester()
28422 .mr(3)
28423 .nr(4)
28424 .kr(2)
28425 .sr(4)
28426 .m(m)
28427 .n(n)
28428 .k(k)
28429 .ks(3)
28430 .iterations(1)
28431 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28432 }
28433 }
28434 }
28435 }
28436
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4_small_kernel)28437 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4_small_kernel) {
28438 TEST_REQUIRES_X86_AVX;
28439 for (uint32_t n = 5; n < 8; n++) {
28440 for (size_t k = 1; k <= 40; k += 9) {
28441 GemmMicrokernelTester()
28442 .mr(3)
28443 .nr(4)
28444 .kr(2)
28445 .sr(4)
28446 .m(3)
28447 .n(n)
28448 .k(k)
28449 .ks(3)
28450 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28451 }
28452 }
28453 }
28454
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4_small_kernel)28455 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4_small_kernel) {
28456 TEST_REQUIRES_X86_AVX;
28457 for (uint32_t n = 8; n <= 12; n += 4) {
28458 for (size_t k = 1; k <= 40; k += 9) {
28459 GemmMicrokernelTester()
28460 .mr(3)
28461 .nr(4)
28462 .kr(2)
28463 .sr(4)
28464 .m(3)
28465 .n(n)
28466 .k(k)
28467 .ks(3)
28468 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28469 }
28470 }
28471 }
28472
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,strided_cm_subtile)28473 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, strided_cm_subtile) {
28474 TEST_REQUIRES_X86_AVX;
28475 for (size_t k = 1; k <= 40; k += 9) {
28476 for (uint32_t n = 1; n <= 4; n++) {
28477 for (uint32_t m = 1; m <= 3; m++) {
28478 GemmMicrokernelTester()
28479 .mr(3)
28480 .nr(4)
28481 .kr(2)
28482 .sr(4)
28483 .m(m)
28484 .n(n)
28485 .k(k)
28486 .cm_stride(7)
28487 .iterations(1)
28488 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28489 }
28490 }
28491 }
28492 }
28493
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,a_offset)28494 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, a_offset) {
28495 TEST_REQUIRES_X86_AVX;
28496 for (size_t k = 1; k <= 40; k += 9) {
28497 GemmMicrokernelTester()
28498 .mr(3)
28499 .nr(4)
28500 .kr(2)
28501 .sr(4)
28502 .m(3)
28503 .n(4)
28504 .k(k)
28505 .ks(3)
28506 .a_offset(127)
28507 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28508 }
28509 }
28510
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,zero)28511 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, zero) {
28512 TEST_REQUIRES_X86_AVX;
28513 for (size_t k = 1; k <= 40; k += 9) {
28514 for (uint32_t mz = 0; mz < 3; mz++) {
28515 GemmMicrokernelTester()
28516 .mr(3)
28517 .nr(4)
28518 .kr(2)
28519 .sr(4)
28520 .m(3)
28521 .n(4)
28522 .k(k)
28523 .ks(3)
28524 .a_offset(127)
28525 .zero_index(mz)
28526 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28527 }
28528 }
28529 }
28530
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,qmin)28531 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, qmin) {
28532 TEST_REQUIRES_X86_AVX;
28533 GemmMicrokernelTester()
28534 .mr(3)
28535 .nr(4)
28536 .kr(2)
28537 .sr(4)
28538 .m(3)
28539 .n(4)
28540 .k(8)
28541 .qmin(128)
28542 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28543 }
28544
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,qmax)28545 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, qmax) {
28546 TEST_REQUIRES_X86_AVX;
28547 GemmMicrokernelTester()
28548 .mr(3)
28549 .nr(4)
28550 .kr(2)
28551 .sr(4)
28552 .m(3)
28553 .n(4)
28554 .k(8)
28555 .qmax(128)
28556 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28557 }
28558
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,strided_cm)28559 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, strided_cm) {
28560 TEST_REQUIRES_X86_AVX;
28561 GemmMicrokernelTester()
28562 .mr(3)
28563 .nr(4)
28564 .kr(2)
28565 .sr(4)
28566 .m(3)
28567 .n(4)
28568 .k(8)
28569 .cm_stride(7)
28570 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28571 }
28572 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
28573
28574
28575 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8)28576 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8) {
28577 TEST_REQUIRES_X86_SSE2;
28578 GemmMicrokernelTester()
28579 .mr(2)
28580 .nr(4)
28581 .kr(8)
28582 .sr(1)
28583 .m(2)
28584 .n(4)
28585 .k(8)
28586 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28587 }
28588
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cn)28589 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cn) {
28590 TEST_REQUIRES_X86_SSE2;
28591 GemmMicrokernelTester()
28592 .mr(2)
28593 .nr(4)
28594 .kr(8)
28595 .sr(1)
28596 .m(2)
28597 .n(4)
28598 .k(8)
28599 .cn_stride(7)
28600 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28601 }
28602
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile)28603 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile) {
28604 TEST_REQUIRES_X86_SSE2;
28605 for (uint32_t n = 1; n <= 4; n++) {
28606 for (uint32_t m = 1; m <= 2; m++) {
28607 GemmMicrokernelTester()
28608 .mr(2)
28609 .nr(4)
28610 .kr(8)
28611 .sr(1)
28612 .m(m)
28613 .n(n)
28614 .k(8)
28615 .iterations(1)
28616 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28617 }
28618 }
28619 }
28620
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile_m)28621 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_m) {
28622 TEST_REQUIRES_X86_SSE2;
28623 for (uint32_t m = 1; m <= 2; m++) {
28624 GemmMicrokernelTester()
28625 .mr(2)
28626 .nr(4)
28627 .kr(8)
28628 .sr(1)
28629 .m(m)
28630 .n(4)
28631 .k(8)
28632 .iterations(1)
28633 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28634 }
28635 }
28636
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile_n)28637 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_n) {
28638 TEST_REQUIRES_X86_SSE2;
28639 for (uint32_t n = 1; n <= 4; n++) {
28640 GemmMicrokernelTester()
28641 .mr(2)
28642 .nr(4)
28643 .kr(8)
28644 .sr(1)
28645 .m(2)
28646 .n(n)
28647 .k(8)
28648 .iterations(1)
28649 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28650 }
28651 }
28652
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_lt_8)28653 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8) {
28654 TEST_REQUIRES_X86_SSE2;
28655 for (size_t k = 1; k < 8; k++) {
28656 GemmMicrokernelTester()
28657 .mr(2)
28658 .nr(4)
28659 .kr(8)
28660 .sr(1)
28661 .m(2)
28662 .n(4)
28663 .k(k)
28664 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28665 }
28666 }
28667
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_lt_8_subtile)28668 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8_subtile) {
28669 TEST_REQUIRES_X86_SSE2;
28670 for (size_t k = 1; k < 8; k++) {
28671 for (uint32_t n = 1; n <= 4; n++) {
28672 for (uint32_t m = 1; m <= 2; m++) {
28673 GemmMicrokernelTester()
28674 .mr(2)
28675 .nr(4)
28676 .kr(8)
28677 .sr(1)
28678 .m(m)
28679 .n(n)
28680 .k(k)
28681 .iterations(1)
28682 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28683 }
28684 }
28685 }
28686 }
28687
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_gt_8)28688 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8) {
28689 TEST_REQUIRES_X86_SSE2;
28690 for (size_t k = 9; k < 16; k++) {
28691 GemmMicrokernelTester()
28692 .mr(2)
28693 .nr(4)
28694 .kr(8)
28695 .sr(1)
28696 .m(2)
28697 .n(4)
28698 .k(k)
28699 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28700 }
28701 }
28702
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_gt_8_subtile)28703 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8_subtile) {
28704 TEST_REQUIRES_X86_SSE2;
28705 for (size_t k = 9; k < 16; k++) {
28706 for (uint32_t n = 1; n <= 4; n++) {
28707 for (uint32_t m = 1; m <= 2; m++) {
28708 GemmMicrokernelTester()
28709 .mr(2)
28710 .nr(4)
28711 .kr(8)
28712 .sr(1)
28713 .m(m)
28714 .n(n)
28715 .k(k)
28716 .iterations(1)
28717 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28718 }
28719 }
28720 }
28721 }
28722
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_div_8)28723 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8) {
28724 TEST_REQUIRES_X86_SSE2;
28725 for (size_t k = 16; k <= 80; k += 8) {
28726 GemmMicrokernelTester()
28727 .mr(2)
28728 .nr(4)
28729 .kr(8)
28730 .sr(1)
28731 .m(2)
28732 .n(4)
28733 .k(k)
28734 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28735 }
28736 }
28737
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_div_8_subtile)28738 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8_subtile) {
28739 TEST_REQUIRES_X86_SSE2;
28740 for (size_t k = 16; k <= 80; k += 8) {
28741 for (uint32_t n = 1; n <= 4; n++) {
28742 for (uint32_t m = 1; m <= 2; m++) {
28743 GemmMicrokernelTester()
28744 .mr(2)
28745 .nr(4)
28746 .kr(8)
28747 .sr(1)
28748 .m(m)
28749 .n(n)
28750 .k(k)
28751 .iterations(1)
28752 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28753 }
28754 }
28755 }
28756 }
28757
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4)28758 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4) {
28759 TEST_REQUIRES_X86_SSE2;
28760 for (uint32_t n = 5; n < 8; n++) {
28761 for (size_t k = 1; k <= 40; k += 9) {
28762 GemmMicrokernelTester()
28763 .mr(2)
28764 .nr(4)
28765 .kr(8)
28766 .sr(1)
28767 .m(2)
28768 .n(n)
28769 .k(k)
28770 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28771 }
28772 }
28773 }
28774
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_strided_cn)28775 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_strided_cn) {
28776 TEST_REQUIRES_X86_SSE2;
28777 for (uint32_t n = 5; n < 8; n++) {
28778 for (size_t k = 1; k <= 40; k += 9) {
28779 GemmMicrokernelTester()
28780 .mr(2)
28781 .nr(4)
28782 .kr(8)
28783 .sr(1)
28784 .m(2)
28785 .n(n)
28786 .k(k)
28787 .cn_stride(7)
28788 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28789 }
28790 }
28791 }
28792
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_subtile)28793 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_subtile) {
28794 TEST_REQUIRES_X86_SSE2;
28795 for (uint32_t n = 5; n < 8; n++) {
28796 for (size_t k = 1; k <= 40; k += 9) {
28797 for (uint32_t m = 1; m <= 2; m++) {
28798 GemmMicrokernelTester()
28799 .mr(2)
28800 .nr(4)
28801 .kr(8)
28802 .sr(1)
28803 .m(m)
28804 .n(n)
28805 .k(k)
28806 .iterations(1)
28807 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28808 }
28809 }
28810 }
28811 }
28812
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4)28813 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4) {
28814 TEST_REQUIRES_X86_SSE2;
28815 for (uint32_t n = 8; n <= 12; n += 4) {
28816 for (size_t k = 1; k <= 40; k += 9) {
28817 GemmMicrokernelTester()
28818 .mr(2)
28819 .nr(4)
28820 .kr(8)
28821 .sr(1)
28822 .m(2)
28823 .n(n)
28824 .k(k)
28825 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28826 }
28827 }
28828 }
28829
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_strided_cn)28830 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_strided_cn) {
28831 TEST_REQUIRES_X86_SSE2;
28832 for (uint32_t n = 8; n <= 12; n += 4) {
28833 for (size_t k = 1; k <= 40; k += 9) {
28834 GemmMicrokernelTester()
28835 .mr(2)
28836 .nr(4)
28837 .kr(8)
28838 .sr(1)
28839 .m(2)
28840 .n(n)
28841 .k(k)
28842 .cn_stride(7)
28843 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28844 }
28845 }
28846 }
28847
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_subtile)28848 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_subtile) {
28849 TEST_REQUIRES_X86_SSE2;
28850 for (uint32_t n = 8; n <= 12; n += 4) {
28851 for (size_t k = 1; k <= 40; k += 9) {
28852 for (uint32_t m = 1; m <= 2; m++) {
28853 GemmMicrokernelTester()
28854 .mr(2)
28855 .nr(4)
28856 .kr(8)
28857 .sr(1)
28858 .m(m)
28859 .n(n)
28860 .k(k)
28861 .iterations(1)
28862 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28863 }
28864 }
28865 }
28866 }
28867
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,small_kernel)28868 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, small_kernel) {
28869 TEST_REQUIRES_X86_SSE2;
28870 for (size_t k = 1; k <= 40; k += 9) {
28871 GemmMicrokernelTester()
28872 .mr(2)
28873 .nr(4)
28874 .kr(8)
28875 .sr(1)
28876 .m(2)
28877 .n(4)
28878 .k(k)
28879 .ks(3)
28880 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28881 }
28882 }
28883
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,small_kernel_subtile)28884 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, small_kernel_subtile) {
28885 TEST_REQUIRES_X86_SSE2;
28886 for (size_t k = 1; k <= 40; k += 9) {
28887 for (uint32_t n = 1; n <= 4; n++) {
28888 for (uint32_t m = 1; m <= 2; m++) {
28889 GemmMicrokernelTester()
28890 .mr(2)
28891 .nr(4)
28892 .kr(8)
28893 .sr(1)
28894 .m(m)
28895 .n(n)
28896 .k(k)
28897 .ks(3)
28898 .iterations(1)
28899 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28900 }
28901 }
28902 }
28903 }
28904
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_small_kernel)28905 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_small_kernel) {
28906 TEST_REQUIRES_X86_SSE2;
28907 for (uint32_t n = 5; n < 8; n++) {
28908 for (size_t k = 1; k <= 40; k += 9) {
28909 GemmMicrokernelTester()
28910 .mr(2)
28911 .nr(4)
28912 .kr(8)
28913 .sr(1)
28914 .m(2)
28915 .n(n)
28916 .k(k)
28917 .ks(3)
28918 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28919 }
28920 }
28921 }
28922
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_small_kernel)28923 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_small_kernel) {
28924 TEST_REQUIRES_X86_SSE2;
28925 for (uint32_t n = 8; n <= 12; n += 4) {
28926 for (size_t k = 1; k <= 40; k += 9) {
28927 GemmMicrokernelTester()
28928 .mr(2)
28929 .nr(4)
28930 .kr(8)
28931 .sr(1)
28932 .m(2)
28933 .n(n)
28934 .k(k)
28935 .ks(3)
28936 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28937 }
28938 }
28939 }
28940
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cm_subtile)28941 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm_subtile) {
28942 TEST_REQUIRES_X86_SSE2;
28943 for (size_t k = 1; k <= 40; k += 9) {
28944 for (uint32_t n = 1; n <= 4; n++) {
28945 for (uint32_t m = 1; m <= 2; m++) {
28946 GemmMicrokernelTester()
28947 .mr(2)
28948 .nr(4)
28949 .kr(8)
28950 .sr(1)
28951 .m(m)
28952 .n(n)
28953 .k(k)
28954 .cm_stride(7)
28955 .iterations(1)
28956 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28957 }
28958 }
28959 }
28960 }
28961
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,a_offset)28962 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, a_offset) {
28963 TEST_REQUIRES_X86_SSE2;
28964 for (size_t k = 1; k <= 40; k += 9) {
28965 GemmMicrokernelTester()
28966 .mr(2)
28967 .nr(4)
28968 .kr(8)
28969 .sr(1)
28970 .m(2)
28971 .n(4)
28972 .k(k)
28973 .ks(3)
28974 .a_offset(83)
28975 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28976 }
28977 }
28978
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,zero)28979 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, zero) {
28980 TEST_REQUIRES_X86_SSE2;
28981 for (size_t k = 1; k <= 40; k += 9) {
28982 for (uint32_t mz = 0; mz < 2; mz++) {
28983 GemmMicrokernelTester()
28984 .mr(2)
28985 .nr(4)
28986 .kr(8)
28987 .sr(1)
28988 .m(2)
28989 .n(4)
28990 .k(k)
28991 .ks(3)
28992 .a_offset(83)
28993 .zero_index(mz)
28994 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28995 }
28996 }
28997 }
28998
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,qmin)28999 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmin) {
29000 TEST_REQUIRES_X86_SSE2;
29001 GemmMicrokernelTester()
29002 .mr(2)
29003 .nr(4)
29004 .kr(8)
29005 .sr(1)
29006 .m(2)
29007 .n(4)
29008 .k(8)
29009 .qmin(128)
29010 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29011 }
29012
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,qmax)29013 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmax) {
29014 TEST_REQUIRES_X86_SSE2;
29015 GemmMicrokernelTester()
29016 .mr(2)
29017 .nr(4)
29018 .kr(8)
29019 .sr(1)
29020 .m(2)
29021 .n(4)
29022 .k(8)
29023 .qmax(128)
29024 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29025 }
29026
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cm)29027 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm) {
29028 TEST_REQUIRES_X86_SSE2;
29029 GemmMicrokernelTester()
29030 .mr(2)
29031 .nr(4)
29032 .kr(8)
29033 .sr(1)
29034 .m(2)
29035 .n(4)
29036 .k(8)
29037 .cm_stride(7)
29038 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29039 }
29040 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29041
29042
29043 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8)29044 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8) {
29045 TEST_REQUIRES_X86_SSE41;
29046 GemmMicrokernelTester()
29047 .mr(2)
29048 .nr(4)
29049 .kr(8)
29050 .sr(1)
29051 .m(2)
29052 .n(4)
29053 .k(8)
29054 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29055 }
29056
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,strided_cn)29057 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cn) {
29058 TEST_REQUIRES_X86_SSE41;
29059 GemmMicrokernelTester()
29060 .mr(2)
29061 .nr(4)
29062 .kr(8)
29063 .sr(1)
29064 .m(2)
29065 .n(4)
29066 .k(8)
29067 .cn_stride(7)
29068 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29069 }
29070
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8_subtile)29071 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile) {
29072 TEST_REQUIRES_X86_SSE41;
29073 for (uint32_t n = 1; n <= 4; n++) {
29074 for (uint32_t m = 1; m <= 2; m++) {
29075 GemmMicrokernelTester()
29076 .mr(2)
29077 .nr(4)
29078 .kr(8)
29079 .sr(1)
29080 .m(m)
29081 .n(n)
29082 .k(8)
29083 .iterations(1)
29084 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29085 }
29086 }
29087 }
29088
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8_subtile_m)29089 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile_m) {
29090 TEST_REQUIRES_X86_SSE41;
29091 for (uint32_t m = 1; m <= 2; m++) {
29092 GemmMicrokernelTester()
29093 .mr(2)
29094 .nr(4)
29095 .kr(8)
29096 .sr(1)
29097 .m(m)
29098 .n(4)
29099 .k(8)
29100 .iterations(1)
29101 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29102 }
29103 }
29104
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8_subtile_n)29105 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile_n) {
29106 TEST_REQUIRES_X86_SSE41;
29107 for (uint32_t n = 1; n <= 4; n++) {
29108 GemmMicrokernelTester()
29109 .mr(2)
29110 .nr(4)
29111 .kr(8)
29112 .sr(1)
29113 .m(2)
29114 .n(n)
29115 .k(8)
29116 .iterations(1)
29117 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29118 }
29119 }
29120
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_lt_8)29121 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_lt_8) {
29122 TEST_REQUIRES_X86_SSE41;
29123 for (size_t k = 1; k < 8; k++) {
29124 GemmMicrokernelTester()
29125 .mr(2)
29126 .nr(4)
29127 .kr(8)
29128 .sr(1)
29129 .m(2)
29130 .n(4)
29131 .k(k)
29132 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29133 }
29134 }
29135
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_lt_8_subtile)29136 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_lt_8_subtile) {
29137 TEST_REQUIRES_X86_SSE41;
29138 for (size_t k = 1; k < 8; k++) {
29139 for (uint32_t n = 1; n <= 4; n++) {
29140 for (uint32_t m = 1; m <= 2; m++) {
29141 GemmMicrokernelTester()
29142 .mr(2)
29143 .nr(4)
29144 .kr(8)
29145 .sr(1)
29146 .m(m)
29147 .n(n)
29148 .k(k)
29149 .iterations(1)
29150 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29151 }
29152 }
29153 }
29154 }
29155
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_gt_8)29156 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_gt_8) {
29157 TEST_REQUIRES_X86_SSE41;
29158 for (size_t k = 9; k < 16; k++) {
29159 GemmMicrokernelTester()
29160 .mr(2)
29161 .nr(4)
29162 .kr(8)
29163 .sr(1)
29164 .m(2)
29165 .n(4)
29166 .k(k)
29167 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29168 }
29169 }
29170
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_gt_8_subtile)29171 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_gt_8_subtile) {
29172 TEST_REQUIRES_X86_SSE41;
29173 for (size_t k = 9; k < 16; k++) {
29174 for (uint32_t n = 1; n <= 4; n++) {
29175 for (uint32_t m = 1; m <= 2; m++) {
29176 GemmMicrokernelTester()
29177 .mr(2)
29178 .nr(4)
29179 .kr(8)
29180 .sr(1)
29181 .m(m)
29182 .n(n)
29183 .k(k)
29184 .iterations(1)
29185 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29186 }
29187 }
29188 }
29189 }
29190
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_div_8)29191 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_div_8) {
29192 TEST_REQUIRES_X86_SSE41;
29193 for (size_t k = 16; k <= 80; k += 8) {
29194 GemmMicrokernelTester()
29195 .mr(2)
29196 .nr(4)
29197 .kr(8)
29198 .sr(1)
29199 .m(2)
29200 .n(4)
29201 .k(k)
29202 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29203 }
29204 }
29205
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_div_8_subtile)29206 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_div_8_subtile) {
29207 TEST_REQUIRES_X86_SSE41;
29208 for (size_t k = 16; k <= 80; k += 8) {
29209 for (uint32_t n = 1; n <= 4; n++) {
29210 for (uint32_t m = 1; m <= 2; m++) {
29211 GemmMicrokernelTester()
29212 .mr(2)
29213 .nr(4)
29214 .kr(8)
29215 .sr(1)
29216 .m(m)
29217 .n(n)
29218 .k(k)
29219 .iterations(1)
29220 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29221 }
29222 }
29223 }
29224 }
29225
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4)29226 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4) {
29227 TEST_REQUIRES_X86_SSE41;
29228 for (uint32_t n = 5; n < 8; n++) {
29229 for (size_t k = 1; k <= 40; k += 9) {
29230 GemmMicrokernelTester()
29231 .mr(2)
29232 .nr(4)
29233 .kr(8)
29234 .sr(1)
29235 .m(2)
29236 .n(n)
29237 .k(k)
29238 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29239 }
29240 }
29241 }
29242
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4_strided_cn)29243 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_strided_cn) {
29244 TEST_REQUIRES_X86_SSE41;
29245 for (uint32_t n = 5; n < 8; n++) {
29246 for (size_t k = 1; k <= 40; k += 9) {
29247 GemmMicrokernelTester()
29248 .mr(2)
29249 .nr(4)
29250 .kr(8)
29251 .sr(1)
29252 .m(2)
29253 .n(n)
29254 .k(k)
29255 .cn_stride(7)
29256 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29257 }
29258 }
29259 }
29260
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4_subtile)29261 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_subtile) {
29262 TEST_REQUIRES_X86_SSE41;
29263 for (uint32_t n = 5; n < 8; n++) {
29264 for (size_t k = 1; k <= 40; k += 9) {
29265 for (uint32_t m = 1; m <= 2; m++) {
29266 GemmMicrokernelTester()
29267 .mr(2)
29268 .nr(4)
29269 .kr(8)
29270 .sr(1)
29271 .m(m)
29272 .n(n)
29273 .k(k)
29274 .iterations(1)
29275 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29276 }
29277 }
29278 }
29279 }
29280
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4)29281 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4) {
29282 TEST_REQUIRES_X86_SSE41;
29283 for (uint32_t n = 8; n <= 12; n += 4) {
29284 for (size_t k = 1; k <= 40; k += 9) {
29285 GemmMicrokernelTester()
29286 .mr(2)
29287 .nr(4)
29288 .kr(8)
29289 .sr(1)
29290 .m(2)
29291 .n(n)
29292 .k(k)
29293 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29294 }
29295 }
29296 }
29297
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4_strided_cn)29298 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_strided_cn) {
29299 TEST_REQUIRES_X86_SSE41;
29300 for (uint32_t n = 8; n <= 12; n += 4) {
29301 for (size_t k = 1; k <= 40; k += 9) {
29302 GemmMicrokernelTester()
29303 .mr(2)
29304 .nr(4)
29305 .kr(8)
29306 .sr(1)
29307 .m(2)
29308 .n(n)
29309 .k(k)
29310 .cn_stride(7)
29311 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29312 }
29313 }
29314 }
29315
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4_subtile)29316 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_subtile) {
29317 TEST_REQUIRES_X86_SSE41;
29318 for (uint32_t n = 8; n <= 12; n += 4) {
29319 for (size_t k = 1; k <= 40; k += 9) {
29320 for (uint32_t m = 1; m <= 2; m++) {
29321 GemmMicrokernelTester()
29322 .mr(2)
29323 .nr(4)
29324 .kr(8)
29325 .sr(1)
29326 .m(m)
29327 .n(n)
29328 .k(k)
29329 .iterations(1)
29330 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29331 }
29332 }
29333 }
29334 }
29335
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,small_kernel)29336 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, small_kernel) {
29337 TEST_REQUIRES_X86_SSE41;
29338 for (size_t k = 1; k <= 40; k += 9) {
29339 GemmMicrokernelTester()
29340 .mr(2)
29341 .nr(4)
29342 .kr(8)
29343 .sr(1)
29344 .m(2)
29345 .n(4)
29346 .k(k)
29347 .ks(3)
29348 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29349 }
29350 }
29351
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,small_kernel_subtile)29352 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, small_kernel_subtile) {
29353 TEST_REQUIRES_X86_SSE41;
29354 for (size_t k = 1; k <= 40; k += 9) {
29355 for (uint32_t n = 1; n <= 4; n++) {
29356 for (uint32_t m = 1; m <= 2; m++) {
29357 GemmMicrokernelTester()
29358 .mr(2)
29359 .nr(4)
29360 .kr(8)
29361 .sr(1)
29362 .m(m)
29363 .n(n)
29364 .k(k)
29365 .ks(3)
29366 .iterations(1)
29367 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29368 }
29369 }
29370 }
29371 }
29372
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4_small_kernel)29373 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_small_kernel) {
29374 TEST_REQUIRES_X86_SSE41;
29375 for (uint32_t n = 5; n < 8; n++) {
29376 for (size_t k = 1; k <= 40; k += 9) {
29377 GemmMicrokernelTester()
29378 .mr(2)
29379 .nr(4)
29380 .kr(8)
29381 .sr(1)
29382 .m(2)
29383 .n(n)
29384 .k(k)
29385 .ks(3)
29386 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29387 }
29388 }
29389 }
29390
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4_small_kernel)29391 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_small_kernel) {
29392 TEST_REQUIRES_X86_SSE41;
29393 for (uint32_t n = 8; n <= 12; n += 4) {
29394 for (size_t k = 1; k <= 40; k += 9) {
29395 GemmMicrokernelTester()
29396 .mr(2)
29397 .nr(4)
29398 .kr(8)
29399 .sr(1)
29400 .m(2)
29401 .n(n)
29402 .k(k)
29403 .ks(3)
29404 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29405 }
29406 }
29407 }
29408
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,strided_cm_subtile)29409 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cm_subtile) {
29410 TEST_REQUIRES_X86_SSE41;
29411 for (size_t k = 1; k <= 40; k += 9) {
29412 for (uint32_t n = 1; n <= 4; n++) {
29413 for (uint32_t m = 1; m <= 2; m++) {
29414 GemmMicrokernelTester()
29415 .mr(2)
29416 .nr(4)
29417 .kr(8)
29418 .sr(1)
29419 .m(m)
29420 .n(n)
29421 .k(k)
29422 .cm_stride(7)
29423 .iterations(1)
29424 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29425 }
29426 }
29427 }
29428 }
29429
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,a_offset)29430 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, a_offset) {
29431 TEST_REQUIRES_X86_SSE41;
29432 for (size_t k = 1; k <= 40; k += 9) {
29433 GemmMicrokernelTester()
29434 .mr(2)
29435 .nr(4)
29436 .kr(8)
29437 .sr(1)
29438 .m(2)
29439 .n(4)
29440 .k(k)
29441 .ks(3)
29442 .a_offset(83)
29443 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29444 }
29445 }
29446
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,zero)29447 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, zero) {
29448 TEST_REQUIRES_X86_SSE41;
29449 for (size_t k = 1; k <= 40; k += 9) {
29450 for (uint32_t mz = 0; mz < 2; mz++) {
29451 GemmMicrokernelTester()
29452 .mr(2)
29453 .nr(4)
29454 .kr(8)
29455 .sr(1)
29456 .m(2)
29457 .n(4)
29458 .k(k)
29459 .ks(3)
29460 .a_offset(83)
29461 .zero_index(mz)
29462 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29463 }
29464 }
29465 }
29466
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,qmin)29467 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, qmin) {
29468 TEST_REQUIRES_X86_SSE41;
29469 GemmMicrokernelTester()
29470 .mr(2)
29471 .nr(4)
29472 .kr(8)
29473 .sr(1)
29474 .m(2)
29475 .n(4)
29476 .k(8)
29477 .qmin(128)
29478 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29479 }
29480
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,qmax)29481 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, qmax) {
29482 TEST_REQUIRES_X86_SSE41;
29483 GemmMicrokernelTester()
29484 .mr(2)
29485 .nr(4)
29486 .kr(8)
29487 .sr(1)
29488 .m(2)
29489 .n(4)
29490 .k(8)
29491 .qmax(128)
29492 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29493 }
29494
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,strided_cm)29495 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cm) {
29496 TEST_REQUIRES_X86_SSE41;
29497 GemmMicrokernelTester()
29498 .mr(2)
29499 .nr(4)
29500 .kr(8)
29501 .sr(1)
29502 .m(2)
29503 .n(4)
29504 .k(8)
29505 .cm_stride(7)
29506 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29507 }
29508 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29509
29510
29511 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8)29512 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8) {
29513 TEST_REQUIRES_X86_SSE2;
29514 GemmMicrokernelTester()
29515 .mr(3)
29516 .nr(4)
29517 .kr(8)
29518 .sr(1)
29519 .m(3)
29520 .n(4)
29521 .k(8)
29522 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29523 }
29524
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,strided_cn)29525 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cn) {
29526 TEST_REQUIRES_X86_SSE2;
29527 GemmMicrokernelTester()
29528 .mr(3)
29529 .nr(4)
29530 .kr(8)
29531 .sr(1)
29532 .m(3)
29533 .n(4)
29534 .k(8)
29535 .cn_stride(7)
29536 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29537 }
29538
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8_subtile)29539 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile) {
29540 TEST_REQUIRES_X86_SSE2;
29541 for (uint32_t n = 1; n <= 4; n++) {
29542 for (uint32_t m = 1; m <= 3; m++) {
29543 GemmMicrokernelTester()
29544 .mr(3)
29545 .nr(4)
29546 .kr(8)
29547 .sr(1)
29548 .m(m)
29549 .n(n)
29550 .k(8)
29551 .iterations(1)
29552 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29553 }
29554 }
29555 }
29556
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8_subtile_m)29557 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile_m) {
29558 TEST_REQUIRES_X86_SSE2;
29559 for (uint32_t m = 1; m <= 3; m++) {
29560 GemmMicrokernelTester()
29561 .mr(3)
29562 .nr(4)
29563 .kr(8)
29564 .sr(1)
29565 .m(m)
29566 .n(4)
29567 .k(8)
29568 .iterations(1)
29569 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29570 }
29571 }
29572
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8_subtile_n)29573 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile_n) {
29574 TEST_REQUIRES_X86_SSE2;
29575 for (uint32_t n = 1; n <= 4; n++) {
29576 GemmMicrokernelTester()
29577 .mr(3)
29578 .nr(4)
29579 .kr(8)
29580 .sr(1)
29581 .m(3)
29582 .n(n)
29583 .k(8)
29584 .iterations(1)
29585 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29586 }
29587 }
29588
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_lt_8)29589 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_lt_8) {
29590 TEST_REQUIRES_X86_SSE2;
29591 for (size_t k = 1; k < 8; k++) {
29592 GemmMicrokernelTester()
29593 .mr(3)
29594 .nr(4)
29595 .kr(8)
29596 .sr(1)
29597 .m(3)
29598 .n(4)
29599 .k(k)
29600 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29601 }
29602 }
29603
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_lt_8_subtile)29604 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_lt_8_subtile) {
29605 TEST_REQUIRES_X86_SSE2;
29606 for (size_t k = 1; k < 8; k++) {
29607 for (uint32_t n = 1; n <= 4; n++) {
29608 for (uint32_t m = 1; m <= 3; m++) {
29609 GemmMicrokernelTester()
29610 .mr(3)
29611 .nr(4)
29612 .kr(8)
29613 .sr(1)
29614 .m(m)
29615 .n(n)
29616 .k(k)
29617 .iterations(1)
29618 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29619 }
29620 }
29621 }
29622 }
29623
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_gt_8)29624 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_gt_8) {
29625 TEST_REQUIRES_X86_SSE2;
29626 for (size_t k = 9; k < 16; k++) {
29627 GemmMicrokernelTester()
29628 .mr(3)
29629 .nr(4)
29630 .kr(8)
29631 .sr(1)
29632 .m(3)
29633 .n(4)
29634 .k(k)
29635 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29636 }
29637 }
29638
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_gt_8_subtile)29639 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_gt_8_subtile) {
29640 TEST_REQUIRES_X86_SSE2;
29641 for (size_t k = 9; k < 16; k++) {
29642 for (uint32_t n = 1; n <= 4; n++) {
29643 for (uint32_t m = 1; m <= 3; m++) {
29644 GemmMicrokernelTester()
29645 .mr(3)
29646 .nr(4)
29647 .kr(8)
29648 .sr(1)
29649 .m(m)
29650 .n(n)
29651 .k(k)
29652 .iterations(1)
29653 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29654 }
29655 }
29656 }
29657 }
29658
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_div_8)29659 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_div_8) {
29660 TEST_REQUIRES_X86_SSE2;
29661 for (size_t k = 16; k <= 80; k += 8) {
29662 GemmMicrokernelTester()
29663 .mr(3)
29664 .nr(4)
29665 .kr(8)
29666 .sr(1)
29667 .m(3)
29668 .n(4)
29669 .k(k)
29670 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29671 }
29672 }
29673
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_div_8_subtile)29674 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_div_8_subtile) {
29675 TEST_REQUIRES_X86_SSE2;
29676 for (size_t k = 16; k <= 80; k += 8) {
29677 for (uint32_t n = 1; n <= 4; n++) {
29678 for (uint32_t m = 1; m <= 3; m++) {
29679 GemmMicrokernelTester()
29680 .mr(3)
29681 .nr(4)
29682 .kr(8)
29683 .sr(1)
29684 .m(m)
29685 .n(n)
29686 .k(k)
29687 .iterations(1)
29688 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29689 }
29690 }
29691 }
29692 }
29693
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4)29694 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4) {
29695 TEST_REQUIRES_X86_SSE2;
29696 for (uint32_t n = 5; n < 8; n++) {
29697 for (size_t k = 1; k <= 40; k += 9) {
29698 GemmMicrokernelTester()
29699 .mr(3)
29700 .nr(4)
29701 .kr(8)
29702 .sr(1)
29703 .m(3)
29704 .n(n)
29705 .k(k)
29706 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29707 }
29708 }
29709 }
29710
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4_strided_cn)29711 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_strided_cn) {
29712 TEST_REQUIRES_X86_SSE2;
29713 for (uint32_t n = 5; n < 8; n++) {
29714 for (size_t k = 1; k <= 40; k += 9) {
29715 GemmMicrokernelTester()
29716 .mr(3)
29717 .nr(4)
29718 .kr(8)
29719 .sr(1)
29720 .m(3)
29721 .n(n)
29722 .k(k)
29723 .cn_stride(7)
29724 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29725 }
29726 }
29727 }
29728
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4_subtile)29729 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_subtile) {
29730 TEST_REQUIRES_X86_SSE2;
29731 for (uint32_t n = 5; n < 8; n++) {
29732 for (size_t k = 1; k <= 40; k += 9) {
29733 for (uint32_t m = 1; m <= 3; m++) {
29734 GemmMicrokernelTester()
29735 .mr(3)
29736 .nr(4)
29737 .kr(8)
29738 .sr(1)
29739 .m(m)
29740 .n(n)
29741 .k(k)
29742 .iterations(1)
29743 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29744 }
29745 }
29746 }
29747 }
29748
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4)29749 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4) {
29750 TEST_REQUIRES_X86_SSE2;
29751 for (uint32_t n = 8; n <= 12; n += 4) {
29752 for (size_t k = 1; k <= 40; k += 9) {
29753 GemmMicrokernelTester()
29754 .mr(3)
29755 .nr(4)
29756 .kr(8)
29757 .sr(1)
29758 .m(3)
29759 .n(n)
29760 .k(k)
29761 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29762 }
29763 }
29764 }
29765
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4_strided_cn)29766 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_strided_cn) {
29767 TEST_REQUIRES_X86_SSE2;
29768 for (uint32_t n = 8; n <= 12; n += 4) {
29769 for (size_t k = 1; k <= 40; k += 9) {
29770 GemmMicrokernelTester()
29771 .mr(3)
29772 .nr(4)
29773 .kr(8)
29774 .sr(1)
29775 .m(3)
29776 .n(n)
29777 .k(k)
29778 .cn_stride(7)
29779 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29780 }
29781 }
29782 }
29783
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4_subtile)29784 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_subtile) {
29785 TEST_REQUIRES_X86_SSE2;
29786 for (uint32_t n = 8; n <= 12; n += 4) {
29787 for (size_t k = 1; k <= 40; k += 9) {
29788 for (uint32_t m = 1; m <= 3; m++) {
29789 GemmMicrokernelTester()
29790 .mr(3)
29791 .nr(4)
29792 .kr(8)
29793 .sr(1)
29794 .m(m)
29795 .n(n)
29796 .k(k)
29797 .iterations(1)
29798 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29799 }
29800 }
29801 }
29802 }
29803
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,small_kernel)29804 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, small_kernel) {
29805 TEST_REQUIRES_X86_SSE2;
29806 for (size_t k = 1; k <= 40; k += 9) {
29807 GemmMicrokernelTester()
29808 .mr(3)
29809 .nr(4)
29810 .kr(8)
29811 .sr(1)
29812 .m(3)
29813 .n(4)
29814 .k(k)
29815 .ks(3)
29816 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29817 }
29818 }
29819
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,small_kernel_subtile)29820 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, small_kernel_subtile) {
29821 TEST_REQUIRES_X86_SSE2;
29822 for (size_t k = 1; k <= 40; k += 9) {
29823 for (uint32_t n = 1; n <= 4; n++) {
29824 for (uint32_t m = 1; m <= 3; m++) {
29825 GemmMicrokernelTester()
29826 .mr(3)
29827 .nr(4)
29828 .kr(8)
29829 .sr(1)
29830 .m(m)
29831 .n(n)
29832 .k(k)
29833 .ks(3)
29834 .iterations(1)
29835 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29836 }
29837 }
29838 }
29839 }
29840
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4_small_kernel)29841 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_small_kernel) {
29842 TEST_REQUIRES_X86_SSE2;
29843 for (uint32_t n = 5; n < 8; n++) {
29844 for (size_t k = 1; k <= 40; k += 9) {
29845 GemmMicrokernelTester()
29846 .mr(3)
29847 .nr(4)
29848 .kr(8)
29849 .sr(1)
29850 .m(3)
29851 .n(n)
29852 .k(k)
29853 .ks(3)
29854 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29855 }
29856 }
29857 }
29858
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4_small_kernel)29859 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_small_kernel) {
29860 TEST_REQUIRES_X86_SSE2;
29861 for (uint32_t n = 8; n <= 12; n += 4) {
29862 for (size_t k = 1; k <= 40; k += 9) {
29863 GemmMicrokernelTester()
29864 .mr(3)
29865 .nr(4)
29866 .kr(8)
29867 .sr(1)
29868 .m(3)
29869 .n(n)
29870 .k(k)
29871 .ks(3)
29872 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29873 }
29874 }
29875 }
29876
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,strided_cm_subtile)29877 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cm_subtile) {
29878 TEST_REQUIRES_X86_SSE2;
29879 for (size_t k = 1; k <= 40; k += 9) {
29880 for (uint32_t n = 1; n <= 4; n++) {
29881 for (uint32_t m = 1; m <= 3; m++) {
29882 GemmMicrokernelTester()
29883 .mr(3)
29884 .nr(4)
29885 .kr(8)
29886 .sr(1)
29887 .m(m)
29888 .n(n)
29889 .k(k)
29890 .cm_stride(7)
29891 .iterations(1)
29892 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29893 }
29894 }
29895 }
29896 }
29897
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,a_offset)29898 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, a_offset) {
29899 TEST_REQUIRES_X86_SSE2;
29900 for (size_t k = 1; k <= 40; k += 9) {
29901 GemmMicrokernelTester()
29902 .mr(3)
29903 .nr(4)
29904 .kr(8)
29905 .sr(1)
29906 .m(3)
29907 .n(4)
29908 .k(k)
29909 .ks(3)
29910 .a_offset(127)
29911 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29912 }
29913 }
29914
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,zero)29915 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, zero) {
29916 TEST_REQUIRES_X86_SSE2;
29917 for (size_t k = 1; k <= 40; k += 9) {
29918 for (uint32_t mz = 0; mz < 3; mz++) {
29919 GemmMicrokernelTester()
29920 .mr(3)
29921 .nr(4)
29922 .kr(8)
29923 .sr(1)
29924 .m(3)
29925 .n(4)
29926 .k(k)
29927 .ks(3)
29928 .a_offset(127)
29929 .zero_index(mz)
29930 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29931 }
29932 }
29933 }
29934
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,qmin)29935 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, qmin) {
29936 TEST_REQUIRES_X86_SSE2;
29937 GemmMicrokernelTester()
29938 .mr(3)
29939 .nr(4)
29940 .kr(8)
29941 .sr(1)
29942 .m(3)
29943 .n(4)
29944 .k(8)
29945 .qmin(128)
29946 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29947 }
29948
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,qmax)29949 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, qmax) {
29950 TEST_REQUIRES_X86_SSE2;
29951 GemmMicrokernelTester()
29952 .mr(3)
29953 .nr(4)
29954 .kr(8)
29955 .sr(1)
29956 .m(3)
29957 .n(4)
29958 .k(8)
29959 .qmax(128)
29960 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29961 }
29962
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,strided_cm)29963 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cm) {
29964 TEST_REQUIRES_X86_SSE2;
29965 GemmMicrokernelTester()
29966 .mr(3)
29967 .nr(4)
29968 .kr(8)
29969 .sr(1)
29970 .m(3)
29971 .n(4)
29972 .k(8)
29973 .cm_stride(7)
29974 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29975 }
29976 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29977
29978
29979 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8)29980 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8) {
29981 TEST_REQUIRES_X86_SSE41;
29982 GemmMicrokernelTester()
29983 .mr(3)
29984 .nr(4)
29985 .kr(8)
29986 .sr(1)
29987 .m(3)
29988 .n(4)
29989 .k(8)
29990 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29991 }
29992
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cn)29993 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cn) {
29994 TEST_REQUIRES_X86_SSE41;
29995 GemmMicrokernelTester()
29996 .mr(3)
29997 .nr(4)
29998 .kr(8)
29999 .sr(1)
30000 .m(3)
30001 .n(4)
30002 .k(8)
30003 .cn_stride(7)
30004 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30005 }
30006
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile)30007 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile) {
30008 TEST_REQUIRES_X86_SSE41;
30009 for (uint32_t n = 1; n <= 4; n++) {
30010 for (uint32_t m = 1; m <= 3; m++) {
30011 GemmMicrokernelTester()
30012 .mr(3)
30013 .nr(4)
30014 .kr(8)
30015 .sr(1)
30016 .m(m)
30017 .n(n)
30018 .k(8)
30019 .iterations(1)
30020 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30021 }
30022 }
30023 }
30024
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile_m)30025 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_m) {
30026 TEST_REQUIRES_X86_SSE41;
30027 for (uint32_t m = 1; m <= 3; m++) {
30028 GemmMicrokernelTester()
30029 .mr(3)
30030 .nr(4)
30031 .kr(8)
30032 .sr(1)
30033 .m(m)
30034 .n(4)
30035 .k(8)
30036 .iterations(1)
30037 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30038 }
30039 }
30040
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile_n)30041 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_n) {
30042 TEST_REQUIRES_X86_SSE41;
30043 for (uint32_t n = 1; n <= 4; n++) {
30044 GemmMicrokernelTester()
30045 .mr(3)
30046 .nr(4)
30047 .kr(8)
30048 .sr(1)
30049 .m(3)
30050 .n(n)
30051 .k(8)
30052 .iterations(1)
30053 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30054 }
30055 }
30056
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_lt_8)30057 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8) {
30058 TEST_REQUIRES_X86_SSE41;
30059 for (size_t k = 1; k < 8; k++) {
30060 GemmMicrokernelTester()
30061 .mr(3)
30062 .nr(4)
30063 .kr(8)
30064 .sr(1)
30065 .m(3)
30066 .n(4)
30067 .k(k)
30068 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30069 }
30070 }
30071
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_lt_8_subtile)30072 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8_subtile) {
30073 TEST_REQUIRES_X86_SSE41;
30074 for (size_t k = 1; k < 8; k++) {
30075 for (uint32_t n = 1; n <= 4; n++) {
30076 for (uint32_t m = 1; m <= 3; m++) {
30077 GemmMicrokernelTester()
30078 .mr(3)
30079 .nr(4)
30080 .kr(8)
30081 .sr(1)
30082 .m(m)
30083 .n(n)
30084 .k(k)
30085 .iterations(1)
30086 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30087 }
30088 }
30089 }
30090 }
30091
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_gt_8)30092 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8) {
30093 TEST_REQUIRES_X86_SSE41;
30094 for (size_t k = 9; k < 16; k++) {
30095 GemmMicrokernelTester()
30096 .mr(3)
30097 .nr(4)
30098 .kr(8)
30099 .sr(1)
30100 .m(3)
30101 .n(4)
30102 .k(k)
30103 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30104 }
30105 }
30106
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_gt_8_subtile)30107 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8_subtile) {
30108 TEST_REQUIRES_X86_SSE41;
30109 for (size_t k = 9; k < 16; k++) {
30110 for (uint32_t n = 1; n <= 4; n++) {
30111 for (uint32_t m = 1; m <= 3; m++) {
30112 GemmMicrokernelTester()
30113 .mr(3)
30114 .nr(4)
30115 .kr(8)
30116 .sr(1)
30117 .m(m)
30118 .n(n)
30119 .k(k)
30120 .iterations(1)
30121 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30122 }
30123 }
30124 }
30125 }
30126
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_div_8)30127 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8) {
30128 TEST_REQUIRES_X86_SSE41;
30129 for (size_t k = 16; k <= 80; k += 8) {
30130 GemmMicrokernelTester()
30131 .mr(3)
30132 .nr(4)
30133 .kr(8)
30134 .sr(1)
30135 .m(3)
30136 .n(4)
30137 .k(k)
30138 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30139 }
30140 }
30141
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_div_8_subtile)30142 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8_subtile) {
30143 TEST_REQUIRES_X86_SSE41;
30144 for (size_t k = 16; k <= 80; k += 8) {
30145 for (uint32_t n = 1; n <= 4; n++) {
30146 for (uint32_t m = 1; m <= 3; m++) {
30147 GemmMicrokernelTester()
30148 .mr(3)
30149 .nr(4)
30150 .kr(8)
30151 .sr(1)
30152 .m(m)
30153 .n(n)
30154 .k(k)
30155 .iterations(1)
30156 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30157 }
30158 }
30159 }
30160 }
30161
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4)30162 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4) {
30163 TEST_REQUIRES_X86_SSE41;
30164 for (uint32_t n = 5; n < 8; n++) {
30165 for (size_t k = 1; k <= 40; k += 9) {
30166 GemmMicrokernelTester()
30167 .mr(3)
30168 .nr(4)
30169 .kr(8)
30170 .sr(1)
30171 .m(3)
30172 .n(n)
30173 .k(k)
30174 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30175 }
30176 }
30177 }
30178
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_strided_cn)30179 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_strided_cn) {
30180 TEST_REQUIRES_X86_SSE41;
30181 for (uint32_t n = 5; n < 8; n++) {
30182 for (size_t k = 1; k <= 40; k += 9) {
30183 GemmMicrokernelTester()
30184 .mr(3)
30185 .nr(4)
30186 .kr(8)
30187 .sr(1)
30188 .m(3)
30189 .n(n)
30190 .k(k)
30191 .cn_stride(7)
30192 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30193 }
30194 }
30195 }
30196
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_subtile)30197 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_subtile) {
30198 TEST_REQUIRES_X86_SSE41;
30199 for (uint32_t n = 5; n < 8; n++) {
30200 for (size_t k = 1; k <= 40; k += 9) {
30201 for (uint32_t m = 1; m <= 3; m++) {
30202 GemmMicrokernelTester()
30203 .mr(3)
30204 .nr(4)
30205 .kr(8)
30206 .sr(1)
30207 .m(m)
30208 .n(n)
30209 .k(k)
30210 .iterations(1)
30211 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30212 }
30213 }
30214 }
30215 }
30216
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4)30217 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4) {
30218 TEST_REQUIRES_X86_SSE41;
30219 for (uint32_t n = 8; n <= 12; n += 4) {
30220 for (size_t k = 1; k <= 40; k += 9) {
30221 GemmMicrokernelTester()
30222 .mr(3)
30223 .nr(4)
30224 .kr(8)
30225 .sr(1)
30226 .m(3)
30227 .n(n)
30228 .k(k)
30229 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30230 }
30231 }
30232 }
30233
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_strided_cn)30234 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_strided_cn) {
30235 TEST_REQUIRES_X86_SSE41;
30236 for (uint32_t n = 8; n <= 12; n += 4) {
30237 for (size_t k = 1; k <= 40; k += 9) {
30238 GemmMicrokernelTester()
30239 .mr(3)
30240 .nr(4)
30241 .kr(8)
30242 .sr(1)
30243 .m(3)
30244 .n(n)
30245 .k(k)
30246 .cn_stride(7)
30247 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30248 }
30249 }
30250 }
30251
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_subtile)30252 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_subtile) {
30253 TEST_REQUIRES_X86_SSE41;
30254 for (uint32_t n = 8; n <= 12; n += 4) {
30255 for (size_t k = 1; k <= 40; k += 9) {
30256 for (uint32_t m = 1; m <= 3; m++) {
30257 GemmMicrokernelTester()
30258 .mr(3)
30259 .nr(4)
30260 .kr(8)
30261 .sr(1)
30262 .m(m)
30263 .n(n)
30264 .k(k)
30265 .iterations(1)
30266 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30267 }
30268 }
30269 }
30270 }
30271
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,small_kernel)30272 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, small_kernel) {
30273 TEST_REQUIRES_X86_SSE41;
30274 for (size_t k = 1; k <= 40; k += 9) {
30275 GemmMicrokernelTester()
30276 .mr(3)
30277 .nr(4)
30278 .kr(8)
30279 .sr(1)
30280 .m(3)
30281 .n(4)
30282 .k(k)
30283 .ks(3)
30284 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30285 }
30286 }
30287
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,small_kernel_subtile)30288 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, small_kernel_subtile) {
30289 TEST_REQUIRES_X86_SSE41;
30290 for (size_t k = 1; k <= 40; k += 9) {
30291 for (uint32_t n = 1; n <= 4; n++) {
30292 for (uint32_t m = 1; m <= 3; m++) {
30293 GemmMicrokernelTester()
30294 .mr(3)
30295 .nr(4)
30296 .kr(8)
30297 .sr(1)
30298 .m(m)
30299 .n(n)
30300 .k(k)
30301 .ks(3)
30302 .iterations(1)
30303 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30304 }
30305 }
30306 }
30307 }
30308
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_small_kernel)30309 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_small_kernel) {
30310 TEST_REQUIRES_X86_SSE41;
30311 for (uint32_t n = 5; n < 8; n++) {
30312 for (size_t k = 1; k <= 40; k += 9) {
30313 GemmMicrokernelTester()
30314 .mr(3)
30315 .nr(4)
30316 .kr(8)
30317 .sr(1)
30318 .m(3)
30319 .n(n)
30320 .k(k)
30321 .ks(3)
30322 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30323 }
30324 }
30325 }
30326
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_small_kernel)30327 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_small_kernel) {
30328 TEST_REQUIRES_X86_SSE41;
30329 for (uint32_t n = 8; n <= 12; n += 4) {
30330 for (size_t k = 1; k <= 40; k += 9) {
30331 GemmMicrokernelTester()
30332 .mr(3)
30333 .nr(4)
30334 .kr(8)
30335 .sr(1)
30336 .m(3)
30337 .n(n)
30338 .k(k)
30339 .ks(3)
30340 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30341 }
30342 }
30343 }
30344
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cm_subtile)30345 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm_subtile) {
30346 TEST_REQUIRES_X86_SSE41;
30347 for (size_t k = 1; k <= 40; k += 9) {
30348 for (uint32_t n = 1; n <= 4; n++) {
30349 for (uint32_t m = 1; m <= 3; m++) {
30350 GemmMicrokernelTester()
30351 .mr(3)
30352 .nr(4)
30353 .kr(8)
30354 .sr(1)
30355 .m(m)
30356 .n(n)
30357 .k(k)
30358 .cm_stride(7)
30359 .iterations(1)
30360 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30361 }
30362 }
30363 }
30364 }
30365
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,a_offset)30366 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, a_offset) {
30367 TEST_REQUIRES_X86_SSE41;
30368 for (size_t k = 1; k <= 40; k += 9) {
30369 GemmMicrokernelTester()
30370 .mr(3)
30371 .nr(4)
30372 .kr(8)
30373 .sr(1)
30374 .m(3)
30375 .n(4)
30376 .k(k)
30377 .ks(3)
30378 .a_offset(127)
30379 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30380 }
30381 }
30382
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,zero)30383 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, zero) {
30384 TEST_REQUIRES_X86_SSE41;
30385 for (size_t k = 1; k <= 40; k += 9) {
30386 for (uint32_t mz = 0; mz < 3; mz++) {
30387 GemmMicrokernelTester()
30388 .mr(3)
30389 .nr(4)
30390 .kr(8)
30391 .sr(1)
30392 .m(3)
30393 .n(4)
30394 .k(k)
30395 .ks(3)
30396 .a_offset(127)
30397 .zero_index(mz)
30398 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30399 }
30400 }
30401 }
30402
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,qmin)30403 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmin) {
30404 TEST_REQUIRES_X86_SSE41;
30405 GemmMicrokernelTester()
30406 .mr(3)
30407 .nr(4)
30408 .kr(8)
30409 .sr(1)
30410 .m(3)
30411 .n(4)
30412 .k(8)
30413 .qmin(128)
30414 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30415 }
30416
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,qmax)30417 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmax) {
30418 TEST_REQUIRES_X86_SSE41;
30419 GemmMicrokernelTester()
30420 .mr(3)
30421 .nr(4)
30422 .kr(8)
30423 .sr(1)
30424 .m(3)
30425 .n(4)
30426 .k(8)
30427 .qmax(128)
30428 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30429 }
30430
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cm)30431 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm) {
30432 TEST_REQUIRES_X86_SSE41;
30433 GemmMicrokernelTester()
30434 .mr(3)
30435 .nr(4)
30436 .kr(8)
30437 .sr(1)
30438 .m(3)
30439 .n(4)
30440 .k(8)
30441 .cm_stride(7)
30442 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30443 }
30444 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
30445
30446
30447 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8)30448 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8) {
30449 TEST_REQUIRES_X86_XOP;
30450 GemmMicrokernelTester()
30451 .mr(3)
30452 .nr(4)
30453 .kr(8)
30454 .sr(1)
30455 .m(3)
30456 .n(4)
30457 .k(8)
30458 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30459 }
30460
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cn)30461 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cn) {
30462 TEST_REQUIRES_X86_XOP;
30463 GemmMicrokernelTester()
30464 .mr(3)
30465 .nr(4)
30466 .kr(8)
30467 .sr(1)
30468 .m(3)
30469 .n(4)
30470 .k(8)
30471 .cn_stride(7)
30472 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30473 }
30474
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile)30475 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile) {
30476 TEST_REQUIRES_X86_XOP;
30477 for (uint32_t n = 1; n <= 4; n++) {
30478 for (uint32_t m = 1; m <= 3; m++) {
30479 GemmMicrokernelTester()
30480 .mr(3)
30481 .nr(4)
30482 .kr(8)
30483 .sr(1)
30484 .m(m)
30485 .n(n)
30486 .k(8)
30487 .iterations(1)
30488 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30489 }
30490 }
30491 }
30492
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile_m)30493 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_m) {
30494 TEST_REQUIRES_X86_XOP;
30495 for (uint32_t m = 1; m <= 3; m++) {
30496 GemmMicrokernelTester()
30497 .mr(3)
30498 .nr(4)
30499 .kr(8)
30500 .sr(1)
30501 .m(m)
30502 .n(4)
30503 .k(8)
30504 .iterations(1)
30505 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30506 }
30507 }
30508
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile_n)30509 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_n) {
30510 TEST_REQUIRES_X86_XOP;
30511 for (uint32_t n = 1; n <= 4; n++) {
30512 GemmMicrokernelTester()
30513 .mr(3)
30514 .nr(4)
30515 .kr(8)
30516 .sr(1)
30517 .m(3)
30518 .n(n)
30519 .k(8)
30520 .iterations(1)
30521 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30522 }
30523 }
30524
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_lt_8)30525 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8) {
30526 TEST_REQUIRES_X86_XOP;
30527 for (size_t k = 1; k < 8; k++) {
30528 GemmMicrokernelTester()
30529 .mr(3)
30530 .nr(4)
30531 .kr(8)
30532 .sr(1)
30533 .m(3)
30534 .n(4)
30535 .k(k)
30536 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30537 }
30538 }
30539
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_lt_8_subtile)30540 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8_subtile) {
30541 TEST_REQUIRES_X86_XOP;
30542 for (size_t k = 1; k < 8; k++) {
30543 for (uint32_t n = 1; n <= 4; n++) {
30544 for (uint32_t m = 1; m <= 3; m++) {
30545 GemmMicrokernelTester()
30546 .mr(3)
30547 .nr(4)
30548 .kr(8)
30549 .sr(1)
30550 .m(m)
30551 .n(n)
30552 .k(k)
30553 .iterations(1)
30554 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30555 }
30556 }
30557 }
30558 }
30559
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_gt_8)30560 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8) {
30561 TEST_REQUIRES_X86_XOP;
30562 for (size_t k = 9; k < 16; k++) {
30563 GemmMicrokernelTester()
30564 .mr(3)
30565 .nr(4)
30566 .kr(8)
30567 .sr(1)
30568 .m(3)
30569 .n(4)
30570 .k(k)
30571 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30572 }
30573 }
30574
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_gt_8_subtile)30575 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8_subtile) {
30576 TEST_REQUIRES_X86_XOP;
30577 for (size_t k = 9; k < 16; k++) {
30578 for (uint32_t n = 1; n <= 4; n++) {
30579 for (uint32_t m = 1; m <= 3; m++) {
30580 GemmMicrokernelTester()
30581 .mr(3)
30582 .nr(4)
30583 .kr(8)
30584 .sr(1)
30585 .m(m)
30586 .n(n)
30587 .k(k)
30588 .iterations(1)
30589 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30590 }
30591 }
30592 }
30593 }
30594
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_div_8)30595 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8) {
30596 TEST_REQUIRES_X86_XOP;
30597 for (size_t k = 16; k <= 80; k += 8) {
30598 GemmMicrokernelTester()
30599 .mr(3)
30600 .nr(4)
30601 .kr(8)
30602 .sr(1)
30603 .m(3)
30604 .n(4)
30605 .k(k)
30606 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30607 }
30608 }
30609
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_div_8_subtile)30610 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8_subtile) {
30611 TEST_REQUIRES_X86_XOP;
30612 for (size_t k = 16; k <= 80; k += 8) {
30613 for (uint32_t n = 1; n <= 4; n++) {
30614 for (uint32_t m = 1; m <= 3; m++) {
30615 GemmMicrokernelTester()
30616 .mr(3)
30617 .nr(4)
30618 .kr(8)
30619 .sr(1)
30620 .m(m)
30621 .n(n)
30622 .k(k)
30623 .iterations(1)
30624 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30625 }
30626 }
30627 }
30628 }
30629
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4)30630 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4) {
30631 TEST_REQUIRES_X86_XOP;
30632 for (uint32_t n = 5; n < 8; n++) {
30633 for (size_t k = 1; k <= 40; k += 9) {
30634 GemmMicrokernelTester()
30635 .mr(3)
30636 .nr(4)
30637 .kr(8)
30638 .sr(1)
30639 .m(3)
30640 .n(n)
30641 .k(k)
30642 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30643 }
30644 }
30645 }
30646
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_strided_cn)30647 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_strided_cn) {
30648 TEST_REQUIRES_X86_XOP;
30649 for (uint32_t n = 5; n < 8; n++) {
30650 for (size_t k = 1; k <= 40; k += 9) {
30651 GemmMicrokernelTester()
30652 .mr(3)
30653 .nr(4)
30654 .kr(8)
30655 .sr(1)
30656 .m(3)
30657 .n(n)
30658 .k(k)
30659 .cn_stride(7)
30660 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30661 }
30662 }
30663 }
30664
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_subtile)30665 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_subtile) {
30666 TEST_REQUIRES_X86_XOP;
30667 for (uint32_t n = 5; n < 8; n++) {
30668 for (size_t k = 1; k <= 40; k += 9) {
30669 for (uint32_t m = 1; m <= 3; m++) {
30670 GemmMicrokernelTester()
30671 .mr(3)
30672 .nr(4)
30673 .kr(8)
30674 .sr(1)
30675 .m(m)
30676 .n(n)
30677 .k(k)
30678 .iterations(1)
30679 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30680 }
30681 }
30682 }
30683 }
30684
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4)30685 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4) {
30686 TEST_REQUIRES_X86_XOP;
30687 for (uint32_t n = 8; n <= 12; n += 4) {
30688 for (size_t k = 1; k <= 40; k += 9) {
30689 GemmMicrokernelTester()
30690 .mr(3)
30691 .nr(4)
30692 .kr(8)
30693 .sr(1)
30694 .m(3)
30695 .n(n)
30696 .k(k)
30697 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30698 }
30699 }
30700 }
30701
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_strided_cn)30702 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_strided_cn) {
30703 TEST_REQUIRES_X86_XOP;
30704 for (uint32_t n = 8; n <= 12; n += 4) {
30705 for (size_t k = 1; k <= 40; k += 9) {
30706 GemmMicrokernelTester()
30707 .mr(3)
30708 .nr(4)
30709 .kr(8)
30710 .sr(1)
30711 .m(3)
30712 .n(n)
30713 .k(k)
30714 .cn_stride(7)
30715 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30716 }
30717 }
30718 }
30719
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_subtile)30720 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_subtile) {
30721 TEST_REQUIRES_X86_XOP;
30722 for (uint32_t n = 8; n <= 12; n += 4) {
30723 for (size_t k = 1; k <= 40; k += 9) {
30724 for (uint32_t m = 1; m <= 3; m++) {
30725 GemmMicrokernelTester()
30726 .mr(3)
30727 .nr(4)
30728 .kr(8)
30729 .sr(1)
30730 .m(m)
30731 .n(n)
30732 .k(k)
30733 .iterations(1)
30734 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30735 }
30736 }
30737 }
30738 }
30739
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,small_kernel)30740 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, small_kernel) {
30741 TEST_REQUIRES_X86_XOP;
30742 for (size_t k = 1; k <= 40; k += 9) {
30743 GemmMicrokernelTester()
30744 .mr(3)
30745 .nr(4)
30746 .kr(8)
30747 .sr(1)
30748 .m(3)
30749 .n(4)
30750 .k(k)
30751 .ks(3)
30752 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30753 }
30754 }
30755
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,small_kernel_subtile)30756 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, small_kernel_subtile) {
30757 TEST_REQUIRES_X86_XOP;
30758 for (size_t k = 1; k <= 40; k += 9) {
30759 for (uint32_t n = 1; n <= 4; n++) {
30760 for (uint32_t m = 1; m <= 3; m++) {
30761 GemmMicrokernelTester()
30762 .mr(3)
30763 .nr(4)
30764 .kr(8)
30765 .sr(1)
30766 .m(m)
30767 .n(n)
30768 .k(k)
30769 .ks(3)
30770 .iterations(1)
30771 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30772 }
30773 }
30774 }
30775 }
30776
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_small_kernel)30777 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_small_kernel) {
30778 TEST_REQUIRES_X86_XOP;
30779 for (uint32_t n = 5; n < 8; n++) {
30780 for (size_t k = 1; k <= 40; k += 9) {
30781 GemmMicrokernelTester()
30782 .mr(3)
30783 .nr(4)
30784 .kr(8)
30785 .sr(1)
30786 .m(3)
30787 .n(n)
30788 .k(k)
30789 .ks(3)
30790 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30791 }
30792 }
30793 }
30794
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_small_kernel)30795 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_small_kernel) {
30796 TEST_REQUIRES_X86_XOP;
30797 for (uint32_t n = 8; n <= 12; n += 4) {
30798 for (size_t k = 1; k <= 40; k += 9) {
30799 GemmMicrokernelTester()
30800 .mr(3)
30801 .nr(4)
30802 .kr(8)
30803 .sr(1)
30804 .m(3)
30805 .n(n)
30806 .k(k)
30807 .ks(3)
30808 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30809 }
30810 }
30811 }
30812
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cm_subtile)30813 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm_subtile) {
30814 TEST_REQUIRES_X86_XOP;
30815 for (size_t k = 1; k <= 40; k += 9) {
30816 for (uint32_t n = 1; n <= 4; n++) {
30817 for (uint32_t m = 1; m <= 3; m++) {
30818 GemmMicrokernelTester()
30819 .mr(3)
30820 .nr(4)
30821 .kr(8)
30822 .sr(1)
30823 .m(m)
30824 .n(n)
30825 .k(k)
30826 .cm_stride(7)
30827 .iterations(1)
30828 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30829 }
30830 }
30831 }
30832 }
30833
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,a_offset)30834 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, a_offset) {
30835 TEST_REQUIRES_X86_XOP;
30836 for (size_t k = 1; k <= 40; k += 9) {
30837 GemmMicrokernelTester()
30838 .mr(3)
30839 .nr(4)
30840 .kr(8)
30841 .sr(1)
30842 .m(3)
30843 .n(4)
30844 .k(k)
30845 .ks(3)
30846 .a_offset(127)
30847 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30848 }
30849 }
30850
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,zero)30851 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, zero) {
30852 TEST_REQUIRES_X86_XOP;
30853 for (size_t k = 1; k <= 40; k += 9) {
30854 for (uint32_t mz = 0; mz < 3; mz++) {
30855 GemmMicrokernelTester()
30856 .mr(3)
30857 .nr(4)
30858 .kr(8)
30859 .sr(1)
30860 .m(3)
30861 .n(4)
30862 .k(k)
30863 .ks(3)
30864 .a_offset(127)
30865 .zero_index(mz)
30866 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30867 }
30868 }
30869 }
30870
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,qmin)30871 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmin) {
30872 TEST_REQUIRES_X86_XOP;
30873 GemmMicrokernelTester()
30874 .mr(3)
30875 .nr(4)
30876 .kr(8)
30877 .sr(1)
30878 .m(3)
30879 .n(4)
30880 .k(8)
30881 .qmin(128)
30882 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30883 }
30884
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,qmax)30885 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmax) {
30886 TEST_REQUIRES_X86_XOP;
30887 GemmMicrokernelTester()
30888 .mr(3)
30889 .nr(4)
30890 .kr(8)
30891 .sr(1)
30892 .m(3)
30893 .n(4)
30894 .k(8)
30895 .qmax(128)
30896 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30897 }
30898
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cm)30899 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm) {
30900 TEST_REQUIRES_X86_XOP;
30901 GemmMicrokernelTester()
30902 .mr(3)
30903 .nr(4)
30904 .kr(8)
30905 .sr(1)
30906 .m(3)
30907 .n(4)
30908 .k(8)
30909 .cm_stride(7)
30910 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30911 }
30912 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
30913
30914
30915 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8)30916 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8) {
30917 TEST_REQUIRES_X86_SSE2;
30918 GemmMicrokernelTester()
30919 .mr(2)
30920 .nr(4)
30921 .kr(8)
30922 .sr(1)
30923 .m(2)
30924 .n(4)
30925 .k(8)
30926 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30927 }
30928
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cn)30929 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cn) {
30930 TEST_REQUIRES_X86_SSE2;
30931 GemmMicrokernelTester()
30932 .mr(2)
30933 .nr(4)
30934 .kr(8)
30935 .sr(1)
30936 .m(2)
30937 .n(4)
30938 .k(8)
30939 .cn_stride(7)
30940 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30941 }
30942
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile)30943 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile) {
30944 TEST_REQUIRES_X86_SSE2;
30945 for (uint32_t n = 1; n <= 4; n++) {
30946 for (uint32_t m = 1; m <= 2; m++) {
30947 GemmMicrokernelTester()
30948 .mr(2)
30949 .nr(4)
30950 .kr(8)
30951 .sr(1)
30952 .m(m)
30953 .n(n)
30954 .k(8)
30955 .iterations(1)
30956 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30957 }
30958 }
30959 }
30960
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile_m)30961 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_m) {
30962 TEST_REQUIRES_X86_SSE2;
30963 for (uint32_t m = 1; m <= 2; m++) {
30964 GemmMicrokernelTester()
30965 .mr(2)
30966 .nr(4)
30967 .kr(8)
30968 .sr(1)
30969 .m(m)
30970 .n(4)
30971 .k(8)
30972 .iterations(1)
30973 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30974 }
30975 }
30976
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile_n)30977 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_n) {
30978 TEST_REQUIRES_X86_SSE2;
30979 for (uint32_t n = 1; n <= 4; n++) {
30980 GemmMicrokernelTester()
30981 .mr(2)
30982 .nr(4)
30983 .kr(8)
30984 .sr(1)
30985 .m(2)
30986 .n(n)
30987 .k(8)
30988 .iterations(1)
30989 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30990 }
30991 }
30992
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_lt_8)30993 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8) {
30994 TEST_REQUIRES_X86_SSE2;
30995 for (size_t k = 1; k < 8; k++) {
30996 GemmMicrokernelTester()
30997 .mr(2)
30998 .nr(4)
30999 .kr(8)
31000 .sr(1)
31001 .m(2)
31002 .n(4)
31003 .k(k)
31004 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31005 }
31006 }
31007
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_lt_8_subtile)31008 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8_subtile) {
31009 TEST_REQUIRES_X86_SSE2;
31010 for (size_t k = 1; k < 8; k++) {
31011 for (uint32_t n = 1; n <= 4; n++) {
31012 for (uint32_t m = 1; m <= 2; m++) {
31013 GemmMicrokernelTester()
31014 .mr(2)
31015 .nr(4)
31016 .kr(8)
31017 .sr(1)
31018 .m(m)
31019 .n(n)
31020 .k(k)
31021 .iterations(1)
31022 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31023 }
31024 }
31025 }
31026 }
31027
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_gt_8)31028 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8) {
31029 TEST_REQUIRES_X86_SSE2;
31030 for (size_t k = 9; k < 16; k++) {
31031 GemmMicrokernelTester()
31032 .mr(2)
31033 .nr(4)
31034 .kr(8)
31035 .sr(1)
31036 .m(2)
31037 .n(4)
31038 .k(k)
31039 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31040 }
31041 }
31042
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_gt_8_subtile)31043 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8_subtile) {
31044 TEST_REQUIRES_X86_SSE2;
31045 for (size_t k = 9; k < 16; k++) {
31046 for (uint32_t n = 1; n <= 4; n++) {
31047 for (uint32_t m = 1; m <= 2; m++) {
31048 GemmMicrokernelTester()
31049 .mr(2)
31050 .nr(4)
31051 .kr(8)
31052 .sr(1)
31053 .m(m)
31054 .n(n)
31055 .k(k)
31056 .iterations(1)
31057 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31058 }
31059 }
31060 }
31061 }
31062
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_div_8)31063 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8) {
31064 TEST_REQUIRES_X86_SSE2;
31065 for (size_t k = 16; k <= 80; k += 8) {
31066 GemmMicrokernelTester()
31067 .mr(2)
31068 .nr(4)
31069 .kr(8)
31070 .sr(1)
31071 .m(2)
31072 .n(4)
31073 .k(k)
31074 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31075 }
31076 }
31077
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_div_8_subtile)31078 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8_subtile) {
31079 TEST_REQUIRES_X86_SSE2;
31080 for (size_t k = 16; k <= 80; k += 8) {
31081 for (uint32_t n = 1; n <= 4; n++) {
31082 for (uint32_t m = 1; m <= 2; m++) {
31083 GemmMicrokernelTester()
31084 .mr(2)
31085 .nr(4)
31086 .kr(8)
31087 .sr(1)
31088 .m(m)
31089 .n(n)
31090 .k(k)
31091 .iterations(1)
31092 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31093 }
31094 }
31095 }
31096 }
31097
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4)31098 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4) {
31099 TEST_REQUIRES_X86_SSE2;
31100 for (uint32_t n = 5; n < 8; n++) {
31101 for (size_t k = 1; k <= 40; k += 9) {
31102 GemmMicrokernelTester()
31103 .mr(2)
31104 .nr(4)
31105 .kr(8)
31106 .sr(1)
31107 .m(2)
31108 .n(n)
31109 .k(k)
31110 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31111 }
31112 }
31113 }
31114
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_strided_cn)31115 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_strided_cn) {
31116 TEST_REQUIRES_X86_SSE2;
31117 for (uint32_t n = 5; n < 8; n++) {
31118 for (size_t k = 1; k <= 40; k += 9) {
31119 GemmMicrokernelTester()
31120 .mr(2)
31121 .nr(4)
31122 .kr(8)
31123 .sr(1)
31124 .m(2)
31125 .n(n)
31126 .k(k)
31127 .cn_stride(7)
31128 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31129 }
31130 }
31131 }
31132
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_subtile)31133 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_subtile) {
31134 TEST_REQUIRES_X86_SSE2;
31135 for (uint32_t n = 5; n < 8; n++) {
31136 for (size_t k = 1; k <= 40; k += 9) {
31137 for (uint32_t m = 1; m <= 2; m++) {
31138 GemmMicrokernelTester()
31139 .mr(2)
31140 .nr(4)
31141 .kr(8)
31142 .sr(1)
31143 .m(m)
31144 .n(n)
31145 .k(k)
31146 .iterations(1)
31147 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31148 }
31149 }
31150 }
31151 }
31152
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4)31153 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4) {
31154 TEST_REQUIRES_X86_SSE2;
31155 for (uint32_t n = 8; n <= 12; n += 4) {
31156 for (size_t k = 1; k <= 40; k += 9) {
31157 GemmMicrokernelTester()
31158 .mr(2)
31159 .nr(4)
31160 .kr(8)
31161 .sr(1)
31162 .m(2)
31163 .n(n)
31164 .k(k)
31165 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31166 }
31167 }
31168 }
31169
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_strided_cn)31170 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_strided_cn) {
31171 TEST_REQUIRES_X86_SSE2;
31172 for (uint32_t n = 8; n <= 12; n += 4) {
31173 for (size_t k = 1; k <= 40; k += 9) {
31174 GemmMicrokernelTester()
31175 .mr(2)
31176 .nr(4)
31177 .kr(8)
31178 .sr(1)
31179 .m(2)
31180 .n(n)
31181 .k(k)
31182 .cn_stride(7)
31183 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31184 }
31185 }
31186 }
31187
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_subtile)31188 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_subtile) {
31189 TEST_REQUIRES_X86_SSE2;
31190 for (uint32_t n = 8; n <= 12; n += 4) {
31191 for (size_t k = 1; k <= 40; k += 9) {
31192 for (uint32_t m = 1; m <= 2; m++) {
31193 GemmMicrokernelTester()
31194 .mr(2)
31195 .nr(4)
31196 .kr(8)
31197 .sr(1)
31198 .m(m)
31199 .n(n)
31200 .k(k)
31201 .iterations(1)
31202 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31203 }
31204 }
31205 }
31206 }
31207
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,small_kernel)31208 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, small_kernel) {
31209 TEST_REQUIRES_X86_SSE2;
31210 for (size_t k = 1; k <= 40; k += 9) {
31211 GemmMicrokernelTester()
31212 .mr(2)
31213 .nr(4)
31214 .kr(8)
31215 .sr(1)
31216 .m(2)
31217 .n(4)
31218 .k(k)
31219 .ks(3)
31220 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31221 }
31222 }
31223
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,small_kernel_subtile)31224 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, small_kernel_subtile) {
31225 TEST_REQUIRES_X86_SSE2;
31226 for (size_t k = 1; k <= 40; k += 9) {
31227 for (uint32_t n = 1; n <= 4; n++) {
31228 for (uint32_t m = 1; m <= 2; m++) {
31229 GemmMicrokernelTester()
31230 .mr(2)
31231 .nr(4)
31232 .kr(8)
31233 .sr(1)
31234 .m(m)
31235 .n(n)
31236 .k(k)
31237 .ks(3)
31238 .iterations(1)
31239 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31240 }
31241 }
31242 }
31243 }
31244
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_small_kernel)31245 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_small_kernel) {
31246 TEST_REQUIRES_X86_SSE2;
31247 for (uint32_t n = 5; n < 8; n++) {
31248 for (size_t k = 1; k <= 40; k += 9) {
31249 GemmMicrokernelTester()
31250 .mr(2)
31251 .nr(4)
31252 .kr(8)
31253 .sr(1)
31254 .m(2)
31255 .n(n)
31256 .k(k)
31257 .ks(3)
31258 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31259 }
31260 }
31261 }
31262
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_small_kernel)31263 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_small_kernel) {
31264 TEST_REQUIRES_X86_SSE2;
31265 for (uint32_t n = 8; n <= 12; n += 4) {
31266 for (size_t k = 1; k <= 40; k += 9) {
31267 GemmMicrokernelTester()
31268 .mr(2)
31269 .nr(4)
31270 .kr(8)
31271 .sr(1)
31272 .m(2)
31273 .n(n)
31274 .k(k)
31275 .ks(3)
31276 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31277 }
31278 }
31279 }
31280
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cm_subtile)31281 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm_subtile) {
31282 TEST_REQUIRES_X86_SSE2;
31283 for (size_t k = 1; k <= 40; k += 9) {
31284 for (uint32_t n = 1; n <= 4; n++) {
31285 for (uint32_t m = 1; m <= 2; m++) {
31286 GemmMicrokernelTester()
31287 .mr(2)
31288 .nr(4)
31289 .kr(8)
31290 .sr(1)
31291 .m(m)
31292 .n(n)
31293 .k(k)
31294 .cm_stride(7)
31295 .iterations(1)
31296 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31297 }
31298 }
31299 }
31300 }
31301
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,a_offset)31302 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, a_offset) {
31303 TEST_REQUIRES_X86_SSE2;
31304 for (size_t k = 1; k <= 40; k += 9) {
31305 GemmMicrokernelTester()
31306 .mr(2)
31307 .nr(4)
31308 .kr(8)
31309 .sr(1)
31310 .m(2)
31311 .n(4)
31312 .k(k)
31313 .ks(3)
31314 .a_offset(83)
31315 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31316 }
31317 }
31318
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,zero)31319 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, zero) {
31320 TEST_REQUIRES_X86_SSE2;
31321 for (size_t k = 1; k <= 40; k += 9) {
31322 for (uint32_t mz = 0; mz < 2; mz++) {
31323 GemmMicrokernelTester()
31324 .mr(2)
31325 .nr(4)
31326 .kr(8)
31327 .sr(1)
31328 .m(2)
31329 .n(4)
31330 .k(k)
31331 .ks(3)
31332 .a_offset(83)
31333 .zero_index(mz)
31334 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31335 }
31336 }
31337 }
31338
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,qmin)31339 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmin) {
31340 TEST_REQUIRES_X86_SSE2;
31341 GemmMicrokernelTester()
31342 .mr(2)
31343 .nr(4)
31344 .kr(8)
31345 .sr(1)
31346 .m(2)
31347 .n(4)
31348 .k(8)
31349 .qmin(128)
31350 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31351 }
31352
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,qmax)31353 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmax) {
31354 TEST_REQUIRES_X86_SSE2;
31355 GemmMicrokernelTester()
31356 .mr(2)
31357 .nr(4)
31358 .kr(8)
31359 .sr(1)
31360 .m(2)
31361 .n(4)
31362 .k(8)
31363 .qmax(128)
31364 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31365 }
31366
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cm)31367 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm) {
31368 TEST_REQUIRES_X86_SSE2;
31369 GemmMicrokernelTester()
31370 .mr(2)
31371 .nr(4)
31372 .kr(8)
31373 .sr(1)
31374 .m(2)
31375 .n(4)
31376 .k(8)
31377 .cm_stride(7)
31378 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31379 }
31380 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
31381
31382
31383 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8)31384 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8) {
31385 TEST_REQUIRES_X86_SSE41;
31386 GemmMicrokernelTester()
31387 .mr(3)
31388 .nr(4)
31389 .kr(8)
31390 .sr(1)
31391 .m(3)
31392 .n(4)
31393 .k(8)
31394 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31395 }
31396
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,strided_cn)31397 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cn) {
31398 TEST_REQUIRES_X86_SSE41;
31399 GemmMicrokernelTester()
31400 .mr(3)
31401 .nr(4)
31402 .kr(8)
31403 .sr(1)
31404 .m(3)
31405 .n(4)
31406 .k(8)
31407 .cn_stride(7)
31408 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31409 }
31410
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_subtile)31411 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile) {
31412 TEST_REQUIRES_X86_SSE41;
31413 for (uint32_t n = 1; n <= 4; n++) {
31414 for (uint32_t m = 1; m <= 3; m++) {
31415 GemmMicrokernelTester()
31416 .mr(3)
31417 .nr(4)
31418 .kr(8)
31419 .sr(1)
31420 .m(m)
31421 .n(n)
31422 .k(8)
31423 .iterations(1)
31424 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31425 }
31426 }
31427 }
31428
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_subtile_m)31429 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile_m) {
31430 TEST_REQUIRES_X86_SSE41;
31431 for (uint32_t m = 1; m <= 3; m++) {
31432 GemmMicrokernelTester()
31433 .mr(3)
31434 .nr(4)
31435 .kr(8)
31436 .sr(1)
31437 .m(m)
31438 .n(4)
31439 .k(8)
31440 .iterations(1)
31441 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31442 }
31443 }
31444
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_subtile_n)31445 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile_n) {
31446 TEST_REQUIRES_X86_SSE41;
31447 for (uint32_t n = 1; n <= 4; n++) {
31448 GemmMicrokernelTester()
31449 .mr(3)
31450 .nr(4)
31451 .kr(8)
31452 .sr(1)
31453 .m(3)
31454 .n(n)
31455 .k(8)
31456 .iterations(1)
31457 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31458 }
31459 }
31460
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_lt_8)31461 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_lt_8) {
31462 TEST_REQUIRES_X86_SSE41;
31463 for (size_t k = 1; k < 8; k++) {
31464 GemmMicrokernelTester()
31465 .mr(3)
31466 .nr(4)
31467 .kr(8)
31468 .sr(1)
31469 .m(3)
31470 .n(4)
31471 .k(k)
31472 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31473 }
31474 }
31475
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_lt_8_subtile)31476 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_lt_8_subtile) {
31477 TEST_REQUIRES_X86_SSE41;
31478 for (size_t k = 1; k < 8; k++) {
31479 for (uint32_t n = 1; n <= 4; n++) {
31480 for (uint32_t m = 1; m <= 3; m++) {
31481 GemmMicrokernelTester()
31482 .mr(3)
31483 .nr(4)
31484 .kr(8)
31485 .sr(1)
31486 .m(m)
31487 .n(n)
31488 .k(k)
31489 .iterations(1)
31490 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31491 }
31492 }
31493 }
31494 }
31495
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_gt_8)31496 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_gt_8) {
31497 TEST_REQUIRES_X86_SSE41;
31498 for (size_t k = 9; k < 16; k++) {
31499 GemmMicrokernelTester()
31500 .mr(3)
31501 .nr(4)
31502 .kr(8)
31503 .sr(1)
31504 .m(3)
31505 .n(4)
31506 .k(k)
31507 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31508 }
31509 }
31510
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_gt_8_subtile)31511 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_gt_8_subtile) {
31512 TEST_REQUIRES_X86_SSE41;
31513 for (size_t k = 9; k < 16; k++) {
31514 for (uint32_t n = 1; n <= 4; n++) {
31515 for (uint32_t m = 1; m <= 3; m++) {
31516 GemmMicrokernelTester()
31517 .mr(3)
31518 .nr(4)
31519 .kr(8)
31520 .sr(1)
31521 .m(m)
31522 .n(n)
31523 .k(k)
31524 .iterations(1)
31525 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31526 }
31527 }
31528 }
31529 }
31530
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_div_8)31531 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_div_8) {
31532 TEST_REQUIRES_X86_SSE41;
31533 for (size_t k = 16; k <= 80; k += 8) {
31534 GemmMicrokernelTester()
31535 .mr(3)
31536 .nr(4)
31537 .kr(8)
31538 .sr(1)
31539 .m(3)
31540 .n(4)
31541 .k(k)
31542 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31543 }
31544 }
31545
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_div_8_subtile)31546 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_div_8_subtile) {
31547 TEST_REQUIRES_X86_SSE41;
31548 for (size_t k = 16; k <= 80; k += 8) {
31549 for (uint32_t n = 1; n <= 4; n++) {
31550 for (uint32_t m = 1; m <= 3; m++) {
31551 GemmMicrokernelTester()
31552 .mr(3)
31553 .nr(4)
31554 .kr(8)
31555 .sr(1)
31556 .m(m)
31557 .n(n)
31558 .k(k)
31559 .iterations(1)
31560 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31561 }
31562 }
31563 }
31564 }
31565
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4)31566 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4) {
31567 TEST_REQUIRES_X86_SSE41;
31568 for (uint32_t n = 5; n < 8; n++) {
31569 for (size_t k = 1; k <= 40; k += 9) {
31570 GemmMicrokernelTester()
31571 .mr(3)
31572 .nr(4)
31573 .kr(8)
31574 .sr(1)
31575 .m(3)
31576 .n(n)
31577 .k(k)
31578 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31579 }
31580 }
31581 }
31582
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4_strided_cn)31583 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_strided_cn) {
31584 TEST_REQUIRES_X86_SSE41;
31585 for (uint32_t n = 5; n < 8; n++) {
31586 for (size_t k = 1; k <= 40; k += 9) {
31587 GemmMicrokernelTester()
31588 .mr(3)
31589 .nr(4)
31590 .kr(8)
31591 .sr(1)
31592 .m(3)
31593 .n(n)
31594 .k(k)
31595 .cn_stride(7)
31596 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31597 }
31598 }
31599 }
31600
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4_subtile)31601 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_subtile) {
31602 TEST_REQUIRES_X86_SSE41;
31603 for (uint32_t n = 5; n < 8; n++) {
31604 for (size_t k = 1; k <= 40; k += 9) {
31605 for (uint32_t m = 1; m <= 3; m++) {
31606 GemmMicrokernelTester()
31607 .mr(3)
31608 .nr(4)
31609 .kr(8)
31610 .sr(1)
31611 .m(m)
31612 .n(n)
31613 .k(k)
31614 .iterations(1)
31615 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31616 }
31617 }
31618 }
31619 }
31620
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4)31621 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4) {
31622 TEST_REQUIRES_X86_SSE41;
31623 for (uint32_t n = 8; n <= 12; n += 4) {
31624 for (size_t k = 1; k <= 40; k += 9) {
31625 GemmMicrokernelTester()
31626 .mr(3)
31627 .nr(4)
31628 .kr(8)
31629 .sr(1)
31630 .m(3)
31631 .n(n)
31632 .k(k)
31633 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31634 }
31635 }
31636 }
31637
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4_strided_cn)31638 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_strided_cn) {
31639 TEST_REQUIRES_X86_SSE41;
31640 for (uint32_t n = 8; n <= 12; n += 4) {
31641 for (size_t k = 1; k <= 40; k += 9) {
31642 GemmMicrokernelTester()
31643 .mr(3)
31644 .nr(4)
31645 .kr(8)
31646 .sr(1)
31647 .m(3)
31648 .n(n)
31649 .k(k)
31650 .cn_stride(7)
31651 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31652 }
31653 }
31654 }
31655
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4_subtile)31656 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_subtile) {
31657 TEST_REQUIRES_X86_SSE41;
31658 for (uint32_t n = 8; n <= 12; n += 4) {
31659 for (size_t k = 1; k <= 40; k += 9) {
31660 for (uint32_t m = 1; m <= 3; m++) {
31661 GemmMicrokernelTester()
31662 .mr(3)
31663 .nr(4)
31664 .kr(8)
31665 .sr(1)
31666 .m(m)
31667 .n(n)
31668 .k(k)
31669 .iterations(1)
31670 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31671 }
31672 }
31673 }
31674 }
31675
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,small_kernel)31676 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, small_kernel) {
31677 TEST_REQUIRES_X86_SSE41;
31678 for (size_t k = 1; k <= 40; k += 9) {
31679 GemmMicrokernelTester()
31680 .mr(3)
31681 .nr(4)
31682 .kr(8)
31683 .sr(1)
31684 .m(3)
31685 .n(4)
31686 .k(k)
31687 .ks(3)
31688 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31689 }
31690 }
31691
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,small_kernel_subtile)31692 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, small_kernel_subtile) {
31693 TEST_REQUIRES_X86_SSE41;
31694 for (size_t k = 1; k <= 40; k += 9) {
31695 for (uint32_t n = 1; n <= 4; n++) {
31696 for (uint32_t m = 1; m <= 3; m++) {
31697 GemmMicrokernelTester()
31698 .mr(3)
31699 .nr(4)
31700 .kr(8)
31701 .sr(1)
31702 .m(m)
31703 .n(n)
31704 .k(k)
31705 .ks(3)
31706 .iterations(1)
31707 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31708 }
31709 }
31710 }
31711 }
31712
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4_small_kernel)31713 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_small_kernel) {
31714 TEST_REQUIRES_X86_SSE41;
31715 for (uint32_t n = 5; n < 8; n++) {
31716 for (size_t k = 1; k <= 40; k += 9) {
31717 GemmMicrokernelTester()
31718 .mr(3)
31719 .nr(4)
31720 .kr(8)
31721 .sr(1)
31722 .m(3)
31723 .n(n)
31724 .k(k)
31725 .ks(3)
31726 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31727 }
31728 }
31729 }
31730
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4_small_kernel)31731 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_small_kernel) {
31732 TEST_REQUIRES_X86_SSE41;
31733 for (uint32_t n = 8; n <= 12; n += 4) {
31734 for (size_t k = 1; k <= 40; k += 9) {
31735 GemmMicrokernelTester()
31736 .mr(3)
31737 .nr(4)
31738 .kr(8)
31739 .sr(1)
31740 .m(3)
31741 .n(n)
31742 .k(k)
31743 .ks(3)
31744 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31745 }
31746 }
31747 }
31748
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,strided_cm_subtile)31749 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cm_subtile) {
31750 TEST_REQUIRES_X86_SSE41;
31751 for (size_t k = 1; k <= 40; k += 9) {
31752 for (uint32_t n = 1; n <= 4; n++) {
31753 for (uint32_t m = 1; m <= 3; m++) {
31754 GemmMicrokernelTester()
31755 .mr(3)
31756 .nr(4)
31757 .kr(8)
31758 .sr(1)
31759 .m(m)
31760 .n(n)
31761 .k(k)
31762 .cm_stride(7)
31763 .iterations(1)
31764 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31765 }
31766 }
31767 }
31768 }
31769
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,a_offset)31770 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, a_offset) {
31771 TEST_REQUIRES_X86_SSE41;
31772 for (size_t k = 1; k <= 40; k += 9) {
31773 GemmMicrokernelTester()
31774 .mr(3)
31775 .nr(4)
31776 .kr(8)
31777 .sr(1)
31778 .m(3)
31779 .n(4)
31780 .k(k)
31781 .ks(3)
31782 .a_offset(127)
31783 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31784 }
31785 }
31786
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,zero)31787 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, zero) {
31788 TEST_REQUIRES_X86_SSE41;
31789 for (size_t k = 1; k <= 40; k += 9) {
31790 for (uint32_t mz = 0; mz < 3; mz++) {
31791 GemmMicrokernelTester()
31792 .mr(3)
31793 .nr(4)
31794 .kr(8)
31795 .sr(1)
31796 .m(3)
31797 .n(4)
31798 .k(k)
31799 .ks(3)
31800 .a_offset(127)
31801 .zero_index(mz)
31802 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31803 }
31804 }
31805 }
31806
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,qmin)31807 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, qmin) {
31808 TEST_REQUIRES_X86_SSE41;
31809 GemmMicrokernelTester()
31810 .mr(3)
31811 .nr(4)
31812 .kr(8)
31813 .sr(1)
31814 .m(3)
31815 .n(4)
31816 .k(8)
31817 .qmin(128)
31818 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31819 }
31820
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,qmax)31821 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, qmax) {
31822 TEST_REQUIRES_X86_SSE41;
31823 GemmMicrokernelTester()
31824 .mr(3)
31825 .nr(4)
31826 .kr(8)
31827 .sr(1)
31828 .m(3)
31829 .n(4)
31830 .k(8)
31831 .qmax(128)
31832 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31833 }
31834
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,strided_cm)31835 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cm) {
31836 TEST_REQUIRES_X86_SSE41;
31837 GemmMicrokernelTester()
31838 .mr(3)
31839 .nr(4)
31840 .kr(8)
31841 .sr(1)
31842 .m(3)
31843 .n(4)
31844 .k(8)
31845 .cm_stride(7)
31846 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31847 }
31848 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
31849
31850
31851 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8)31852 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8) {
31853 TEST_REQUIRES_X86_AVX2;
31854 GemmMicrokernelTester()
31855 .mr(1)
31856 .nr(8)
31857 .kr(8)
31858 .sr(1)
31859 .m(1)
31860 .n(8)
31861 .k(8)
31862 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
31863 }
31864
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,strided_cn)31865 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cn) {
31866 TEST_REQUIRES_X86_AVX2;
31867 GemmMicrokernelTester()
31868 .mr(1)
31869 .nr(8)
31870 .kr(8)
31871 .sr(1)
31872 .m(1)
31873 .n(8)
31874 .k(8)
31875 .cn_stride(11)
31876 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
31877 }
31878
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile)31879 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile) {
31880 TEST_REQUIRES_X86_AVX2;
31881 for (uint32_t n = 1; n <= 8; n++) {
31882 for (uint32_t m = 1; m <= 1; m++) {
31883 GemmMicrokernelTester()
31884 .mr(1)
31885 .nr(8)
31886 .kr(8)
31887 .sr(1)
31888 .m(m)
31889 .n(n)
31890 .k(8)
31891 .iterations(1)
31892 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
31893 }
31894 }
31895 }
31896
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile_m)31897 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_m) {
31898 TEST_REQUIRES_X86_AVX2;
31899 for (uint32_t m = 1; m <= 1; m++) {
31900 GemmMicrokernelTester()
31901 .mr(1)
31902 .nr(8)
31903 .kr(8)
31904 .sr(1)
31905 .m(m)
31906 .n(8)
31907 .k(8)
31908 .iterations(1)
31909 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
31910 }
31911 }
31912
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile_n)31913 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_n) {
31914 TEST_REQUIRES_X86_AVX2;
31915 for (uint32_t n = 1; n <= 8; n++) {
31916 GemmMicrokernelTester()
31917 .mr(1)
31918 .nr(8)
31919 .kr(8)
31920 .sr(1)
31921 .m(1)
31922 .n(n)
31923 .k(8)
31924 .iterations(1)
31925 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
31926 }
31927 }
31928
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_lt_8)31929 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8) {
31930 TEST_REQUIRES_X86_AVX2;
31931 for (size_t k = 1; k < 8; k++) {
31932 GemmMicrokernelTester()
31933 .mr(1)
31934 .nr(8)
31935 .kr(8)
31936 .sr(1)
31937 .m(1)
31938 .n(8)
31939 .k(k)
31940 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
31941 }
31942 }
31943
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_lt_8_subtile)31944 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8_subtile) {
31945 TEST_REQUIRES_X86_AVX2;
31946 for (size_t k = 1; k < 8; k++) {
31947 for (uint32_t n = 1; n <= 8; n++) {
31948 for (uint32_t m = 1; m <= 1; m++) {
31949 GemmMicrokernelTester()
31950 .mr(1)
31951 .nr(8)
31952 .kr(8)
31953 .sr(1)
31954 .m(m)
31955 .n(n)
31956 .k(k)
31957 .iterations(1)
31958 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
31959 }
31960 }
31961 }
31962 }
31963
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_gt_8)31964 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8) {
31965 TEST_REQUIRES_X86_AVX2;
31966 for (size_t k = 9; k < 16; k++) {
31967 GemmMicrokernelTester()
31968 .mr(1)
31969 .nr(8)
31970 .kr(8)
31971 .sr(1)
31972 .m(1)
31973 .n(8)
31974 .k(k)
31975 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
31976 }
31977 }
31978
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_gt_8_subtile)31979 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8_subtile) {
31980 TEST_REQUIRES_X86_AVX2;
31981 for (size_t k = 9; k < 16; k++) {
31982 for (uint32_t n = 1; n <= 8; n++) {
31983 for (uint32_t m = 1; m <= 1; m++) {
31984 GemmMicrokernelTester()
31985 .mr(1)
31986 .nr(8)
31987 .kr(8)
31988 .sr(1)
31989 .m(m)
31990 .n(n)
31991 .k(k)
31992 .iterations(1)
31993 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
31994 }
31995 }
31996 }
31997 }
31998
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_div_8)31999 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8) {
32000 TEST_REQUIRES_X86_AVX2;
32001 for (size_t k = 16; k <= 80; k += 8) {
32002 GemmMicrokernelTester()
32003 .mr(1)
32004 .nr(8)
32005 .kr(8)
32006 .sr(1)
32007 .m(1)
32008 .n(8)
32009 .k(k)
32010 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32011 }
32012 }
32013
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_div_8_subtile)32014 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8_subtile) {
32015 TEST_REQUIRES_X86_AVX2;
32016 for (size_t k = 16; k <= 80; k += 8) {
32017 for (uint32_t n = 1; n <= 8; n++) {
32018 for (uint32_t m = 1; m <= 1; m++) {
32019 GemmMicrokernelTester()
32020 .mr(1)
32021 .nr(8)
32022 .kr(8)
32023 .sr(1)
32024 .m(m)
32025 .n(n)
32026 .k(k)
32027 .iterations(1)
32028 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32029 }
32030 }
32031 }
32032 }
32033
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8)32034 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8) {
32035 TEST_REQUIRES_X86_AVX2;
32036 for (uint32_t n = 9; n < 16; n++) {
32037 for (size_t k = 1; k <= 40; k += 9) {
32038 GemmMicrokernelTester()
32039 .mr(1)
32040 .nr(8)
32041 .kr(8)
32042 .sr(1)
32043 .m(1)
32044 .n(n)
32045 .k(k)
32046 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32047 }
32048 }
32049 }
32050
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8_strided_cn)32051 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_cn) {
32052 TEST_REQUIRES_X86_AVX2;
32053 for (uint32_t n = 9; n < 16; n++) {
32054 for (size_t k = 1; k <= 40; k += 9) {
32055 GemmMicrokernelTester()
32056 .mr(1)
32057 .nr(8)
32058 .kr(8)
32059 .sr(1)
32060 .m(1)
32061 .n(n)
32062 .k(k)
32063 .cn_stride(11)
32064 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32065 }
32066 }
32067 }
32068
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8_subtile)32069 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_subtile) {
32070 TEST_REQUIRES_X86_AVX2;
32071 for (uint32_t n = 9; n < 16; n++) {
32072 for (size_t k = 1; k <= 40; k += 9) {
32073 for (uint32_t m = 1; m <= 1; m++) {
32074 GemmMicrokernelTester()
32075 .mr(1)
32076 .nr(8)
32077 .kr(8)
32078 .sr(1)
32079 .m(m)
32080 .n(n)
32081 .k(k)
32082 .iterations(1)
32083 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32084 }
32085 }
32086 }
32087 }
32088
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8)32089 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8) {
32090 TEST_REQUIRES_X86_AVX2;
32091 for (uint32_t n = 16; n <= 24; n += 8) {
32092 for (size_t k = 1; k <= 40; k += 9) {
32093 GemmMicrokernelTester()
32094 .mr(1)
32095 .nr(8)
32096 .kr(8)
32097 .sr(1)
32098 .m(1)
32099 .n(n)
32100 .k(k)
32101 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32102 }
32103 }
32104 }
32105
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8_strided_cn)32106 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_cn) {
32107 TEST_REQUIRES_X86_AVX2;
32108 for (uint32_t n = 16; n <= 24; n += 8) {
32109 for (size_t k = 1; k <= 40; k += 9) {
32110 GemmMicrokernelTester()
32111 .mr(1)
32112 .nr(8)
32113 .kr(8)
32114 .sr(1)
32115 .m(1)
32116 .n(n)
32117 .k(k)
32118 .cn_stride(11)
32119 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32120 }
32121 }
32122 }
32123
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8_subtile)32124 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_subtile) {
32125 TEST_REQUIRES_X86_AVX2;
32126 for (uint32_t n = 16; n <= 24; n += 8) {
32127 for (size_t k = 1; k <= 40; k += 9) {
32128 for (uint32_t m = 1; m <= 1; m++) {
32129 GemmMicrokernelTester()
32130 .mr(1)
32131 .nr(8)
32132 .kr(8)
32133 .sr(1)
32134 .m(m)
32135 .n(n)
32136 .k(k)
32137 .iterations(1)
32138 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32139 }
32140 }
32141 }
32142 }
32143
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,small_kernel)32144 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, small_kernel) {
32145 TEST_REQUIRES_X86_AVX2;
32146 for (size_t k = 1; k <= 40; k += 9) {
32147 GemmMicrokernelTester()
32148 .mr(1)
32149 .nr(8)
32150 .kr(8)
32151 .sr(1)
32152 .m(1)
32153 .n(8)
32154 .k(k)
32155 .ks(3)
32156 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32157 }
32158 }
32159
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,small_kernel_subtile)32160 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, small_kernel_subtile) {
32161 TEST_REQUIRES_X86_AVX2;
32162 for (size_t k = 1; k <= 40; k += 9) {
32163 for (uint32_t n = 1; n <= 8; n++) {
32164 for (uint32_t m = 1; m <= 1; m++) {
32165 GemmMicrokernelTester()
32166 .mr(1)
32167 .nr(8)
32168 .kr(8)
32169 .sr(1)
32170 .m(m)
32171 .n(n)
32172 .k(k)
32173 .ks(3)
32174 .iterations(1)
32175 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32176 }
32177 }
32178 }
32179 }
32180
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8_small_kernel)32181 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_small_kernel) {
32182 TEST_REQUIRES_X86_AVX2;
32183 for (uint32_t n = 9; n < 16; n++) {
32184 for (size_t k = 1; k <= 40; k += 9) {
32185 GemmMicrokernelTester()
32186 .mr(1)
32187 .nr(8)
32188 .kr(8)
32189 .sr(1)
32190 .m(1)
32191 .n(n)
32192 .k(k)
32193 .ks(3)
32194 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32195 }
32196 }
32197 }
32198
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8_small_kernel)32199 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_small_kernel) {
32200 TEST_REQUIRES_X86_AVX2;
32201 for (uint32_t n = 16; n <= 24; n += 8) {
32202 for (size_t k = 1; k <= 40; k += 9) {
32203 GemmMicrokernelTester()
32204 .mr(1)
32205 .nr(8)
32206 .kr(8)
32207 .sr(1)
32208 .m(1)
32209 .n(n)
32210 .k(k)
32211 .ks(3)
32212 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32213 }
32214 }
32215 }
32216
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,strided_cm_subtile)32217 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm_subtile) {
32218 TEST_REQUIRES_X86_AVX2;
32219 for (size_t k = 1; k <= 40; k += 9) {
32220 for (uint32_t n = 1; n <= 8; n++) {
32221 for (uint32_t m = 1; m <= 1; m++) {
32222 GemmMicrokernelTester()
32223 .mr(1)
32224 .nr(8)
32225 .kr(8)
32226 .sr(1)
32227 .m(m)
32228 .n(n)
32229 .k(k)
32230 .cm_stride(11)
32231 .iterations(1)
32232 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32233 }
32234 }
32235 }
32236 }
32237
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,a_offset)32238 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, a_offset) {
32239 TEST_REQUIRES_X86_AVX2;
32240 for (size_t k = 1; k <= 40; k += 9) {
32241 GemmMicrokernelTester()
32242 .mr(1)
32243 .nr(8)
32244 .kr(8)
32245 .sr(1)
32246 .m(1)
32247 .n(8)
32248 .k(k)
32249 .ks(3)
32250 .a_offset(43)
32251 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32252 }
32253 }
32254
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,zero)32255 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, zero) {
32256 TEST_REQUIRES_X86_AVX2;
32257 for (size_t k = 1; k <= 40; k += 9) {
32258 for (uint32_t mz = 0; mz < 1; mz++) {
32259 GemmMicrokernelTester()
32260 .mr(1)
32261 .nr(8)
32262 .kr(8)
32263 .sr(1)
32264 .m(1)
32265 .n(8)
32266 .k(k)
32267 .ks(3)
32268 .a_offset(43)
32269 .zero_index(mz)
32270 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32271 }
32272 }
32273 }
32274
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,qmin)32275 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, qmin) {
32276 TEST_REQUIRES_X86_AVX2;
32277 GemmMicrokernelTester()
32278 .mr(1)
32279 .nr(8)
32280 .kr(8)
32281 .sr(1)
32282 .m(1)
32283 .n(8)
32284 .k(8)
32285 .qmin(128)
32286 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32287 }
32288
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,qmax)32289 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, qmax) {
32290 TEST_REQUIRES_X86_AVX2;
32291 GemmMicrokernelTester()
32292 .mr(1)
32293 .nr(8)
32294 .kr(8)
32295 .sr(1)
32296 .m(1)
32297 .n(8)
32298 .k(8)
32299 .qmax(128)
32300 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32301 }
32302
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,strided_cm)32303 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm) {
32304 TEST_REQUIRES_X86_AVX2;
32305 GemmMicrokernelTester()
32306 .mr(1)
32307 .nr(8)
32308 .kr(8)
32309 .sr(1)
32310 .m(1)
32311 .n(8)
32312 .k(8)
32313 .cm_stride(11)
32314 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32315 }
32316 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
32317
32318
32319 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8)32320 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8) {
32321 TEST_REQUIRES_X86_AVX2;
32322 GemmMicrokernelTester()
32323 .mr(3)
32324 .nr(8)
32325 .kr(8)
32326 .sr(1)
32327 .m(3)
32328 .n(8)
32329 .k(8)
32330 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32331 }
32332
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,strided_cn)32333 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, strided_cn) {
32334 TEST_REQUIRES_X86_AVX2;
32335 GemmMicrokernelTester()
32336 .mr(3)
32337 .nr(8)
32338 .kr(8)
32339 .sr(1)
32340 .m(3)
32341 .n(8)
32342 .k(8)
32343 .cn_stride(11)
32344 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32345 }
32346
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile)32347 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile) {
32348 TEST_REQUIRES_X86_AVX2;
32349 for (uint32_t n = 1; n <= 8; n++) {
32350 for (uint32_t m = 1; m <= 3; m++) {
32351 GemmMicrokernelTester()
32352 .mr(3)
32353 .nr(8)
32354 .kr(8)
32355 .sr(1)
32356 .m(m)
32357 .n(n)
32358 .k(8)
32359 .iterations(1)
32360 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32361 }
32362 }
32363 }
32364
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile_m)32365 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_m) {
32366 TEST_REQUIRES_X86_AVX2;
32367 for (uint32_t m = 1; m <= 3; m++) {
32368 GemmMicrokernelTester()
32369 .mr(3)
32370 .nr(8)
32371 .kr(8)
32372 .sr(1)
32373 .m(m)
32374 .n(8)
32375 .k(8)
32376 .iterations(1)
32377 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32378 }
32379 }
32380
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile_n)32381 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_n) {
32382 TEST_REQUIRES_X86_AVX2;
32383 for (uint32_t n = 1; n <= 8; n++) {
32384 GemmMicrokernelTester()
32385 .mr(3)
32386 .nr(8)
32387 .kr(8)
32388 .sr(1)
32389 .m(3)
32390 .n(n)
32391 .k(8)
32392 .iterations(1)
32393 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32394 }
32395 }
32396
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_lt_8)32397 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8) {
32398 TEST_REQUIRES_X86_AVX2;
32399 for (size_t k = 1; k < 8; k++) {
32400 GemmMicrokernelTester()
32401 .mr(3)
32402 .nr(8)
32403 .kr(8)
32404 .sr(1)
32405 .m(3)
32406 .n(8)
32407 .k(k)
32408 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32409 }
32410 }
32411
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_lt_8_subtile)32412 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8_subtile) {
32413 TEST_REQUIRES_X86_AVX2;
32414 for (size_t k = 1; k < 8; k++) {
32415 for (uint32_t n = 1; n <= 8; n++) {
32416 for (uint32_t m = 1; m <= 3; m++) {
32417 GemmMicrokernelTester()
32418 .mr(3)
32419 .nr(8)
32420 .kr(8)
32421 .sr(1)
32422 .m(m)
32423 .n(n)
32424 .k(k)
32425 .iterations(1)
32426 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32427 }
32428 }
32429 }
32430 }
32431
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_gt_8)32432 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8) {
32433 TEST_REQUIRES_X86_AVX2;
32434 for (size_t k = 9; k < 16; k++) {
32435 GemmMicrokernelTester()
32436 .mr(3)
32437 .nr(8)
32438 .kr(8)
32439 .sr(1)
32440 .m(3)
32441 .n(8)
32442 .k(k)
32443 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32444 }
32445 }
32446
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_gt_8_subtile)32447 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8_subtile) {
32448 TEST_REQUIRES_X86_AVX2;
32449 for (size_t k = 9; k < 16; k++) {
32450 for (uint32_t n = 1; n <= 8; n++) {
32451 for (uint32_t m = 1; m <= 3; m++) {
32452 GemmMicrokernelTester()
32453 .mr(3)
32454 .nr(8)
32455 .kr(8)
32456 .sr(1)
32457 .m(m)
32458 .n(n)
32459 .k(k)
32460 .iterations(1)
32461 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32462 }
32463 }
32464 }
32465 }
32466
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_div_8)32467 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8) {
32468 TEST_REQUIRES_X86_AVX2;
32469 for (size_t k = 16; k <= 80; k += 8) {
32470 GemmMicrokernelTester()
32471 .mr(3)
32472 .nr(8)
32473 .kr(8)
32474 .sr(1)
32475 .m(3)
32476 .n(8)
32477 .k(k)
32478 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32479 }
32480 }
32481
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_div_8_subtile)32482 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8_subtile) {
32483 TEST_REQUIRES_X86_AVX2;
32484 for (size_t k = 16; k <= 80; k += 8) {
32485 for (uint32_t n = 1; n <= 8; n++) {
32486 for (uint32_t m = 1; m <= 3; m++) {
32487 GemmMicrokernelTester()
32488 .mr(3)
32489 .nr(8)
32490 .kr(8)
32491 .sr(1)
32492 .m(m)
32493 .n(n)
32494 .k(k)
32495 .iterations(1)
32496 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32497 }
32498 }
32499 }
32500 }
32501
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8)32502 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8) {
32503 TEST_REQUIRES_X86_AVX2;
32504 for (uint32_t n = 9; n < 16; n++) {
32505 for (size_t k = 1; k <= 40; k += 9) {
32506 GemmMicrokernelTester()
32507 .mr(3)
32508 .nr(8)
32509 .kr(8)
32510 .sr(1)
32511 .m(3)
32512 .n(n)
32513 .k(k)
32514 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32515 }
32516 }
32517 }
32518
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8_strided_cn)32519 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_cn) {
32520 TEST_REQUIRES_X86_AVX2;
32521 for (uint32_t n = 9; n < 16; n++) {
32522 for (size_t k = 1; k <= 40; k += 9) {
32523 GemmMicrokernelTester()
32524 .mr(3)
32525 .nr(8)
32526 .kr(8)
32527 .sr(1)
32528 .m(3)
32529 .n(n)
32530 .k(k)
32531 .cn_stride(11)
32532 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32533 }
32534 }
32535 }
32536
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8_subtile)32537 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_subtile) {
32538 TEST_REQUIRES_X86_AVX2;
32539 for (uint32_t n = 9; n < 16; n++) {
32540 for (size_t k = 1; k <= 40; k += 9) {
32541 for (uint32_t m = 1; m <= 3; m++) {
32542 GemmMicrokernelTester()
32543 .mr(3)
32544 .nr(8)
32545 .kr(8)
32546 .sr(1)
32547 .m(m)
32548 .n(n)
32549 .k(k)
32550 .iterations(1)
32551 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32552 }
32553 }
32554 }
32555 }
32556
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8)32557 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8) {
32558 TEST_REQUIRES_X86_AVX2;
32559 for (uint32_t n = 16; n <= 24; n += 8) {
32560 for (size_t k = 1; k <= 40; k += 9) {
32561 GemmMicrokernelTester()
32562 .mr(3)
32563 .nr(8)
32564 .kr(8)
32565 .sr(1)
32566 .m(3)
32567 .n(n)
32568 .k(k)
32569 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32570 }
32571 }
32572 }
32573
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8_strided_cn)32574 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_cn) {
32575 TEST_REQUIRES_X86_AVX2;
32576 for (uint32_t n = 16; n <= 24; n += 8) {
32577 for (size_t k = 1; k <= 40; k += 9) {
32578 GemmMicrokernelTester()
32579 .mr(3)
32580 .nr(8)
32581 .kr(8)
32582 .sr(1)
32583 .m(3)
32584 .n(n)
32585 .k(k)
32586 .cn_stride(11)
32587 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32588 }
32589 }
32590 }
32591
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8_subtile)32592 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_subtile) {
32593 TEST_REQUIRES_X86_AVX2;
32594 for (uint32_t n = 16; n <= 24; n += 8) {
32595 for (size_t k = 1; k <= 40; k += 9) {
32596 for (uint32_t m = 1; m <= 3; m++) {
32597 GemmMicrokernelTester()
32598 .mr(3)
32599 .nr(8)
32600 .kr(8)
32601 .sr(1)
32602 .m(m)
32603 .n(n)
32604 .k(k)
32605 .iterations(1)
32606 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32607 }
32608 }
32609 }
32610 }
32611
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,small_kernel)32612 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, small_kernel) {
32613 TEST_REQUIRES_X86_AVX2;
32614 for (size_t k = 1; k <= 40; k += 9) {
32615 GemmMicrokernelTester()
32616 .mr(3)
32617 .nr(8)
32618 .kr(8)
32619 .sr(1)
32620 .m(3)
32621 .n(8)
32622 .k(k)
32623 .ks(3)
32624 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32625 }
32626 }
32627
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,small_kernel_subtile)32628 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, small_kernel_subtile) {
32629 TEST_REQUIRES_X86_AVX2;
32630 for (size_t k = 1; k <= 40; k += 9) {
32631 for (uint32_t n = 1; n <= 8; n++) {
32632 for (uint32_t m = 1; m <= 3; m++) {
32633 GemmMicrokernelTester()
32634 .mr(3)
32635 .nr(8)
32636 .kr(8)
32637 .sr(1)
32638 .m(m)
32639 .n(n)
32640 .k(k)
32641 .ks(3)
32642 .iterations(1)
32643 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32644 }
32645 }
32646 }
32647 }
32648
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8_small_kernel)32649 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_small_kernel) {
32650 TEST_REQUIRES_X86_AVX2;
32651 for (uint32_t n = 9; n < 16; n++) {
32652 for (size_t k = 1; k <= 40; k += 9) {
32653 GemmMicrokernelTester()
32654 .mr(3)
32655 .nr(8)
32656 .kr(8)
32657 .sr(1)
32658 .m(3)
32659 .n(n)
32660 .k(k)
32661 .ks(3)
32662 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32663 }
32664 }
32665 }
32666
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8_small_kernel)32667 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_small_kernel) {
32668 TEST_REQUIRES_X86_AVX2;
32669 for (uint32_t n = 16; n <= 24; n += 8) {
32670 for (size_t k = 1; k <= 40; k += 9) {
32671 GemmMicrokernelTester()
32672 .mr(3)
32673 .nr(8)
32674 .kr(8)
32675 .sr(1)
32676 .m(3)
32677 .n(n)
32678 .k(k)
32679 .ks(3)
32680 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32681 }
32682 }
32683 }
32684
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,strided_cm_subtile)32685 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm_subtile) {
32686 TEST_REQUIRES_X86_AVX2;
32687 for (size_t k = 1; k <= 40; k += 9) {
32688 for (uint32_t n = 1; n <= 8; n++) {
32689 for (uint32_t m = 1; m <= 3; m++) {
32690 GemmMicrokernelTester()
32691 .mr(3)
32692 .nr(8)
32693 .kr(8)
32694 .sr(1)
32695 .m(m)
32696 .n(n)
32697 .k(k)
32698 .cm_stride(11)
32699 .iterations(1)
32700 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32701 }
32702 }
32703 }
32704 }
32705
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,a_offset)32706 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, a_offset) {
32707 TEST_REQUIRES_X86_AVX2;
32708 for (size_t k = 1; k <= 40; k += 9) {
32709 GemmMicrokernelTester()
32710 .mr(3)
32711 .nr(8)
32712 .kr(8)
32713 .sr(1)
32714 .m(3)
32715 .n(8)
32716 .k(k)
32717 .ks(3)
32718 .a_offset(127)
32719 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32720 }
32721 }
32722
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,zero)32723 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, zero) {
32724 TEST_REQUIRES_X86_AVX2;
32725 for (size_t k = 1; k <= 40; k += 9) {
32726 for (uint32_t mz = 0; mz < 3; mz++) {
32727 GemmMicrokernelTester()
32728 .mr(3)
32729 .nr(8)
32730 .kr(8)
32731 .sr(1)
32732 .m(3)
32733 .n(8)
32734 .k(k)
32735 .ks(3)
32736 .a_offset(127)
32737 .zero_index(mz)
32738 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32739 }
32740 }
32741 }
32742
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,qmin)32743 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, qmin) {
32744 TEST_REQUIRES_X86_AVX2;
32745 GemmMicrokernelTester()
32746 .mr(3)
32747 .nr(8)
32748 .kr(8)
32749 .sr(1)
32750 .m(3)
32751 .n(8)
32752 .k(8)
32753 .qmin(128)
32754 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32755 }
32756
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,qmax)32757 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, qmax) {
32758 TEST_REQUIRES_X86_AVX2;
32759 GemmMicrokernelTester()
32760 .mr(3)
32761 .nr(8)
32762 .kr(8)
32763 .sr(1)
32764 .m(3)
32765 .n(8)
32766 .k(8)
32767 .qmax(128)
32768 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32769 }
32770
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,strided_cm)32771 TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm) {
32772 TEST_REQUIRES_X86_AVX2;
32773 GemmMicrokernelTester()
32774 .mr(3)
32775 .nr(8)
32776 .kr(8)
32777 .sr(1)
32778 .m(3)
32779 .n(8)
32780 .k(8)
32781 .cm_stride(11)
32782 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32783 }
32784 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
32785
32786
32787 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8)32788 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8) {
32789 TEST_REQUIRES_X86_AVX512SKX;
32790 GemmMicrokernelTester()
32791 .mr(2)
32792 .nr(16)
32793 .kr(8)
32794 .sr(1)
32795 .m(2)
32796 .n(16)
32797 .k(8)
32798 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32799 }
32800
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,strided_cn)32801 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cn) {
32802 TEST_REQUIRES_X86_AVX512SKX;
32803 GemmMicrokernelTester()
32804 .mr(2)
32805 .nr(16)
32806 .kr(8)
32807 .sr(1)
32808 .m(2)
32809 .n(16)
32810 .k(8)
32811 .cn_stride(19)
32812 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32813 }
32814
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8_subtile)32815 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile) {
32816 TEST_REQUIRES_X86_AVX512SKX;
32817 for (uint32_t n = 1; n <= 16; n++) {
32818 for (uint32_t m = 1; m <= 2; m++) {
32819 GemmMicrokernelTester()
32820 .mr(2)
32821 .nr(16)
32822 .kr(8)
32823 .sr(1)
32824 .m(m)
32825 .n(n)
32826 .k(8)
32827 .iterations(1)
32828 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32829 }
32830 }
32831 }
32832
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8_subtile_m)32833 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile_m) {
32834 TEST_REQUIRES_X86_AVX512SKX;
32835 for (uint32_t m = 1; m <= 2; m++) {
32836 GemmMicrokernelTester()
32837 .mr(2)
32838 .nr(16)
32839 .kr(8)
32840 .sr(1)
32841 .m(m)
32842 .n(16)
32843 .k(8)
32844 .iterations(1)
32845 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32846 }
32847 }
32848
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8_subtile_n)32849 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile_n) {
32850 TEST_REQUIRES_X86_AVX512SKX;
32851 for (uint32_t n = 1; n <= 16; n++) {
32852 GemmMicrokernelTester()
32853 .mr(2)
32854 .nr(16)
32855 .kr(8)
32856 .sr(1)
32857 .m(2)
32858 .n(n)
32859 .k(8)
32860 .iterations(1)
32861 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32862 }
32863 }
32864
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_lt_8)32865 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8) {
32866 TEST_REQUIRES_X86_AVX512SKX;
32867 for (size_t k = 1; k < 8; k++) {
32868 GemmMicrokernelTester()
32869 .mr(2)
32870 .nr(16)
32871 .kr(8)
32872 .sr(1)
32873 .m(2)
32874 .n(16)
32875 .k(k)
32876 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32877 }
32878 }
32879
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_lt_8_subtile)32880 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8_subtile) {
32881 TEST_REQUIRES_X86_AVX512SKX;
32882 for (size_t k = 1; k < 8; k++) {
32883 for (uint32_t n = 1; n <= 16; n++) {
32884 for (uint32_t m = 1; m <= 2; m++) {
32885 GemmMicrokernelTester()
32886 .mr(2)
32887 .nr(16)
32888 .kr(8)
32889 .sr(1)
32890 .m(m)
32891 .n(n)
32892 .k(k)
32893 .iterations(1)
32894 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32895 }
32896 }
32897 }
32898 }
32899
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_gt_8)32900 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8) {
32901 TEST_REQUIRES_X86_AVX512SKX;
32902 for (size_t k = 9; k < 16; k++) {
32903 GemmMicrokernelTester()
32904 .mr(2)
32905 .nr(16)
32906 .kr(8)
32907 .sr(1)
32908 .m(2)
32909 .n(16)
32910 .k(k)
32911 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32912 }
32913 }
32914
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_gt_8_subtile)32915 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8_subtile) {
32916 TEST_REQUIRES_X86_AVX512SKX;
32917 for (size_t k = 9; k < 16; k++) {
32918 for (uint32_t n = 1; n <= 16; n++) {
32919 for (uint32_t m = 1; m <= 2; m++) {
32920 GemmMicrokernelTester()
32921 .mr(2)
32922 .nr(16)
32923 .kr(8)
32924 .sr(1)
32925 .m(m)
32926 .n(n)
32927 .k(k)
32928 .iterations(1)
32929 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32930 }
32931 }
32932 }
32933 }
32934
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_div_8)32935 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8) {
32936 TEST_REQUIRES_X86_AVX512SKX;
32937 for (size_t k = 16; k <= 80; k += 8) {
32938 GemmMicrokernelTester()
32939 .mr(2)
32940 .nr(16)
32941 .kr(8)
32942 .sr(1)
32943 .m(2)
32944 .n(16)
32945 .k(k)
32946 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32947 }
32948 }
32949
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_div_8_subtile)32950 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8_subtile) {
32951 TEST_REQUIRES_X86_AVX512SKX;
32952 for (size_t k = 16; k <= 80; k += 8) {
32953 for (uint32_t n = 1; n <= 16; n++) {
32954 for (uint32_t m = 1; m <= 2; m++) {
32955 GemmMicrokernelTester()
32956 .mr(2)
32957 .nr(16)
32958 .kr(8)
32959 .sr(1)
32960 .m(m)
32961 .n(n)
32962 .k(k)
32963 .iterations(1)
32964 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32965 }
32966 }
32967 }
32968 }
32969
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16)32970 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16) {
32971 TEST_REQUIRES_X86_AVX512SKX;
32972 for (uint32_t n = 17; n < 32; n++) {
32973 for (size_t k = 1; k <= 40; k += 9) {
32974 GemmMicrokernelTester()
32975 .mr(2)
32976 .nr(16)
32977 .kr(8)
32978 .sr(1)
32979 .m(2)
32980 .n(n)
32981 .k(k)
32982 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32983 }
32984 }
32985 }
32986
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16_strided_cn)32987 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_strided_cn) {
32988 TEST_REQUIRES_X86_AVX512SKX;
32989 for (uint32_t n = 17; n < 32; n++) {
32990 for (size_t k = 1; k <= 40; k += 9) {
32991 GemmMicrokernelTester()
32992 .mr(2)
32993 .nr(16)
32994 .kr(8)
32995 .sr(1)
32996 .m(2)
32997 .n(n)
32998 .k(k)
32999 .cn_stride(19)
33000 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33001 }
33002 }
33003 }
33004
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16_subtile)33005 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_subtile) {
33006 TEST_REQUIRES_X86_AVX512SKX;
33007 for (uint32_t n = 17; n < 32; n++) {
33008 for (size_t k = 1; k <= 40; k += 9) {
33009 for (uint32_t m = 1; m <= 2; m++) {
33010 GemmMicrokernelTester()
33011 .mr(2)
33012 .nr(16)
33013 .kr(8)
33014 .sr(1)
33015 .m(m)
33016 .n(n)
33017 .k(k)
33018 .iterations(1)
33019 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33020 }
33021 }
33022 }
33023 }
33024
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16)33025 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16) {
33026 TEST_REQUIRES_X86_AVX512SKX;
33027 for (uint32_t n = 32; n <= 48; n += 16) {
33028 for (size_t k = 1; k <= 40; k += 9) {
33029 GemmMicrokernelTester()
33030 .mr(2)
33031 .nr(16)
33032 .kr(8)
33033 .sr(1)
33034 .m(2)
33035 .n(n)
33036 .k(k)
33037 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33038 }
33039 }
33040 }
33041
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16_strided_cn)33042 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_strided_cn) {
33043 TEST_REQUIRES_X86_AVX512SKX;
33044 for (uint32_t n = 32; n <= 48; n += 16) {
33045 for (size_t k = 1; k <= 40; k += 9) {
33046 GemmMicrokernelTester()
33047 .mr(2)
33048 .nr(16)
33049 .kr(8)
33050 .sr(1)
33051 .m(2)
33052 .n(n)
33053 .k(k)
33054 .cn_stride(19)
33055 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33056 }
33057 }
33058 }
33059
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16_subtile)33060 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_subtile) {
33061 TEST_REQUIRES_X86_AVX512SKX;
33062 for (uint32_t n = 32; n <= 48; n += 16) {
33063 for (size_t k = 1; k <= 40; k += 9) {
33064 for (uint32_t m = 1; m <= 2; m++) {
33065 GemmMicrokernelTester()
33066 .mr(2)
33067 .nr(16)
33068 .kr(8)
33069 .sr(1)
33070 .m(m)
33071 .n(n)
33072 .k(k)
33073 .iterations(1)
33074 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33075 }
33076 }
33077 }
33078 }
33079
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,small_kernel)33080 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, small_kernel) {
33081 TEST_REQUIRES_X86_AVX512SKX;
33082 for (size_t k = 1; k <= 40; k += 9) {
33083 GemmMicrokernelTester()
33084 .mr(2)
33085 .nr(16)
33086 .kr(8)
33087 .sr(1)
33088 .m(2)
33089 .n(16)
33090 .k(k)
33091 .ks(3)
33092 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33093 }
33094 }
33095
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,small_kernel_subtile)33096 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, small_kernel_subtile) {
33097 TEST_REQUIRES_X86_AVX512SKX;
33098 for (size_t k = 1; k <= 40; k += 9) {
33099 for (uint32_t n = 1; n <= 16; n++) {
33100 for (uint32_t m = 1; m <= 2; m++) {
33101 GemmMicrokernelTester()
33102 .mr(2)
33103 .nr(16)
33104 .kr(8)
33105 .sr(1)
33106 .m(m)
33107 .n(n)
33108 .k(k)
33109 .ks(3)
33110 .iterations(1)
33111 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33112 }
33113 }
33114 }
33115 }
33116
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16_small_kernel)33117 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_small_kernel) {
33118 TEST_REQUIRES_X86_AVX512SKX;
33119 for (uint32_t n = 17; n < 32; n++) {
33120 for (size_t k = 1; k <= 40; k += 9) {
33121 GemmMicrokernelTester()
33122 .mr(2)
33123 .nr(16)
33124 .kr(8)
33125 .sr(1)
33126 .m(2)
33127 .n(n)
33128 .k(k)
33129 .ks(3)
33130 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33131 }
33132 }
33133 }
33134
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16_small_kernel)33135 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_small_kernel) {
33136 TEST_REQUIRES_X86_AVX512SKX;
33137 for (uint32_t n = 32; n <= 48; n += 16) {
33138 for (size_t k = 1; k <= 40; k += 9) {
33139 GemmMicrokernelTester()
33140 .mr(2)
33141 .nr(16)
33142 .kr(8)
33143 .sr(1)
33144 .m(2)
33145 .n(n)
33146 .k(k)
33147 .ks(3)
33148 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33149 }
33150 }
33151 }
33152
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,strided_cm_subtile)33153 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cm_subtile) {
33154 TEST_REQUIRES_X86_AVX512SKX;
33155 for (size_t k = 1; k <= 40; k += 9) {
33156 for (uint32_t n = 1; n <= 16; n++) {
33157 for (uint32_t m = 1; m <= 2; m++) {
33158 GemmMicrokernelTester()
33159 .mr(2)
33160 .nr(16)
33161 .kr(8)
33162 .sr(1)
33163 .m(m)
33164 .n(n)
33165 .k(k)
33166 .cm_stride(19)
33167 .iterations(1)
33168 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33169 }
33170 }
33171 }
33172 }
33173
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,a_offset)33174 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, a_offset) {
33175 TEST_REQUIRES_X86_AVX512SKX;
33176 for (size_t k = 1; k <= 40; k += 9) {
33177 GemmMicrokernelTester()
33178 .mr(2)
33179 .nr(16)
33180 .kr(8)
33181 .sr(1)
33182 .m(2)
33183 .n(16)
33184 .k(k)
33185 .ks(3)
33186 .a_offset(83)
33187 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33188 }
33189 }
33190
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,zero)33191 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, zero) {
33192 TEST_REQUIRES_X86_AVX512SKX;
33193 for (size_t k = 1; k <= 40; k += 9) {
33194 for (uint32_t mz = 0; mz < 2; mz++) {
33195 GemmMicrokernelTester()
33196 .mr(2)
33197 .nr(16)
33198 .kr(8)
33199 .sr(1)
33200 .m(2)
33201 .n(16)
33202 .k(k)
33203 .ks(3)
33204 .a_offset(83)
33205 .zero_index(mz)
33206 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33207 }
33208 }
33209 }
33210
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,qmin)33211 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, qmin) {
33212 TEST_REQUIRES_X86_AVX512SKX;
33213 GemmMicrokernelTester()
33214 .mr(2)
33215 .nr(16)
33216 .kr(8)
33217 .sr(1)
33218 .m(2)
33219 .n(16)
33220 .k(8)
33221 .qmin(128)
33222 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33223 }
33224
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,qmax)33225 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, qmax) {
33226 TEST_REQUIRES_X86_AVX512SKX;
33227 GemmMicrokernelTester()
33228 .mr(2)
33229 .nr(16)
33230 .kr(8)
33231 .sr(1)
33232 .m(2)
33233 .n(16)
33234 .k(8)
33235 .qmax(128)
33236 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33237 }
33238
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,strided_cm)33239 TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cm) {
33240 TEST_REQUIRES_X86_AVX512SKX;
33241 GemmMicrokernelTester()
33242 .mr(2)
33243 .nr(16)
33244 .kr(8)
33245 .sr(1)
33246 .m(2)
33247 .n(16)
33248 .k(8)
33249 .cm_stride(19)
33250 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33251 }
33252 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
33253
33254
33255 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)33256 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
33257 GemmMicrokernelTester()
33258 .mr(1)
33259 .nr(4)
33260 .kr(8)
33261 .sr(1)
33262 .m(1)
33263 .n(4)
33264 .k(8)
33265 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33266 }
33267
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)33268 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
33269 GemmMicrokernelTester()
33270 .mr(1)
33271 .nr(4)
33272 .kr(8)
33273 .sr(1)
33274 .m(1)
33275 .n(4)
33276 .k(8)
33277 .cn_stride(7)
33278 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33279 }
33280
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)33281 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
33282 for (uint32_t n = 1; n <= 4; n++) {
33283 for (uint32_t m = 1; m <= 1; m++) {
33284 GemmMicrokernelTester()
33285 .mr(1)
33286 .nr(4)
33287 .kr(8)
33288 .sr(1)
33289 .m(m)
33290 .n(n)
33291 .k(8)
33292 .iterations(1)
33293 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33294 }
33295 }
33296 }
33297
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)33298 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
33299 for (uint32_t m = 1; m <= 1; m++) {
33300 GemmMicrokernelTester()
33301 .mr(1)
33302 .nr(4)
33303 .kr(8)
33304 .sr(1)
33305 .m(m)
33306 .n(4)
33307 .k(8)
33308 .iterations(1)
33309 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33310 }
33311 }
33312
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)33313 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
33314 for (uint32_t n = 1; n <= 4; n++) {
33315 GemmMicrokernelTester()
33316 .mr(1)
33317 .nr(4)
33318 .kr(8)
33319 .sr(1)
33320 .m(1)
33321 .n(n)
33322 .k(8)
33323 .iterations(1)
33324 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33325 }
33326 }
33327
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)33328 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
33329 for (size_t k = 1; k < 8; k++) {
33330 GemmMicrokernelTester()
33331 .mr(1)
33332 .nr(4)
33333 .kr(8)
33334 .sr(1)
33335 .m(1)
33336 .n(4)
33337 .k(k)
33338 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33339 }
33340 }
33341
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)33342 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
33343 for (size_t k = 1; k < 8; k++) {
33344 for (uint32_t n = 1; n <= 4; n++) {
33345 for (uint32_t m = 1; m <= 1; m++) {
33346 GemmMicrokernelTester()
33347 .mr(1)
33348 .nr(4)
33349 .kr(8)
33350 .sr(1)
33351 .m(m)
33352 .n(n)
33353 .k(k)
33354 .iterations(1)
33355 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33356 }
33357 }
33358 }
33359 }
33360
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)33361 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
33362 for (size_t k = 9; k < 16; k++) {
33363 GemmMicrokernelTester()
33364 .mr(1)
33365 .nr(4)
33366 .kr(8)
33367 .sr(1)
33368 .m(1)
33369 .n(4)
33370 .k(k)
33371 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33372 }
33373 }
33374
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)33375 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
33376 for (size_t k = 9; k < 16; k++) {
33377 for (uint32_t n = 1; n <= 4; n++) {
33378 for (uint32_t m = 1; m <= 1; m++) {
33379 GemmMicrokernelTester()
33380 .mr(1)
33381 .nr(4)
33382 .kr(8)
33383 .sr(1)
33384 .m(m)
33385 .n(n)
33386 .k(k)
33387 .iterations(1)
33388 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33389 }
33390 }
33391 }
33392 }
33393
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)33394 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
33395 for (size_t k = 16; k <= 80; k += 8) {
33396 GemmMicrokernelTester()
33397 .mr(1)
33398 .nr(4)
33399 .kr(8)
33400 .sr(1)
33401 .m(1)
33402 .n(4)
33403 .k(k)
33404 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33405 }
33406 }
33407
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)33408 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
33409 for (size_t k = 16; k <= 80; k += 8) {
33410 for (uint32_t n = 1; n <= 4; n++) {
33411 for (uint32_t m = 1; m <= 1; m++) {
33412 GemmMicrokernelTester()
33413 .mr(1)
33414 .nr(4)
33415 .kr(8)
33416 .sr(1)
33417 .m(m)
33418 .n(n)
33419 .k(k)
33420 .iterations(1)
33421 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33422 }
33423 }
33424 }
33425 }
33426
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)33427 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
33428 for (uint32_t n = 5; n < 8; n++) {
33429 for (size_t k = 1; k <= 40; k += 9) {
33430 GemmMicrokernelTester()
33431 .mr(1)
33432 .nr(4)
33433 .kr(8)
33434 .sr(1)
33435 .m(1)
33436 .n(n)
33437 .k(k)
33438 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33439 }
33440 }
33441 }
33442
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)33443 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
33444 for (uint32_t n = 5; n < 8; n++) {
33445 for (size_t k = 1; k <= 40; k += 9) {
33446 GemmMicrokernelTester()
33447 .mr(1)
33448 .nr(4)
33449 .kr(8)
33450 .sr(1)
33451 .m(1)
33452 .n(n)
33453 .k(k)
33454 .cn_stride(7)
33455 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33456 }
33457 }
33458 }
33459
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)33460 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
33461 for (uint32_t n = 5; n < 8; n++) {
33462 for (size_t k = 1; k <= 40; k += 9) {
33463 for (uint32_t m = 1; m <= 1; m++) {
33464 GemmMicrokernelTester()
33465 .mr(1)
33466 .nr(4)
33467 .kr(8)
33468 .sr(1)
33469 .m(m)
33470 .n(n)
33471 .k(k)
33472 .iterations(1)
33473 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33474 }
33475 }
33476 }
33477 }
33478
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)33479 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
33480 for (uint32_t n = 8; n <= 12; n += 4) {
33481 for (size_t k = 1; k <= 40; k += 9) {
33482 GemmMicrokernelTester()
33483 .mr(1)
33484 .nr(4)
33485 .kr(8)
33486 .sr(1)
33487 .m(1)
33488 .n(n)
33489 .k(k)
33490 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33491 }
33492 }
33493 }
33494
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)33495 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
33496 for (uint32_t n = 8; n <= 12; n += 4) {
33497 for (size_t k = 1; k <= 40; k += 9) {
33498 GemmMicrokernelTester()
33499 .mr(1)
33500 .nr(4)
33501 .kr(8)
33502 .sr(1)
33503 .m(1)
33504 .n(n)
33505 .k(k)
33506 .cn_stride(7)
33507 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33508 }
33509 }
33510 }
33511
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)33512 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
33513 for (uint32_t n = 8; n <= 12; n += 4) {
33514 for (size_t k = 1; k <= 40; k += 9) {
33515 for (uint32_t m = 1; m <= 1; m++) {
33516 GemmMicrokernelTester()
33517 .mr(1)
33518 .nr(4)
33519 .kr(8)
33520 .sr(1)
33521 .m(m)
33522 .n(n)
33523 .k(k)
33524 .iterations(1)
33525 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33526 }
33527 }
33528 }
33529 }
33530
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,small_kernel)33531 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
33532 for (size_t k = 1; k <= 40; k += 9) {
33533 GemmMicrokernelTester()
33534 .mr(1)
33535 .nr(4)
33536 .kr(8)
33537 .sr(1)
33538 .m(1)
33539 .n(4)
33540 .k(k)
33541 .ks(3)
33542 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33543 }
33544 }
33545
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)33546 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
33547 for (size_t k = 1; k <= 40; k += 9) {
33548 for (uint32_t n = 1; n <= 4; n++) {
33549 for (uint32_t m = 1; m <= 1; m++) {
33550 GemmMicrokernelTester()
33551 .mr(1)
33552 .nr(4)
33553 .kr(8)
33554 .sr(1)
33555 .m(m)
33556 .n(n)
33557 .k(k)
33558 .ks(3)
33559 .iterations(1)
33560 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33561 }
33562 }
33563 }
33564 }
33565
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)33566 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
33567 for (uint32_t n = 5; n < 8; n++) {
33568 for (size_t k = 1; k <= 40; k += 9) {
33569 GemmMicrokernelTester()
33570 .mr(1)
33571 .nr(4)
33572 .kr(8)
33573 .sr(1)
33574 .m(1)
33575 .n(n)
33576 .k(k)
33577 .ks(3)
33578 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33579 }
33580 }
33581 }
33582
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)33583 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
33584 for (uint32_t n = 8; n <= 12; n += 4) {
33585 for (size_t k = 1; k <= 40; k += 9) {
33586 GemmMicrokernelTester()
33587 .mr(1)
33588 .nr(4)
33589 .kr(8)
33590 .sr(1)
33591 .m(1)
33592 .n(n)
33593 .k(k)
33594 .ks(3)
33595 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33596 }
33597 }
33598 }
33599
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)33600 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
33601 for (size_t k = 1; k <= 40; k += 9) {
33602 for (uint32_t n = 1; n <= 4; n++) {
33603 for (uint32_t m = 1; m <= 1; m++) {
33604 GemmMicrokernelTester()
33605 .mr(1)
33606 .nr(4)
33607 .kr(8)
33608 .sr(1)
33609 .m(m)
33610 .n(n)
33611 .k(k)
33612 .cm_stride(7)
33613 .iterations(1)
33614 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33615 }
33616 }
33617 }
33618 }
33619
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,a_offset)33620 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
33621 for (size_t k = 1; k <= 40; k += 9) {
33622 GemmMicrokernelTester()
33623 .mr(1)
33624 .nr(4)
33625 .kr(8)
33626 .sr(1)
33627 .m(1)
33628 .n(4)
33629 .k(k)
33630 .ks(3)
33631 .a_offset(43)
33632 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33633 }
33634 }
33635
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,zero)33636 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, zero) {
33637 for (size_t k = 1; k <= 40; k += 9) {
33638 for (uint32_t mz = 0; mz < 1; mz++) {
33639 GemmMicrokernelTester()
33640 .mr(1)
33641 .nr(4)
33642 .kr(8)
33643 .sr(1)
33644 .m(1)
33645 .n(4)
33646 .k(k)
33647 .ks(3)
33648 .a_offset(43)
33649 .zero_index(mz)
33650 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33651 }
33652 }
33653 }
33654
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,qmin)33655 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
33656 GemmMicrokernelTester()
33657 .mr(1)
33658 .nr(4)
33659 .kr(8)
33660 .sr(1)
33661 .m(1)
33662 .n(4)
33663 .k(8)
33664 .qmin(128)
33665 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33666 }
33667
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,qmax)33668 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
33669 GemmMicrokernelTester()
33670 .mr(1)
33671 .nr(4)
33672 .kr(8)
33673 .sr(1)
33674 .m(1)
33675 .n(4)
33676 .k(8)
33677 .qmax(128)
33678 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33679 }
33680
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)33681 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
33682 GemmMicrokernelTester()
33683 .mr(1)
33684 .nr(4)
33685 .kr(8)
33686 .sr(1)
33687 .m(1)
33688 .n(4)
33689 .k(8)
33690 .cm_stride(7)
33691 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33692 }
33693 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
33694
33695
33696 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)33697 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
33698 GemmMicrokernelTester()
33699 .mr(2)
33700 .nr(4)
33701 .kr(2)
33702 .sr(1)
33703 .m(2)
33704 .n(4)
33705 .k(8)
33706 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33707 }
33708
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)33709 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
33710 GemmMicrokernelTester()
33711 .mr(2)
33712 .nr(4)
33713 .kr(2)
33714 .sr(1)
33715 .m(2)
33716 .n(4)
33717 .k(8)
33718 .cn_stride(7)
33719 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33720 }
33721
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)33722 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
33723 for (uint32_t n = 1; n <= 4; n++) {
33724 for (uint32_t m = 1; m <= 2; m++) {
33725 GemmMicrokernelTester()
33726 .mr(2)
33727 .nr(4)
33728 .kr(2)
33729 .sr(1)
33730 .m(m)
33731 .n(n)
33732 .k(8)
33733 .iterations(1)
33734 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33735 }
33736 }
33737 }
33738
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)33739 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
33740 for (uint32_t m = 1; m <= 2; m++) {
33741 GemmMicrokernelTester()
33742 .mr(2)
33743 .nr(4)
33744 .kr(2)
33745 .sr(1)
33746 .m(m)
33747 .n(4)
33748 .k(8)
33749 .iterations(1)
33750 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33751 }
33752 }
33753
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)33754 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
33755 for (uint32_t n = 1; n <= 4; n++) {
33756 GemmMicrokernelTester()
33757 .mr(2)
33758 .nr(4)
33759 .kr(2)
33760 .sr(1)
33761 .m(2)
33762 .n(n)
33763 .k(8)
33764 .iterations(1)
33765 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33766 }
33767 }
33768
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)33769 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
33770 for (size_t k = 1; k < 8; k++) {
33771 GemmMicrokernelTester()
33772 .mr(2)
33773 .nr(4)
33774 .kr(2)
33775 .sr(1)
33776 .m(2)
33777 .n(4)
33778 .k(k)
33779 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33780 }
33781 }
33782
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)33783 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
33784 for (size_t k = 1; k < 8; k++) {
33785 for (uint32_t n = 1; n <= 4; n++) {
33786 for (uint32_t m = 1; m <= 2; m++) {
33787 GemmMicrokernelTester()
33788 .mr(2)
33789 .nr(4)
33790 .kr(2)
33791 .sr(1)
33792 .m(m)
33793 .n(n)
33794 .k(k)
33795 .iterations(1)
33796 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33797 }
33798 }
33799 }
33800 }
33801
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)33802 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
33803 for (size_t k = 9; k < 16; k++) {
33804 GemmMicrokernelTester()
33805 .mr(2)
33806 .nr(4)
33807 .kr(2)
33808 .sr(1)
33809 .m(2)
33810 .n(4)
33811 .k(k)
33812 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33813 }
33814 }
33815
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)33816 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
33817 for (size_t k = 9; k < 16; k++) {
33818 for (uint32_t n = 1; n <= 4; n++) {
33819 for (uint32_t m = 1; m <= 2; m++) {
33820 GemmMicrokernelTester()
33821 .mr(2)
33822 .nr(4)
33823 .kr(2)
33824 .sr(1)
33825 .m(m)
33826 .n(n)
33827 .k(k)
33828 .iterations(1)
33829 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33830 }
33831 }
33832 }
33833 }
33834
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)33835 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
33836 for (size_t k = 16; k <= 80; k += 8) {
33837 GemmMicrokernelTester()
33838 .mr(2)
33839 .nr(4)
33840 .kr(2)
33841 .sr(1)
33842 .m(2)
33843 .n(4)
33844 .k(k)
33845 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33846 }
33847 }
33848
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)33849 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
33850 for (size_t k = 16; k <= 80; k += 8) {
33851 for (uint32_t n = 1; n <= 4; n++) {
33852 for (uint32_t m = 1; m <= 2; m++) {
33853 GemmMicrokernelTester()
33854 .mr(2)
33855 .nr(4)
33856 .kr(2)
33857 .sr(1)
33858 .m(m)
33859 .n(n)
33860 .k(k)
33861 .iterations(1)
33862 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33863 }
33864 }
33865 }
33866 }
33867
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)33868 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
33869 for (uint32_t n = 5; n < 8; n++) {
33870 for (size_t k = 1; k <= 40; k += 9) {
33871 GemmMicrokernelTester()
33872 .mr(2)
33873 .nr(4)
33874 .kr(2)
33875 .sr(1)
33876 .m(2)
33877 .n(n)
33878 .k(k)
33879 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33880 }
33881 }
33882 }
33883
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)33884 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
33885 for (uint32_t n = 5; n < 8; n++) {
33886 for (size_t k = 1; k <= 40; k += 9) {
33887 GemmMicrokernelTester()
33888 .mr(2)
33889 .nr(4)
33890 .kr(2)
33891 .sr(1)
33892 .m(2)
33893 .n(n)
33894 .k(k)
33895 .cn_stride(7)
33896 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33897 }
33898 }
33899 }
33900
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)33901 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
33902 for (uint32_t n = 5; n < 8; n++) {
33903 for (size_t k = 1; k <= 40; k += 9) {
33904 for (uint32_t m = 1; m <= 2; m++) {
33905 GemmMicrokernelTester()
33906 .mr(2)
33907 .nr(4)
33908 .kr(2)
33909 .sr(1)
33910 .m(m)
33911 .n(n)
33912 .k(k)
33913 .iterations(1)
33914 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33915 }
33916 }
33917 }
33918 }
33919
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)33920 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
33921 for (uint32_t n = 8; n <= 12; n += 4) {
33922 for (size_t k = 1; k <= 40; k += 9) {
33923 GemmMicrokernelTester()
33924 .mr(2)
33925 .nr(4)
33926 .kr(2)
33927 .sr(1)
33928 .m(2)
33929 .n(n)
33930 .k(k)
33931 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33932 }
33933 }
33934 }
33935
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)33936 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
33937 for (uint32_t n = 8; n <= 12; n += 4) {
33938 for (size_t k = 1; k <= 40; k += 9) {
33939 GemmMicrokernelTester()
33940 .mr(2)
33941 .nr(4)
33942 .kr(2)
33943 .sr(1)
33944 .m(2)
33945 .n(n)
33946 .k(k)
33947 .cn_stride(7)
33948 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33949 }
33950 }
33951 }
33952
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)33953 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
33954 for (uint32_t n = 8; n <= 12; n += 4) {
33955 for (size_t k = 1; k <= 40; k += 9) {
33956 for (uint32_t m = 1; m <= 2; m++) {
33957 GemmMicrokernelTester()
33958 .mr(2)
33959 .nr(4)
33960 .kr(2)
33961 .sr(1)
33962 .m(m)
33963 .n(n)
33964 .k(k)
33965 .iterations(1)
33966 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33967 }
33968 }
33969 }
33970 }
33971
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,small_kernel)33972 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
33973 for (size_t k = 1; k <= 40; k += 9) {
33974 GemmMicrokernelTester()
33975 .mr(2)
33976 .nr(4)
33977 .kr(2)
33978 .sr(1)
33979 .m(2)
33980 .n(4)
33981 .k(k)
33982 .ks(3)
33983 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33984 }
33985 }
33986
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)33987 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
33988 for (size_t k = 1; k <= 40; k += 9) {
33989 for (uint32_t n = 1; n <= 4; n++) {
33990 for (uint32_t m = 1; m <= 2; m++) {
33991 GemmMicrokernelTester()
33992 .mr(2)
33993 .nr(4)
33994 .kr(2)
33995 .sr(1)
33996 .m(m)
33997 .n(n)
33998 .k(k)
33999 .ks(3)
34000 .iterations(1)
34001 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34002 }
34003 }
34004 }
34005 }
34006
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)34007 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
34008 for (uint32_t n = 5; n < 8; n++) {
34009 for (size_t k = 1; k <= 40; k += 9) {
34010 GemmMicrokernelTester()
34011 .mr(2)
34012 .nr(4)
34013 .kr(2)
34014 .sr(1)
34015 .m(2)
34016 .n(n)
34017 .k(k)
34018 .ks(3)
34019 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34020 }
34021 }
34022 }
34023
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)34024 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
34025 for (uint32_t n = 8; n <= 12; n += 4) {
34026 for (size_t k = 1; k <= 40; k += 9) {
34027 GemmMicrokernelTester()
34028 .mr(2)
34029 .nr(4)
34030 .kr(2)
34031 .sr(1)
34032 .m(2)
34033 .n(n)
34034 .k(k)
34035 .ks(3)
34036 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34037 }
34038 }
34039 }
34040
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)34041 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
34042 for (size_t k = 1; k <= 40; k += 9) {
34043 for (uint32_t n = 1; n <= 4; n++) {
34044 for (uint32_t m = 1; m <= 2; m++) {
34045 GemmMicrokernelTester()
34046 .mr(2)
34047 .nr(4)
34048 .kr(2)
34049 .sr(1)
34050 .m(m)
34051 .n(n)
34052 .k(k)
34053 .cm_stride(7)
34054 .iterations(1)
34055 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34056 }
34057 }
34058 }
34059 }
34060
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,a_offset)34061 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
34062 for (size_t k = 1; k <= 40; k += 9) {
34063 GemmMicrokernelTester()
34064 .mr(2)
34065 .nr(4)
34066 .kr(2)
34067 .sr(1)
34068 .m(2)
34069 .n(4)
34070 .k(k)
34071 .ks(3)
34072 .a_offset(83)
34073 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34074 }
34075 }
34076
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,zero)34077 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, zero) {
34078 for (size_t k = 1; k <= 40; k += 9) {
34079 for (uint32_t mz = 0; mz < 2; mz++) {
34080 GemmMicrokernelTester()
34081 .mr(2)
34082 .nr(4)
34083 .kr(2)
34084 .sr(1)
34085 .m(2)
34086 .n(4)
34087 .k(k)
34088 .ks(3)
34089 .a_offset(83)
34090 .zero_index(mz)
34091 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34092 }
34093 }
34094 }
34095
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,qmin)34096 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
34097 GemmMicrokernelTester()
34098 .mr(2)
34099 .nr(4)
34100 .kr(2)
34101 .sr(1)
34102 .m(2)
34103 .n(4)
34104 .k(8)
34105 .qmin(128)
34106 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34107 }
34108
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,qmax)34109 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
34110 GemmMicrokernelTester()
34111 .mr(2)
34112 .nr(4)
34113 .kr(2)
34114 .sr(1)
34115 .m(2)
34116 .n(4)
34117 .k(8)
34118 .qmax(128)
34119 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34120 }
34121
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)34122 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
34123 GemmMicrokernelTester()
34124 .mr(2)
34125 .nr(4)
34126 .kr(2)
34127 .sr(1)
34128 .m(2)
34129 .n(4)
34130 .k(8)
34131 .cm_stride(7)
34132 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34133 }
34134 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
34135
34136
34137 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)34138 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
34139 GemmMicrokernelTester()
34140 .mr(3)
34141 .nr(4)
34142 .kr(2)
34143 .sr(4)
34144 .m(3)
34145 .n(4)
34146 .k(8)
34147 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34148 }
34149
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)34150 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
34151 GemmMicrokernelTester()
34152 .mr(3)
34153 .nr(4)
34154 .kr(2)
34155 .sr(4)
34156 .m(3)
34157 .n(4)
34158 .k(8)
34159 .cn_stride(7)
34160 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34161 }
34162
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)34163 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
34164 for (uint32_t n = 1; n <= 4; n++) {
34165 for (uint32_t m = 1; m <= 3; m++) {
34166 GemmMicrokernelTester()
34167 .mr(3)
34168 .nr(4)
34169 .kr(2)
34170 .sr(4)
34171 .m(m)
34172 .n(n)
34173 .k(8)
34174 .iterations(1)
34175 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34176 }
34177 }
34178 }
34179
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)34180 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
34181 for (uint32_t m = 1; m <= 3; m++) {
34182 GemmMicrokernelTester()
34183 .mr(3)
34184 .nr(4)
34185 .kr(2)
34186 .sr(4)
34187 .m(m)
34188 .n(4)
34189 .k(8)
34190 .iterations(1)
34191 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34192 }
34193 }
34194
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)34195 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
34196 for (uint32_t n = 1; n <= 4; n++) {
34197 GemmMicrokernelTester()
34198 .mr(3)
34199 .nr(4)
34200 .kr(2)
34201 .sr(4)
34202 .m(3)
34203 .n(n)
34204 .k(8)
34205 .iterations(1)
34206 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34207 }
34208 }
34209
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)34210 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
34211 for (size_t k = 1; k < 8; k++) {
34212 GemmMicrokernelTester()
34213 .mr(3)
34214 .nr(4)
34215 .kr(2)
34216 .sr(4)
34217 .m(3)
34218 .n(4)
34219 .k(k)
34220 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34221 }
34222 }
34223
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)34224 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
34225 for (size_t k = 1; k < 8; k++) {
34226 for (uint32_t n = 1; n <= 4; n++) {
34227 for (uint32_t m = 1; m <= 3; m++) {
34228 GemmMicrokernelTester()
34229 .mr(3)
34230 .nr(4)
34231 .kr(2)
34232 .sr(4)
34233 .m(m)
34234 .n(n)
34235 .k(k)
34236 .iterations(1)
34237 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34238 }
34239 }
34240 }
34241 }
34242
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)34243 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
34244 for (size_t k = 9; k < 16; k++) {
34245 GemmMicrokernelTester()
34246 .mr(3)
34247 .nr(4)
34248 .kr(2)
34249 .sr(4)
34250 .m(3)
34251 .n(4)
34252 .k(k)
34253 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34254 }
34255 }
34256
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)34257 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
34258 for (size_t k = 9; k < 16; k++) {
34259 for (uint32_t n = 1; n <= 4; n++) {
34260 for (uint32_t m = 1; m <= 3; m++) {
34261 GemmMicrokernelTester()
34262 .mr(3)
34263 .nr(4)
34264 .kr(2)
34265 .sr(4)
34266 .m(m)
34267 .n(n)
34268 .k(k)
34269 .iterations(1)
34270 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34271 }
34272 }
34273 }
34274 }
34275
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)34276 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
34277 for (size_t k = 16; k <= 80; k += 8) {
34278 GemmMicrokernelTester()
34279 .mr(3)
34280 .nr(4)
34281 .kr(2)
34282 .sr(4)
34283 .m(3)
34284 .n(4)
34285 .k(k)
34286 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34287 }
34288 }
34289
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)34290 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
34291 for (size_t k = 16; k <= 80; k += 8) {
34292 for (uint32_t n = 1; n <= 4; n++) {
34293 for (uint32_t m = 1; m <= 3; m++) {
34294 GemmMicrokernelTester()
34295 .mr(3)
34296 .nr(4)
34297 .kr(2)
34298 .sr(4)
34299 .m(m)
34300 .n(n)
34301 .k(k)
34302 .iterations(1)
34303 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34304 }
34305 }
34306 }
34307 }
34308
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)34309 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
34310 for (uint32_t n = 5; n < 8; n++) {
34311 for (size_t k = 1; k <= 40; k += 9) {
34312 GemmMicrokernelTester()
34313 .mr(3)
34314 .nr(4)
34315 .kr(2)
34316 .sr(4)
34317 .m(3)
34318 .n(n)
34319 .k(k)
34320 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34321 }
34322 }
34323 }
34324
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)34325 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
34326 for (uint32_t n = 5; n < 8; n++) {
34327 for (size_t k = 1; k <= 40; k += 9) {
34328 GemmMicrokernelTester()
34329 .mr(3)
34330 .nr(4)
34331 .kr(2)
34332 .sr(4)
34333 .m(3)
34334 .n(n)
34335 .k(k)
34336 .cn_stride(7)
34337 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34338 }
34339 }
34340 }
34341
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)34342 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
34343 for (uint32_t n = 5; n < 8; n++) {
34344 for (size_t k = 1; k <= 40; k += 9) {
34345 for (uint32_t m = 1; m <= 3; m++) {
34346 GemmMicrokernelTester()
34347 .mr(3)
34348 .nr(4)
34349 .kr(2)
34350 .sr(4)
34351 .m(m)
34352 .n(n)
34353 .k(k)
34354 .iterations(1)
34355 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34356 }
34357 }
34358 }
34359 }
34360
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)34361 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
34362 for (uint32_t n = 8; n <= 12; n += 4) {
34363 for (size_t k = 1; k <= 40; k += 9) {
34364 GemmMicrokernelTester()
34365 .mr(3)
34366 .nr(4)
34367 .kr(2)
34368 .sr(4)
34369 .m(3)
34370 .n(n)
34371 .k(k)
34372 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34373 }
34374 }
34375 }
34376
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)34377 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
34378 for (uint32_t n = 8; n <= 12; n += 4) {
34379 for (size_t k = 1; k <= 40; k += 9) {
34380 GemmMicrokernelTester()
34381 .mr(3)
34382 .nr(4)
34383 .kr(2)
34384 .sr(4)
34385 .m(3)
34386 .n(n)
34387 .k(k)
34388 .cn_stride(7)
34389 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34390 }
34391 }
34392 }
34393
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)34394 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
34395 for (uint32_t n = 8; n <= 12; n += 4) {
34396 for (size_t k = 1; k <= 40; k += 9) {
34397 for (uint32_t m = 1; m <= 3; m++) {
34398 GemmMicrokernelTester()
34399 .mr(3)
34400 .nr(4)
34401 .kr(2)
34402 .sr(4)
34403 .m(m)
34404 .n(n)
34405 .k(k)
34406 .iterations(1)
34407 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34408 }
34409 }
34410 }
34411 }
34412
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel)34413 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
34414 for (size_t k = 1; k <= 40; k += 9) {
34415 GemmMicrokernelTester()
34416 .mr(3)
34417 .nr(4)
34418 .kr(2)
34419 .sr(4)
34420 .m(3)
34421 .n(4)
34422 .k(k)
34423 .ks(3)
34424 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34425 }
34426 }
34427
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)34428 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
34429 for (size_t k = 1; k <= 40; k += 9) {
34430 for (uint32_t n = 1; n <= 4; n++) {
34431 for (uint32_t m = 1; m <= 3; m++) {
34432 GemmMicrokernelTester()
34433 .mr(3)
34434 .nr(4)
34435 .kr(2)
34436 .sr(4)
34437 .m(m)
34438 .n(n)
34439 .k(k)
34440 .ks(3)
34441 .iterations(1)
34442 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34443 }
34444 }
34445 }
34446 }
34447
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)34448 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
34449 for (uint32_t n = 5; n < 8; n++) {
34450 for (size_t k = 1; k <= 40; k += 9) {
34451 GemmMicrokernelTester()
34452 .mr(3)
34453 .nr(4)
34454 .kr(2)
34455 .sr(4)
34456 .m(3)
34457 .n(n)
34458 .k(k)
34459 .ks(3)
34460 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34461 }
34462 }
34463 }
34464
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)34465 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
34466 for (uint32_t n = 8; n <= 12; n += 4) {
34467 for (size_t k = 1; k <= 40; k += 9) {
34468 GemmMicrokernelTester()
34469 .mr(3)
34470 .nr(4)
34471 .kr(2)
34472 .sr(4)
34473 .m(3)
34474 .n(n)
34475 .k(k)
34476 .ks(3)
34477 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34478 }
34479 }
34480 }
34481
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)34482 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
34483 for (size_t k = 1; k <= 40; k += 9) {
34484 for (uint32_t n = 1; n <= 4; n++) {
34485 for (uint32_t m = 1; m <= 3; m++) {
34486 GemmMicrokernelTester()
34487 .mr(3)
34488 .nr(4)
34489 .kr(2)
34490 .sr(4)
34491 .m(m)
34492 .n(n)
34493 .k(k)
34494 .cm_stride(7)
34495 .iterations(1)
34496 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34497 }
34498 }
34499 }
34500 }
34501
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,a_offset)34502 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
34503 for (size_t k = 1; k <= 40; k += 9) {
34504 GemmMicrokernelTester()
34505 .mr(3)
34506 .nr(4)
34507 .kr(2)
34508 .sr(4)
34509 .m(3)
34510 .n(4)
34511 .k(k)
34512 .ks(3)
34513 .a_offset(127)
34514 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34515 }
34516 }
34517
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,zero)34518 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
34519 for (size_t k = 1; k <= 40; k += 9) {
34520 for (uint32_t mz = 0; mz < 3; mz++) {
34521 GemmMicrokernelTester()
34522 .mr(3)
34523 .nr(4)
34524 .kr(2)
34525 .sr(4)
34526 .m(3)
34527 .n(4)
34528 .k(k)
34529 .ks(3)
34530 .a_offset(127)
34531 .zero_index(mz)
34532 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34533 }
34534 }
34535 }
34536
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)34537 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
34538 GemmMicrokernelTester()
34539 .mr(3)
34540 .nr(4)
34541 .kr(2)
34542 .sr(4)
34543 .m(3)
34544 .n(4)
34545 .k(8)
34546 .qmin(128)
34547 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34548 }
34549
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)34550 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
34551 GemmMicrokernelTester()
34552 .mr(3)
34553 .nr(4)
34554 .kr(2)
34555 .sr(4)
34556 .m(3)
34557 .n(4)
34558 .k(8)
34559 .qmax(128)
34560 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34561 }
34562
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)34563 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
34564 GemmMicrokernelTester()
34565 .mr(3)
34566 .nr(4)
34567 .kr(2)
34568 .sr(4)
34569 .m(3)
34570 .n(4)
34571 .k(8)
34572 .cm_stride(7)
34573 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34574 }
34575 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
34576
34577
34578 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)34579 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
34580 GemmMicrokernelTester()
34581 .mr(4)
34582 .nr(4)
34583 .kr(2)
34584 .sr(1)
34585 .m(4)
34586 .n(4)
34587 .k(8)
34588 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34589 }
34590
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)34591 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
34592 GemmMicrokernelTester()
34593 .mr(4)
34594 .nr(4)
34595 .kr(2)
34596 .sr(1)
34597 .m(4)
34598 .n(4)
34599 .k(8)
34600 .cn_stride(7)
34601 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34602 }
34603
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)34604 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
34605 for (uint32_t n = 1; n <= 4; n++) {
34606 for (uint32_t m = 1; m <= 4; m++) {
34607 GemmMicrokernelTester()
34608 .mr(4)
34609 .nr(4)
34610 .kr(2)
34611 .sr(1)
34612 .m(m)
34613 .n(n)
34614 .k(8)
34615 .iterations(1)
34616 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34617 }
34618 }
34619 }
34620
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)34621 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
34622 for (uint32_t m = 1; m <= 4; m++) {
34623 GemmMicrokernelTester()
34624 .mr(4)
34625 .nr(4)
34626 .kr(2)
34627 .sr(1)
34628 .m(m)
34629 .n(4)
34630 .k(8)
34631 .iterations(1)
34632 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34633 }
34634 }
34635
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)34636 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
34637 for (uint32_t n = 1; n <= 4; n++) {
34638 GemmMicrokernelTester()
34639 .mr(4)
34640 .nr(4)
34641 .kr(2)
34642 .sr(1)
34643 .m(4)
34644 .n(n)
34645 .k(8)
34646 .iterations(1)
34647 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34648 }
34649 }
34650
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)34651 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
34652 for (size_t k = 1; k < 8; k++) {
34653 GemmMicrokernelTester()
34654 .mr(4)
34655 .nr(4)
34656 .kr(2)
34657 .sr(1)
34658 .m(4)
34659 .n(4)
34660 .k(k)
34661 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34662 }
34663 }
34664
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)34665 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
34666 for (size_t k = 1; k < 8; k++) {
34667 for (uint32_t n = 1; n <= 4; n++) {
34668 for (uint32_t m = 1; m <= 4; m++) {
34669 GemmMicrokernelTester()
34670 .mr(4)
34671 .nr(4)
34672 .kr(2)
34673 .sr(1)
34674 .m(m)
34675 .n(n)
34676 .k(k)
34677 .iterations(1)
34678 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34679 }
34680 }
34681 }
34682 }
34683
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)34684 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
34685 for (size_t k = 9; k < 16; k++) {
34686 GemmMicrokernelTester()
34687 .mr(4)
34688 .nr(4)
34689 .kr(2)
34690 .sr(1)
34691 .m(4)
34692 .n(4)
34693 .k(k)
34694 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34695 }
34696 }
34697
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)34698 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
34699 for (size_t k = 9; k < 16; k++) {
34700 for (uint32_t n = 1; n <= 4; n++) {
34701 for (uint32_t m = 1; m <= 4; m++) {
34702 GemmMicrokernelTester()
34703 .mr(4)
34704 .nr(4)
34705 .kr(2)
34706 .sr(1)
34707 .m(m)
34708 .n(n)
34709 .k(k)
34710 .iterations(1)
34711 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34712 }
34713 }
34714 }
34715 }
34716
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)34717 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
34718 for (size_t k = 16; k <= 80; k += 8) {
34719 GemmMicrokernelTester()
34720 .mr(4)
34721 .nr(4)
34722 .kr(2)
34723 .sr(1)
34724 .m(4)
34725 .n(4)
34726 .k(k)
34727 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34728 }
34729 }
34730
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)34731 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
34732 for (size_t k = 16; k <= 80; k += 8) {
34733 for (uint32_t n = 1; n <= 4; n++) {
34734 for (uint32_t m = 1; m <= 4; m++) {
34735 GemmMicrokernelTester()
34736 .mr(4)
34737 .nr(4)
34738 .kr(2)
34739 .sr(1)
34740 .m(m)
34741 .n(n)
34742 .k(k)
34743 .iterations(1)
34744 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34745 }
34746 }
34747 }
34748 }
34749
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)34750 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
34751 for (uint32_t n = 5; n < 8; n++) {
34752 for (size_t k = 1; k <= 40; k += 9) {
34753 GemmMicrokernelTester()
34754 .mr(4)
34755 .nr(4)
34756 .kr(2)
34757 .sr(1)
34758 .m(4)
34759 .n(n)
34760 .k(k)
34761 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34762 }
34763 }
34764 }
34765
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)34766 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
34767 for (uint32_t n = 5; n < 8; n++) {
34768 for (size_t k = 1; k <= 40; k += 9) {
34769 GemmMicrokernelTester()
34770 .mr(4)
34771 .nr(4)
34772 .kr(2)
34773 .sr(1)
34774 .m(4)
34775 .n(n)
34776 .k(k)
34777 .cn_stride(7)
34778 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34779 }
34780 }
34781 }
34782
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)34783 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
34784 for (uint32_t n = 5; n < 8; n++) {
34785 for (size_t k = 1; k <= 40; k += 9) {
34786 for (uint32_t m = 1; m <= 4; m++) {
34787 GemmMicrokernelTester()
34788 .mr(4)
34789 .nr(4)
34790 .kr(2)
34791 .sr(1)
34792 .m(m)
34793 .n(n)
34794 .k(k)
34795 .iterations(1)
34796 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34797 }
34798 }
34799 }
34800 }
34801
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)34802 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
34803 for (uint32_t n = 8; n <= 12; n += 4) {
34804 for (size_t k = 1; k <= 40; k += 9) {
34805 GemmMicrokernelTester()
34806 .mr(4)
34807 .nr(4)
34808 .kr(2)
34809 .sr(1)
34810 .m(4)
34811 .n(n)
34812 .k(k)
34813 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34814 }
34815 }
34816 }
34817
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)34818 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
34819 for (uint32_t n = 8; n <= 12; n += 4) {
34820 for (size_t k = 1; k <= 40; k += 9) {
34821 GemmMicrokernelTester()
34822 .mr(4)
34823 .nr(4)
34824 .kr(2)
34825 .sr(1)
34826 .m(4)
34827 .n(n)
34828 .k(k)
34829 .cn_stride(7)
34830 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34831 }
34832 }
34833 }
34834
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)34835 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
34836 for (uint32_t n = 8; n <= 12; n += 4) {
34837 for (size_t k = 1; k <= 40; k += 9) {
34838 for (uint32_t m = 1; m <= 4; m++) {
34839 GemmMicrokernelTester()
34840 .mr(4)
34841 .nr(4)
34842 .kr(2)
34843 .sr(1)
34844 .m(m)
34845 .n(n)
34846 .k(k)
34847 .iterations(1)
34848 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34849 }
34850 }
34851 }
34852 }
34853
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,small_kernel)34854 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
34855 for (size_t k = 1; k <= 40; k += 9) {
34856 GemmMicrokernelTester()
34857 .mr(4)
34858 .nr(4)
34859 .kr(2)
34860 .sr(1)
34861 .m(4)
34862 .n(4)
34863 .k(k)
34864 .ks(3)
34865 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34866 }
34867 }
34868
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)34869 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
34870 for (size_t k = 1; k <= 40; k += 9) {
34871 for (uint32_t n = 1; n <= 4; n++) {
34872 for (uint32_t m = 1; m <= 4; m++) {
34873 GemmMicrokernelTester()
34874 .mr(4)
34875 .nr(4)
34876 .kr(2)
34877 .sr(1)
34878 .m(m)
34879 .n(n)
34880 .k(k)
34881 .ks(3)
34882 .iterations(1)
34883 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34884 }
34885 }
34886 }
34887 }
34888
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)34889 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
34890 for (uint32_t n = 5; n < 8; n++) {
34891 for (size_t k = 1; k <= 40; k += 9) {
34892 GemmMicrokernelTester()
34893 .mr(4)
34894 .nr(4)
34895 .kr(2)
34896 .sr(1)
34897 .m(4)
34898 .n(n)
34899 .k(k)
34900 .ks(3)
34901 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34902 }
34903 }
34904 }
34905
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)34906 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
34907 for (uint32_t n = 8; n <= 12; n += 4) {
34908 for (size_t k = 1; k <= 40; k += 9) {
34909 GemmMicrokernelTester()
34910 .mr(4)
34911 .nr(4)
34912 .kr(2)
34913 .sr(1)
34914 .m(4)
34915 .n(n)
34916 .k(k)
34917 .ks(3)
34918 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34919 }
34920 }
34921 }
34922
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)34923 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
34924 for (size_t k = 1; k <= 40; k += 9) {
34925 for (uint32_t n = 1; n <= 4; n++) {
34926 for (uint32_t m = 1; m <= 4; m++) {
34927 GemmMicrokernelTester()
34928 .mr(4)
34929 .nr(4)
34930 .kr(2)
34931 .sr(1)
34932 .m(m)
34933 .n(n)
34934 .k(k)
34935 .cm_stride(7)
34936 .iterations(1)
34937 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34938 }
34939 }
34940 }
34941 }
34942
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,a_offset)34943 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
34944 for (size_t k = 1; k <= 40; k += 9) {
34945 GemmMicrokernelTester()
34946 .mr(4)
34947 .nr(4)
34948 .kr(2)
34949 .sr(1)
34950 .m(4)
34951 .n(4)
34952 .k(k)
34953 .ks(3)
34954 .a_offset(163)
34955 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34956 }
34957 }
34958
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,zero)34959 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, zero) {
34960 for (size_t k = 1; k <= 40; k += 9) {
34961 for (uint32_t mz = 0; mz < 4; mz++) {
34962 GemmMicrokernelTester()
34963 .mr(4)
34964 .nr(4)
34965 .kr(2)
34966 .sr(1)
34967 .m(4)
34968 .n(4)
34969 .k(k)
34970 .ks(3)
34971 .a_offset(163)
34972 .zero_index(mz)
34973 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34974 }
34975 }
34976 }
34977
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,qmin)34978 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
34979 GemmMicrokernelTester()
34980 .mr(4)
34981 .nr(4)
34982 .kr(2)
34983 .sr(1)
34984 .m(4)
34985 .n(4)
34986 .k(8)
34987 .qmin(128)
34988 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34989 }
34990
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,qmax)34991 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
34992 GemmMicrokernelTester()
34993 .mr(4)
34994 .nr(4)
34995 .kr(2)
34996 .sr(1)
34997 .m(4)
34998 .n(4)
34999 .k(8)
35000 .qmax(128)
35001 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35002 }
35003
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)35004 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
35005 GemmMicrokernelTester()
35006 .mr(4)
35007 .nr(4)
35008 .kr(2)
35009 .sr(1)
35010 .m(4)
35011 .n(4)
35012 .k(8)
35013 .cm_stride(7)
35014 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35015 }
35016 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
35017
35018
35019 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1)35020 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1) {
35021 GemmMicrokernelTester()
35022 .mr(4)
35023 .nr(2)
35024 .kr(1)
35025 .sr(1)
35026 .m(4)
35027 .n(2)
35028 .k(1)
35029 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35030 }
35031
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cn)35032 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cn) {
35033 GemmMicrokernelTester()
35034 .mr(4)
35035 .nr(2)
35036 .kr(1)
35037 .sr(1)
35038 .m(4)
35039 .n(2)
35040 .k(1)
35041 .cn_stride(5)
35042 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35043 }
35044
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile)35045 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile) {
35046 for (uint32_t n = 1; n <= 2; n++) {
35047 for (uint32_t m = 1; m <= 4; m++) {
35048 GemmMicrokernelTester()
35049 .mr(4)
35050 .nr(2)
35051 .kr(1)
35052 .sr(1)
35053 .m(m)
35054 .n(n)
35055 .k(1)
35056 .iterations(1)
35057 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35058 }
35059 }
35060 }
35061
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile_m)35062 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_m) {
35063 for (uint32_t m = 1; m <= 4; m++) {
35064 GemmMicrokernelTester()
35065 .mr(4)
35066 .nr(2)
35067 .kr(1)
35068 .sr(1)
35069 .m(m)
35070 .n(2)
35071 .k(1)
35072 .iterations(1)
35073 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35074 }
35075 }
35076
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile_n)35077 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_n) {
35078 for (uint32_t n = 1; n <= 2; n++) {
35079 GemmMicrokernelTester()
35080 .mr(4)
35081 .nr(2)
35082 .kr(1)
35083 .sr(1)
35084 .m(4)
35085 .n(n)
35086 .k(1)
35087 .iterations(1)
35088 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35089 }
35090 }
35091
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_gt_1)35092 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1) {
35093 for (size_t k = 2; k < 10; k++) {
35094 GemmMicrokernelTester()
35095 .mr(4)
35096 .nr(2)
35097 .kr(1)
35098 .sr(1)
35099 .m(4)
35100 .n(2)
35101 .k(k)
35102 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35103 }
35104 }
35105
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_gt_1_subtile)35106 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1_subtile) {
35107 for (size_t k = 2; k < 10; k++) {
35108 for (uint32_t n = 1; n <= 2; n++) {
35109 for (uint32_t m = 1; m <= 4; m++) {
35110 GemmMicrokernelTester()
35111 .mr(4)
35112 .nr(2)
35113 .kr(1)
35114 .sr(1)
35115 .m(m)
35116 .n(n)
35117 .k(k)
35118 .iterations(1)
35119 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35120 }
35121 }
35122 }
35123 }
35124
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2)35125 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2) {
35126 for (uint32_t n = 3; n < 4; n++) {
35127 for (size_t k = 1; k <= 5; k += 2) {
35128 GemmMicrokernelTester()
35129 .mr(4)
35130 .nr(2)
35131 .kr(1)
35132 .sr(1)
35133 .m(4)
35134 .n(n)
35135 .k(k)
35136 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35137 }
35138 }
35139 }
35140
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_strided_cn)35141 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_strided_cn) {
35142 for (uint32_t n = 3; n < 4; n++) {
35143 for (size_t k = 1; k <= 5; k += 2) {
35144 GemmMicrokernelTester()
35145 .mr(4)
35146 .nr(2)
35147 .kr(1)
35148 .sr(1)
35149 .m(4)
35150 .n(n)
35151 .k(k)
35152 .cn_stride(5)
35153 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35154 }
35155 }
35156 }
35157
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_subtile)35158 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_subtile) {
35159 for (uint32_t n = 3; n < 4; n++) {
35160 for (size_t k = 1; k <= 5; k += 2) {
35161 for (uint32_t m = 1; m <= 4; m++) {
35162 GemmMicrokernelTester()
35163 .mr(4)
35164 .nr(2)
35165 .kr(1)
35166 .sr(1)
35167 .m(m)
35168 .n(n)
35169 .k(k)
35170 .iterations(1)
35171 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35172 }
35173 }
35174 }
35175 }
35176
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2)35177 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2) {
35178 for (uint32_t n = 4; n <= 6; n += 2) {
35179 for (size_t k = 1; k <= 5; k += 2) {
35180 GemmMicrokernelTester()
35181 .mr(4)
35182 .nr(2)
35183 .kr(1)
35184 .sr(1)
35185 .m(4)
35186 .n(n)
35187 .k(k)
35188 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35189 }
35190 }
35191 }
35192
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_strided_cn)35193 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_strided_cn) {
35194 for (uint32_t n = 4; n <= 6; n += 2) {
35195 for (size_t k = 1; k <= 5; k += 2) {
35196 GemmMicrokernelTester()
35197 .mr(4)
35198 .nr(2)
35199 .kr(1)
35200 .sr(1)
35201 .m(4)
35202 .n(n)
35203 .k(k)
35204 .cn_stride(5)
35205 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35206 }
35207 }
35208 }
35209
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_subtile)35210 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_subtile) {
35211 for (uint32_t n = 4; n <= 6; n += 2) {
35212 for (size_t k = 1; k <= 5; k += 2) {
35213 for (uint32_t m = 1; m <= 4; m++) {
35214 GemmMicrokernelTester()
35215 .mr(4)
35216 .nr(2)
35217 .kr(1)
35218 .sr(1)
35219 .m(m)
35220 .n(n)
35221 .k(k)
35222 .iterations(1)
35223 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35224 }
35225 }
35226 }
35227 }
35228
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,small_kernel)35229 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, small_kernel) {
35230 for (size_t k = 1; k <= 5; k += 2) {
35231 GemmMicrokernelTester()
35232 .mr(4)
35233 .nr(2)
35234 .kr(1)
35235 .sr(1)
35236 .m(4)
35237 .n(2)
35238 .k(k)
35239 .ks(3)
35240 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35241 }
35242 }
35243
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,small_kernel_subtile)35244 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, small_kernel_subtile) {
35245 for (size_t k = 1; k <= 5; k += 2) {
35246 for (uint32_t n = 1; n <= 2; n++) {
35247 for (uint32_t m = 1; m <= 4; m++) {
35248 GemmMicrokernelTester()
35249 .mr(4)
35250 .nr(2)
35251 .kr(1)
35252 .sr(1)
35253 .m(m)
35254 .n(n)
35255 .k(k)
35256 .ks(3)
35257 .iterations(1)
35258 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35259 }
35260 }
35261 }
35262 }
35263
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_small_kernel)35264 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_small_kernel) {
35265 for (uint32_t n = 3; n < 4; n++) {
35266 for (size_t k = 1; k <= 5; k += 2) {
35267 GemmMicrokernelTester()
35268 .mr(4)
35269 .nr(2)
35270 .kr(1)
35271 .sr(1)
35272 .m(4)
35273 .n(n)
35274 .k(k)
35275 .ks(3)
35276 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35277 }
35278 }
35279 }
35280
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_small_kernel)35281 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_small_kernel) {
35282 for (uint32_t n = 4; n <= 6; n += 2) {
35283 for (size_t k = 1; k <= 5; k += 2) {
35284 GemmMicrokernelTester()
35285 .mr(4)
35286 .nr(2)
35287 .kr(1)
35288 .sr(1)
35289 .m(4)
35290 .n(n)
35291 .k(k)
35292 .ks(3)
35293 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35294 }
35295 }
35296 }
35297
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cm_subtile)35298 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm_subtile) {
35299 for (size_t k = 1; k <= 5; k += 2) {
35300 for (uint32_t n = 1; n <= 2; n++) {
35301 for (uint32_t m = 1; m <= 4; m++) {
35302 GemmMicrokernelTester()
35303 .mr(4)
35304 .nr(2)
35305 .kr(1)
35306 .sr(1)
35307 .m(m)
35308 .n(n)
35309 .k(k)
35310 .cm_stride(5)
35311 .iterations(1)
35312 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35313 }
35314 }
35315 }
35316 }
35317
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,a_offset)35318 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, a_offset) {
35319 for (size_t k = 1; k <= 5; k += 2) {
35320 GemmMicrokernelTester()
35321 .mr(4)
35322 .nr(2)
35323 .kr(1)
35324 .sr(1)
35325 .m(4)
35326 .n(2)
35327 .k(k)
35328 .ks(3)
35329 .a_offset(23)
35330 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35331 }
35332 }
35333
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,zero)35334 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, zero) {
35335 for (size_t k = 1; k <= 5; k += 2) {
35336 for (uint32_t mz = 0; mz < 4; mz++) {
35337 GemmMicrokernelTester()
35338 .mr(4)
35339 .nr(2)
35340 .kr(1)
35341 .sr(1)
35342 .m(4)
35343 .n(2)
35344 .k(k)
35345 .ks(3)
35346 .a_offset(23)
35347 .zero_index(mz)
35348 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35349 }
35350 }
35351 }
35352
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,qmin)35353 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmin) {
35354 GemmMicrokernelTester()
35355 .mr(4)
35356 .nr(2)
35357 .kr(1)
35358 .sr(1)
35359 .m(4)
35360 .n(2)
35361 .k(1)
35362 .qmin(128)
35363 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35364 }
35365
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,qmax)35366 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmax) {
35367 GemmMicrokernelTester()
35368 .mr(4)
35369 .nr(2)
35370 .kr(1)
35371 .sr(1)
35372 .m(4)
35373 .n(2)
35374 .k(1)
35375 .qmax(128)
35376 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35377 }
35378
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cm)35379 TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm) {
35380 GemmMicrokernelTester()
35381 .mr(4)
35382 .nr(2)
35383 .kr(1)
35384 .sr(1)
35385 .m(4)
35386 .n(2)
35387 .k(1)
35388 .cm_stride(5)
35389 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35390 }
35391 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
35392
35393
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1)35394 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
35395 GemmMicrokernelTester()
35396 .mr(1)
35397 .nr(2)
35398 .kr(1)
35399 .sr(1)
35400 .m(1)
35401 .n(2)
35402 .k(1)
35403 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35404 }
35405
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,strided_cn)35406 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
35407 GemmMicrokernelTester()
35408 .mr(1)
35409 .nr(2)
35410 .kr(1)
35411 .sr(1)
35412 .m(1)
35413 .n(2)
35414 .k(1)
35415 .cn_stride(5)
35416 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35417 }
35418
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_subtile)35419 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
35420 for (uint32_t n = 1; n <= 2; n++) {
35421 for (uint32_t m = 1; m <= 1; m++) {
35422 GemmMicrokernelTester()
35423 .mr(1)
35424 .nr(2)
35425 .kr(1)
35426 .sr(1)
35427 .m(m)
35428 .n(n)
35429 .k(1)
35430 .iterations(1)
35431 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35432 }
35433 }
35434 }
35435
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_subtile_m)35436 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
35437 for (uint32_t m = 1; m <= 1; m++) {
35438 GemmMicrokernelTester()
35439 .mr(1)
35440 .nr(2)
35441 .kr(1)
35442 .sr(1)
35443 .m(m)
35444 .n(2)
35445 .k(1)
35446 .iterations(1)
35447 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35448 }
35449 }
35450
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_subtile_n)35451 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
35452 for (uint32_t n = 1; n <= 2; n++) {
35453 GemmMicrokernelTester()
35454 .mr(1)
35455 .nr(2)
35456 .kr(1)
35457 .sr(1)
35458 .m(1)
35459 .n(n)
35460 .k(1)
35461 .iterations(1)
35462 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35463 }
35464 }
35465
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_gt_1)35466 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
35467 for (size_t k = 2; k < 10; k++) {
35468 GemmMicrokernelTester()
35469 .mr(1)
35470 .nr(2)
35471 .kr(1)
35472 .sr(1)
35473 .m(1)
35474 .n(2)
35475 .k(k)
35476 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35477 }
35478 }
35479
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_gt_1_subtile)35480 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
35481 for (size_t k = 2; k < 10; k++) {
35482 for (uint32_t n = 1; n <= 2; n++) {
35483 for (uint32_t m = 1; m <= 1; m++) {
35484 GemmMicrokernelTester()
35485 .mr(1)
35486 .nr(2)
35487 .kr(1)
35488 .sr(1)
35489 .m(m)
35490 .n(n)
35491 .k(k)
35492 .iterations(1)
35493 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35494 }
35495 }
35496 }
35497 }
35498
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2)35499 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
35500 for (uint32_t n = 3; n < 4; n++) {
35501 for (size_t k = 1; k <= 5; k += 2) {
35502 GemmMicrokernelTester()
35503 .mr(1)
35504 .nr(2)
35505 .kr(1)
35506 .sr(1)
35507 .m(1)
35508 .n(n)
35509 .k(k)
35510 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35511 }
35512 }
35513 }
35514
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2_strided_cn)35515 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
35516 for (uint32_t n = 3; n < 4; n++) {
35517 for (size_t k = 1; k <= 5; k += 2) {
35518 GemmMicrokernelTester()
35519 .mr(1)
35520 .nr(2)
35521 .kr(1)
35522 .sr(1)
35523 .m(1)
35524 .n(n)
35525 .k(k)
35526 .cn_stride(5)
35527 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35528 }
35529 }
35530 }
35531
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2_subtile)35532 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
35533 for (uint32_t n = 3; n < 4; n++) {
35534 for (size_t k = 1; k <= 5; k += 2) {
35535 for (uint32_t m = 1; m <= 1; m++) {
35536 GemmMicrokernelTester()
35537 .mr(1)
35538 .nr(2)
35539 .kr(1)
35540 .sr(1)
35541 .m(m)
35542 .n(n)
35543 .k(k)
35544 .iterations(1)
35545 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35546 }
35547 }
35548 }
35549 }
35550
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2)35551 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
35552 for (uint32_t n = 4; n <= 6; n += 2) {
35553 for (size_t k = 1; k <= 5; k += 2) {
35554 GemmMicrokernelTester()
35555 .mr(1)
35556 .nr(2)
35557 .kr(1)
35558 .sr(1)
35559 .m(1)
35560 .n(n)
35561 .k(k)
35562 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35563 }
35564 }
35565 }
35566
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2_strided_cn)35567 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
35568 for (uint32_t n = 4; n <= 6; n += 2) {
35569 for (size_t k = 1; k <= 5; k += 2) {
35570 GemmMicrokernelTester()
35571 .mr(1)
35572 .nr(2)
35573 .kr(1)
35574 .sr(1)
35575 .m(1)
35576 .n(n)
35577 .k(k)
35578 .cn_stride(5)
35579 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35580 }
35581 }
35582 }
35583
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2_subtile)35584 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
35585 for (uint32_t n = 4; n <= 6; n += 2) {
35586 for (size_t k = 1; k <= 5; k += 2) {
35587 for (uint32_t m = 1; m <= 1; m++) {
35588 GemmMicrokernelTester()
35589 .mr(1)
35590 .nr(2)
35591 .kr(1)
35592 .sr(1)
35593 .m(m)
35594 .n(n)
35595 .k(k)
35596 .iterations(1)
35597 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35598 }
35599 }
35600 }
35601 }
35602
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,small_kernel)35603 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel) {
35604 for (size_t k = 1; k <= 5; k += 2) {
35605 GemmMicrokernelTester()
35606 .mr(1)
35607 .nr(2)
35608 .kr(1)
35609 .sr(1)
35610 .m(1)
35611 .n(2)
35612 .k(k)
35613 .ks(3)
35614 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35615 }
35616 }
35617
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,small_kernel_subtile)35618 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel_subtile) {
35619 for (size_t k = 1; k <= 5; k += 2) {
35620 for (uint32_t n = 1; n <= 2; n++) {
35621 for (uint32_t m = 1; m <= 1; m++) {
35622 GemmMicrokernelTester()
35623 .mr(1)
35624 .nr(2)
35625 .kr(1)
35626 .sr(1)
35627 .m(m)
35628 .n(n)
35629 .k(k)
35630 .ks(3)
35631 .iterations(1)
35632 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35633 }
35634 }
35635 }
35636 }
35637
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2_small_kernel)35638 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
35639 for (uint32_t n = 3; n < 4; n++) {
35640 for (size_t k = 1; k <= 5; k += 2) {
35641 GemmMicrokernelTester()
35642 .mr(1)
35643 .nr(2)
35644 .kr(1)
35645 .sr(1)
35646 .m(1)
35647 .n(n)
35648 .k(k)
35649 .ks(3)
35650 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35651 }
35652 }
35653 }
35654
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2_small_kernel)35655 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_small_kernel) {
35656 for (uint32_t n = 4; n <= 6; n += 2) {
35657 for (size_t k = 1; k <= 5; k += 2) {
35658 GemmMicrokernelTester()
35659 .mr(1)
35660 .nr(2)
35661 .kr(1)
35662 .sr(1)
35663 .m(1)
35664 .n(n)
35665 .k(k)
35666 .ks(3)
35667 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35668 }
35669 }
35670 }
35671
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,strided_cm_subtile)35672 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
35673 for (size_t k = 1; k <= 5; k += 2) {
35674 for (uint32_t n = 1; n <= 2; n++) {
35675 for (uint32_t m = 1; m <= 1; m++) {
35676 GemmMicrokernelTester()
35677 .mr(1)
35678 .nr(2)
35679 .kr(1)
35680 .sr(1)
35681 .m(m)
35682 .n(n)
35683 .k(k)
35684 .cm_stride(5)
35685 .iterations(1)
35686 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35687 }
35688 }
35689 }
35690 }
35691
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,a_offset)35692 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, a_offset) {
35693 for (size_t k = 1; k <= 5; k += 2) {
35694 GemmMicrokernelTester()
35695 .mr(1)
35696 .nr(2)
35697 .kr(1)
35698 .sr(1)
35699 .m(1)
35700 .n(2)
35701 .k(k)
35702 .ks(3)
35703 .a_offset(7)
35704 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35705 }
35706 }
35707
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,zero)35708 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, zero) {
35709 for (size_t k = 1; k <= 5; k += 2) {
35710 for (uint32_t mz = 0; mz < 1; mz++) {
35711 GemmMicrokernelTester()
35712 .mr(1)
35713 .nr(2)
35714 .kr(1)
35715 .sr(1)
35716 .m(1)
35717 .n(2)
35718 .k(k)
35719 .ks(3)
35720 .a_offset(7)
35721 .zero_index(mz)
35722 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35723 }
35724 }
35725 }
35726
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,qmin)35727 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
35728 GemmMicrokernelTester()
35729 .mr(1)
35730 .nr(2)
35731 .kr(1)
35732 .sr(1)
35733 .m(1)
35734 .n(2)
35735 .k(1)
35736 .qmin(128)
35737 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35738 }
35739
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,qmax)35740 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
35741 GemmMicrokernelTester()
35742 .mr(1)
35743 .nr(2)
35744 .kr(1)
35745 .sr(1)
35746 .m(1)
35747 .n(2)
35748 .k(1)
35749 .qmax(128)
35750 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35751 }
35752
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,strided_cm)35753 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
35754 GemmMicrokernelTester()
35755 .mr(1)
35756 .nr(2)
35757 .kr(1)
35758 .sr(1)
35759 .m(1)
35760 .n(2)
35761 .k(1)
35762 .cm_stride(5)
35763 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35764 }
35765
35766
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1)35767 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1) {
35768 GemmMicrokernelTester()
35769 .mr(1)
35770 .nr(4)
35771 .kr(1)
35772 .sr(1)
35773 .m(1)
35774 .n(4)
35775 .k(1)
35776 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35777 }
35778
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,strided_cn)35779 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cn) {
35780 GemmMicrokernelTester()
35781 .mr(1)
35782 .nr(4)
35783 .kr(1)
35784 .sr(1)
35785 .m(1)
35786 .n(4)
35787 .k(1)
35788 .cn_stride(7)
35789 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35790 }
35791
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_subtile)35792 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile) {
35793 for (uint32_t n = 1; n <= 4; n++) {
35794 for (uint32_t m = 1; m <= 1; m++) {
35795 GemmMicrokernelTester()
35796 .mr(1)
35797 .nr(4)
35798 .kr(1)
35799 .sr(1)
35800 .m(m)
35801 .n(n)
35802 .k(1)
35803 .iterations(1)
35804 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35805 }
35806 }
35807 }
35808
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_subtile_m)35809 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
35810 for (uint32_t m = 1; m <= 1; m++) {
35811 GemmMicrokernelTester()
35812 .mr(1)
35813 .nr(4)
35814 .kr(1)
35815 .sr(1)
35816 .m(m)
35817 .n(4)
35818 .k(1)
35819 .iterations(1)
35820 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35821 }
35822 }
35823
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_subtile_n)35824 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
35825 for (uint32_t n = 1; n <= 4; n++) {
35826 GemmMicrokernelTester()
35827 .mr(1)
35828 .nr(4)
35829 .kr(1)
35830 .sr(1)
35831 .m(1)
35832 .n(n)
35833 .k(1)
35834 .iterations(1)
35835 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35836 }
35837 }
35838
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_gt_1)35839 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1) {
35840 for (size_t k = 2; k < 10; k++) {
35841 GemmMicrokernelTester()
35842 .mr(1)
35843 .nr(4)
35844 .kr(1)
35845 .sr(1)
35846 .m(1)
35847 .n(4)
35848 .k(k)
35849 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35850 }
35851 }
35852
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_gt_1_subtile)35853 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1_subtile) {
35854 for (size_t k = 2; k < 10; k++) {
35855 for (uint32_t n = 1; n <= 4; n++) {
35856 for (uint32_t m = 1; m <= 1; m++) {
35857 GemmMicrokernelTester()
35858 .mr(1)
35859 .nr(4)
35860 .kr(1)
35861 .sr(1)
35862 .m(m)
35863 .n(n)
35864 .k(k)
35865 .iterations(1)
35866 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35867 }
35868 }
35869 }
35870 }
35871
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4)35872 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4) {
35873 for (uint32_t n = 5; n < 8; n++) {
35874 for (size_t k = 1; k <= 5; k += 2) {
35875 GemmMicrokernelTester()
35876 .mr(1)
35877 .nr(4)
35878 .kr(1)
35879 .sr(1)
35880 .m(1)
35881 .n(n)
35882 .k(k)
35883 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35884 }
35885 }
35886 }
35887
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4_strided_cn)35888 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
35889 for (uint32_t n = 5; n < 8; n++) {
35890 for (size_t k = 1; k <= 5; k += 2) {
35891 GemmMicrokernelTester()
35892 .mr(1)
35893 .nr(4)
35894 .kr(1)
35895 .sr(1)
35896 .m(1)
35897 .n(n)
35898 .k(k)
35899 .cn_stride(7)
35900 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35901 }
35902 }
35903 }
35904
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4_subtile)35905 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_subtile) {
35906 for (uint32_t n = 5; n < 8; n++) {
35907 for (size_t k = 1; k <= 5; k += 2) {
35908 for (uint32_t m = 1; m <= 1; m++) {
35909 GemmMicrokernelTester()
35910 .mr(1)
35911 .nr(4)
35912 .kr(1)
35913 .sr(1)
35914 .m(m)
35915 .n(n)
35916 .k(k)
35917 .iterations(1)
35918 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35919 }
35920 }
35921 }
35922 }
35923
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4)35924 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4) {
35925 for (uint32_t n = 8; n <= 12; n += 4) {
35926 for (size_t k = 1; k <= 5; k += 2) {
35927 GemmMicrokernelTester()
35928 .mr(1)
35929 .nr(4)
35930 .kr(1)
35931 .sr(1)
35932 .m(1)
35933 .n(n)
35934 .k(k)
35935 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35936 }
35937 }
35938 }
35939
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4_strided_cn)35940 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
35941 for (uint32_t n = 8; n <= 12; n += 4) {
35942 for (size_t k = 1; k <= 5; k += 2) {
35943 GemmMicrokernelTester()
35944 .mr(1)
35945 .nr(4)
35946 .kr(1)
35947 .sr(1)
35948 .m(1)
35949 .n(n)
35950 .k(k)
35951 .cn_stride(7)
35952 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35953 }
35954 }
35955 }
35956
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4_subtile)35957 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_subtile) {
35958 for (uint32_t n = 8; n <= 12; n += 4) {
35959 for (size_t k = 1; k <= 5; k += 2) {
35960 for (uint32_t m = 1; m <= 1; m++) {
35961 GemmMicrokernelTester()
35962 .mr(1)
35963 .nr(4)
35964 .kr(1)
35965 .sr(1)
35966 .m(m)
35967 .n(n)
35968 .k(k)
35969 .iterations(1)
35970 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35971 }
35972 }
35973 }
35974 }
35975
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,small_kernel)35976 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, small_kernel) {
35977 for (size_t k = 1; k <= 5; k += 2) {
35978 GemmMicrokernelTester()
35979 .mr(1)
35980 .nr(4)
35981 .kr(1)
35982 .sr(1)
35983 .m(1)
35984 .n(4)
35985 .k(k)
35986 .ks(3)
35987 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35988 }
35989 }
35990
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,small_kernel_subtile)35991 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, small_kernel_subtile) {
35992 for (size_t k = 1; k <= 5; k += 2) {
35993 for (uint32_t n = 1; n <= 4; n++) {
35994 for (uint32_t m = 1; m <= 1; m++) {
35995 GemmMicrokernelTester()
35996 .mr(1)
35997 .nr(4)
35998 .kr(1)
35999 .sr(1)
36000 .m(m)
36001 .n(n)
36002 .k(k)
36003 .ks(3)
36004 .iterations(1)
36005 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36006 }
36007 }
36008 }
36009 }
36010
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4_small_kernel)36011 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
36012 for (uint32_t n = 5; n < 8; n++) {
36013 for (size_t k = 1; k <= 5; k += 2) {
36014 GemmMicrokernelTester()
36015 .mr(1)
36016 .nr(4)
36017 .kr(1)
36018 .sr(1)
36019 .m(1)
36020 .n(n)
36021 .k(k)
36022 .ks(3)
36023 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36024 }
36025 }
36026 }
36027
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4_small_kernel)36028 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
36029 for (uint32_t n = 8; n <= 12; n += 4) {
36030 for (size_t k = 1; k <= 5; k += 2) {
36031 GemmMicrokernelTester()
36032 .mr(1)
36033 .nr(4)
36034 .kr(1)
36035 .sr(1)
36036 .m(1)
36037 .n(n)
36038 .k(k)
36039 .ks(3)
36040 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36041 }
36042 }
36043 }
36044
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,strided_cm_subtile)36045 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm_subtile) {
36046 for (size_t k = 1; k <= 5; k += 2) {
36047 for (uint32_t n = 1; n <= 4; n++) {
36048 for (uint32_t m = 1; m <= 1; m++) {
36049 GemmMicrokernelTester()
36050 .mr(1)
36051 .nr(4)
36052 .kr(1)
36053 .sr(1)
36054 .m(m)
36055 .n(n)
36056 .k(k)
36057 .cm_stride(7)
36058 .iterations(1)
36059 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36060 }
36061 }
36062 }
36063 }
36064
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,a_offset)36065 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, a_offset) {
36066 for (size_t k = 1; k <= 5; k += 2) {
36067 GemmMicrokernelTester()
36068 .mr(1)
36069 .nr(4)
36070 .kr(1)
36071 .sr(1)
36072 .m(1)
36073 .n(4)
36074 .k(k)
36075 .ks(3)
36076 .a_offset(7)
36077 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36078 }
36079 }
36080
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,zero)36081 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, zero) {
36082 for (size_t k = 1; k <= 5; k += 2) {
36083 for (uint32_t mz = 0; mz < 1; mz++) {
36084 GemmMicrokernelTester()
36085 .mr(1)
36086 .nr(4)
36087 .kr(1)
36088 .sr(1)
36089 .m(1)
36090 .n(4)
36091 .k(k)
36092 .ks(3)
36093 .a_offset(7)
36094 .zero_index(mz)
36095 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36096 }
36097 }
36098 }
36099
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,qmin)36100 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmin) {
36101 GemmMicrokernelTester()
36102 .mr(1)
36103 .nr(4)
36104 .kr(1)
36105 .sr(1)
36106 .m(1)
36107 .n(4)
36108 .k(1)
36109 .qmin(128)
36110 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36111 }
36112
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,qmax)36113 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmax) {
36114 GemmMicrokernelTester()
36115 .mr(1)
36116 .nr(4)
36117 .kr(1)
36118 .sr(1)
36119 .m(1)
36120 .n(4)
36121 .k(1)
36122 .qmax(128)
36123 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36124 }
36125
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,strided_cm)36126 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm) {
36127 GemmMicrokernelTester()
36128 .mr(1)
36129 .nr(4)
36130 .kr(1)
36131 .sr(1)
36132 .m(1)
36133 .n(4)
36134 .k(1)
36135 .cm_stride(7)
36136 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36137 }
36138
36139
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1)36140 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
36141 GemmMicrokernelTester()
36142 .mr(1)
36143 .nr(4)
36144 .kr(1)
36145 .sr(1)
36146 .m(1)
36147 .n(4)
36148 .k(1)
36149 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36150 }
36151
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,strided_cn)36152 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
36153 GemmMicrokernelTester()
36154 .mr(1)
36155 .nr(4)
36156 .kr(1)
36157 .sr(1)
36158 .m(1)
36159 .n(4)
36160 .k(1)
36161 .cn_stride(7)
36162 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36163 }
36164
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_subtile)36165 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
36166 for (uint32_t n = 1; n <= 4; n++) {
36167 for (uint32_t m = 1; m <= 1; m++) {
36168 GemmMicrokernelTester()
36169 .mr(1)
36170 .nr(4)
36171 .kr(1)
36172 .sr(1)
36173 .m(m)
36174 .n(n)
36175 .k(1)
36176 .iterations(1)
36177 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36178 }
36179 }
36180 }
36181
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_subtile_m)36182 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
36183 for (uint32_t m = 1; m <= 1; m++) {
36184 GemmMicrokernelTester()
36185 .mr(1)
36186 .nr(4)
36187 .kr(1)
36188 .sr(1)
36189 .m(m)
36190 .n(4)
36191 .k(1)
36192 .iterations(1)
36193 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36194 }
36195 }
36196
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_subtile_n)36197 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
36198 for (uint32_t n = 1; n <= 4; n++) {
36199 GemmMicrokernelTester()
36200 .mr(1)
36201 .nr(4)
36202 .kr(1)
36203 .sr(1)
36204 .m(1)
36205 .n(n)
36206 .k(1)
36207 .iterations(1)
36208 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36209 }
36210 }
36211
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_gt_1)36212 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
36213 for (size_t k = 2; k < 10; k++) {
36214 GemmMicrokernelTester()
36215 .mr(1)
36216 .nr(4)
36217 .kr(1)
36218 .sr(1)
36219 .m(1)
36220 .n(4)
36221 .k(k)
36222 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36223 }
36224 }
36225
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_gt_1_subtile)36226 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
36227 for (size_t k = 2; k < 10; k++) {
36228 for (uint32_t n = 1; n <= 4; n++) {
36229 for (uint32_t m = 1; m <= 1; m++) {
36230 GemmMicrokernelTester()
36231 .mr(1)
36232 .nr(4)
36233 .kr(1)
36234 .sr(1)
36235 .m(m)
36236 .n(n)
36237 .k(k)
36238 .iterations(1)
36239 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36240 }
36241 }
36242 }
36243 }
36244
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4)36245 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
36246 for (uint32_t n = 5; n < 8; n++) {
36247 for (size_t k = 1; k <= 5; k += 2) {
36248 GemmMicrokernelTester()
36249 .mr(1)
36250 .nr(4)
36251 .kr(1)
36252 .sr(1)
36253 .m(1)
36254 .n(n)
36255 .k(k)
36256 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36257 }
36258 }
36259 }
36260
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4_strided_cn)36261 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
36262 for (uint32_t n = 5; n < 8; n++) {
36263 for (size_t k = 1; k <= 5; k += 2) {
36264 GemmMicrokernelTester()
36265 .mr(1)
36266 .nr(4)
36267 .kr(1)
36268 .sr(1)
36269 .m(1)
36270 .n(n)
36271 .k(k)
36272 .cn_stride(7)
36273 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36274 }
36275 }
36276 }
36277
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4_subtile)36278 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
36279 for (uint32_t n = 5; n < 8; n++) {
36280 for (size_t k = 1; k <= 5; k += 2) {
36281 for (uint32_t m = 1; m <= 1; m++) {
36282 GemmMicrokernelTester()
36283 .mr(1)
36284 .nr(4)
36285 .kr(1)
36286 .sr(1)
36287 .m(m)
36288 .n(n)
36289 .k(k)
36290 .iterations(1)
36291 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36292 }
36293 }
36294 }
36295 }
36296
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4)36297 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
36298 for (uint32_t n = 8; n <= 12; n += 4) {
36299 for (size_t k = 1; k <= 5; k += 2) {
36300 GemmMicrokernelTester()
36301 .mr(1)
36302 .nr(4)
36303 .kr(1)
36304 .sr(1)
36305 .m(1)
36306 .n(n)
36307 .k(k)
36308 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36309 }
36310 }
36311 }
36312
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4_strided_cn)36313 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
36314 for (uint32_t n = 8; n <= 12; n += 4) {
36315 for (size_t k = 1; k <= 5; k += 2) {
36316 GemmMicrokernelTester()
36317 .mr(1)
36318 .nr(4)
36319 .kr(1)
36320 .sr(1)
36321 .m(1)
36322 .n(n)
36323 .k(k)
36324 .cn_stride(7)
36325 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36326 }
36327 }
36328 }
36329
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4_subtile)36330 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
36331 for (uint32_t n = 8; n <= 12; n += 4) {
36332 for (size_t k = 1; k <= 5; k += 2) {
36333 for (uint32_t m = 1; m <= 1; m++) {
36334 GemmMicrokernelTester()
36335 .mr(1)
36336 .nr(4)
36337 .kr(1)
36338 .sr(1)
36339 .m(m)
36340 .n(n)
36341 .k(k)
36342 .iterations(1)
36343 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36344 }
36345 }
36346 }
36347 }
36348
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,small_kernel)36349 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel) {
36350 for (size_t k = 1; k <= 5; k += 2) {
36351 GemmMicrokernelTester()
36352 .mr(1)
36353 .nr(4)
36354 .kr(1)
36355 .sr(1)
36356 .m(1)
36357 .n(4)
36358 .k(k)
36359 .ks(3)
36360 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36361 }
36362 }
36363
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,small_kernel_subtile)36364 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel_subtile) {
36365 for (size_t k = 1; k <= 5; k += 2) {
36366 for (uint32_t n = 1; n <= 4; n++) {
36367 for (uint32_t m = 1; m <= 1; m++) {
36368 GemmMicrokernelTester()
36369 .mr(1)
36370 .nr(4)
36371 .kr(1)
36372 .sr(1)
36373 .m(m)
36374 .n(n)
36375 .k(k)
36376 .ks(3)
36377 .iterations(1)
36378 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36379 }
36380 }
36381 }
36382 }
36383
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4_small_kernel)36384 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
36385 for (uint32_t n = 5; n < 8; n++) {
36386 for (size_t k = 1; k <= 5; k += 2) {
36387 GemmMicrokernelTester()
36388 .mr(1)
36389 .nr(4)
36390 .kr(1)
36391 .sr(1)
36392 .m(1)
36393 .n(n)
36394 .k(k)
36395 .ks(3)
36396 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36397 }
36398 }
36399 }
36400
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4_small_kernel)36401 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_small_kernel) {
36402 for (uint32_t n = 8; n <= 12; n += 4) {
36403 for (size_t k = 1; k <= 5; k += 2) {
36404 GemmMicrokernelTester()
36405 .mr(1)
36406 .nr(4)
36407 .kr(1)
36408 .sr(1)
36409 .m(1)
36410 .n(n)
36411 .k(k)
36412 .ks(3)
36413 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36414 }
36415 }
36416 }
36417
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,strided_cm_subtile)36418 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
36419 for (size_t k = 1; k <= 5; k += 2) {
36420 for (uint32_t n = 1; n <= 4; n++) {
36421 for (uint32_t m = 1; m <= 1; m++) {
36422 GemmMicrokernelTester()
36423 .mr(1)
36424 .nr(4)
36425 .kr(1)
36426 .sr(1)
36427 .m(m)
36428 .n(n)
36429 .k(k)
36430 .cm_stride(7)
36431 .iterations(1)
36432 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36433 }
36434 }
36435 }
36436 }
36437
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,a_offset)36438 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, a_offset) {
36439 for (size_t k = 1; k <= 5; k += 2) {
36440 GemmMicrokernelTester()
36441 .mr(1)
36442 .nr(4)
36443 .kr(1)
36444 .sr(1)
36445 .m(1)
36446 .n(4)
36447 .k(k)
36448 .ks(3)
36449 .a_offset(7)
36450 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36451 }
36452 }
36453
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,zero)36454 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, zero) {
36455 for (size_t k = 1; k <= 5; k += 2) {
36456 for (uint32_t mz = 0; mz < 1; mz++) {
36457 GemmMicrokernelTester()
36458 .mr(1)
36459 .nr(4)
36460 .kr(1)
36461 .sr(1)
36462 .m(1)
36463 .n(4)
36464 .k(k)
36465 .ks(3)
36466 .a_offset(7)
36467 .zero_index(mz)
36468 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36469 }
36470 }
36471 }
36472
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,qmin)36473 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
36474 GemmMicrokernelTester()
36475 .mr(1)
36476 .nr(4)
36477 .kr(1)
36478 .sr(1)
36479 .m(1)
36480 .n(4)
36481 .k(1)
36482 .qmin(128)
36483 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36484 }
36485
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,qmax)36486 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
36487 GemmMicrokernelTester()
36488 .mr(1)
36489 .nr(4)
36490 .kr(1)
36491 .sr(1)
36492 .m(1)
36493 .n(4)
36494 .k(1)
36495 .qmax(128)
36496 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36497 }
36498
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,strided_cm)36499 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
36500 GemmMicrokernelTester()
36501 .mr(1)
36502 .nr(4)
36503 .kr(1)
36504 .sr(1)
36505 .m(1)
36506 .n(4)
36507 .k(1)
36508 .cm_stride(7)
36509 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36510 }
36511
36512
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1)36513 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1) {
36514 GemmMicrokernelTester()
36515 .mr(2)
36516 .nr(2)
36517 .kr(1)
36518 .sr(1)
36519 .m(2)
36520 .n(2)
36521 .k(1)
36522 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36523 }
36524
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,strided_cn)36525 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cn) {
36526 GemmMicrokernelTester()
36527 .mr(2)
36528 .nr(2)
36529 .kr(1)
36530 .sr(1)
36531 .m(2)
36532 .n(2)
36533 .k(1)
36534 .cn_stride(5)
36535 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36536 }
36537
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_subtile)36538 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile) {
36539 for (uint32_t n = 1; n <= 2; n++) {
36540 for (uint32_t m = 1; m <= 2; m++) {
36541 GemmMicrokernelTester()
36542 .mr(2)
36543 .nr(2)
36544 .kr(1)
36545 .sr(1)
36546 .m(m)
36547 .n(n)
36548 .k(1)
36549 .iterations(1)
36550 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36551 }
36552 }
36553 }
36554
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_subtile_m)36555 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
36556 for (uint32_t m = 1; m <= 2; m++) {
36557 GemmMicrokernelTester()
36558 .mr(2)
36559 .nr(2)
36560 .kr(1)
36561 .sr(1)
36562 .m(m)
36563 .n(2)
36564 .k(1)
36565 .iterations(1)
36566 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36567 }
36568 }
36569
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_subtile_n)36570 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
36571 for (uint32_t n = 1; n <= 2; n++) {
36572 GemmMicrokernelTester()
36573 .mr(2)
36574 .nr(2)
36575 .kr(1)
36576 .sr(1)
36577 .m(2)
36578 .n(n)
36579 .k(1)
36580 .iterations(1)
36581 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36582 }
36583 }
36584
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_gt_1)36585 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1) {
36586 for (size_t k = 2; k < 10; k++) {
36587 GemmMicrokernelTester()
36588 .mr(2)
36589 .nr(2)
36590 .kr(1)
36591 .sr(1)
36592 .m(2)
36593 .n(2)
36594 .k(k)
36595 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36596 }
36597 }
36598
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_gt_1_subtile)36599 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1_subtile) {
36600 for (size_t k = 2; k < 10; k++) {
36601 for (uint32_t n = 1; n <= 2; n++) {
36602 for (uint32_t m = 1; m <= 2; m++) {
36603 GemmMicrokernelTester()
36604 .mr(2)
36605 .nr(2)
36606 .kr(1)
36607 .sr(1)
36608 .m(m)
36609 .n(n)
36610 .k(k)
36611 .iterations(1)
36612 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36613 }
36614 }
36615 }
36616 }
36617
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2)36618 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2) {
36619 for (uint32_t n = 3; n < 4; n++) {
36620 for (size_t k = 1; k <= 5; k += 2) {
36621 GemmMicrokernelTester()
36622 .mr(2)
36623 .nr(2)
36624 .kr(1)
36625 .sr(1)
36626 .m(2)
36627 .n(n)
36628 .k(k)
36629 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36630 }
36631 }
36632 }
36633
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2_strided_cn)36634 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
36635 for (uint32_t n = 3; n < 4; n++) {
36636 for (size_t k = 1; k <= 5; k += 2) {
36637 GemmMicrokernelTester()
36638 .mr(2)
36639 .nr(2)
36640 .kr(1)
36641 .sr(1)
36642 .m(2)
36643 .n(n)
36644 .k(k)
36645 .cn_stride(5)
36646 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36647 }
36648 }
36649 }
36650
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2_subtile)36651 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_subtile) {
36652 for (uint32_t n = 3; n < 4; n++) {
36653 for (size_t k = 1; k <= 5; k += 2) {
36654 for (uint32_t m = 1; m <= 2; m++) {
36655 GemmMicrokernelTester()
36656 .mr(2)
36657 .nr(2)
36658 .kr(1)
36659 .sr(1)
36660 .m(m)
36661 .n(n)
36662 .k(k)
36663 .iterations(1)
36664 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36665 }
36666 }
36667 }
36668 }
36669
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2)36670 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2) {
36671 for (uint32_t n = 4; n <= 6; n += 2) {
36672 for (size_t k = 1; k <= 5; k += 2) {
36673 GemmMicrokernelTester()
36674 .mr(2)
36675 .nr(2)
36676 .kr(1)
36677 .sr(1)
36678 .m(2)
36679 .n(n)
36680 .k(k)
36681 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36682 }
36683 }
36684 }
36685
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2_strided_cn)36686 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
36687 for (uint32_t n = 4; n <= 6; n += 2) {
36688 for (size_t k = 1; k <= 5; k += 2) {
36689 GemmMicrokernelTester()
36690 .mr(2)
36691 .nr(2)
36692 .kr(1)
36693 .sr(1)
36694 .m(2)
36695 .n(n)
36696 .k(k)
36697 .cn_stride(5)
36698 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36699 }
36700 }
36701 }
36702
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2_subtile)36703 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_subtile) {
36704 for (uint32_t n = 4; n <= 6; n += 2) {
36705 for (size_t k = 1; k <= 5; k += 2) {
36706 for (uint32_t m = 1; m <= 2; m++) {
36707 GemmMicrokernelTester()
36708 .mr(2)
36709 .nr(2)
36710 .kr(1)
36711 .sr(1)
36712 .m(m)
36713 .n(n)
36714 .k(k)
36715 .iterations(1)
36716 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36717 }
36718 }
36719 }
36720 }
36721
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,small_kernel)36722 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, small_kernel) {
36723 for (size_t k = 1; k <= 5; k += 2) {
36724 GemmMicrokernelTester()
36725 .mr(2)
36726 .nr(2)
36727 .kr(1)
36728 .sr(1)
36729 .m(2)
36730 .n(2)
36731 .k(k)
36732 .ks(3)
36733 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36734 }
36735 }
36736
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,small_kernel_subtile)36737 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, small_kernel_subtile) {
36738 for (size_t k = 1; k <= 5; k += 2) {
36739 for (uint32_t n = 1; n <= 2; n++) {
36740 for (uint32_t m = 1; m <= 2; m++) {
36741 GemmMicrokernelTester()
36742 .mr(2)
36743 .nr(2)
36744 .kr(1)
36745 .sr(1)
36746 .m(m)
36747 .n(n)
36748 .k(k)
36749 .ks(3)
36750 .iterations(1)
36751 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36752 }
36753 }
36754 }
36755 }
36756
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2_small_kernel)36757 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
36758 for (uint32_t n = 3; n < 4; n++) {
36759 for (size_t k = 1; k <= 5; k += 2) {
36760 GemmMicrokernelTester()
36761 .mr(2)
36762 .nr(2)
36763 .kr(1)
36764 .sr(1)
36765 .m(2)
36766 .n(n)
36767 .k(k)
36768 .ks(3)
36769 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36770 }
36771 }
36772 }
36773
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2_small_kernel)36774 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
36775 for (uint32_t n = 4; n <= 6; n += 2) {
36776 for (size_t k = 1; k <= 5; k += 2) {
36777 GemmMicrokernelTester()
36778 .mr(2)
36779 .nr(2)
36780 .kr(1)
36781 .sr(1)
36782 .m(2)
36783 .n(n)
36784 .k(k)
36785 .ks(3)
36786 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36787 }
36788 }
36789 }
36790
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,strided_cm_subtile)36791 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cm_subtile) {
36792 for (size_t k = 1; k <= 5; k += 2) {
36793 for (uint32_t n = 1; n <= 2; n++) {
36794 for (uint32_t m = 1; m <= 2; m++) {
36795 GemmMicrokernelTester()
36796 .mr(2)
36797 .nr(2)
36798 .kr(1)
36799 .sr(1)
36800 .m(m)
36801 .n(n)
36802 .k(k)
36803 .cm_stride(5)
36804 .iterations(1)
36805 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36806 }
36807 }
36808 }
36809 }
36810
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,a_offset)36811 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, a_offset) {
36812 for (size_t k = 1; k <= 5; k += 2) {
36813 GemmMicrokernelTester()
36814 .mr(2)
36815 .nr(2)
36816 .kr(1)
36817 .sr(1)
36818 .m(2)
36819 .n(2)
36820 .k(k)
36821 .ks(3)
36822 .a_offset(13)
36823 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36824 }
36825 }
36826
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,zero)36827 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, zero) {
36828 for (size_t k = 1; k <= 5; k += 2) {
36829 for (uint32_t mz = 0; mz < 2; mz++) {
36830 GemmMicrokernelTester()
36831 .mr(2)
36832 .nr(2)
36833 .kr(1)
36834 .sr(1)
36835 .m(2)
36836 .n(2)
36837 .k(k)
36838 .ks(3)
36839 .a_offset(13)
36840 .zero_index(mz)
36841 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36842 }
36843 }
36844 }
36845
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,qmin)36846 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, qmin) {
36847 GemmMicrokernelTester()
36848 .mr(2)
36849 .nr(2)
36850 .kr(1)
36851 .sr(1)
36852 .m(2)
36853 .n(2)
36854 .k(1)
36855 .qmin(128)
36856 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36857 }
36858
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,qmax)36859 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, qmax) {
36860 GemmMicrokernelTester()
36861 .mr(2)
36862 .nr(2)
36863 .kr(1)
36864 .sr(1)
36865 .m(2)
36866 .n(2)
36867 .k(1)
36868 .qmax(128)
36869 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36870 }
36871
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,strided_cm)36872 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cm) {
36873 GemmMicrokernelTester()
36874 .mr(2)
36875 .nr(2)
36876 .kr(1)
36877 .sr(1)
36878 .m(2)
36879 .n(2)
36880 .k(1)
36881 .cm_stride(5)
36882 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36883 }
36884
36885
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1)36886 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1) {
36887 GemmMicrokernelTester()
36888 .mr(2)
36889 .nr(4)
36890 .kr(1)
36891 .sr(1)
36892 .m(2)
36893 .n(4)
36894 .k(1)
36895 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36896 }
36897
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,strided_cn)36898 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cn) {
36899 GemmMicrokernelTester()
36900 .mr(2)
36901 .nr(4)
36902 .kr(1)
36903 .sr(1)
36904 .m(2)
36905 .n(4)
36906 .k(1)
36907 .cn_stride(7)
36908 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36909 }
36910
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_subtile)36911 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile) {
36912 for (uint32_t n = 1; n <= 4; n++) {
36913 for (uint32_t m = 1; m <= 2; m++) {
36914 GemmMicrokernelTester()
36915 .mr(2)
36916 .nr(4)
36917 .kr(1)
36918 .sr(1)
36919 .m(m)
36920 .n(n)
36921 .k(1)
36922 .iterations(1)
36923 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36924 }
36925 }
36926 }
36927
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_subtile_m)36928 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
36929 for (uint32_t m = 1; m <= 2; m++) {
36930 GemmMicrokernelTester()
36931 .mr(2)
36932 .nr(4)
36933 .kr(1)
36934 .sr(1)
36935 .m(m)
36936 .n(4)
36937 .k(1)
36938 .iterations(1)
36939 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36940 }
36941 }
36942
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_subtile_n)36943 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
36944 for (uint32_t n = 1; n <= 4; n++) {
36945 GemmMicrokernelTester()
36946 .mr(2)
36947 .nr(4)
36948 .kr(1)
36949 .sr(1)
36950 .m(2)
36951 .n(n)
36952 .k(1)
36953 .iterations(1)
36954 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36955 }
36956 }
36957
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_gt_1)36958 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1) {
36959 for (size_t k = 2; k < 10; k++) {
36960 GemmMicrokernelTester()
36961 .mr(2)
36962 .nr(4)
36963 .kr(1)
36964 .sr(1)
36965 .m(2)
36966 .n(4)
36967 .k(k)
36968 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36969 }
36970 }
36971
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_gt_1_subtile)36972 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1_subtile) {
36973 for (size_t k = 2; k < 10; k++) {
36974 for (uint32_t n = 1; n <= 4; n++) {
36975 for (uint32_t m = 1; m <= 2; m++) {
36976 GemmMicrokernelTester()
36977 .mr(2)
36978 .nr(4)
36979 .kr(1)
36980 .sr(1)
36981 .m(m)
36982 .n(n)
36983 .k(k)
36984 .iterations(1)
36985 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36986 }
36987 }
36988 }
36989 }
36990
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4)36991 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4) {
36992 for (uint32_t n = 5; n < 8; n++) {
36993 for (size_t k = 1; k <= 5; k += 2) {
36994 GemmMicrokernelTester()
36995 .mr(2)
36996 .nr(4)
36997 .kr(1)
36998 .sr(1)
36999 .m(2)
37000 .n(n)
37001 .k(k)
37002 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37003 }
37004 }
37005 }
37006
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4_strided_cn)37007 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
37008 for (uint32_t n = 5; n < 8; n++) {
37009 for (size_t k = 1; k <= 5; k += 2) {
37010 GemmMicrokernelTester()
37011 .mr(2)
37012 .nr(4)
37013 .kr(1)
37014 .sr(1)
37015 .m(2)
37016 .n(n)
37017 .k(k)
37018 .cn_stride(7)
37019 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37020 }
37021 }
37022 }
37023
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4_subtile)37024 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_subtile) {
37025 for (uint32_t n = 5; n < 8; n++) {
37026 for (size_t k = 1; k <= 5; k += 2) {
37027 for (uint32_t m = 1; m <= 2; m++) {
37028 GemmMicrokernelTester()
37029 .mr(2)
37030 .nr(4)
37031 .kr(1)
37032 .sr(1)
37033 .m(m)
37034 .n(n)
37035 .k(k)
37036 .iterations(1)
37037 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37038 }
37039 }
37040 }
37041 }
37042
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4)37043 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4) {
37044 for (uint32_t n = 8; n <= 12; n += 4) {
37045 for (size_t k = 1; k <= 5; k += 2) {
37046 GemmMicrokernelTester()
37047 .mr(2)
37048 .nr(4)
37049 .kr(1)
37050 .sr(1)
37051 .m(2)
37052 .n(n)
37053 .k(k)
37054 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37055 }
37056 }
37057 }
37058
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4_strided_cn)37059 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
37060 for (uint32_t n = 8; n <= 12; n += 4) {
37061 for (size_t k = 1; k <= 5; k += 2) {
37062 GemmMicrokernelTester()
37063 .mr(2)
37064 .nr(4)
37065 .kr(1)
37066 .sr(1)
37067 .m(2)
37068 .n(n)
37069 .k(k)
37070 .cn_stride(7)
37071 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37072 }
37073 }
37074 }
37075
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4_subtile)37076 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_subtile) {
37077 for (uint32_t n = 8; n <= 12; n += 4) {
37078 for (size_t k = 1; k <= 5; k += 2) {
37079 for (uint32_t m = 1; m <= 2; m++) {
37080 GemmMicrokernelTester()
37081 .mr(2)
37082 .nr(4)
37083 .kr(1)
37084 .sr(1)
37085 .m(m)
37086 .n(n)
37087 .k(k)
37088 .iterations(1)
37089 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37090 }
37091 }
37092 }
37093 }
37094
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,small_kernel)37095 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, small_kernel) {
37096 for (size_t k = 1; k <= 5; k += 2) {
37097 GemmMicrokernelTester()
37098 .mr(2)
37099 .nr(4)
37100 .kr(1)
37101 .sr(1)
37102 .m(2)
37103 .n(4)
37104 .k(k)
37105 .ks(3)
37106 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37107 }
37108 }
37109
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,small_kernel_subtile)37110 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, small_kernel_subtile) {
37111 for (size_t k = 1; k <= 5; k += 2) {
37112 for (uint32_t n = 1; n <= 4; n++) {
37113 for (uint32_t m = 1; m <= 2; m++) {
37114 GemmMicrokernelTester()
37115 .mr(2)
37116 .nr(4)
37117 .kr(1)
37118 .sr(1)
37119 .m(m)
37120 .n(n)
37121 .k(k)
37122 .ks(3)
37123 .iterations(1)
37124 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37125 }
37126 }
37127 }
37128 }
37129
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4_small_kernel)37130 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
37131 for (uint32_t n = 5; n < 8; n++) {
37132 for (size_t k = 1; k <= 5; k += 2) {
37133 GemmMicrokernelTester()
37134 .mr(2)
37135 .nr(4)
37136 .kr(1)
37137 .sr(1)
37138 .m(2)
37139 .n(n)
37140 .k(k)
37141 .ks(3)
37142 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37143 }
37144 }
37145 }
37146
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4_small_kernel)37147 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
37148 for (uint32_t n = 8; n <= 12; n += 4) {
37149 for (size_t k = 1; k <= 5; k += 2) {
37150 GemmMicrokernelTester()
37151 .mr(2)
37152 .nr(4)
37153 .kr(1)
37154 .sr(1)
37155 .m(2)
37156 .n(n)
37157 .k(k)
37158 .ks(3)
37159 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37160 }
37161 }
37162 }
37163
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,strided_cm_subtile)37164 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm_subtile) {
37165 for (size_t k = 1; k <= 5; k += 2) {
37166 for (uint32_t n = 1; n <= 4; n++) {
37167 for (uint32_t m = 1; m <= 2; m++) {
37168 GemmMicrokernelTester()
37169 .mr(2)
37170 .nr(4)
37171 .kr(1)
37172 .sr(1)
37173 .m(m)
37174 .n(n)
37175 .k(k)
37176 .cm_stride(7)
37177 .iterations(1)
37178 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37179 }
37180 }
37181 }
37182 }
37183
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,a_offset)37184 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, a_offset) {
37185 for (size_t k = 1; k <= 5; k += 2) {
37186 GemmMicrokernelTester()
37187 .mr(2)
37188 .nr(4)
37189 .kr(1)
37190 .sr(1)
37191 .m(2)
37192 .n(4)
37193 .k(k)
37194 .ks(3)
37195 .a_offset(13)
37196 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37197 }
37198 }
37199
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,zero)37200 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, zero) {
37201 for (size_t k = 1; k <= 5; k += 2) {
37202 for (uint32_t mz = 0; mz < 2; mz++) {
37203 GemmMicrokernelTester()
37204 .mr(2)
37205 .nr(4)
37206 .kr(1)
37207 .sr(1)
37208 .m(2)
37209 .n(4)
37210 .k(k)
37211 .ks(3)
37212 .a_offset(13)
37213 .zero_index(mz)
37214 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37215 }
37216 }
37217 }
37218
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,qmin)37219 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmin) {
37220 GemmMicrokernelTester()
37221 .mr(2)
37222 .nr(4)
37223 .kr(1)
37224 .sr(1)
37225 .m(2)
37226 .n(4)
37227 .k(1)
37228 .qmin(128)
37229 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37230 }
37231
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,qmax)37232 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmax) {
37233 GemmMicrokernelTester()
37234 .mr(2)
37235 .nr(4)
37236 .kr(1)
37237 .sr(1)
37238 .m(2)
37239 .n(4)
37240 .k(1)
37241 .qmax(128)
37242 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37243 }
37244
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,strided_cm)37245 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm) {
37246 GemmMicrokernelTester()
37247 .mr(2)
37248 .nr(4)
37249 .kr(1)
37250 .sr(1)
37251 .m(2)
37252 .n(4)
37253 .k(1)
37254 .cm_stride(7)
37255 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37256 }
37257
37258
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1)37259 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1) {
37260 GemmMicrokernelTester()
37261 .mr(3)
37262 .nr(2)
37263 .kr(1)
37264 .sr(1)
37265 .m(3)
37266 .n(2)
37267 .k(1)
37268 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37269 }
37270
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,strided_cn)37271 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cn) {
37272 GemmMicrokernelTester()
37273 .mr(3)
37274 .nr(2)
37275 .kr(1)
37276 .sr(1)
37277 .m(3)
37278 .n(2)
37279 .k(1)
37280 .cn_stride(5)
37281 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37282 }
37283
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_subtile)37284 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile) {
37285 for (uint32_t n = 1; n <= 2; n++) {
37286 for (uint32_t m = 1; m <= 3; m++) {
37287 GemmMicrokernelTester()
37288 .mr(3)
37289 .nr(2)
37290 .kr(1)
37291 .sr(1)
37292 .m(m)
37293 .n(n)
37294 .k(1)
37295 .iterations(1)
37296 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37297 }
37298 }
37299 }
37300
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_subtile_m)37301 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
37302 for (uint32_t m = 1; m <= 3; m++) {
37303 GemmMicrokernelTester()
37304 .mr(3)
37305 .nr(2)
37306 .kr(1)
37307 .sr(1)
37308 .m(m)
37309 .n(2)
37310 .k(1)
37311 .iterations(1)
37312 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37313 }
37314 }
37315
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_subtile_n)37316 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
37317 for (uint32_t n = 1; n <= 2; n++) {
37318 GemmMicrokernelTester()
37319 .mr(3)
37320 .nr(2)
37321 .kr(1)
37322 .sr(1)
37323 .m(3)
37324 .n(n)
37325 .k(1)
37326 .iterations(1)
37327 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37328 }
37329 }
37330
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_gt_1)37331 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_gt_1) {
37332 for (size_t k = 2; k < 10; k++) {
37333 GemmMicrokernelTester()
37334 .mr(3)
37335 .nr(2)
37336 .kr(1)
37337 .sr(1)
37338 .m(3)
37339 .n(2)
37340 .k(k)
37341 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37342 }
37343 }
37344
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_gt_1_subtile)37345 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_gt_1_subtile) {
37346 for (size_t k = 2; k < 10; k++) {
37347 for (uint32_t n = 1; n <= 2; n++) {
37348 for (uint32_t m = 1; m <= 3; m++) {
37349 GemmMicrokernelTester()
37350 .mr(3)
37351 .nr(2)
37352 .kr(1)
37353 .sr(1)
37354 .m(m)
37355 .n(n)
37356 .k(k)
37357 .iterations(1)
37358 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37359 }
37360 }
37361 }
37362 }
37363
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2)37364 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2) {
37365 for (uint32_t n = 3; n < 4; n++) {
37366 for (size_t k = 1; k <= 5; k += 2) {
37367 GemmMicrokernelTester()
37368 .mr(3)
37369 .nr(2)
37370 .kr(1)
37371 .sr(1)
37372 .m(3)
37373 .n(n)
37374 .k(k)
37375 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37376 }
37377 }
37378 }
37379
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2_strided_cn)37380 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
37381 for (uint32_t n = 3; n < 4; n++) {
37382 for (size_t k = 1; k <= 5; k += 2) {
37383 GemmMicrokernelTester()
37384 .mr(3)
37385 .nr(2)
37386 .kr(1)
37387 .sr(1)
37388 .m(3)
37389 .n(n)
37390 .k(k)
37391 .cn_stride(5)
37392 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37393 }
37394 }
37395 }
37396
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2_subtile)37397 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_subtile) {
37398 for (uint32_t n = 3; n < 4; n++) {
37399 for (size_t k = 1; k <= 5; k += 2) {
37400 for (uint32_t m = 1; m <= 3; m++) {
37401 GemmMicrokernelTester()
37402 .mr(3)
37403 .nr(2)
37404 .kr(1)
37405 .sr(1)
37406 .m(m)
37407 .n(n)
37408 .k(k)
37409 .iterations(1)
37410 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37411 }
37412 }
37413 }
37414 }
37415
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2)37416 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2) {
37417 for (uint32_t n = 4; n <= 6; n += 2) {
37418 for (size_t k = 1; k <= 5; k += 2) {
37419 GemmMicrokernelTester()
37420 .mr(3)
37421 .nr(2)
37422 .kr(1)
37423 .sr(1)
37424 .m(3)
37425 .n(n)
37426 .k(k)
37427 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37428 }
37429 }
37430 }
37431
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2_strided_cn)37432 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
37433 for (uint32_t n = 4; n <= 6; n += 2) {
37434 for (size_t k = 1; k <= 5; k += 2) {
37435 GemmMicrokernelTester()
37436 .mr(3)
37437 .nr(2)
37438 .kr(1)
37439 .sr(1)
37440 .m(3)
37441 .n(n)
37442 .k(k)
37443 .cn_stride(5)
37444 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37445 }
37446 }
37447 }
37448
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2_subtile)37449 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_subtile) {
37450 for (uint32_t n = 4; n <= 6; n += 2) {
37451 for (size_t k = 1; k <= 5; k += 2) {
37452 for (uint32_t m = 1; m <= 3; m++) {
37453 GemmMicrokernelTester()
37454 .mr(3)
37455 .nr(2)
37456 .kr(1)
37457 .sr(1)
37458 .m(m)
37459 .n(n)
37460 .k(k)
37461 .iterations(1)
37462 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37463 }
37464 }
37465 }
37466 }
37467
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,small_kernel)37468 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, small_kernel) {
37469 for (size_t k = 1; k <= 5; k += 2) {
37470 GemmMicrokernelTester()
37471 .mr(3)
37472 .nr(2)
37473 .kr(1)
37474 .sr(1)
37475 .m(3)
37476 .n(2)
37477 .k(k)
37478 .ks(3)
37479 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37480 }
37481 }
37482
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,small_kernel_subtile)37483 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, small_kernel_subtile) {
37484 for (size_t k = 1; k <= 5; k += 2) {
37485 for (uint32_t n = 1; n <= 2; n++) {
37486 for (uint32_t m = 1; m <= 3; m++) {
37487 GemmMicrokernelTester()
37488 .mr(3)
37489 .nr(2)
37490 .kr(1)
37491 .sr(1)
37492 .m(m)
37493 .n(n)
37494 .k(k)
37495 .ks(3)
37496 .iterations(1)
37497 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37498 }
37499 }
37500 }
37501 }
37502
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2_small_kernel)37503 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
37504 for (uint32_t n = 3; n < 4; n++) {
37505 for (size_t k = 1; k <= 5; k += 2) {
37506 GemmMicrokernelTester()
37507 .mr(3)
37508 .nr(2)
37509 .kr(1)
37510 .sr(1)
37511 .m(3)
37512 .n(n)
37513 .k(k)
37514 .ks(3)
37515 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37516 }
37517 }
37518 }
37519
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2_small_kernel)37520 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
37521 for (uint32_t n = 4; n <= 6; n += 2) {
37522 for (size_t k = 1; k <= 5; k += 2) {
37523 GemmMicrokernelTester()
37524 .mr(3)
37525 .nr(2)
37526 .kr(1)
37527 .sr(1)
37528 .m(3)
37529 .n(n)
37530 .k(k)
37531 .ks(3)
37532 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37533 }
37534 }
37535 }
37536
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,strided_cm_subtile)37537 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cm_subtile) {
37538 for (size_t k = 1; k <= 5; k += 2) {
37539 for (uint32_t n = 1; n <= 2; n++) {
37540 for (uint32_t m = 1; m <= 3; m++) {
37541 GemmMicrokernelTester()
37542 .mr(3)
37543 .nr(2)
37544 .kr(1)
37545 .sr(1)
37546 .m(m)
37547 .n(n)
37548 .k(k)
37549 .cm_stride(5)
37550 .iterations(1)
37551 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37552 }
37553 }
37554 }
37555 }
37556
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,a_offset)37557 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, a_offset) {
37558 for (size_t k = 1; k <= 5; k += 2) {
37559 GemmMicrokernelTester()
37560 .mr(3)
37561 .nr(2)
37562 .kr(1)
37563 .sr(1)
37564 .m(3)
37565 .n(2)
37566 .k(k)
37567 .ks(3)
37568 .a_offset(17)
37569 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37570 }
37571 }
37572
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,zero)37573 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, zero) {
37574 for (size_t k = 1; k <= 5; k += 2) {
37575 for (uint32_t mz = 0; mz < 3; mz++) {
37576 GemmMicrokernelTester()
37577 .mr(3)
37578 .nr(2)
37579 .kr(1)
37580 .sr(1)
37581 .m(3)
37582 .n(2)
37583 .k(k)
37584 .ks(3)
37585 .a_offset(17)
37586 .zero_index(mz)
37587 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37588 }
37589 }
37590 }
37591
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,qmin)37592 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, qmin) {
37593 GemmMicrokernelTester()
37594 .mr(3)
37595 .nr(2)
37596 .kr(1)
37597 .sr(1)
37598 .m(3)
37599 .n(2)
37600 .k(1)
37601 .qmin(128)
37602 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37603 }
37604
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,qmax)37605 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, qmax) {
37606 GemmMicrokernelTester()
37607 .mr(3)
37608 .nr(2)
37609 .kr(1)
37610 .sr(1)
37611 .m(3)
37612 .n(2)
37613 .k(1)
37614 .qmax(128)
37615 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37616 }
37617
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,strided_cm)37618 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cm) {
37619 GemmMicrokernelTester()
37620 .mr(3)
37621 .nr(2)
37622 .kr(1)
37623 .sr(1)
37624 .m(3)
37625 .n(2)
37626 .k(1)
37627 .cm_stride(5)
37628 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37629 }
37630
37631
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1)37632 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1) {
37633 GemmMicrokernelTester()
37634 .mr(3)
37635 .nr(2)
37636 .kr(1)
37637 .sr(1)
37638 .m(3)
37639 .n(2)
37640 .k(1)
37641 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37642 }
37643
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,strided_cn)37644 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cn) {
37645 GemmMicrokernelTester()
37646 .mr(3)
37647 .nr(2)
37648 .kr(1)
37649 .sr(1)
37650 .m(3)
37651 .n(2)
37652 .k(1)
37653 .cn_stride(5)
37654 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37655 }
37656
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_subtile)37657 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile) {
37658 for (uint32_t n = 1; n <= 2; n++) {
37659 for (uint32_t m = 1; m <= 3; m++) {
37660 GemmMicrokernelTester()
37661 .mr(3)
37662 .nr(2)
37663 .kr(1)
37664 .sr(1)
37665 .m(m)
37666 .n(n)
37667 .k(1)
37668 .iterations(1)
37669 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37670 }
37671 }
37672 }
37673
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_subtile_m)37674 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
37675 for (uint32_t m = 1; m <= 3; m++) {
37676 GemmMicrokernelTester()
37677 .mr(3)
37678 .nr(2)
37679 .kr(1)
37680 .sr(1)
37681 .m(m)
37682 .n(2)
37683 .k(1)
37684 .iterations(1)
37685 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37686 }
37687 }
37688
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_subtile_n)37689 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
37690 for (uint32_t n = 1; n <= 2; n++) {
37691 GemmMicrokernelTester()
37692 .mr(3)
37693 .nr(2)
37694 .kr(1)
37695 .sr(1)
37696 .m(3)
37697 .n(n)
37698 .k(1)
37699 .iterations(1)
37700 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37701 }
37702 }
37703
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_gt_1)37704 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1) {
37705 for (size_t k = 2; k < 10; k++) {
37706 GemmMicrokernelTester()
37707 .mr(3)
37708 .nr(2)
37709 .kr(1)
37710 .sr(1)
37711 .m(3)
37712 .n(2)
37713 .k(k)
37714 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37715 }
37716 }
37717
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_gt_1_subtile)37718 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_subtile) {
37719 for (size_t k = 2; k < 10; k++) {
37720 for (uint32_t n = 1; n <= 2; n++) {
37721 for (uint32_t m = 1; m <= 3; m++) {
37722 GemmMicrokernelTester()
37723 .mr(3)
37724 .nr(2)
37725 .kr(1)
37726 .sr(1)
37727 .m(m)
37728 .n(n)
37729 .k(k)
37730 .iterations(1)
37731 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37732 }
37733 }
37734 }
37735 }
37736
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2)37737 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2) {
37738 for (uint32_t n = 3; n < 4; n++) {
37739 for (size_t k = 1; k <= 5; k += 2) {
37740 GemmMicrokernelTester()
37741 .mr(3)
37742 .nr(2)
37743 .kr(1)
37744 .sr(1)
37745 .m(3)
37746 .n(n)
37747 .k(k)
37748 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37749 }
37750 }
37751 }
37752
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2_strided_cn)37753 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
37754 for (uint32_t n = 3; n < 4; n++) {
37755 for (size_t k = 1; k <= 5; k += 2) {
37756 GemmMicrokernelTester()
37757 .mr(3)
37758 .nr(2)
37759 .kr(1)
37760 .sr(1)
37761 .m(3)
37762 .n(n)
37763 .k(k)
37764 .cn_stride(5)
37765 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37766 }
37767 }
37768 }
37769
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2_subtile)37770 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_subtile) {
37771 for (uint32_t n = 3; n < 4; n++) {
37772 for (size_t k = 1; k <= 5; k += 2) {
37773 for (uint32_t m = 1; m <= 3; m++) {
37774 GemmMicrokernelTester()
37775 .mr(3)
37776 .nr(2)
37777 .kr(1)
37778 .sr(1)
37779 .m(m)
37780 .n(n)
37781 .k(k)
37782 .iterations(1)
37783 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37784 }
37785 }
37786 }
37787 }
37788
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2)37789 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2) {
37790 for (uint32_t n = 4; n <= 6; n += 2) {
37791 for (size_t k = 1; k <= 5; k += 2) {
37792 GemmMicrokernelTester()
37793 .mr(3)
37794 .nr(2)
37795 .kr(1)
37796 .sr(1)
37797 .m(3)
37798 .n(n)
37799 .k(k)
37800 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37801 }
37802 }
37803 }
37804
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2_strided_cn)37805 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
37806 for (uint32_t n = 4; n <= 6; n += 2) {
37807 for (size_t k = 1; k <= 5; k += 2) {
37808 GemmMicrokernelTester()
37809 .mr(3)
37810 .nr(2)
37811 .kr(1)
37812 .sr(1)
37813 .m(3)
37814 .n(n)
37815 .k(k)
37816 .cn_stride(5)
37817 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37818 }
37819 }
37820 }
37821
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2_subtile)37822 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_subtile) {
37823 for (uint32_t n = 4; n <= 6; n += 2) {
37824 for (size_t k = 1; k <= 5; k += 2) {
37825 for (uint32_t m = 1; m <= 3; m++) {
37826 GemmMicrokernelTester()
37827 .mr(3)
37828 .nr(2)
37829 .kr(1)
37830 .sr(1)
37831 .m(m)
37832 .n(n)
37833 .k(k)
37834 .iterations(1)
37835 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37836 }
37837 }
37838 }
37839 }
37840
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,small_kernel)37841 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, small_kernel) {
37842 for (size_t k = 1; k <= 5; k += 2) {
37843 GemmMicrokernelTester()
37844 .mr(3)
37845 .nr(2)
37846 .kr(1)
37847 .sr(1)
37848 .m(3)
37849 .n(2)
37850 .k(k)
37851 .ks(3)
37852 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37853 }
37854 }
37855
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,small_kernel_subtile)37856 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, small_kernel_subtile) {
37857 for (size_t k = 1; k <= 5; k += 2) {
37858 for (uint32_t n = 1; n <= 2; n++) {
37859 for (uint32_t m = 1; m <= 3; m++) {
37860 GemmMicrokernelTester()
37861 .mr(3)
37862 .nr(2)
37863 .kr(1)
37864 .sr(1)
37865 .m(m)
37866 .n(n)
37867 .k(k)
37868 .ks(3)
37869 .iterations(1)
37870 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37871 }
37872 }
37873 }
37874 }
37875
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2_small_kernel)37876 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
37877 for (uint32_t n = 3; n < 4; n++) {
37878 for (size_t k = 1; k <= 5; k += 2) {
37879 GemmMicrokernelTester()
37880 .mr(3)
37881 .nr(2)
37882 .kr(1)
37883 .sr(1)
37884 .m(3)
37885 .n(n)
37886 .k(k)
37887 .ks(3)
37888 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37889 }
37890 }
37891 }
37892
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2_small_kernel)37893 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
37894 for (uint32_t n = 4; n <= 6; n += 2) {
37895 for (size_t k = 1; k <= 5; k += 2) {
37896 GemmMicrokernelTester()
37897 .mr(3)
37898 .nr(2)
37899 .kr(1)
37900 .sr(1)
37901 .m(3)
37902 .n(n)
37903 .k(k)
37904 .ks(3)
37905 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37906 }
37907 }
37908 }
37909
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,strided_cm_subtile)37910 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm_subtile) {
37911 for (size_t k = 1; k <= 5; k += 2) {
37912 for (uint32_t n = 1; n <= 2; n++) {
37913 for (uint32_t m = 1; m <= 3; m++) {
37914 GemmMicrokernelTester()
37915 .mr(3)
37916 .nr(2)
37917 .kr(1)
37918 .sr(1)
37919 .m(m)
37920 .n(n)
37921 .k(k)
37922 .cm_stride(5)
37923 .iterations(1)
37924 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37925 }
37926 }
37927 }
37928 }
37929
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,a_offset)37930 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, a_offset) {
37931 for (size_t k = 1; k <= 5; k += 2) {
37932 GemmMicrokernelTester()
37933 .mr(3)
37934 .nr(2)
37935 .kr(1)
37936 .sr(1)
37937 .m(3)
37938 .n(2)
37939 .k(k)
37940 .ks(3)
37941 .a_offset(17)
37942 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37943 }
37944 }
37945
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,zero)37946 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, zero) {
37947 for (size_t k = 1; k <= 5; k += 2) {
37948 for (uint32_t mz = 0; mz < 3; mz++) {
37949 GemmMicrokernelTester()
37950 .mr(3)
37951 .nr(2)
37952 .kr(1)
37953 .sr(1)
37954 .m(3)
37955 .n(2)
37956 .k(k)
37957 .ks(3)
37958 .a_offset(17)
37959 .zero_index(mz)
37960 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37961 }
37962 }
37963 }
37964
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,qmin)37965 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmin) {
37966 GemmMicrokernelTester()
37967 .mr(3)
37968 .nr(2)
37969 .kr(1)
37970 .sr(1)
37971 .m(3)
37972 .n(2)
37973 .k(1)
37974 .qmin(128)
37975 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37976 }
37977
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,qmax)37978 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmax) {
37979 GemmMicrokernelTester()
37980 .mr(3)
37981 .nr(2)
37982 .kr(1)
37983 .sr(1)
37984 .m(3)
37985 .n(2)
37986 .k(1)
37987 .qmax(128)
37988 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37989 }
37990
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,strided_cm)37991 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm) {
37992 GemmMicrokernelTester()
37993 .mr(3)
37994 .nr(2)
37995 .kr(1)
37996 .sr(1)
37997 .m(3)
37998 .n(2)
37999 .k(1)
38000 .cm_stride(5)
38001 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38002 }
38003
38004
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1)38005 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1) {
38006 GemmMicrokernelTester()
38007 .mr(3)
38008 .nr(2)
38009 .kr(1)
38010 .sr(1)
38011 .m(3)
38012 .n(2)
38013 .k(1)
38014 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38015 }
38016
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,strided_cn)38017 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cn) {
38018 GemmMicrokernelTester()
38019 .mr(3)
38020 .nr(2)
38021 .kr(1)
38022 .sr(1)
38023 .m(3)
38024 .n(2)
38025 .k(1)
38026 .cn_stride(5)
38027 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38028 }
38029
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_subtile)38030 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile) {
38031 for (uint32_t n = 1; n <= 2; n++) {
38032 for (uint32_t m = 1; m <= 3; m++) {
38033 GemmMicrokernelTester()
38034 .mr(3)
38035 .nr(2)
38036 .kr(1)
38037 .sr(1)
38038 .m(m)
38039 .n(n)
38040 .k(1)
38041 .iterations(1)
38042 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38043 }
38044 }
38045 }
38046
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_subtile_m)38047 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
38048 for (uint32_t m = 1; m <= 3; m++) {
38049 GemmMicrokernelTester()
38050 .mr(3)
38051 .nr(2)
38052 .kr(1)
38053 .sr(1)
38054 .m(m)
38055 .n(2)
38056 .k(1)
38057 .iterations(1)
38058 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38059 }
38060 }
38061
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_subtile_n)38062 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
38063 for (uint32_t n = 1; n <= 2; n++) {
38064 GemmMicrokernelTester()
38065 .mr(3)
38066 .nr(2)
38067 .kr(1)
38068 .sr(1)
38069 .m(3)
38070 .n(n)
38071 .k(1)
38072 .iterations(1)
38073 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38074 }
38075 }
38076
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_gt_1)38077 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1) {
38078 for (size_t k = 2; k < 10; k++) {
38079 GemmMicrokernelTester()
38080 .mr(3)
38081 .nr(2)
38082 .kr(1)
38083 .sr(1)
38084 .m(3)
38085 .n(2)
38086 .k(k)
38087 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38088 }
38089 }
38090
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_gt_1_subtile)38091 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_subtile) {
38092 for (size_t k = 2; k < 10; k++) {
38093 for (uint32_t n = 1; n <= 2; n++) {
38094 for (uint32_t m = 1; m <= 3; m++) {
38095 GemmMicrokernelTester()
38096 .mr(3)
38097 .nr(2)
38098 .kr(1)
38099 .sr(1)
38100 .m(m)
38101 .n(n)
38102 .k(k)
38103 .iterations(1)
38104 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38105 }
38106 }
38107 }
38108 }
38109
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2)38110 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2) {
38111 for (uint32_t n = 3; n < 4; n++) {
38112 for (size_t k = 1; k <= 5; k += 2) {
38113 GemmMicrokernelTester()
38114 .mr(3)
38115 .nr(2)
38116 .kr(1)
38117 .sr(1)
38118 .m(3)
38119 .n(n)
38120 .k(k)
38121 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38122 }
38123 }
38124 }
38125
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2_strided_cn)38126 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
38127 for (uint32_t n = 3; n < 4; n++) {
38128 for (size_t k = 1; k <= 5; k += 2) {
38129 GemmMicrokernelTester()
38130 .mr(3)
38131 .nr(2)
38132 .kr(1)
38133 .sr(1)
38134 .m(3)
38135 .n(n)
38136 .k(k)
38137 .cn_stride(5)
38138 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38139 }
38140 }
38141 }
38142
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2_subtile)38143 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_subtile) {
38144 for (uint32_t n = 3; n < 4; n++) {
38145 for (size_t k = 1; k <= 5; k += 2) {
38146 for (uint32_t m = 1; m <= 3; m++) {
38147 GemmMicrokernelTester()
38148 .mr(3)
38149 .nr(2)
38150 .kr(1)
38151 .sr(1)
38152 .m(m)
38153 .n(n)
38154 .k(k)
38155 .iterations(1)
38156 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38157 }
38158 }
38159 }
38160 }
38161
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2)38162 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2) {
38163 for (uint32_t n = 4; n <= 6; n += 2) {
38164 for (size_t k = 1; k <= 5; k += 2) {
38165 GemmMicrokernelTester()
38166 .mr(3)
38167 .nr(2)
38168 .kr(1)
38169 .sr(1)
38170 .m(3)
38171 .n(n)
38172 .k(k)
38173 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38174 }
38175 }
38176 }
38177
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2_strided_cn)38178 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_cn) {
38179 for (uint32_t n = 4; n <= 6; n += 2) {
38180 for (size_t k = 1; k <= 5; k += 2) {
38181 GemmMicrokernelTester()
38182 .mr(3)
38183 .nr(2)
38184 .kr(1)
38185 .sr(1)
38186 .m(3)
38187 .n(n)
38188 .k(k)
38189 .cn_stride(5)
38190 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38191 }
38192 }
38193 }
38194
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2_subtile)38195 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_subtile) {
38196 for (uint32_t n = 4; n <= 6; n += 2) {
38197 for (size_t k = 1; k <= 5; k += 2) {
38198 for (uint32_t m = 1; m <= 3; m++) {
38199 GemmMicrokernelTester()
38200 .mr(3)
38201 .nr(2)
38202 .kr(1)
38203 .sr(1)
38204 .m(m)
38205 .n(n)
38206 .k(k)
38207 .iterations(1)
38208 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38209 }
38210 }
38211 }
38212 }
38213
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,small_kernel)38214 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel) {
38215 for (size_t k = 1; k <= 5; k += 2) {
38216 GemmMicrokernelTester()
38217 .mr(3)
38218 .nr(2)
38219 .kr(1)
38220 .sr(1)
38221 .m(3)
38222 .n(2)
38223 .k(k)
38224 .ks(3)
38225 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38226 }
38227 }
38228
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,small_kernel_subtile)38229 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel_subtile) {
38230 for (size_t k = 1; k <= 5; k += 2) {
38231 for (uint32_t n = 1; n <= 2; n++) {
38232 for (uint32_t m = 1; m <= 3; m++) {
38233 GemmMicrokernelTester()
38234 .mr(3)
38235 .nr(2)
38236 .kr(1)
38237 .sr(1)
38238 .m(m)
38239 .n(n)
38240 .k(k)
38241 .ks(3)
38242 .iterations(1)
38243 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38244 }
38245 }
38246 }
38247 }
38248
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2_small_kernel)38249 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
38250 for (uint32_t n = 3; n < 4; n++) {
38251 for (size_t k = 1; k <= 5; k += 2) {
38252 GemmMicrokernelTester()
38253 .mr(3)
38254 .nr(2)
38255 .kr(1)
38256 .sr(1)
38257 .m(3)
38258 .n(n)
38259 .k(k)
38260 .ks(3)
38261 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38262 }
38263 }
38264 }
38265
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2_small_kernel)38266 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_small_kernel) {
38267 for (uint32_t n = 4; n <= 6; n += 2) {
38268 for (size_t k = 1; k <= 5; k += 2) {
38269 GemmMicrokernelTester()
38270 .mr(3)
38271 .nr(2)
38272 .kr(1)
38273 .sr(1)
38274 .m(3)
38275 .n(n)
38276 .k(k)
38277 .ks(3)
38278 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38279 }
38280 }
38281 }
38282
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,strided_cm_subtile)38283 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm_subtile) {
38284 for (size_t k = 1; k <= 5; k += 2) {
38285 for (uint32_t n = 1; n <= 2; n++) {
38286 for (uint32_t m = 1; m <= 3; m++) {
38287 GemmMicrokernelTester()
38288 .mr(3)
38289 .nr(2)
38290 .kr(1)
38291 .sr(1)
38292 .m(m)
38293 .n(n)
38294 .k(k)
38295 .cm_stride(5)
38296 .iterations(1)
38297 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38298 }
38299 }
38300 }
38301 }
38302
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,a_offset)38303 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, a_offset) {
38304 for (size_t k = 1; k <= 5; k += 2) {
38305 GemmMicrokernelTester()
38306 .mr(3)
38307 .nr(2)
38308 .kr(1)
38309 .sr(1)
38310 .m(3)
38311 .n(2)
38312 .k(k)
38313 .ks(3)
38314 .a_offset(17)
38315 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38316 }
38317 }
38318
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,zero)38319 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, zero) {
38320 for (size_t k = 1; k <= 5; k += 2) {
38321 for (uint32_t mz = 0; mz < 3; mz++) {
38322 GemmMicrokernelTester()
38323 .mr(3)
38324 .nr(2)
38325 .kr(1)
38326 .sr(1)
38327 .m(3)
38328 .n(2)
38329 .k(k)
38330 .ks(3)
38331 .a_offset(17)
38332 .zero_index(mz)
38333 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38334 }
38335 }
38336 }
38337
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,qmin)38338 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmin) {
38339 GemmMicrokernelTester()
38340 .mr(3)
38341 .nr(2)
38342 .kr(1)
38343 .sr(1)
38344 .m(3)
38345 .n(2)
38346 .k(1)
38347 .qmin(128)
38348 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38349 }
38350
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,qmax)38351 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmax) {
38352 GemmMicrokernelTester()
38353 .mr(3)
38354 .nr(2)
38355 .kr(1)
38356 .sr(1)
38357 .m(3)
38358 .n(2)
38359 .k(1)
38360 .qmax(128)
38361 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38362 }
38363
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,strided_cm)38364 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm) {
38365 GemmMicrokernelTester()
38366 .mr(3)
38367 .nr(2)
38368 .kr(1)
38369 .sr(1)
38370 .m(3)
38371 .n(2)
38372 .k(1)
38373 .cm_stride(5)
38374 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38375 }
38376
38377
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1)38378 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1) {
38379 GemmMicrokernelTester()
38380 .mr(4)
38381 .nr(2)
38382 .kr(1)
38383 .sr(1)
38384 .m(4)
38385 .n(2)
38386 .k(1)
38387 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38388 }
38389
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,strided_cn)38390 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cn) {
38391 GemmMicrokernelTester()
38392 .mr(4)
38393 .nr(2)
38394 .kr(1)
38395 .sr(1)
38396 .m(4)
38397 .n(2)
38398 .k(1)
38399 .cn_stride(5)
38400 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38401 }
38402
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_subtile)38403 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile) {
38404 for (uint32_t n = 1; n <= 2; n++) {
38405 for (uint32_t m = 1; m <= 4; m++) {
38406 GemmMicrokernelTester()
38407 .mr(4)
38408 .nr(2)
38409 .kr(1)
38410 .sr(1)
38411 .m(m)
38412 .n(n)
38413 .k(1)
38414 .iterations(1)
38415 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38416 }
38417 }
38418 }
38419
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_subtile_m)38420 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
38421 for (uint32_t m = 1; m <= 4; m++) {
38422 GemmMicrokernelTester()
38423 .mr(4)
38424 .nr(2)
38425 .kr(1)
38426 .sr(1)
38427 .m(m)
38428 .n(2)
38429 .k(1)
38430 .iterations(1)
38431 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38432 }
38433 }
38434
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_subtile_n)38435 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
38436 for (uint32_t n = 1; n <= 2; n++) {
38437 GemmMicrokernelTester()
38438 .mr(4)
38439 .nr(2)
38440 .kr(1)
38441 .sr(1)
38442 .m(4)
38443 .n(n)
38444 .k(1)
38445 .iterations(1)
38446 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38447 }
38448 }
38449
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_gt_1)38450 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1) {
38451 for (size_t k = 2; k < 10; k++) {
38452 GemmMicrokernelTester()
38453 .mr(4)
38454 .nr(2)
38455 .kr(1)
38456 .sr(1)
38457 .m(4)
38458 .n(2)
38459 .k(k)
38460 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38461 }
38462 }
38463
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_gt_1_subtile)38464 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_subtile) {
38465 for (size_t k = 2; k < 10; k++) {
38466 for (uint32_t n = 1; n <= 2; n++) {
38467 for (uint32_t m = 1; m <= 4; m++) {
38468 GemmMicrokernelTester()
38469 .mr(4)
38470 .nr(2)
38471 .kr(1)
38472 .sr(1)
38473 .m(m)
38474 .n(n)
38475 .k(k)
38476 .iterations(1)
38477 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38478 }
38479 }
38480 }
38481 }
38482
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2)38483 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2) {
38484 for (uint32_t n = 3; n < 4; n++) {
38485 for (size_t k = 1; k <= 5; k += 2) {
38486 GemmMicrokernelTester()
38487 .mr(4)
38488 .nr(2)
38489 .kr(1)
38490 .sr(1)
38491 .m(4)
38492 .n(n)
38493 .k(k)
38494 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38495 }
38496 }
38497 }
38498
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2_strided_cn)38499 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
38500 for (uint32_t n = 3; n < 4; n++) {
38501 for (size_t k = 1; k <= 5; k += 2) {
38502 GemmMicrokernelTester()
38503 .mr(4)
38504 .nr(2)
38505 .kr(1)
38506 .sr(1)
38507 .m(4)
38508 .n(n)
38509 .k(k)
38510 .cn_stride(5)
38511 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38512 }
38513 }
38514 }
38515
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2_subtile)38516 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_subtile) {
38517 for (uint32_t n = 3; n < 4; n++) {
38518 for (size_t k = 1; k <= 5; k += 2) {
38519 for (uint32_t m = 1; m <= 4; m++) {
38520 GemmMicrokernelTester()
38521 .mr(4)
38522 .nr(2)
38523 .kr(1)
38524 .sr(1)
38525 .m(m)
38526 .n(n)
38527 .k(k)
38528 .iterations(1)
38529 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38530 }
38531 }
38532 }
38533 }
38534
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2)38535 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2) {
38536 for (uint32_t n = 4; n <= 6; n += 2) {
38537 for (size_t k = 1; k <= 5; k += 2) {
38538 GemmMicrokernelTester()
38539 .mr(4)
38540 .nr(2)
38541 .kr(1)
38542 .sr(1)
38543 .m(4)
38544 .n(n)
38545 .k(k)
38546 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38547 }
38548 }
38549 }
38550
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2_strided_cn)38551 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_cn) {
38552 for (uint32_t n = 4; n <= 6; n += 2) {
38553 for (size_t k = 1; k <= 5; k += 2) {
38554 GemmMicrokernelTester()
38555 .mr(4)
38556 .nr(2)
38557 .kr(1)
38558 .sr(1)
38559 .m(4)
38560 .n(n)
38561 .k(k)
38562 .cn_stride(5)
38563 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38564 }
38565 }
38566 }
38567
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2_subtile)38568 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_subtile) {
38569 for (uint32_t n = 4; n <= 6; n += 2) {
38570 for (size_t k = 1; k <= 5; k += 2) {
38571 for (uint32_t m = 1; m <= 4; m++) {
38572 GemmMicrokernelTester()
38573 .mr(4)
38574 .nr(2)
38575 .kr(1)
38576 .sr(1)
38577 .m(m)
38578 .n(n)
38579 .k(k)
38580 .iterations(1)
38581 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38582 }
38583 }
38584 }
38585 }
38586
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,small_kernel)38587 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel) {
38588 for (size_t k = 1; k <= 5; k += 2) {
38589 GemmMicrokernelTester()
38590 .mr(4)
38591 .nr(2)
38592 .kr(1)
38593 .sr(1)
38594 .m(4)
38595 .n(2)
38596 .k(k)
38597 .ks(3)
38598 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38599 }
38600 }
38601
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,small_kernel_subtile)38602 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel_subtile) {
38603 for (size_t k = 1; k <= 5; k += 2) {
38604 for (uint32_t n = 1; n <= 2; n++) {
38605 for (uint32_t m = 1; m <= 4; m++) {
38606 GemmMicrokernelTester()
38607 .mr(4)
38608 .nr(2)
38609 .kr(1)
38610 .sr(1)
38611 .m(m)
38612 .n(n)
38613 .k(k)
38614 .ks(3)
38615 .iterations(1)
38616 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38617 }
38618 }
38619 }
38620 }
38621
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2_small_kernel)38622 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
38623 for (uint32_t n = 3; n < 4; n++) {
38624 for (size_t k = 1; k <= 5; k += 2) {
38625 GemmMicrokernelTester()
38626 .mr(4)
38627 .nr(2)
38628 .kr(1)
38629 .sr(1)
38630 .m(4)
38631 .n(n)
38632 .k(k)
38633 .ks(3)
38634 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38635 }
38636 }
38637 }
38638
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2_small_kernel)38639 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_small_kernel) {
38640 for (uint32_t n = 4; n <= 6; n += 2) {
38641 for (size_t k = 1; k <= 5; k += 2) {
38642 GemmMicrokernelTester()
38643 .mr(4)
38644 .nr(2)
38645 .kr(1)
38646 .sr(1)
38647 .m(4)
38648 .n(n)
38649 .k(k)
38650 .ks(3)
38651 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38652 }
38653 }
38654 }
38655
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,strided_cm_subtile)38656 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm_subtile) {
38657 for (size_t k = 1; k <= 5; k += 2) {
38658 for (uint32_t n = 1; n <= 2; n++) {
38659 for (uint32_t m = 1; m <= 4; m++) {
38660 GemmMicrokernelTester()
38661 .mr(4)
38662 .nr(2)
38663 .kr(1)
38664 .sr(1)
38665 .m(m)
38666 .n(n)
38667 .k(k)
38668 .cm_stride(5)
38669 .iterations(1)
38670 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38671 }
38672 }
38673 }
38674 }
38675
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,a_offset)38676 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, a_offset) {
38677 for (size_t k = 1; k <= 5; k += 2) {
38678 GemmMicrokernelTester()
38679 .mr(4)
38680 .nr(2)
38681 .kr(1)
38682 .sr(1)
38683 .m(4)
38684 .n(2)
38685 .k(k)
38686 .ks(3)
38687 .a_offset(23)
38688 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38689 }
38690 }
38691
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,zero)38692 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, zero) {
38693 for (size_t k = 1; k <= 5; k += 2) {
38694 for (uint32_t mz = 0; mz < 4; mz++) {
38695 GemmMicrokernelTester()
38696 .mr(4)
38697 .nr(2)
38698 .kr(1)
38699 .sr(1)
38700 .m(4)
38701 .n(2)
38702 .k(k)
38703 .ks(3)
38704 .a_offset(23)
38705 .zero_index(mz)
38706 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38707 }
38708 }
38709 }
38710
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,qmin)38711 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmin) {
38712 GemmMicrokernelTester()
38713 .mr(4)
38714 .nr(2)
38715 .kr(1)
38716 .sr(1)
38717 .m(4)
38718 .n(2)
38719 .k(1)
38720 .qmin(128)
38721 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38722 }
38723
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,qmax)38724 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmax) {
38725 GemmMicrokernelTester()
38726 .mr(4)
38727 .nr(2)
38728 .kr(1)
38729 .sr(1)
38730 .m(4)
38731 .n(2)
38732 .k(1)
38733 .qmax(128)
38734 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38735 }
38736
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,strided_cm)38737 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm) {
38738 GemmMicrokernelTester()
38739 .mr(4)
38740 .nr(2)
38741 .kr(1)
38742 .sr(1)
38743 .m(4)
38744 .n(2)
38745 .k(1)
38746 .cm_stride(5)
38747 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38748 }
38749
38750
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1)38751 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1) {
38752 GemmMicrokernelTester()
38753 .mr(4)
38754 .nr(4)
38755 .kr(1)
38756 .sr(1)
38757 .m(4)
38758 .n(4)
38759 .k(1)
38760 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38761 }
38762
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,strided_cn)38763 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cn) {
38764 GemmMicrokernelTester()
38765 .mr(4)
38766 .nr(4)
38767 .kr(1)
38768 .sr(1)
38769 .m(4)
38770 .n(4)
38771 .k(1)
38772 .cn_stride(7)
38773 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38774 }
38775
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_subtile)38776 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile) {
38777 for (uint32_t n = 1; n <= 4; n++) {
38778 for (uint32_t m = 1; m <= 4; m++) {
38779 GemmMicrokernelTester()
38780 .mr(4)
38781 .nr(4)
38782 .kr(1)
38783 .sr(1)
38784 .m(m)
38785 .n(n)
38786 .k(1)
38787 .iterations(1)
38788 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38789 }
38790 }
38791 }
38792
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_subtile_m)38793 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
38794 for (uint32_t m = 1; m <= 4; m++) {
38795 GemmMicrokernelTester()
38796 .mr(4)
38797 .nr(4)
38798 .kr(1)
38799 .sr(1)
38800 .m(m)
38801 .n(4)
38802 .k(1)
38803 .iterations(1)
38804 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38805 }
38806 }
38807
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_subtile_n)38808 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
38809 for (uint32_t n = 1; n <= 4; n++) {
38810 GemmMicrokernelTester()
38811 .mr(4)
38812 .nr(4)
38813 .kr(1)
38814 .sr(1)
38815 .m(4)
38816 .n(n)
38817 .k(1)
38818 .iterations(1)
38819 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38820 }
38821 }
38822
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_gt_1)38823 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_gt_1) {
38824 for (size_t k = 2; k < 10; k++) {
38825 GemmMicrokernelTester()
38826 .mr(4)
38827 .nr(4)
38828 .kr(1)
38829 .sr(1)
38830 .m(4)
38831 .n(4)
38832 .k(k)
38833 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38834 }
38835 }
38836
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_gt_1_subtile)38837 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_gt_1_subtile) {
38838 for (size_t k = 2; k < 10; k++) {
38839 for (uint32_t n = 1; n <= 4; n++) {
38840 for (uint32_t m = 1; m <= 4; m++) {
38841 GemmMicrokernelTester()
38842 .mr(4)
38843 .nr(4)
38844 .kr(1)
38845 .sr(1)
38846 .m(m)
38847 .n(n)
38848 .k(k)
38849 .iterations(1)
38850 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38851 }
38852 }
38853 }
38854 }
38855
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4)38856 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4) {
38857 for (uint32_t n = 5; n < 8; n++) {
38858 for (size_t k = 1; k <= 5; k += 2) {
38859 GemmMicrokernelTester()
38860 .mr(4)
38861 .nr(4)
38862 .kr(1)
38863 .sr(1)
38864 .m(4)
38865 .n(n)
38866 .k(k)
38867 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38868 }
38869 }
38870 }
38871
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4_strided_cn)38872 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
38873 for (uint32_t n = 5; n < 8; n++) {
38874 for (size_t k = 1; k <= 5; k += 2) {
38875 GemmMicrokernelTester()
38876 .mr(4)
38877 .nr(4)
38878 .kr(1)
38879 .sr(1)
38880 .m(4)
38881 .n(n)
38882 .k(k)
38883 .cn_stride(7)
38884 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38885 }
38886 }
38887 }
38888
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4_subtile)38889 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_subtile) {
38890 for (uint32_t n = 5; n < 8; n++) {
38891 for (size_t k = 1; k <= 5; k += 2) {
38892 for (uint32_t m = 1; m <= 4; m++) {
38893 GemmMicrokernelTester()
38894 .mr(4)
38895 .nr(4)
38896 .kr(1)
38897 .sr(1)
38898 .m(m)
38899 .n(n)
38900 .k(k)
38901 .iterations(1)
38902 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38903 }
38904 }
38905 }
38906 }
38907
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4)38908 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4) {
38909 for (uint32_t n = 8; n <= 12; n += 4) {
38910 for (size_t k = 1; k <= 5; k += 2) {
38911 GemmMicrokernelTester()
38912 .mr(4)
38913 .nr(4)
38914 .kr(1)
38915 .sr(1)
38916 .m(4)
38917 .n(n)
38918 .k(k)
38919 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38920 }
38921 }
38922 }
38923
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4_strided_cn)38924 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
38925 for (uint32_t n = 8; n <= 12; n += 4) {
38926 for (size_t k = 1; k <= 5; k += 2) {
38927 GemmMicrokernelTester()
38928 .mr(4)
38929 .nr(4)
38930 .kr(1)
38931 .sr(1)
38932 .m(4)
38933 .n(n)
38934 .k(k)
38935 .cn_stride(7)
38936 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38937 }
38938 }
38939 }
38940
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4_subtile)38941 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_subtile) {
38942 for (uint32_t n = 8; n <= 12; n += 4) {
38943 for (size_t k = 1; k <= 5; k += 2) {
38944 for (uint32_t m = 1; m <= 4; m++) {
38945 GemmMicrokernelTester()
38946 .mr(4)
38947 .nr(4)
38948 .kr(1)
38949 .sr(1)
38950 .m(m)
38951 .n(n)
38952 .k(k)
38953 .iterations(1)
38954 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38955 }
38956 }
38957 }
38958 }
38959
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,small_kernel)38960 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, small_kernel) {
38961 for (size_t k = 1; k <= 5; k += 2) {
38962 GemmMicrokernelTester()
38963 .mr(4)
38964 .nr(4)
38965 .kr(1)
38966 .sr(1)
38967 .m(4)
38968 .n(4)
38969 .k(k)
38970 .ks(3)
38971 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38972 }
38973 }
38974
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,small_kernel_subtile)38975 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, small_kernel_subtile) {
38976 for (size_t k = 1; k <= 5; k += 2) {
38977 for (uint32_t n = 1; n <= 4; n++) {
38978 for (uint32_t m = 1; m <= 4; m++) {
38979 GemmMicrokernelTester()
38980 .mr(4)
38981 .nr(4)
38982 .kr(1)
38983 .sr(1)
38984 .m(m)
38985 .n(n)
38986 .k(k)
38987 .ks(3)
38988 .iterations(1)
38989 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38990 }
38991 }
38992 }
38993 }
38994
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4_small_kernel)38995 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
38996 for (uint32_t n = 5; n < 8; n++) {
38997 for (size_t k = 1; k <= 5; k += 2) {
38998 GemmMicrokernelTester()
38999 .mr(4)
39000 .nr(4)
39001 .kr(1)
39002 .sr(1)
39003 .m(4)
39004 .n(n)
39005 .k(k)
39006 .ks(3)
39007 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39008 }
39009 }
39010 }
39011
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4_small_kernel)39012 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
39013 for (uint32_t n = 8; n <= 12; n += 4) {
39014 for (size_t k = 1; k <= 5; k += 2) {
39015 GemmMicrokernelTester()
39016 .mr(4)
39017 .nr(4)
39018 .kr(1)
39019 .sr(1)
39020 .m(4)
39021 .n(n)
39022 .k(k)
39023 .ks(3)
39024 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39025 }
39026 }
39027 }
39028
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,strided_cm_subtile)39029 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cm_subtile) {
39030 for (size_t k = 1; k <= 5; k += 2) {
39031 for (uint32_t n = 1; n <= 4; n++) {
39032 for (uint32_t m = 1; m <= 4; m++) {
39033 GemmMicrokernelTester()
39034 .mr(4)
39035 .nr(4)
39036 .kr(1)
39037 .sr(1)
39038 .m(m)
39039 .n(n)
39040 .k(k)
39041 .cm_stride(7)
39042 .iterations(1)
39043 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39044 }
39045 }
39046 }
39047 }
39048
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,a_offset)39049 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, a_offset) {
39050 for (size_t k = 1; k <= 5; k += 2) {
39051 GemmMicrokernelTester()
39052 .mr(4)
39053 .nr(4)
39054 .kr(1)
39055 .sr(1)
39056 .m(4)
39057 .n(4)
39058 .k(k)
39059 .ks(3)
39060 .a_offset(23)
39061 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39062 }
39063 }
39064
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,zero)39065 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, zero) {
39066 for (size_t k = 1; k <= 5; k += 2) {
39067 for (uint32_t mz = 0; mz < 4; mz++) {
39068 GemmMicrokernelTester()
39069 .mr(4)
39070 .nr(4)
39071 .kr(1)
39072 .sr(1)
39073 .m(4)
39074 .n(4)
39075 .k(k)
39076 .ks(3)
39077 .a_offset(23)
39078 .zero_index(mz)
39079 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39080 }
39081 }
39082 }
39083
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,qmin)39084 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, qmin) {
39085 GemmMicrokernelTester()
39086 .mr(4)
39087 .nr(4)
39088 .kr(1)
39089 .sr(1)
39090 .m(4)
39091 .n(4)
39092 .k(1)
39093 .qmin(128)
39094 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39095 }
39096
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,qmax)39097 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, qmax) {
39098 GemmMicrokernelTester()
39099 .mr(4)
39100 .nr(4)
39101 .kr(1)
39102 .sr(1)
39103 .m(4)
39104 .n(4)
39105 .k(1)
39106 .qmax(128)
39107 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39108 }
39109
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,strided_cm)39110 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cm) {
39111 GemmMicrokernelTester()
39112 .mr(4)
39113 .nr(4)
39114 .kr(1)
39115 .sr(1)
39116 .m(4)
39117 .n(4)
39118 .k(1)
39119 .cm_stride(7)
39120 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39121 }
39122
39123
39124 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8)39125 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8) {
39126 TEST_REQUIRES_ARM_NEON_V8;
39127 GemmMicrokernelTester()
39128 .mr(4)
39129 .nr(8)
39130 .kr(1)
39131 .sr(1)
39132 .m(4)
39133 .n(8)
39134 .k(8)
39135 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39136 }
39137
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,strided_cn)39138 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, strided_cn) {
39139 TEST_REQUIRES_ARM_NEON_V8;
39140 GemmMicrokernelTester()
39141 .mr(4)
39142 .nr(8)
39143 .kr(1)
39144 .sr(1)
39145 .m(4)
39146 .n(8)
39147 .k(8)
39148 .cn_stride(11)
39149 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39150 }
39151
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8_subtile)39152 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8_subtile) {
39153 TEST_REQUIRES_ARM_NEON_V8;
39154 for (uint32_t n = 1; n <= 8; n++) {
39155 for (uint32_t m = 1; m <= 4; m++) {
39156 GemmMicrokernelTester()
39157 .mr(4)
39158 .nr(8)
39159 .kr(1)
39160 .sr(1)
39161 .m(m)
39162 .n(n)
39163 .k(8)
39164 .iterations(1)
39165 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39166 }
39167 }
39168 }
39169
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8_subtile_m)39170 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8_subtile_m) {
39171 TEST_REQUIRES_ARM_NEON_V8;
39172 for (uint32_t m = 1; m <= 4; m++) {
39173 GemmMicrokernelTester()
39174 .mr(4)
39175 .nr(8)
39176 .kr(1)
39177 .sr(1)
39178 .m(m)
39179 .n(8)
39180 .k(8)
39181 .iterations(1)
39182 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39183 }
39184 }
39185
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8_subtile_n)39186 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8_subtile_n) {
39187 TEST_REQUIRES_ARM_NEON_V8;
39188 for (uint32_t n = 1; n <= 8; n++) {
39189 GemmMicrokernelTester()
39190 .mr(4)
39191 .nr(8)
39192 .kr(1)
39193 .sr(1)
39194 .m(4)
39195 .n(n)
39196 .k(8)
39197 .iterations(1)
39198 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39199 }
39200 }
39201
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_lt_8)39202 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_lt_8) {
39203 TEST_REQUIRES_ARM_NEON_V8;
39204 for (size_t k = 1; k < 8; k++) {
39205 GemmMicrokernelTester()
39206 .mr(4)
39207 .nr(8)
39208 .kr(1)
39209 .sr(1)
39210 .m(4)
39211 .n(8)
39212 .k(k)
39213 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39214 }
39215 }
39216
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_lt_8_subtile)39217 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_lt_8_subtile) {
39218 TEST_REQUIRES_ARM_NEON_V8;
39219 for (size_t k = 1; k < 8; k++) {
39220 for (uint32_t n = 1; n <= 8; n++) {
39221 for (uint32_t m = 1; m <= 4; m++) {
39222 GemmMicrokernelTester()
39223 .mr(4)
39224 .nr(8)
39225 .kr(1)
39226 .sr(1)
39227 .m(m)
39228 .n(n)
39229 .k(k)
39230 .iterations(1)
39231 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39232 }
39233 }
39234 }
39235 }
39236
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_gt_8)39237 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_gt_8) {
39238 TEST_REQUIRES_ARM_NEON_V8;
39239 for (size_t k = 9; k < 16; k++) {
39240 GemmMicrokernelTester()
39241 .mr(4)
39242 .nr(8)
39243 .kr(1)
39244 .sr(1)
39245 .m(4)
39246 .n(8)
39247 .k(k)
39248 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39249 }
39250 }
39251
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_gt_8_subtile)39252 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_gt_8_subtile) {
39253 TEST_REQUIRES_ARM_NEON_V8;
39254 for (size_t k = 9; k < 16; k++) {
39255 for (uint32_t n = 1; n <= 8; n++) {
39256 for (uint32_t m = 1; m <= 4; m++) {
39257 GemmMicrokernelTester()
39258 .mr(4)
39259 .nr(8)
39260 .kr(1)
39261 .sr(1)
39262 .m(m)
39263 .n(n)
39264 .k(k)
39265 .iterations(1)
39266 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39267 }
39268 }
39269 }
39270 }
39271
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_div_8)39272 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_div_8) {
39273 TEST_REQUIRES_ARM_NEON_V8;
39274 for (size_t k = 16; k <= 80; k += 8) {
39275 GemmMicrokernelTester()
39276 .mr(4)
39277 .nr(8)
39278 .kr(1)
39279 .sr(1)
39280 .m(4)
39281 .n(8)
39282 .k(k)
39283 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39284 }
39285 }
39286
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_div_8_subtile)39287 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_div_8_subtile) {
39288 TEST_REQUIRES_ARM_NEON_V8;
39289 for (size_t k = 16; k <= 80; k += 8) {
39290 for (uint32_t n = 1; n <= 8; n++) {
39291 for (uint32_t m = 1; m <= 4; m++) {
39292 GemmMicrokernelTester()
39293 .mr(4)
39294 .nr(8)
39295 .kr(1)
39296 .sr(1)
39297 .m(m)
39298 .n(n)
39299 .k(k)
39300 .iterations(1)
39301 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39302 }
39303 }
39304 }
39305 }
39306
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_gt_8)39307 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_gt_8) {
39308 TEST_REQUIRES_ARM_NEON_V8;
39309 for (uint32_t n = 9; n < 16; n++) {
39310 for (size_t k = 1; k <= 40; k += 9) {
39311 GemmMicrokernelTester()
39312 .mr(4)
39313 .nr(8)
39314 .kr(1)
39315 .sr(1)
39316 .m(4)
39317 .n(n)
39318 .k(k)
39319 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39320 }
39321 }
39322 }
39323
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_gt_8_strided_cn)39324 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_gt_8_strided_cn) {
39325 TEST_REQUIRES_ARM_NEON_V8;
39326 for (uint32_t n = 9; n < 16; n++) {
39327 for (size_t k = 1; k <= 40; k += 9) {
39328 GemmMicrokernelTester()
39329 .mr(4)
39330 .nr(8)
39331 .kr(1)
39332 .sr(1)
39333 .m(4)
39334 .n(n)
39335 .k(k)
39336 .cn_stride(11)
39337 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39338 }
39339 }
39340 }
39341
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_gt_8_subtile)39342 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_gt_8_subtile) {
39343 TEST_REQUIRES_ARM_NEON_V8;
39344 for (uint32_t n = 9; n < 16; n++) {
39345 for (size_t k = 1; k <= 40; k += 9) {
39346 for (uint32_t m = 1; m <= 4; m++) {
39347 GemmMicrokernelTester()
39348 .mr(4)
39349 .nr(8)
39350 .kr(1)
39351 .sr(1)
39352 .m(m)
39353 .n(n)
39354 .k(k)
39355 .iterations(1)
39356 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39357 }
39358 }
39359 }
39360 }
39361
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_div_8)39362 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_div_8) {
39363 TEST_REQUIRES_ARM_NEON_V8;
39364 for (uint32_t n = 16; n <= 24; n += 8) {
39365 for (size_t k = 1; k <= 40; k += 9) {
39366 GemmMicrokernelTester()
39367 .mr(4)
39368 .nr(8)
39369 .kr(1)
39370 .sr(1)
39371 .m(4)
39372 .n(n)
39373 .k(k)
39374 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39375 }
39376 }
39377 }
39378
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_div_8_strided_cn)39379 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_div_8_strided_cn) {
39380 TEST_REQUIRES_ARM_NEON_V8;
39381 for (uint32_t n = 16; n <= 24; n += 8) {
39382 for (size_t k = 1; k <= 40; k += 9) {
39383 GemmMicrokernelTester()
39384 .mr(4)
39385 .nr(8)
39386 .kr(1)
39387 .sr(1)
39388 .m(4)
39389 .n(n)
39390 .k(k)
39391 .cn_stride(11)
39392 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39393 }
39394 }
39395 }
39396
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_div_8_subtile)39397 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_div_8_subtile) {
39398 TEST_REQUIRES_ARM_NEON_V8;
39399 for (uint32_t n = 16; n <= 24; n += 8) {
39400 for (size_t k = 1; k <= 40; k += 9) {
39401 for (uint32_t m = 1; m <= 4; m++) {
39402 GemmMicrokernelTester()
39403 .mr(4)
39404 .nr(8)
39405 .kr(1)
39406 .sr(1)
39407 .m(m)
39408 .n(n)
39409 .k(k)
39410 .iterations(1)
39411 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39412 }
39413 }
39414 }
39415 }
39416
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,small_kernel)39417 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, small_kernel) {
39418 TEST_REQUIRES_ARM_NEON_V8;
39419 for (size_t k = 1; k <= 40; k += 9) {
39420 GemmMicrokernelTester()
39421 .mr(4)
39422 .nr(8)
39423 .kr(1)
39424 .sr(1)
39425 .m(4)
39426 .n(8)
39427 .k(k)
39428 .ks(3)
39429 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39430 }
39431 }
39432
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,small_kernel_subtile)39433 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, small_kernel_subtile) {
39434 TEST_REQUIRES_ARM_NEON_V8;
39435 for (size_t k = 1; k <= 40; k += 9) {
39436 for (uint32_t n = 1; n <= 8; n++) {
39437 for (uint32_t m = 1; m <= 4; m++) {
39438 GemmMicrokernelTester()
39439 .mr(4)
39440 .nr(8)
39441 .kr(1)
39442 .sr(1)
39443 .m(m)
39444 .n(n)
39445 .k(k)
39446 .ks(3)
39447 .iterations(1)
39448 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39449 }
39450 }
39451 }
39452 }
39453
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_gt_8_small_kernel)39454 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_gt_8_small_kernel) {
39455 TEST_REQUIRES_ARM_NEON_V8;
39456 for (uint32_t n = 9; n < 16; n++) {
39457 for (size_t k = 1; k <= 40; k += 9) {
39458 GemmMicrokernelTester()
39459 .mr(4)
39460 .nr(8)
39461 .kr(1)
39462 .sr(1)
39463 .m(4)
39464 .n(n)
39465 .k(k)
39466 .ks(3)
39467 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39468 }
39469 }
39470 }
39471
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_div_8_small_kernel)39472 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_div_8_small_kernel) {
39473 TEST_REQUIRES_ARM_NEON_V8;
39474 for (uint32_t n = 16; n <= 24; n += 8) {
39475 for (size_t k = 1; k <= 40; k += 9) {
39476 GemmMicrokernelTester()
39477 .mr(4)
39478 .nr(8)
39479 .kr(1)
39480 .sr(1)
39481 .m(4)
39482 .n(n)
39483 .k(k)
39484 .ks(3)
39485 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39486 }
39487 }
39488 }
39489
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,strided_cm_subtile)39490 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, strided_cm_subtile) {
39491 TEST_REQUIRES_ARM_NEON_V8;
39492 for (size_t k = 1; k <= 40; k += 9) {
39493 for (uint32_t n = 1; n <= 8; n++) {
39494 for (uint32_t m = 1; m <= 4; m++) {
39495 GemmMicrokernelTester()
39496 .mr(4)
39497 .nr(8)
39498 .kr(1)
39499 .sr(1)
39500 .m(m)
39501 .n(n)
39502 .k(k)
39503 .cm_stride(11)
39504 .iterations(1)
39505 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39506 }
39507 }
39508 }
39509 }
39510
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,a_offset)39511 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, a_offset) {
39512 TEST_REQUIRES_ARM_NEON_V8;
39513 for (size_t k = 1; k <= 40; k += 9) {
39514 GemmMicrokernelTester()
39515 .mr(4)
39516 .nr(8)
39517 .kr(1)
39518 .sr(1)
39519 .m(4)
39520 .n(8)
39521 .k(k)
39522 .ks(3)
39523 .a_offset(163)
39524 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39525 }
39526 }
39527
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,zero)39528 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, zero) {
39529 TEST_REQUIRES_ARM_NEON_V8;
39530 for (size_t k = 1; k <= 40; k += 9) {
39531 for (uint32_t mz = 0; mz < 4; mz++) {
39532 GemmMicrokernelTester()
39533 .mr(4)
39534 .nr(8)
39535 .kr(1)
39536 .sr(1)
39537 .m(4)
39538 .n(8)
39539 .k(k)
39540 .ks(3)
39541 .a_offset(163)
39542 .zero_index(mz)
39543 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39544 }
39545 }
39546 }
39547
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,qmin)39548 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, qmin) {
39549 TEST_REQUIRES_ARM_NEON_V8;
39550 GemmMicrokernelTester()
39551 .mr(4)
39552 .nr(8)
39553 .kr(1)
39554 .sr(1)
39555 .m(4)
39556 .n(8)
39557 .k(8)
39558 .qmin(128)
39559 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39560 }
39561
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,qmax)39562 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, qmax) {
39563 TEST_REQUIRES_ARM_NEON_V8;
39564 GemmMicrokernelTester()
39565 .mr(4)
39566 .nr(8)
39567 .kr(1)
39568 .sr(1)
39569 .m(4)
39570 .n(8)
39571 .k(8)
39572 .qmax(128)
39573 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39574 }
39575
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,strided_cm)39576 TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, strided_cm) {
39577 TEST_REQUIRES_ARM_NEON_V8;
39578 GemmMicrokernelTester()
39579 .mr(4)
39580 .nr(8)
39581 .kr(1)
39582 .sr(1)
39583 .m(4)
39584 .n(8)
39585 .k(8)
39586 .cm_stride(11)
39587 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39588 }
39589 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
39590
39591
39592 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8)39593 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8) {
39594 TEST_REQUIRES_ARM_NEON_DOT;
39595 GemmMicrokernelTester()
39596 .mr(4)
39597 .nr(8)
39598 .kr(4)
39599 .sr(1)
39600 .m(4)
39601 .n(8)
39602 .k(8)
39603 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39604 }
39605
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,strided_cn)39606 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, strided_cn) {
39607 TEST_REQUIRES_ARM_NEON_DOT;
39608 GemmMicrokernelTester()
39609 .mr(4)
39610 .nr(8)
39611 .kr(4)
39612 .sr(1)
39613 .m(4)
39614 .n(8)
39615 .k(8)
39616 .cn_stride(11)
39617 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39618 }
39619
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_subtile)39620 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile) {
39621 TEST_REQUIRES_ARM_NEON_DOT;
39622 for (uint32_t n = 1; n <= 8; n++) {
39623 for (uint32_t m = 1; m <= 4; m++) {
39624 GemmMicrokernelTester()
39625 .mr(4)
39626 .nr(8)
39627 .kr(4)
39628 .sr(1)
39629 .m(m)
39630 .n(n)
39631 .k(8)
39632 .iterations(1)
39633 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39634 }
39635 }
39636 }
39637
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_subtile_m)39638 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_m) {
39639 TEST_REQUIRES_ARM_NEON_DOT;
39640 for (uint32_t m = 1; m <= 4; m++) {
39641 GemmMicrokernelTester()
39642 .mr(4)
39643 .nr(8)
39644 .kr(4)
39645 .sr(1)
39646 .m(m)
39647 .n(8)
39648 .k(8)
39649 .iterations(1)
39650 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39651 }
39652 }
39653
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_subtile_n)39654 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_n) {
39655 TEST_REQUIRES_ARM_NEON_DOT;
39656 for (uint32_t n = 1; n <= 8; n++) {
39657 GemmMicrokernelTester()
39658 .mr(4)
39659 .nr(8)
39660 .kr(4)
39661 .sr(1)
39662 .m(4)
39663 .n(n)
39664 .k(8)
39665 .iterations(1)
39666 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39667 }
39668 }
39669
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_lt_8)39670 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8) {
39671 TEST_REQUIRES_ARM_NEON_DOT;
39672 for (size_t k = 1; k < 8; k++) {
39673 GemmMicrokernelTester()
39674 .mr(4)
39675 .nr(8)
39676 .kr(4)
39677 .sr(1)
39678 .m(4)
39679 .n(8)
39680 .k(k)
39681 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39682 }
39683 }
39684
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_lt_8_subtile)39685 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8_subtile) {
39686 TEST_REQUIRES_ARM_NEON_DOT;
39687 for (size_t k = 1; k < 8; k++) {
39688 for (uint32_t n = 1; n <= 8; n++) {
39689 for (uint32_t m = 1; m <= 4; m++) {
39690 GemmMicrokernelTester()
39691 .mr(4)
39692 .nr(8)
39693 .kr(4)
39694 .sr(1)
39695 .m(m)
39696 .n(n)
39697 .k(k)
39698 .iterations(1)
39699 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39700 }
39701 }
39702 }
39703 }
39704
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_gt_8)39705 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8) {
39706 TEST_REQUIRES_ARM_NEON_DOT;
39707 for (size_t k = 9; k < 16; k++) {
39708 GemmMicrokernelTester()
39709 .mr(4)
39710 .nr(8)
39711 .kr(4)
39712 .sr(1)
39713 .m(4)
39714 .n(8)
39715 .k(k)
39716 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39717 }
39718 }
39719
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_gt_8_subtile)39720 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8_subtile) {
39721 TEST_REQUIRES_ARM_NEON_DOT;
39722 for (size_t k = 9; k < 16; k++) {
39723 for (uint32_t n = 1; n <= 8; n++) {
39724 for (uint32_t m = 1; m <= 4; m++) {
39725 GemmMicrokernelTester()
39726 .mr(4)
39727 .nr(8)
39728 .kr(4)
39729 .sr(1)
39730 .m(m)
39731 .n(n)
39732 .k(k)
39733 .iterations(1)
39734 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39735 }
39736 }
39737 }
39738 }
39739
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_div_8)39740 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_div_8) {
39741 TEST_REQUIRES_ARM_NEON_DOT;
39742 for (size_t k = 16; k <= 80; k += 8) {
39743 GemmMicrokernelTester()
39744 .mr(4)
39745 .nr(8)
39746 .kr(4)
39747 .sr(1)
39748 .m(4)
39749 .n(8)
39750 .k(k)
39751 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39752 }
39753 }
39754
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_div_8_subtile)39755 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_div_8_subtile) {
39756 TEST_REQUIRES_ARM_NEON_DOT;
39757 for (size_t k = 16; k <= 80; k += 8) {
39758 for (uint32_t n = 1; n <= 8; n++) {
39759 for (uint32_t m = 1; m <= 4; m++) {
39760 GemmMicrokernelTester()
39761 .mr(4)
39762 .nr(8)
39763 .kr(4)
39764 .sr(1)
39765 .m(m)
39766 .n(n)
39767 .k(k)
39768 .iterations(1)
39769 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39770 }
39771 }
39772 }
39773 }
39774
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8)39775 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8) {
39776 TEST_REQUIRES_ARM_NEON_DOT;
39777 for (uint32_t n = 9; n < 16; n++) {
39778 for (size_t k = 1; k <= 40; k += 9) {
39779 GemmMicrokernelTester()
39780 .mr(4)
39781 .nr(8)
39782 .kr(4)
39783 .sr(1)
39784 .m(4)
39785 .n(n)
39786 .k(k)
39787 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39788 }
39789 }
39790 }
39791
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8_strided_cn)39792 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_strided_cn) {
39793 TEST_REQUIRES_ARM_NEON_DOT;
39794 for (uint32_t n = 9; n < 16; n++) {
39795 for (size_t k = 1; k <= 40; k += 9) {
39796 GemmMicrokernelTester()
39797 .mr(4)
39798 .nr(8)
39799 .kr(4)
39800 .sr(1)
39801 .m(4)
39802 .n(n)
39803 .k(k)
39804 .cn_stride(11)
39805 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39806 }
39807 }
39808 }
39809
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8_subtile)39810 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_subtile) {
39811 TEST_REQUIRES_ARM_NEON_DOT;
39812 for (uint32_t n = 9; n < 16; n++) {
39813 for (size_t k = 1; k <= 40; k += 9) {
39814 for (uint32_t m = 1; m <= 4; m++) {
39815 GemmMicrokernelTester()
39816 .mr(4)
39817 .nr(8)
39818 .kr(4)
39819 .sr(1)
39820 .m(m)
39821 .n(n)
39822 .k(k)
39823 .iterations(1)
39824 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39825 }
39826 }
39827 }
39828 }
39829
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8)39830 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8) {
39831 TEST_REQUIRES_ARM_NEON_DOT;
39832 for (uint32_t n = 16; n <= 24; n += 8) {
39833 for (size_t k = 1; k <= 40; k += 9) {
39834 GemmMicrokernelTester()
39835 .mr(4)
39836 .nr(8)
39837 .kr(4)
39838 .sr(1)
39839 .m(4)
39840 .n(n)
39841 .k(k)
39842 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39843 }
39844 }
39845 }
39846
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8_strided_cn)39847 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_strided_cn) {
39848 TEST_REQUIRES_ARM_NEON_DOT;
39849 for (uint32_t n = 16; n <= 24; n += 8) {
39850 for (size_t k = 1; k <= 40; k += 9) {
39851 GemmMicrokernelTester()
39852 .mr(4)
39853 .nr(8)
39854 .kr(4)
39855 .sr(1)
39856 .m(4)
39857 .n(n)
39858 .k(k)
39859 .cn_stride(11)
39860 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39861 }
39862 }
39863 }
39864
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8_subtile)39865 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_subtile) {
39866 TEST_REQUIRES_ARM_NEON_DOT;
39867 for (uint32_t n = 16; n <= 24; n += 8) {
39868 for (size_t k = 1; k <= 40; k += 9) {
39869 for (uint32_t m = 1; m <= 4; m++) {
39870 GemmMicrokernelTester()
39871 .mr(4)
39872 .nr(8)
39873 .kr(4)
39874 .sr(1)
39875 .m(m)
39876 .n(n)
39877 .k(k)
39878 .iterations(1)
39879 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39880 }
39881 }
39882 }
39883 }
39884
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,small_kernel)39885 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, small_kernel) {
39886 TEST_REQUIRES_ARM_NEON_DOT;
39887 for (size_t k = 1; k <= 40; k += 9) {
39888 GemmMicrokernelTester()
39889 .mr(4)
39890 .nr(8)
39891 .kr(4)
39892 .sr(1)
39893 .m(4)
39894 .n(8)
39895 .k(k)
39896 .ks(3)
39897 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39898 }
39899 }
39900
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,small_kernel_subtile)39901 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, small_kernel_subtile) {
39902 TEST_REQUIRES_ARM_NEON_DOT;
39903 for (size_t k = 1; k <= 40; k += 9) {
39904 for (uint32_t n = 1; n <= 8; n++) {
39905 for (uint32_t m = 1; m <= 4; m++) {
39906 GemmMicrokernelTester()
39907 .mr(4)
39908 .nr(8)
39909 .kr(4)
39910 .sr(1)
39911 .m(m)
39912 .n(n)
39913 .k(k)
39914 .ks(3)
39915 .iterations(1)
39916 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39917 }
39918 }
39919 }
39920 }
39921
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8_small_kernel)39922 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_small_kernel) {
39923 TEST_REQUIRES_ARM_NEON_DOT;
39924 for (uint32_t n = 9; n < 16; n++) {
39925 for (size_t k = 1; k <= 40; k += 9) {
39926 GemmMicrokernelTester()
39927 .mr(4)
39928 .nr(8)
39929 .kr(4)
39930 .sr(1)
39931 .m(4)
39932 .n(n)
39933 .k(k)
39934 .ks(3)
39935 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39936 }
39937 }
39938 }
39939
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8_small_kernel)39940 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_small_kernel) {
39941 TEST_REQUIRES_ARM_NEON_DOT;
39942 for (uint32_t n = 16; n <= 24; n += 8) {
39943 for (size_t k = 1; k <= 40; k += 9) {
39944 GemmMicrokernelTester()
39945 .mr(4)
39946 .nr(8)
39947 .kr(4)
39948 .sr(1)
39949 .m(4)
39950 .n(n)
39951 .k(k)
39952 .ks(3)
39953 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39954 }
39955 }
39956 }
39957
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,strided_cm_subtile)39958 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, strided_cm_subtile) {
39959 TEST_REQUIRES_ARM_NEON_DOT;
39960 for (size_t k = 1; k <= 40; k += 9) {
39961 for (uint32_t n = 1; n <= 8; n++) {
39962 for (uint32_t m = 1; m <= 4; m++) {
39963 GemmMicrokernelTester()
39964 .mr(4)
39965 .nr(8)
39966 .kr(4)
39967 .sr(1)
39968 .m(m)
39969 .n(n)
39970 .k(k)
39971 .cm_stride(11)
39972 .iterations(1)
39973 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39974 }
39975 }
39976 }
39977 }
39978
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,a_offset)39979 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, a_offset) {
39980 TEST_REQUIRES_ARM_NEON_DOT;
39981 for (size_t k = 1; k <= 40; k += 9) {
39982 GemmMicrokernelTester()
39983 .mr(4)
39984 .nr(8)
39985 .kr(4)
39986 .sr(1)
39987 .m(4)
39988 .n(8)
39989 .k(k)
39990 .ks(3)
39991 .a_offset(163)
39992 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39993 }
39994 }
39995
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,zero)39996 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, zero) {
39997 TEST_REQUIRES_ARM_NEON_DOT;
39998 for (size_t k = 1; k <= 40; k += 9) {
39999 for (uint32_t mz = 0; mz < 4; mz++) {
40000 GemmMicrokernelTester()
40001 .mr(4)
40002 .nr(8)
40003 .kr(4)
40004 .sr(1)
40005 .m(4)
40006 .n(8)
40007 .k(k)
40008 .ks(3)
40009 .a_offset(163)
40010 .zero_index(mz)
40011 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
40012 }
40013 }
40014 }
40015
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,qmin)40016 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, qmin) {
40017 TEST_REQUIRES_ARM_NEON_DOT;
40018 GemmMicrokernelTester()
40019 .mr(4)
40020 .nr(8)
40021 .kr(4)
40022 .sr(1)
40023 .m(4)
40024 .n(8)
40025 .k(8)
40026 .qmin(128)
40027 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
40028 }
40029
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,qmax)40030 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, qmax) {
40031 TEST_REQUIRES_ARM_NEON_DOT;
40032 GemmMicrokernelTester()
40033 .mr(4)
40034 .nr(8)
40035 .kr(4)
40036 .sr(1)
40037 .m(4)
40038 .n(8)
40039 .k(8)
40040 .qmax(128)
40041 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
40042 }
40043
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,strided_cm)40044 TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, strided_cm) {
40045 TEST_REQUIRES_ARM_NEON_DOT;
40046 GemmMicrokernelTester()
40047 .mr(4)
40048 .nr(8)
40049 .kr(4)
40050 .sr(1)
40051 .m(4)
40052 .n(8)
40053 .k(8)
40054 .cm_stride(11)
40055 .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
40056 }
40057 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
40058