1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/qc8-gemm-minmax-fp32.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8)28 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8) {
29 TEST_REQUIRES_ARM_NEON;
30 GemmMicrokernelTester()
31 .mr(1)
32 .nr(8)
33 .kr(1)
34 .sr(1)
35 .m(1)
36 .n(8)
37 .k(8)
38 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
39 }
40
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,strided_cn)41 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, strided_cn) {
42 TEST_REQUIRES_ARM_NEON;
43 GemmMicrokernelTester()
44 .mr(1)
45 .nr(8)
46 .kr(1)
47 .sr(1)
48 .m(1)
49 .n(8)
50 .k(8)
51 .cn_stride(11)
52 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
53 }
54
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_strided_a)55 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_strided_a) {
56 TEST_REQUIRES_ARM_NEON;
57 GemmMicrokernelTester()
58 .mr(1)
59 .nr(8)
60 .kr(1)
61 .sr(1)
62 .m(1)
63 .n(8)
64 .k(8)
65 .a_stride(11)
66 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
67 }
68
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_subtile)69 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_subtile) {
70 TEST_REQUIRES_ARM_NEON;
71 for (uint32_t n = 1; n <= 8; n++) {
72 for (uint32_t m = 1; m <= 1; m++) {
73 GemmMicrokernelTester()
74 .mr(1)
75 .nr(8)
76 .kr(1)
77 .sr(1)
78 .m(m)
79 .n(n)
80 .k(8)
81 .iterations(1)
82 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
83 }
84 }
85 }
86
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_subtile_m)87 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_subtile_m) {
88 TEST_REQUIRES_ARM_NEON;
89 for (uint32_t m = 1; m <= 1; m++) {
90 GemmMicrokernelTester()
91 .mr(1)
92 .nr(8)
93 .kr(1)
94 .sr(1)
95 .m(m)
96 .n(8)
97 .k(8)
98 .iterations(1)
99 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
100 }
101 }
102
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_subtile_n)103 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_subtile_n) {
104 TEST_REQUIRES_ARM_NEON;
105 for (uint32_t n = 1; n <= 8; n++) {
106 GemmMicrokernelTester()
107 .mr(1)
108 .nr(8)
109 .kr(1)
110 .sr(1)
111 .m(1)
112 .n(n)
113 .k(8)
114 .iterations(1)
115 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
116 }
117 }
118
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_lt_8)119 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_lt_8) {
120 TEST_REQUIRES_ARM_NEON;
121 for (size_t k = 1; k < 8; k++) {
122 GemmMicrokernelTester()
123 .mr(1)
124 .nr(8)
125 .kr(1)
126 .sr(1)
127 .m(1)
128 .n(8)
129 .k(k)
130 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
131 }
132 }
133
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_lt_8_strided_a)134 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_lt_8_strided_a) {
135 TEST_REQUIRES_ARM_NEON;
136 for (size_t k = 1; k < 8; k++) {
137 GemmMicrokernelTester()
138 .mr(1)
139 .nr(8)
140 .kr(1)
141 .sr(1)
142 .m(1)
143 .n(8)
144 .k(k)
145 .a_stride(11)
146 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
147 }
148 }
149
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_lt_8_subtile)150 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_lt_8_subtile) {
151 TEST_REQUIRES_ARM_NEON;
152 for (size_t k = 1; k < 8; k++) {
153 for (uint32_t n = 1; n <= 8; n++) {
154 for (uint32_t m = 1; m <= 1; m++) {
155 GemmMicrokernelTester()
156 .mr(1)
157 .nr(8)
158 .kr(1)
159 .sr(1)
160 .m(m)
161 .n(n)
162 .k(k)
163 .iterations(1)
164 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
165 }
166 }
167 }
168 }
169
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_gt_8)170 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_gt_8) {
171 TEST_REQUIRES_ARM_NEON;
172 for (size_t k = 9; k < 16; k++) {
173 GemmMicrokernelTester()
174 .mr(1)
175 .nr(8)
176 .kr(1)
177 .sr(1)
178 .m(1)
179 .n(8)
180 .k(k)
181 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
182 }
183 }
184
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_gt_8_strided_a)185 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_gt_8_strided_a) {
186 TEST_REQUIRES_ARM_NEON;
187 for (size_t k = 9; k < 16; k++) {
188 GemmMicrokernelTester()
189 .mr(1)
190 .nr(8)
191 .kr(1)
192 .sr(1)
193 .m(1)
194 .n(8)
195 .k(k)
196 .a_stride(19)
197 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
198 }
199 }
200
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_gt_8_subtile)201 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_gt_8_subtile) {
202 TEST_REQUIRES_ARM_NEON;
203 for (size_t k = 9; k < 16; k++) {
204 for (uint32_t n = 1; n <= 8; n++) {
205 for (uint32_t m = 1; m <= 1; m++) {
206 GemmMicrokernelTester()
207 .mr(1)
208 .nr(8)
209 .kr(1)
210 .sr(1)
211 .m(m)
212 .n(n)
213 .k(k)
214 .iterations(1)
215 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
216 }
217 }
218 }
219 }
220
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_div_8)221 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_div_8) {
222 TEST_REQUIRES_ARM_NEON;
223 for (size_t k = 16; k <= 80; k += 8) {
224 GemmMicrokernelTester()
225 .mr(1)
226 .nr(8)
227 .kr(1)
228 .sr(1)
229 .m(1)
230 .n(8)
231 .k(k)
232 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
233 }
234 }
235
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_div_8_strided_a)236 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_div_8_strided_a) {
237 TEST_REQUIRES_ARM_NEON;
238 for (size_t k = 16; k <= 80; k += 8) {
239 GemmMicrokernelTester()
240 .mr(1)
241 .nr(8)
242 .kr(1)
243 .sr(1)
244 .m(1)
245 .n(8)
246 .k(k)
247 .a_stride(83)
248 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
249 }
250 }
251
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_div_8_subtile)252 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_div_8_subtile) {
253 TEST_REQUIRES_ARM_NEON;
254 for (size_t k = 16; k <= 80; k += 8) {
255 for (uint32_t n = 1; n <= 8; n++) {
256 for (uint32_t m = 1; m <= 1; m++) {
257 GemmMicrokernelTester()
258 .mr(1)
259 .nr(8)
260 .kr(1)
261 .sr(1)
262 .m(m)
263 .n(n)
264 .k(k)
265 .iterations(1)
266 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
267 }
268 }
269 }
270 }
271
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8)272 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8) {
273 TEST_REQUIRES_ARM_NEON;
274 for (uint32_t n = 9; n < 16; n++) {
275 for (size_t k = 1; k <= 40; k += 9) {
276 GemmMicrokernelTester()
277 .mr(1)
278 .nr(8)
279 .kr(1)
280 .sr(1)
281 .m(1)
282 .n(n)
283 .k(k)
284 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
285 }
286 }
287 }
288
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8_strided_cn)289 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8_strided_cn) {
290 TEST_REQUIRES_ARM_NEON;
291 for (uint32_t n = 9; n < 16; n++) {
292 for (size_t k = 1; k <= 40; k += 9) {
293 GemmMicrokernelTester()
294 .mr(1)
295 .nr(8)
296 .kr(1)
297 .sr(1)
298 .m(1)
299 .n(n)
300 .k(k)
301 .cn_stride(11)
302 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
303 }
304 }
305 }
306
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8_strided_a)307 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8_strided_a) {
308 TEST_REQUIRES_ARM_NEON;
309 for (uint32_t n = 9; n < 16; n++) {
310 for (size_t k = 1; k <= 40; k += 9) {
311 GemmMicrokernelTester()
312 .mr(1)
313 .nr(8)
314 .kr(1)
315 .sr(1)
316 .m(1)
317 .n(n)
318 .k(k)
319 .a_stride(43)
320 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
321 }
322 }
323 }
324
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8_subtile)325 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8_subtile) {
326 TEST_REQUIRES_ARM_NEON;
327 for (uint32_t n = 9; n < 16; n++) {
328 for (size_t k = 1; k <= 40; k += 9) {
329 for (uint32_t m = 1; m <= 1; m++) {
330 GemmMicrokernelTester()
331 .mr(1)
332 .nr(8)
333 .kr(1)
334 .sr(1)
335 .m(m)
336 .n(n)
337 .k(k)
338 .iterations(1)
339 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
340 }
341 }
342 }
343 }
344
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8)345 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8) {
346 TEST_REQUIRES_ARM_NEON;
347 for (uint32_t n = 16; n <= 24; n += 8) {
348 for (size_t k = 1; k <= 40; k += 9) {
349 GemmMicrokernelTester()
350 .mr(1)
351 .nr(8)
352 .kr(1)
353 .sr(1)
354 .m(1)
355 .n(n)
356 .k(k)
357 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
358 }
359 }
360 }
361
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8_strided_cn)362 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8_strided_cn) {
363 TEST_REQUIRES_ARM_NEON;
364 for (uint32_t n = 16; n <= 24; n += 8) {
365 for (size_t k = 1; k <= 40; k += 9) {
366 GemmMicrokernelTester()
367 .mr(1)
368 .nr(8)
369 .kr(1)
370 .sr(1)
371 .m(1)
372 .n(n)
373 .k(k)
374 .cn_stride(11)
375 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
376 }
377 }
378 }
379
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8_strided_a)380 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8_strided_a) {
381 TEST_REQUIRES_ARM_NEON;
382 for (uint32_t n = 16; n <= 24; n += 8) {
383 for (size_t k = 1; k <= 40; k += 9) {
384 GemmMicrokernelTester()
385 .mr(1)
386 .nr(8)
387 .kr(1)
388 .sr(1)
389 .m(1)
390 .n(n)
391 .k(k)
392 .a_stride(43)
393 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
394 }
395 }
396 }
397
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8_subtile)398 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8_subtile) {
399 TEST_REQUIRES_ARM_NEON;
400 for (uint32_t n = 16; n <= 24; n += 8) {
401 for (size_t k = 1; k <= 40; k += 9) {
402 for (uint32_t m = 1; m <= 1; m++) {
403 GemmMicrokernelTester()
404 .mr(1)
405 .nr(8)
406 .kr(1)
407 .sr(1)
408 .m(m)
409 .n(n)
410 .k(k)
411 .iterations(1)
412 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
413 }
414 }
415 }
416 }
417
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,strided_cm_subtile)418 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, strided_cm_subtile) {
419 TEST_REQUIRES_ARM_NEON;
420 for (size_t k = 1; k <= 40; k += 9) {
421 for (uint32_t n = 1; n <= 8; n++) {
422 for (uint32_t m = 1; m <= 1; m++) {
423 GemmMicrokernelTester()
424 .mr(1)
425 .nr(8)
426 .kr(1)
427 .sr(1)
428 .m(m)
429 .n(n)
430 .k(k)
431 .cm_stride(11)
432 .iterations(1)
433 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
434 }
435 }
436 }
437 }
438
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,qmin)439 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, qmin) {
440 TEST_REQUIRES_ARM_NEON;
441 GemmMicrokernelTester()
442 .mr(1)
443 .nr(8)
444 .kr(1)
445 .sr(1)
446 .m(1)
447 .n(8)
448 .k(8)
449 .qmin(128)
450 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
451 }
452
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,qmax)453 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, qmax) {
454 TEST_REQUIRES_ARM_NEON;
455 GemmMicrokernelTester()
456 .mr(1)
457 .nr(8)
458 .kr(1)
459 .sr(1)
460 .m(1)
461 .n(8)
462 .k(8)
463 .qmax(128)
464 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
465 }
466
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,strided_cm)467 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, strided_cm) {
468 TEST_REQUIRES_ARM_NEON;
469 GemmMicrokernelTester()
470 .mr(1)
471 .nr(8)
472 .kr(1)
473 .sr(1)
474 .m(1)
475 .n(8)
476 .k(8)
477 .cm_stride(11)
478 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
479 }
480 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
481
482
483 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_eq_8)484 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_eq_8) {
485 TEST_REQUIRES_ARM_NEON_V8;
486 GemmMicrokernelTester()
487 .mr(1)
488 .nr(8)
489 .kr(1)
490 .sr(1)
491 .m(1)
492 .n(8)
493 .k(8)
494 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
495 }
496
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,strided_cn)497 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, strided_cn) {
498 TEST_REQUIRES_ARM_NEON_V8;
499 GemmMicrokernelTester()
500 .mr(1)
501 .nr(8)
502 .kr(1)
503 .sr(1)
504 .m(1)
505 .n(8)
506 .k(8)
507 .cn_stride(11)
508 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
509 }
510
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_eq_8_strided_a)511 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_eq_8_strided_a) {
512 TEST_REQUIRES_ARM_NEON_V8;
513 GemmMicrokernelTester()
514 .mr(1)
515 .nr(8)
516 .kr(1)
517 .sr(1)
518 .m(1)
519 .n(8)
520 .k(8)
521 .a_stride(11)
522 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
523 }
524
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_eq_8_subtile)525 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_eq_8_subtile) {
526 TEST_REQUIRES_ARM_NEON_V8;
527 for (uint32_t n = 1; n <= 8; n++) {
528 for (uint32_t m = 1; m <= 1; m++) {
529 GemmMicrokernelTester()
530 .mr(1)
531 .nr(8)
532 .kr(1)
533 .sr(1)
534 .m(m)
535 .n(n)
536 .k(8)
537 .iterations(1)
538 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
539 }
540 }
541 }
542
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_eq_8_subtile_m)543 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_eq_8_subtile_m) {
544 TEST_REQUIRES_ARM_NEON_V8;
545 for (uint32_t m = 1; m <= 1; m++) {
546 GemmMicrokernelTester()
547 .mr(1)
548 .nr(8)
549 .kr(1)
550 .sr(1)
551 .m(m)
552 .n(8)
553 .k(8)
554 .iterations(1)
555 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
556 }
557 }
558
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_eq_8_subtile_n)559 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_eq_8_subtile_n) {
560 TEST_REQUIRES_ARM_NEON_V8;
561 for (uint32_t n = 1; n <= 8; n++) {
562 GemmMicrokernelTester()
563 .mr(1)
564 .nr(8)
565 .kr(1)
566 .sr(1)
567 .m(1)
568 .n(n)
569 .k(8)
570 .iterations(1)
571 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
572 }
573 }
574
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_lt_8)575 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_lt_8) {
576 TEST_REQUIRES_ARM_NEON_V8;
577 for (size_t k = 1; k < 8; k++) {
578 GemmMicrokernelTester()
579 .mr(1)
580 .nr(8)
581 .kr(1)
582 .sr(1)
583 .m(1)
584 .n(8)
585 .k(k)
586 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
587 }
588 }
589
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_lt_8_strided_a)590 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_lt_8_strided_a) {
591 TEST_REQUIRES_ARM_NEON_V8;
592 for (size_t k = 1; k < 8; k++) {
593 GemmMicrokernelTester()
594 .mr(1)
595 .nr(8)
596 .kr(1)
597 .sr(1)
598 .m(1)
599 .n(8)
600 .k(k)
601 .a_stride(11)
602 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
603 }
604 }
605
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_lt_8_subtile)606 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_lt_8_subtile) {
607 TEST_REQUIRES_ARM_NEON_V8;
608 for (size_t k = 1; k < 8; k++) {
609 for (uint32_t n = 1; n <= 8; n++) {
610 for (uint32_t m = 1; m <= 1; m++) {
611 GemmMicrokernelTester()
612 .mr(1)
613 .nr(8)
614 .kr(1)
615 .sr(1)
616 .m(m)
617 .n(n)
618 .k(k)
619 .iterations(1)
620 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
621 }
622 }
623 }
624 }
625
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_gt_8)626 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_gt_8) {
627 TEST_REQUIRES_ARM_NEON_V8;
628 for (size_t k = 9; k < 16; k++) {
629 GemmMicrokernelTester()
630 .mr(1)
631 .nr(8)
632 .kr(1)
633 .sr(1)
634 .m(1)
635 .n(8)
636 .k(k)
637 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
638 }
639 }
640
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_gt_8_strided_a)641 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_gt_8_strided_a) {
642 TEST_REQUIRES_ARM_NEON_V8;
643 for (size_t k = 9; k < 16; k++) {
644 GemmMicrokernelTester()
645 .mr(1)
646 .nr(8)
647 .kr(1)
648 .sr(1)
649 .m(1)
650 .n(8)
651 .k(k)
652 .a_stride(19)
653 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
654 }
655 }
656
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_gt_8_subtile)657 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_gt_8_subtile) {
658 TEST_REQUIRES_ARM_NEON_V8;
659 for (size_t k = 9; k < 16; k++) {
660 for (uint32_t n = 1; n <= 8; n++) {
661 for (uint32_t m = 1; m <= 1; m++) {
662 GemmMicrokernelTester()
663 .mr(1)
664 .nr(8)
665 .kr(1)
666 .sr(1)
667 .m(m)
668 .n(n)
669 .k(k)
670 .iterations(1)
671 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
672 }
673 }
674 }
675 }
676
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_div_8)677 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_div_8) {
678 TEST_REQUIRES_ARM_NEON_V8;
679 for (size_t k = 16; k <= 80; k += 8) {
680 GemmMicrokernelTester()
681 .mr(1)
682 .nr(8)
683 .kr(1)
684 .sr(1)
685 .m(1)
686 .n(8)
687 .k(k)
688 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
689 }
690 }
691
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_div_8_strided_a)692 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_div_8_strided_a) {
693 TEST_REQUIRES_ARM_NEON_V8;
694 for (size_t k = 16; k <= 80; k += 8) {
695 GemmMicrokernelTester()
696 .mr(1)
697 .nr(8)
698 .kr(1)
699 .sr(1)
700 .m(1)
701 .n(8)
702 .k(k)
703 .a_stride(83)
704 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
705 }
706 }
707
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_div_8_subtile)708 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_div_8_subtile) {
709 TEST_REQUIRES_ARM_NEON_V8;
710 for (size_t k = 16; k <= 80; k += 8) {
711 for (uint32_t n = 1; n <= 8; n++) {
712 for (uint32_t m = 1; m <= 1; m++) {
713 GemmMicrokernelTester()
714 .mr(1)
715 .nr(8)
716 .kr(1)
717 .sr(1)
718 .m(m)
719 .n(n)
720 .k(k)
721 .iterations(1)
722 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
723 }
724 }
725 }
726 }
727
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_gt_8)728 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_gt_8) {
729 TEST_REQUIRES_ARM_NEON_V8;
730 for (uint32_t n = 9; n < 16; n++) {
731 for (size_t k = 1; k <= 40; k += 9) {
732 GemmMicrokernelTester()
733 .mr(1)
734 .nr(8)
735 .kr(1)
736 .sr(1)
737 .m(1)
738 .n(n)
739 .k(k)
740 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
741 }
742 }
743 }
744
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_gt_8_strided_cn)745 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_gt_8_strided_cn) {
746 TEST_REQUIRES_ARM_NEON_V8;
747 for (uint32_t n = 9; n < 16; n++) {
748 for (size_t k = 1; k <= 40; k += 9) {
749 GemmMicrokernelTester()
750 .mr(1)
751 .nr(8)
752 .kr(1)
753 .sr(1)
754 .m(1)
755 .n(n)
756 .k(k)
757 .cn_stride(11)
758 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
759 }
760 }
761 }
762
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_gt_8_strided_a)763 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_gt_8_strided_a) {
764 TEST_REQUIRES_ARM_NEON_V8;
765 for (uint32_t n = 9; n < 16; n++) {
766 for (size_t k = 1; k <= 40; k += 9) {
767 GemmMicrokernelTester()
768 .mr(1)
769 .nr(8)
770 .kr(1)
771 .sr(1)
772 .m(1)
773 .n(n)
774 .k(k)
775 .a_stride(43)
776 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
777 }
778 }
779 }
780
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_gt_8_subtile)781 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_gt_8_subtile) {
782 TEST_REQUIRES_ARM_NEON_V8;
783 for (uint32_t n = 9; n < 16; n++) {
784 for (size_t k = 1; k <= 40; k += 9) {
785 for (uint32_t m = 1; m <= 1; m++) {
786 GemmMicrokernelTester()
787 .mr(1)
788 .nr(8)
789 .kr(1)
790 .sr(1)
791 .m(m)
792 .n(n)
793 .k(k)
794 .iterations(1)
795 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
796 }
797 }
798 }
799 }
800
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_div_8)801 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_div_8) {
802 TEST_REQUIRES_ARM_NEON_V8;
803 for (uint32_t n = 16; n <= 24; n += 8) {
804 for (size_t k = 1; k <= 40; k += 9) {
805 GemmMicrokernelTester()
806 .mr(1)
807 .nr(8)
808 .kr(1)
809 .sr(1)
810 .m(1)
811 .n(n)
812 .k(k)
813 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
814 }
815 }
816 }
817
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_div_8_strided_cn)818 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_div_8_strided_cn) {
819 TEST_REQUIRES_ARM_NEON_V8;
820 for (uint32_t n = 16; n <= 24; n += 8) {
821 for (size_t k = 1; k <= 40; k += 9) {
822 GemmMicrokernelTester()
823 .mr(1)
824 .nr(8)
825 .kr(1)
826 .sr(1)
827 .m(1)
828 .n(n)
829 .k(k)
830 .cn_stride(11)
831 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
832 }
833 }
834 }
835
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_div_8_strided_a)836 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_div_8_strided_a) {
837 TEST_REQUIRES_ARM_NEON_V8;
838 for (uint32_t n = 16; n <= 24; n += 8) {
839 for (size_t k = 1; k <= 40; k += 9) {
840 GemmMicrokernelTester()
841 .mr(1)
842 .nr(8)
843 .kr(1)
844 .sr(1)
845 .m(1)
846 .n(n)
847 .k(k)
848 .a_stride(43)
849 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
850 }
851 }
852 }
853
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_div_8_subtile)854 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_div_8_subtile) {
855 TEST_REQUIRES_ARM_NEON_V8;
856 for (uint32_t n = 16; n <= 24; n += 8) {
857 for (size_t k = 1; k <= 40; k += 9) {
858 for (uint32_t m = 1; m <= 1; m++) {
859 GemmMicrokernelTester()
860 .mr(1)
861 .nr(8)
862 .kr(1)
863 .sr(1)
864 .m(m)
865 .n(n)
866 .k(k)
867 .iterations(1)
868 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
869 }
870 }
871 }
872 }
873
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,strided_cm_subtile)874 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, strided_cm_subtile) {
875 TEST_REQUIRES_ARM_NEON_V8;
876 for (size_t k = 1; k <= 40; k += 9) {
877 for (uint32_t n = 1; n <= 8; n++) {
878 for (uint32_t m = 1; m <= 1; m++) {
879 GemmMicrokernelTester()
880 .mr(1)
881 .nr(8)
882 .kr(1)
883 .sr(1)
884 .m(m)
885 .n(n)
886 .k(k)
887 .cm_stride(11)
888 .iterations(1)
889 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
890 }
891 }
892 }
893 }
894
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,qmin)895 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, qmin) {
896 TEST_REQUIRES_ARM_NEON_V8;
897 GemmMicrokernelTester()
898 .mr(1)
899 .nr(8)
900 .kr(1)
901 .sr(1)
902 .m(1)
903 .n(8)
904 .k(8)
905 .qmin(128)
906 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
907 }
908
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,qmax)909 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, qmax) {
910 TEST_REQUIRES_ARM_NEON_V8;
911 GemmMicrokernelTester()
912 .mr(1)
913 .nr(8)
914 .kr(1)
915 .sr(1)
916 .m(1)
917 .n(8)
918 .k(8)
919 .qmax(128)
920 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
921 }
922
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,strided_cm)923 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, strided_cm) {
924 TEST_REQUIRES_ARM_NEON_V8;
925 GemmMicrokernelTester()
926 .mr(1)
927 .nr(8)
928 .kr(1)
929 .sr(1)
930 .m(1)
931 .n(8)
932 .k(8)
933 .cm_stride(11)
934 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
935 }
936 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
937
938
939 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_eq_8)940 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_eq_8) {
941 TEST_REQUIRES_ARM_NEON;
942 GemmMicrokernelTester()
943 .mr(4)
944 .nr(8)
945 .kr(1)
946 .sr(1)
947 .m(4)
948 .n(8)
949 .k(8)
950 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
951 }
952
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,strided_cn)953 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, strided_cn) {
954 TEST_REQUIRES_ARM_NEON;
955 GemmMicrokernelTester()
956 .mr(4)
957 .nr(8)
958 .kr(1)
959 .sr(1)
960 .m(4)
961 .n(8)
962 .k(8)
963 .cn_stride(11)
964 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
965 }
966
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_eq_8_strided_a)967 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_eq_8_strided_a) {
968 TEST_REQUIRES_ARM_NEON;
969 GemmMicrokernelTester()
970 .mr(4)
971 .nr(8)
972 .kr(1)
973 .sr(1)
974 .m(4)
975 .n(8)
976 .k(8)
977 .a_stride(11)
978 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
979 }
980
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_eq_8_subtile)981 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_eq_8_subtile) {
982 TEST_REQUIRES_ARM_NEON;
983 for (uint32_t n = 1; n <= 8; n++) {
984 for (uint32_t m = 1; m <= 4; m++) {
985 GemmMicrokernelTester()
986 .mr(4)
987 .nr(8)
988 .kr(1)
989 .sr(1)
990 .m(m)
991 .n(n)
992 .k(8)
993 .iterations(1)
994 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
995 }
996 }
997 }
998
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_eq_8_subtile_m)999 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_eq_8_subtile_m) {
1000 TEST_REQUIRES_ARM_NEON;
1001 for (uint32_t m = 1; m <= 4; m++) {
1002 GemmMicrokernelTester()
1003 .mr(4)
1004 .nr(8)
1005 .kr(1)
1006 .sr(1)
1007 .m(m)
1008 .n(8)
1009 .k(8)
1010 .iterations(1)
1011 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1012 }
1013 }
1014
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_eq_8_subtile_n)1015 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_eq_8_subtile_n) {
1016 TEST_REQUIRES_ARM_NEON;
1017 for (uint32_t n = 1; n <= 8; n++) {
1018 GemmMicrokernelTester()
1019 .mr(4)
1020 .nr(8)
1021 .kr(1)
1022 .sr(1)
1023 .m(4)
1024 .n(n)
1025 .k(8)
1026 .iterations(1)
1027 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1028 }
1029 }
1030
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_lt_8)1031 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_lt_8) {
1032 TEST_REQUIRES_ARM_NEON;
1033 for (size_t k = 1; k < 8; k++) {
1034 GemmMicrokernelTester()
1035 .mr(4)
1036 .nr(8)
1037 .kr(1)
1038 .sr(1)
1039 .m(4)
1040 .n(8)
1041 .k(k)
1042 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1043 }
1044 }
1045
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_lt_8_strided_a)1046 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_lt_8_strided_a) {
1047 TEST_REQUIRES_ARM_NEON;
1048 for (size_t k = 1; k < 8; k++) {
1049 GemmMicrokernelTester()
1050 .mr(4)
1051 .nr(8)
1052 .kr(1)
1053 .sr(1)
1054 .m(4)
1055 .n(8)
1056 .k(k)
1057 .a_stride(11)
1058 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1059 }
1060 }
1061
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_lt_8_subtile)1062 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_lt_8_subtile) {
1063 TEST_REQUIRES_ARM_NEON;
1064 for (size_t k = 1; k < 8; k++) {
1065 for (uint32_t n = 1; n <= 8; n++) {
1066 for (uint32_t m = 1; m <= 4; m++) {
1067 GemmMicrokernelTester()
1068 .mr(4)
1069 .nr(8)
1070 .kr(1)
1071 .sr(1)
1072 .m(m)
1073 .n(n)
1074 .k(k)
1075 .iterations(1)
1076 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1077 }
1078 }
1079 }
1080 }
1081
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_gt_8)1082 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_gt_8) {
1083 TEST_REQUIRES_ARM_NEON;
1084 for (size_t k = 9; k < 16; k++) {
1085 GemmMicrokernelTester()
1086 .mr(4)
1087 .nr(8)
1088 .kr(1)
1089 .sr(1)
1090 .m(4)
1091 .n(8)
1092 .k(k)
1093 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1094 }
1095 }
1096
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_gt_8_strided_a)1097 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_gt_8_strided_a) {
1098 TEST_REQUIRES_ARM_NEON;
1099 for (size_t k = 9; k < 16; k++) {
1100 GemmMicrokernelTester()
1101 .mr(4)
1102 .nr(8)
1103 .kr(1)
1104 .sr(1)
1105 .m(4)
1106 .n(8)
1107 .k(k)
1108 .a_stride(19)
1109 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1110 }
1111 }
1112
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_gt_8_subtile)1113 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_gt_8_subtile) {
1114 TEST_REQUIRES_ARM_NEON;
1115 for (size_t k = 9; k < 16; k++) {
1116 for (uint32_t n = 1; n <= 8; n++) {
1117 for (uint32_t m = 1; m <= 4; m++) {
1118 GemmMicrokernelTester()
1119 .mr(4)
1120 .nr(8)
1121 .kr(1)
1122 .sr(1)
1123 .m(m)
1124 .n(n)
1125 .k(k)
1126 .iterations(1)
1127 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1128 }
1129 }
1130 }
1131 }
1132
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_div_8)1133 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_div_8) {
1134 TEST_REQUIRES_ARM_NEON;
1135 for (size_t k = 16; k <= 80; k += 8) {
1136 GemmMicrokernelTester()
1137 .mr(4)
1138 .nr(8)
1139 .kr(1)
1140 .sr(1)
1141 .m(4)
1142 .n(8)
1143 .k(k)
1144 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1145 }
1146 }
1147
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_div_8_strided_a)1148 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_div_8_strided_a) {
1149 TEST_REQUIRES_ARM_NEON;
1150 for (size_t k = 16; k <= 80; k += 8) {
1151 GemmMicrokernelTester()
1152 .mr(4)
1153 .nr(8)
1154 .kr(1)
1155 .sr(1)
1156 .m(4)
1157 .n(8)
1158 .k(k)
1159 .a_stride(83)
1160 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1161 }
1162 }
1163
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_div_8_subtile)1164 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_div_8_subtile) {
1165 TEST_REQUIRES_ARM_NEON;
1166 for (size_t k = 16; k <= 80; k += 8) {
1167 for (uint32_t n = 1; n <= 8; n++) {
1168 for (uint32_t m = 1; m <= 4; m++) {
1169 GemmMicrokernelTester()
1170 .mr(4)
1171 .nr(8)
1172 .kr(1)
1173 .sr(1)
1174 .m(m)
1175 .n(n)
1176 .k(k)
1177 .iterations(1)
1178 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1179 }
1180 }
1181 }
1182 }
1183
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_gt_8)1184 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_gt_8) {
1185 TEST_REQUIRES_ARM_NEON;
1186 for (uint32_t n = 9; n < 16; n++) {
1187 for (size_t k = 1; k <= 40; k += 9) {
1188 GemmMicrokernelTester()
1189 .mr(4)
1190 .nr(8)
1191 .kr(1)
1192 .sr(1)
1193 .m(4)
1194 .n(n)
1195 .k(k)
1196 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1197 }
1198 }
1199 }
1200
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_gt_8_strided_cn)1201 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_gt_8_strided_cn) {
1202 TEST_REQUIRES_ARM_NEON;
1203 for (uint32_t n = 9; n < 16; n++) {
1204 for (size_t k = 1; k <= 40; k += 9) {
1205 GemmMicrokernelTester()
1206 .mr(4)
1207 .nr(8)
1208 .kr(1)
1209 .sr(1)
1210 .m(4)
1211 .n(n)
1212 .k(k)
1213 .cn_stride(11)
1214 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1215 }
1216 }
1217 }
1218
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_gt_8_strided_a)1219 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_gt_8_strided_a) {
1220 TEST_REQUIRES_ARM_NEON;
1221 for (uint32_t n = 9; n < 16; n++) {
1222 for (size_t k = 1; k <= 40; k += 9) {
1223 GemmMicrokernelTester()
1224 .mr(4)
1225 .nr(8)
1226 .kr(1)
1227 .sr(1)
1228 .m(4)
1229 .n(n)
1230 .k(k)
1231 .a_stride(43)
1232 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1233 }
1234 }
1235 }
1236
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_gt_8_subtile)1237 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_gt_8_subtile) {
1238 TEST_REQUIRES_ARM_NEON;
1239 for (uint32_t n = 9; n < 16; n++) {
1240 for (size_t k = 1; k <= 40; k += 9) {
1241 for (uint32_t m = 1; m <= 4; m++) {
1242 GemmMicrokernelTester()
1243 .mr(4)
1244 .nr(8)
1245 .kr(1)
1246 .sr(1)
1247 .m(m)
1248 .n(n)
1249 .k(k)
1250 .iterations(1)
1251 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1252 }
1253 }
1254 }
1255 }
1256
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_div_8)1257 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_div_8) {
1258 TEST_REQUIRES_ARM_NEON;
1259 for (uint32_t n = 16; n <= 24; n += 8) {
1260 for (size_t k = 1; k <= 40; k += 9) {
1261 GemmMicrokernelTester()
1262 .mr(4)
1263 .nr(8)
1264 .kr(1)
1265 .sr(1)
1266 .m(4)
1267 .n(n)
1268 .k(k)
1269 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1270 }
1271 }
1272 }
1273
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_div_8_strided_cn)1274 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_div_8_strided_cn) {
1275 TEST_REQUIRES_ARM_NEON;
1276 for (uint32_t n = 16; n <= 24; n += 8) {
1277 for (size_t k = 1; k <= 40; k += 9) {
1278 GemmMicrokernelTester()
1279 .mr(4)
1280 .nr(8)
1281 .kr(1)
1282 .sr(1)
1283 .m(4)
1284 .n(n)
1285 .k(k)
1286 .cn_stride(11)
1287 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1288 }
1289 }
1290 }
1291
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_div_8_strided_a)1292 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_div_8_strided_a) {
1293 TEST_REQUIRES_ARM_NEON;
1294 for (uint32_t n = 16; n <= 24; n += 8) {
1295 for (size_t k = 1; k <= 40; k += 9) {
1296 GemmMicrokernelTester()
1297 .mr(4)
1298 .nr(8)
1299 .kr(1)
1300 .sr(1)
1301 .m(4)
1302 .n(n)
1303 .k(k)
1304 .a_stride(43)
1305 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1306 }
1307 }
1308 }
1309
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_div_8_subtile)1310 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_div_8_subtile) {
1311 TEST_REQUIRES_ARM_NEON;
1312 for (uint32_t n = 16; n <= 24; n += 8) {
1313 for (size_t k = 1; k <= 40; k += 9) {
1314 for (uint32_t m = 1; m <= 4; m++) {
1315 GemmMicrokernelTester()
1316 .mr(4)
1317 .nr(8)
1318 .kr(1)
1319 .sr(1)
1320 .m(m)
1321 .n(n)
1322 .k(k)
1323 .iterations(1)
1324 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1325 }
1326 }
1327 }
1328 }
1329
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,strided_cm_subtile)1330 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, strided_cm_subtile) {
1331 TEST_REQUIRES_ARM_NEON;
1332 for (size_t k = 1; k <= 40; k += 9) {
1333 for (uint32_t n = 1; n <= 8; n++) {
1334 for (uint32_t m = 1; m <= 4; m++) {
1335 GemmMicrokernelTester()
1336 .mr(4)
1337 .nr(8)
1338 .kr(1)
1339 .sr(1)
1340 .m(m)
1341 .n(n)
1342 .k(k)
1343 .cm_stride(11)
1344 .iterations(1)
1345 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1346 }
1347 }
1348 }
1349 }
1350
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,qmin)1351 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, qmin) {
1352 TEST_REQUIRES_ARM_NEON;
1353 GemmMicrokernelTester()
1354 .mr(4)
1355 .nr(8)
1356 .kr(1)
1357 .sr(1)
1358 .m(4)
1359 .n(8)
1360 .k(8)
1361 .qmin(128)
1362 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1363 }
1364
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,qmax)1365 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, qmax) {
1366 TEST_REQUIRES_ARM_NEON;
1367 GemmMicrokernelTester()
1368 .mr(4)
1369 .nr(8)
1370 .kr(1)
1371 .sr(1)
1372 .m(4)
1373 .n(8)
1374 .k(8)
1375 .qmax(128)
1376 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1377 }
1378
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,strided_cm)1379 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, strided_cm) {
1380 TEST_REQUIRES_ARM_NEON;
1381 GemmMicrokernelTester()
1382 .mr(4)
1383 .nr(8)
1384 .kr(1)
1385 .sr(1)
1386 .m(4)
1387 .n(8)
1388 .k(8)
1389 .cm_stride(11)
1390 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1391 }
1392 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
1393
1394
1395 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_eq_8)1396 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_eq_8) {
1397 TEST_REQUIRES_ARM_NEON_V8;
1398 GemmMicrokernelTester()
1399 .mr(4)
1400 .nr(8)
1401 .kr(1)
1402 .sr(1)
1403 .m(4)
1404 .n(8)
1405 .k(8)
1406 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1407 }
1408
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,strided_cn)1409 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, strided_cn) {
1410 TEST_REQUIRES_ARM_NEON_V8;
1411 GemmMicrokernelTester()
1412 .mr(4)
1413 .nr(8)
1414 .kr(1)
1415 .sr(1)
1416 .m(4)
1417 .n(8)
1418 .k(8)
1419 .cn_stride(11)
1420 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1421 }
1422
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_eq_8_strided_a)1423 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_eq_8_strided_a) {
1424 TEST_REQUIRES_ARM_NEON_V8;
1425 GemmMicrokernelTester()
1426 .mr(4)
1427 .nr(8)
1428 .kr(1)
1429 .sr(1)
1430 .m(4)
1431 .n(8)
1432 .k(8)
1433 .a_stride(11)
1434 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1435 }
1436
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_eq_8_subtile)1437 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_eq_8_subtile) {
1438 TEST_REQUIRES_ARM_NEON_V8;
1439 for (uint32_t n = 1; n <= 8; n++) {
1440 for (uint32_t m = 1; m <= 4; m++) {
1441 GemmMicrokernelTester()
1442 .mr(4)
1443 .nr(8)
1444 .kr(1)
1445 .sr(1)
1446 .m(m)
1447 .n(n)
1448 .k(8)
1449 .iterations(1)
1450 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1451 }
1452 }
1453 }
1454
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_m)1455 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_m) {
1456 TEST_REQUIRES_ARM_NEON_V8;
1457 for (uint32_t m = 1; m <= 4; m++) {
1458 GemmMicrokernelTester()
1459 .mr(4)
1460 .nr(8)
1461 .kr(1)
1462 .sr(1)
1463 .m(m)
1464 .n(8)
1465 .k(8)
1466 .iterations(1)
1467 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1468 }
1469 }
1470
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_n)1471 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_n) {
1472 TEST_REQUIRES_ARM_NEON_V8;
1473 for (uint32_t n = 1; n <= 8; n++) {
1474 GemmMicrokernelTester()
1475 .mr(4)
1476 .nr(8)
1477 .kr(1)
1478 .sr(1)
1479 .m(4)
1480 .n(n)
1481 .k(8)
1482 .iterations(1)
1483 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1484 }
1485 }
1486
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_lt_8)1487 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_lt_8) {
1488 TEST_REQUIRES_ARM_NEON_V8;
1489 for (size_t k = 1; k < 8; k++) {
1490 GemmMicrokernelTester()
1491 .mr(4)
1492 .nr(8)
1493 .kr(1)
1494 .sr(1)
1495 .m(4)
1496 .n(8)
1497 .k(k)
1498 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1499 }
1500 }
1501
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_lt_8_strided_a)1502 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_lt_8_strided_a) {
1503 TEST_REQUIRES_ARM_NEON_V8;
1504 for (size_t k = 1; k < 8; k++) {
1505 GemmMicrokernelTester()
1506 .mr(4)
1507 .nr(8)
1508 .kr(1)
1509 .sr(1)
1510 .m(4)
1511 .n(8)
1512 .k(k)
1513 .a_stride(11)
1514 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1515 }
1516 }
1517
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_lt_8_subtile)1518 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_lt_8_subtile) {
1519 TEST_REQUIRES_ARM_NEON_V8;
1520 for (size_t k = 1; k < 8; k++) {
1521 for (uint32_t n = 1; n <= 8; n++) {
1522 for (uint32_t m = 1; m <= 4; m++) {
1523 GemmMicrokernelTester()
1524 .mr(4)
1525 .nr(8)
1526 .kr(1)
1527 .sr(1)
1528 .m(m)
1529 .n(n)
1530 .k(k)
1531 .iterations(1)
1532 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1533 }
1534 }
1535 }
1536 }
1537
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_gt_8)1538 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_gt_8) {
1539 TEST_REQUIRES_ARM_NEON_V8;
1540 for (size_t k = 9; k < 16; k++) {
1541 GemmMicrokernelTester()
1542 .mr(4)
1543 .nr(8)
1544 .kr(1)
1545 .sr(1)
1546 .m(4)
1547 .n(8)
1548 .k(k)
1549 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1550 }
1551 }
1552
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_gt_8_strided_a)1553 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_gt_8_strided_a) {
1554 TEST_REQUIRES_ARM_NEON_V8;
1555 for (size_t k = 9; k < 16; k++) {
1556 GemmMicrokernelTester()
1557 .mr(4)
1558 .nr(8)
1559 .kr(1)
1560 .sr(1)
1561 .m(4)
1562 .n(8)
1563 .k(k)
1564 .a_stride(19)
1565 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1566 }
1567 }
1568
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_gt_8_subtile)1569 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_gt_8_subtile) {
1570 TEST_REQUIRES_ARM_NEON_V8;
1571 for (size_t k = 9; k < 16; k++) {
1572 for (uint32_t n = 1; n <= 8; n++) {
1573 for (uint32_t m = 1; m <= 4; m++) {
1574 GemmMicrokernelTester()
1575 .mr(4)
1576 .nr(8)
1577 .kr(1)
1578 .sr(1)
1579 .m(m)
1580 .n(n)
1581 .k(k)
1582 .iterations(1)
1583 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1584 }
1585 }
1586 }
1587 }
1588
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_div_8)1589 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_div_8) {
1590 TEST_REQUIRES_ARM_NEON_V8;
1591 for (size_t k = 16; k <= 80; k += 8) {
1592 GemmMicrokernelTester()
1593 .mr(4)
1594 .nr(8)
1595 .kr(1)
1596 .sr(1)
1597 .m(4)
1598 .n(8)
1599 .k(k)
1600 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1601 }
1602 }
1603
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_div_8_strided_a)1604 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_div_8_strided_a) {
1605 TEST_REQUIRES_ARM_NEON_V8;
1606 for (size_t k = 16; k <= 80; k += 8) {
1607 GemmMicrokernelTester()
1608 .mr(4)
1609 .nr(8)
1610 .kr(1)
1611 .sr(1)
1612 .m(4)
1613 .n(8)
1614 .k(k)
1615 .a_stride(83)
1616 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1617 }
1618 }
1619
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_div_8_subtile)1620 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_div_8_subtile) {
1621 TEST_REQUIRES_ARM_NEON_V8;
1622 for (size_t k = 16; k <= 80; k += 8) {
1623 for (uint32_t n = 1; n <= 8; n++) {
1624 for (uint32_t m = 1; m <= 4; m++) {
1625 GemmMicrokernelTester()
1626 .mr(4)
1627 .nr(8)
1628 .kr(1)
1629 .sr(1)
1630 .m(m)
1631 .n(n)
1632 .k(k)
1633 .iterations(1)
1634 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1635 }
1636 }
1637 }
1638 }
1639
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,n_gt_8)1640 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, n_gt_8) {
1641 TEST_REQUIRES_ARM_NEON_V8;
1642 for (uint32_t n = 9; n < 16; n++) {
1643 for (size_t k = 1; k <= 40; k += 9) {
1644 GemmMicrokernelTester()
1645 .mr(4)
1646 .nr(8)
1647 .kr(1)
1648 .sr(1)
1649 .m(4)
1650 .n(n)
1651 .k(k)
1652 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1653 }
1654 }
1655 }
1656
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,n_gt_8_strided_cn)1657 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, n_gt_8_strided_cn) {
1658 TEST_REQUIRES_ARM_NEON_V8;
1659 for (uint32_t n = 9; n < 16; n++) {
1660 for (size_t k = 1; k <= 40; k += 9) {
1661 GemmMicrokernelTester()
1662 .mr(4)
1663 .nr(8)
1664 .kr(1)
1665 .sr(1)
1666 .m(4)
1667 .n(n)
1668 .k(k)
1669 .cn_stride(11)
1670 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1671 }
1672 }
1673 }
1674
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,n_gt_8_strided_a)1675 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, n_gt_8_strided_a) {
1676 TEST_REQUIRES_ARM_NEON_V8;
1677 for (uint32_t n = 9; n < 16; n++) {
1678 for (size_t k = 1; k <= 40; k += 9) {
1679 GemmMicrokernelTester()
1680 .mr(4)
1681 .nr(8)
1682 .kr(1)
1683 .sr(1)
1684 .m(4)
1685 .n(n)
1686 .k(k)
1687 .a_stride(43)
1688 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1689 }
1690 }
1691 }
1692
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,n_gt_8_subtile)1693 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, n_gt_8_subtile) {
1694 TEST_REQUIRES_ARM_NEON_V8;
1695 for (uint32_t n = 9; n < 16; n++) {
1696 for (size_t k = 1; k <= 40; k += 9) {
1697 for (uint32_t m = 1; m <= 4; m++) {
1698 GemmMicrokernelTester()
1699 .mr(4)
1700 .nr(8)
1701 .kr(1)
1702 .sr(1)
1703 .m(m)
1704 .n(n)
1705 .k(k)
1706 .iterations(1)
1707 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1708 }
1709 }
1710 }
1711 }
1712
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,n_div_8)1713 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, n_div_8) {
1714 TEST_REQUIRES_ARM_NEON_V8;
1715 for (uint32_t n = 16; n <= 24; n += 8) {
1716 for (size_t k = 1; k <= 40; k += 9) {
1717 GemmMicrokernelTester()
1718 .mr(4)
1719 .nr(8)
1720 .kr(1)
1721 .sr(1)
1722 .m(4)
1723 .n(n)
1724 .k(k)
1725 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1726 }
1727 }
1728 }
1729
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,n_div_8_strided_cn)1730 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, n_div_8_strided_cn) {
1731 TEST_REQUIRES_ARM_NEON_V8;
1732 for (uint32_t n = 16; n <= 24; n += 8) {
1733 for (size_t k = 1; k <= 40; k += 9) {
1734 GemmMicrokernelTester()
1735 .mr(4)
1736 .nr(8)
1737 .kr(1)
1738 .sr(1)
1739 .m(4)
1740 .n(n)
1741 .k(k)
1742 .cn_stride(11)
1743 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1744 }
1745 }
1746 }
1747
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,n_div_8_strided_a)1748 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, n_div_8_strided_a) {
1749 TEST_REQUIRES_ARM_NEON_V8;
1750 for (uint32_t n = 16; n <= 24; n += 8) {
1751 for (size_t k = 1; k <= 40; k += 9) {
1752 GemmMicrokernelTester()
1753 .mr(4)
1754 .nr(8)
1755 .kr(1)
1756 .sr(1)
1757 .m(4)
1758 .n(n)
1759 .k(k)
1760 .a_stride(43)
1761 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1762 }
1763 }
1764 }
1765
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,n_div_8_subtile)1766 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, n_div_8_subtile) {
1767 TEST_REQUIRES_ARM_NEON_V8;
1768 for (uint32_t n = 16; n <= 24; n += 8) {
1769 for (size_t k = 1; k <= 40; k += 9) {
1770 for (uint32_t m = 1; m <= 4; m++) {
1771 GemmMicrokernelTester()
1772 .mr(4)
1773 .nr(8)
1774 .kr(1)
1775 .sr(1)
1776 .m(m)
1777 .n(n)
1778 .k(k)
1779 .iterations(1)
1780 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1781 }
1782 }
1783 }
1784 }
1785
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,strided_cm_subtile)1786 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, strided_cm_subtile) {
1787 TEST_REQUIRES_ARM_NEON_V8;
1788 for (size_t k = 1; k <= 40; k += 9) {
1789 for (uint32_t n = 1; n <= 8; n++) {
1790 for (uint32_t m = 1; m <= 4; m++) {
1791 GemmMicrokernelTester()
1792 .mr(4)
1793 .nr(8)
1794 .kr(1)
1795 .sr(1)
1796 .m(m)
1797 .n(n)
1798 .k(k)
1799 .cm_stride(11)
1800 .iterations(1)
1801 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1802 }
1803 }
1804 }
1805 }
1806
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,qmin)1807 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, qmin) {
1808 TEST_REQUIRES_ARM_NEON_V8;
1809 GemmMicrokernelTester()
1810 .mr(4)
1811 .nr(8)
1812 .kr(1)
1813 .sr(1)
1814 .m(4)
1815 .n(8)
1816 .k(8)
1817 .qmin(128)
1818 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1819 }
1820
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,qmax)1821 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, qmax) {
1822 TEST_REQUIRES_ARM_NEON_V8;
1823 GemmMicrokernelTester()
1824 .mr(4)
1825 .nr(8)
1826 .kr(1)
1827 .sr(1)
1828 .m(4)
1829 .n(8)
1830 .k(8)
1831 .qmax(128)
1832 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1833 }
1834
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,strided_cm)1835 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, strided_cm) {
1836 TEST_REQUIRES_ARM_NEON_V8;
1837 GemmMicrokernelTester()
1838 .mr(4)
1839 .nr(8)
1840 .kr(1)
1841 .sr(1)
1842 .m(4)
1843 .n(8)
1844 .k(8)
1845 .cm_stride(11)
1846 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1847 }
1848 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
1849
1850
1851 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8)1852 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8) {
1853 TEST_REQUIRES_ARM_NEON_V8;
1854 GemmMicrokernelTester()
1855 .mr(4)
1856 .nr(8)
1857 .kr(1)
1858 .sr(1)
1859 .m(4)
1860 .n(8)
1861 .k(8)
1862 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1863 }
1864
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,strided_cn)1865 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, strided_cn) {
1866 TEST_REQUIRES_ARM_NEON_V8;
1867 GemmMicrokernelTester()
1868 .mr(4)
1869 .nr(8)
1870 .kr(1)
1871 .sr(1)
1872 .m(4)
1873 .n(8)
1874 .k(8)
1875 .cn_stride(11)
1876 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1877 }
1878
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8_strided_a)1879 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8_strided_a) {
1880 TEST_REQUIRES_ARM_NEON_V8;
1881 GemmMicrokernelTester()
1882 .mr(4)
1883 .nr(8)
1884 .kr(1)
1885 .sr(1)
1886 .m(4)
1887 .n(8)
1888 .k(8)
1889 .a_stride(11)
1890 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1891 }
1892
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8_subtile)1893 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8_subtile) {
1894 TEST_REQUIRES_ARM_NEON_V8;
1895 for (uint32_t n = 1; n <= 8; n++) {
1896 for (uint32_t m = 1; m <= 4; m++) {
1897 GemmMicrokernelTester()
1898 .mr(4)
1899 .nr(8)
1900 .kr(1)
1901 .sr(1)
1902 .m(m)
1903 .n(n)
1904 .k(8)
1905 .iterations(1)
1906 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1907 }
1908 }
1909 }
1910
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8_subtile_m)1911 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8_subtile_m) {
1912 TEST_REQUIRES_ARM_NEON_V8;
1913 for (uint32_t m = 1; m <= 4; m++) {
1914 GemmMicrokernelTester()
1915 .mr(4)
1916 .nr(8)
1917 .kr(1)
1918 .sr(1)
1919 .m(m)
1920 .n(8)
1921 .k(8)
1922 .iterations(1)
1923 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1924 }
1925 }
1926
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8_subtile_n)1927 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8_subtile_n) {
1928 TEST_REQUIRES_ARM_NEON_V8;
1929 for (uint32_t n = 1; n <= 8; n++) {
1930 GemmMicrokernelTester()
1931 .mr(4)
1932 .nr(8)
1933 .kr(1)
1934 .sr(1)
1935 .m(4)
1936 .n(n)
1937 .k(8)
1938 .iterations(1)
1939 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1940 }
1941 }
1942
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_lt_8)1943 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_lt_8) {
1944 TEST_REQUIRES_ARM_NEON_V8;
1945 for (size_t k = 1; k < 8; k++) {
1946 GemmMicrokernelTester()
1947 .mr(4)
1948 .nr(8)
1949 .kr(1)
1950 .sr(1)
1951 .m(4)
1952 .n(8)
1953 .k(k)
1954 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1955 }
1956 }
1957
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_lt_8_strided_a)1958 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_lt_8_strided_a) {
1959 TEST_REQUIRES_ARM_NEON_V8;
1960 for (size_t k = 1; k < 8; k++) {
1961 GemmMicrokernelTester()
1962 .mr(4)
1963 .nr(8)
1964 .kr(1)
1965 .sr(1)
1966 .m(4)
1967 .n(8)
1968 .k(k)
1969 .a_stride(11)
1970 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1971 }
1972 }
1973
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_lt_8_subtile)1974 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_lt_8_subtile) {
1975 TEST_REQUIRES_ARM_NEON_V8;
1976 for (size_t k = 1; k < 8; k++) {
1977 for (uint32_t n = 1; n <= 8; n++) {
1978 for (uint32_t m = 1; m <= 4; m++) {
1979 GemmMicrokernelTester()
1980 .mr(4)
1981 .nr(8)
1982 .kr(1)
1983 .sr(1)
1984 .m(m)
1985 .n(n)
1986 .k(k)
1987 .iterations(1)
1988 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1989 }
1990 }
1991 }
1992 }
1993
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_gt_8)1994 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_gt_8) {
1995 TEST_REQUIRES_ARM_NEON_V8;
1996 for (size_t k = 9; k < 16; k++) {
1997 GemmMicrokernelTester()
1998 .mr(4)
1999 .nr(8)
2000 .kr(1)
2001 .sr(1)
2002 .m(4)
2003 .n(8)
2004 .k(k)
2005 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2006 }
2007 }
2008
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_gt_8_strided_a)2009 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_gt_8_strided_a) {
2010 TEST_REQUIRES_ARM_NEON_V8;
2011 for (size_t k = 9; k < 16; k++) {
2012 GemmMicrokernelTester()
2013 .mr(4)
2014 .nr(8)
2015 .kr(1)
2016 .sr(1)
2017 .m(4)
2018 .n(8)
2019 .k(k)
2020 .a_stride(19)
2021 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2022 }
2023 }
2024
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_gt_8_subtile)2025 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_gt_8_subtile) {
2026 TEST_REQUIRES_ARM_NEON_V8;
2027 for (size_t k = 9; k < 16; k++) {
2028 for (uint32_t n = 1; n <= 8; n++) {
2029 for (uint32_t m = 1; m <= 4; m++) {
2030 GemmMicrokernelTester()
2031 .mr(4)
2032 .nr(8)
2033 .kr(1)
2034 .sr(1)
2035 .m(m)
2036 .n(n)
2037 .k(k)
2038 .iterations(1)
2039 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2040 }
2041 }
2042 }
2043 }
2044
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_div_8)2045 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_div_8) {
2046 TEST_REQUIRES_ARM_NEON_V8;
2047 for (size_t k = 16; k <= 80; k += 8) {
2048 GemmMicrokernelTester()
2049 .mr(4)
2050 .nr(8)
2051 .kr(1)
2052 .sr(1)
2053 .m(4)
2054 .n(8)
2055 .k(k)
2056 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2057 }
2058 }
2059
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_div_8_strided_a)2060 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_div_8_strided_a) {
2061 TEST_REQUIRES_ARM_NEON_V8;
2062 for (size_t k = 16; k <= 80; k += 8) {
2063 GemmMicrokernelTester()
2064 .mr(4)
2065 .nr(8)
2066 .kr(1)
2067 .sr(1)
2068 .m(4)
2069 .n(8)
2070 .k(k)
2071 .a_stride(83)
2072 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2073 }
2074 }
2075
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_div_8_subtile)2076 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_div_8_subtile) {
2077 TEST_REQUIRES_ARM_NEON_V8;
2078 for (size_t k = 16; k <= 80; k += 8) {
2079 for (uint32_t n = 1; n <= 8; n++) {
2080 for (uint32_t m = 1; m <= 4; m++) {
2081 GemmMicrokernelTester()
2082 .mr(4)
2083 .nr(8)
2084 .kr(1)
2085 .sr(1)
2086 .m(m)
2087 .n(n)
2088 .k(k)
2089 .iterations(1)
2090 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2091 }
2092 }
2093 }
2094 }
2095
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_gt_8)2096 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_gt_8) {
2097 TEST_REQUIRES_ARM_NEON_V8;
2098 for (uint32_t n = 9; n < 16; n++) {
2099 for (size_t k = 1; k <= 40; k += 9) {
2100 GemmMicrokernelTester()
2101 .mr(4)
2102 .nr(8)
2103 .kr(1)
2104 .sr(1)
2105 .m(4)
2106 .n(n)
2107 .k(k)
2108 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2109 }
2110 }
2111 }
2112
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_gt_8_strided_cn)2113 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_gt_8_strided_cn) {
2114 TEST_REQUIRES_ARM_NEON_V8;
2115 for (uint32_t n = 9; n < 16; n++) {
2116 for (size_t k = 1; k <= 40; k += 9) {
2117 GemmMicrokernelTester()
2118 .mr(4)
2119 .nr(8)
2120 .kr(1)
2121 .sr(1)
2122 .m(4)
2123 .n(n)
2124 .k(k)
2125 .cn_stride(11)
2126 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2127 }
2128 }
2129 }
2130
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_gt_8_strided_a)2131 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_gt_8_strided_a) {
2132 TEST_REQUIRES_ARM_NEON_V8;
2133 for (uint32_t n = 9; n < 16; n++) {
2134 for (size_t k = 1; k <= 40; k += 9) {
2135 GemmMicrokernelTester()
2136 .mr(4)
2137 .nr(8)
2138 .kr(1)
2139 .sr(1)
2140 .m(4)
2141 .n(n)
2142 .k(k)
2143 .a_stride(43)
2144 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2145 }
2146 }
2147 }
2148
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_gt_8_subtile)2149 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_gt_8_subtile) {
2150 TEST_REQUIRES_ARM_NEON_V8;
2151 for (uint32_t n = 9; n < 16; n++) {
2152 for (size_t k = 1; k <= 40; k += 9) {
2153 for (uint32_t m = 1; m <= 4; m++) {
2154 GemmMicrokernelTester()
2155 .mr(4)
2156 .nr(8)
2157 .kr(1)
2158 .sr(1)
2159 .m(m)
2160 .n(n)
2161 .k(k)
2162 .iterations(1)
2163 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2164 }
2165 }
2166 }
2167 }
2168
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_div_8)2169 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_div_8) {
2170 TEST_REQUIRES_ARM_NEON_V8;
2171 for (uint32_t n = 16; n <= 24; n += 8) {
2172 for (size_t k = 1; k <= 40; k += 9) {
2173 GemmMicrokernelTester()
2174 .mr(4)
2175 .nr(8)
2176 .kr(1)
2177 .sr(1)
2178 .m(4)
2179 .n(n)
2180 .k(k)
2181 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2182 }
2183 }
2184 }
2185
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_div_8_strided_cn)2186 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_div_8_strided_cn) {
2187 TEST_REQUIRES_ARM_NEON_V8;
2188 for (uint32_t n = 16; n <= 24; n += 8) {
2189 for (size_t k = 1; k <= 40; k += 9) {
2190 GemmMicrokernelTester()
2191 .mr(4)
2192 .nr(8)
2193 .kr(1)
2194 .sr(1)
2195 .m(4)
2196 .n(n)
2197 .k(k)
2198 .cn_stride(11)
2199 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2200 }
2201 }
2202 }
2203
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_div_8_strided_a)2204 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_div_8_strided_a) {
2205 TEST_REQUIRES_ARM_NEON_V8;
2206 for (uint32_t n = 16; n <= 24; n += 8) {
2207 for (size_t k = 1; k <= 40; k += 9) {
2208 GemmMicrokernelTester()
2209 .mr(4)
2210 .nr(8)
2211 .kr(1)
2212 .sr(1)
2213 .m(4)
2214 .n(n)
2215 .k(k)
2216 .a_stride(43)
2217 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2218 }
2219 }
2220 }
2221
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_div_8_subtile)2222 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_div_8_subtile) {
2223 TEST_REQUIRES_ARM_NEON_V8;
2224 for (uint32_t n = 16; n <= 24; n += 8) {
2225 for (size_t k = 1; k <= 40; k += 9) {
2226 for (uint32_t m = 1; m <= 4; m++) {
2227 GemmMicrokernelTester()
2228 .mr(4)
2229 .nr(8)
2230 .kr(1)
2231 .sr(1)
2232 .m(m)
2233 .n(n)
2234 .k(k)
2235 .iterations(1)
2236 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2237 }
2238 }
2239 }
2240 }
2241
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,strided_cm_subtile)2242 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, strided_cm_subtile) {
2243 TEST_REQUIRES_ARM_NEON_V8;
2244 for (size_t k = 1; k <= 40; k += 9) {
2245 for (uint32_t n = 1; n <= 8; n++) {
2246 for (uint32_t m = 1; m <= 4; m++) {
2247 GemmMicrokernelTester()
2248 .mr(4)
2249 .nr(8)
2250 .kr(1)
2251 .sr(1)
2252 .m(m)
2253 .n(n)
2254 .k(k)
2255 .cm_stride(11)
2256 .iterations(1)
2257 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2258 }
2259 }
2260 }
2261 }
2262
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,qmin)2263 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, qmin) {
2264 TEST_REQUIRES_ARM_NEON_V8;
2265 GemmMicrokernelTester()
2266 .mr(4)
2267 .nr(8)
2268 .kr(1)
2269 .sr(1)
2270 .m(4)
2271 .n(8)
2272 .k(8)
2273 .qmin(128)
2274 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2275 }
2276
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,qmax)2277 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, qmax) {
2278 TEST_REQUIRES_ARM_NEON_V8;
2279 GemmMicrokernelTester()
2280 .mr(4)
2281 .nr(8)
2282 .kr(1)
2283 .sr(1)
2284 .m(4)
2285 .n(8)
2286 .k(8)
2287 .qmax(128)
2288 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2289 }
2290
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,strided_cm)2291 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, strided_cm) {
2292 TEST_REQUIRES_ARM_NEON_V8;
2293 GemmMicrokernelTester()
2294 .mr(4)
2295 .nr(8)
2296 .kr(1)
2297 .sr(1)
2298 .m(4)
2299 .n(8)
2300 .k(8)
2301 .cm_stride(11)
2302 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2303 }
2304 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
2305
2306
2307 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_eq_8)2308 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_eq_8) {
2309 TEST_REQUIRES_ARM_NEON_V8;
2310 GemmMicrokernelTester()
2311 .mr(4)
2312 .nr(8)
2313 .kr(1)
2314 .sr(1)
2315 .m(4)
2316 .n(8)
2317 .k(8)
2318 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2319 }
2320
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,strided_cn)2321 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, strided_cn) {
2322 TEST_REQUIRES_ARM_NEON_V8;
2323 GemmMicrokernelTester()
2324 .mr(4)
2325 .nr(8)
2326 .kr(1)
2327 .sr(1)
2328 .m(4)
2329 .n(8)
2330 .k(8)
2331 .cn_stride(11)
2332 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2333 }
2334
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_eq_8_strided_a)2335 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_eq_8_strided_a) {
2336 TEST_REQUIRES_ARM_NEON_V8;
2337 GemmMicrokernelTester()
2338 .mr(4)
2339 .nr(8)
2340 .kr(1)
2341 .sr(1)
2342 .m(4)
2343 .n(8)
2344 .k(8)
2345 .a_stride(11)
2346 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2347 }
2348
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_eq_8_subtile)2349 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
2350 TEST_REQUIRES_ARM_NEON_V8;
2351 for (uint32_t n = 1; n <= 8; n++) {
2352 for (uint32_t m = 1; m <= 4; m++) {
2353 GemmMicrokernelTester()
2354 .mr(4)
2355 .nr(8)
2356 .kr(1)
2357 .sr(1)
2358 .m(m)
2359 .n(n)
2360 .k(8)
2361 .iterations(1)
2362 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2363 }
2364 }
2365 }
2366
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_m)2367 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
2368 TEST_REQUIRES_ARM_NEON_V8;
2369 for (uint32_t m = 1; m <= 4; m++) {
2370 GemmMicrokernelTester()
2371 .mr(4)
2372 .nr(8)
2373 .kr(1)
2374 .sr(1)
2375 .m(m)
2376 .n(8)
2377 .k(8)
2378 .iterations(1)
2379 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2380 }
2381 }
2382
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_n)2383 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
2384 TEST_REQUIRES_ARM_NEON_V8;
2385 for (uint32_t n = 1; n <= 8; n++) {
2386 GemmMicrokernelTester()
2387 .mr(4)
2388 .nr(8)
2389 .kr(1)
2390 .sr(1)
2391 .m(4)
2392 .n(n)
2393 .k(8)
2394 .iterations(1)
2395 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2396 }
2397 }
2398
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_lt_8)2399 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_lt_8) {
2400 TEST_REQUIRES_ARM_NEON_V8;
2401 for (size_t k = 1; k < 8; k++) {
2402 GemmMicrokernelTester()
2403 .mr(4)
2404 .nr(8)
2405 .kr(1)
2406 .sr(1)
2407 .m(4)
2408 .n(8)
2409 .k(k)
2410 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2411 }
2412 }
2413
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_lt_8_strided_a)2414 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_lt_8_strided_a) {
2415 TEST_REQUIRES_ARM_NEON_V8;
2416 for (size_t k = 1; k < 8; k++) {
2417 GemmMicrokernelTester()
2418 .mr(4)
2419 .nr(8)
2420 .kr(1)
2421 .sr(1)
2422 .m(4)
2423 .n(8)
2424 .k(k)
2425 .a_stride(11)
2426 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2427 }
2428 }
2429
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_lt_8_subtile)2430 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
2431 TEST_REQUIRES_ARM_NEON_V8;
2432 for (size_t k = 1; k < 8; k++) {
2433 for (uint32_t n = 1; n <= 8; n++) {
2434 for (uint32_t m = 1; m <= 4; m++) {
2435 GemmMicrokernelTester()
2436 .mr(4)
2437 .nr(8)
2438 .kr(1)
2439 .sr(1)
2440 .m(m)
2441 .n(n)
2442 .k(k)
2443 .iterations(1)
2444 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2445 }
2446 }
2447 }
2448 }
2449
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_gt_8)2450 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_gt_8) {
2451 TEST_REQUIRES_ARM_NEON_V8;
2452 for (size_t k = 9; k < 16; k++) {
2453 GemmMicrokernelTester()
2454 .mr(4)
2455 .nr(8)
2456 .kr(1)
2457 .sr(1)
2458 .m(4)
2459 .n(8)
2460 .k(k)
2461 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2462 }
2463 }
2464
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_gt_8_strided_a)2465 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_gt_8_strided_a) {
2466 TEST_REQUIRES_ARM_NEON_V8;
2467 for (size_t k = 9; k < 16; k++) {
2468 GemmMicrokernelTester()
2469 .mr(4)
2470 .nr(8)
2471 .kr(1)
2472 .sr(1)
2473 .m(4)
2474 .n(8)
2475 .k(k)
2476 .a_stride(19)
2477 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2478 }
2479 }
2480
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_gt_8_subtile)2481 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
2482 TEST_REQUIRES_ARM_NEON_V8;
2483 for (size_t k = 9; k < 16; k++) {
2484 for (uint32_t n = 1; n <= 8; n++) {
2485 for (uint32_t m = 1; m <= 4; m++) {
2486 GemmMicrokernelTester()
2487 .mr(4)
2488 .nr(8)
2489 .kr(1)
2490 .sr(1)
2491 .m(m)
2492 .n(n)
2493 .k(k)
2494 .iterations(1)
2495 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2496 }
2497 }
2498 }
2499 }
2500
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_div_8)2501 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_div_8) {
2502 TEST_REQUIRES_ARM_NEON_V8;
2503 for (size_t k = 16; k <= 80; k += 8) {
2504 GemmMicrokernelTester()
2505 .mr(4)
2506 .nr(8)
2507 .kr(1)
2508 .sr(1)
2509 .m(4)
2510 .n(8)
2511 .k(k)
2512 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2513 }
2514 }
2515
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_div_8_strided_a)2516 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_div_8_strided_a) {
2517 TEST_REQUIRES_ARM_NEON_V8;
2518 for (size_t k = 16; k <= 80; k += 8) {
2519 GemmMicrokernelTester()
2520 .mr(4)
2521 .nr(8)
2522 .kr(1)
2523 .sr(1)
2524 .m(4)
2525 .n(8)
2526 .k(k)
2527 .a_stride(83)
2528 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2529 }
2530 }
2531
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_div_8_subtile)2532 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
2533 TEST_REQUIRES_ARM_NEON_V8;
2534 for (size_t k = 16; k <= 80; k += 8) {
2535 for (uint32_t n = 1; n <= 8; n++) {
2536 for (uint32_t m = 1; m <= 4; m++) {
2537 GemmMicrokernelTester()
2538 .mr(4)
2539 .nr(8)
2540 .kr(1)
2541 .sr(1)
2542 .m(m)
2543 .n(n)
2544 .k(k)
2545 .iterations(1)
2546 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2547 }
2548 }
2549 }
2550 }
2551
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_gt_8)2552 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_gt_8) {
2553 TEST_REQUIRES_ARM_NEON_V8;
2554 for (uint32_t n = 9; n < 16; n++) {
2555 for (size_t k = 1; k <= 40; k += 9) {
2556 GemmMicrokernelTester()
2557 .mr(4)
2558 .nr(8)
2559 .kr(1)
2560 .sr(1)
2561 .m(4)
2562 .n(n)
2563 .k(k)
2564 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2565 }
2566 }
2567 }
2568
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_gt_8_strided_cn)2569 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_gt_8_strided_cn) {
2570 TEST_REQUIRES_ARM_NEON_V8;
2571 for (uint32_t n = 9; n < 16; n++) {
2572 for (size_t k = 1; k <= 40; k += 9) {
2573 GemmMicrokernelTester()
2574 .mr(4)
2575 .nr(8)
2576 .kr(1)
2577 .sr(1)
2578 .m(4)
2579 .n(n)
2580 .k(k)
2581 .cn_stride(11)
2582 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2583 }
2584 }
2585 }
2586
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_gt_8_strided_a)2587 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_gt_8_strided_a) {
2588 TEST_REQUIRES_ARM_NEON_V8;
2589 for (uint32_t n = 9; n < 16; n++) {
2590 for (size_t k = 1; k <= 40; k += 9) {
2591 GemmMicrokernelTester()
2592 .mr(4)
2593 .nr(8)
2594 .kr(1)
2595 .sr(1)
2596 .m(4)
2597 .n(n)
2598 .k(k)
2599 .a_stride(43)
2600 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2601 }
2602 }
2603 }
2604
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_gt_8_subtile)2605 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_gt_8_subtile) {
2606 TEST_REQUIRES_ARM_NEON_V8;
2607 for (uint32_t n = 9; n < 16; n++) {
2608 for (size_t k = 1; k <= 40; k += 9) {
2609 for (uint32_t m = 1; m <= 4; m++) {
2610 GemmMicrokernelTester()
2611 .mr(4)
2612 .nr(8)
2613 .kr(1)
2614 .sr(1)
2615 .m(m)
2616 .n(n)
2617 .k(k)
2618 .iterations(1)
2619 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2620 }
2621 }
2622 }
2623 }
2624
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_div_8)2625 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_div_8) {
2626 TEST_REQUIRES_ARM_NEON_V8;
2627 for (uint32_t n = 16; n <= 24; n += 8) {
2628 for (size_t k = 1; k <= 40; k += 9) {
2629 GemmMicrokernelTester()
2630 .mr(4)
2631 .nr(8)
2632 .kr(1)
2633 .sr(1)
2634 .m(4)
2635 .n(n)
2636 .k(k)
2637 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2638 }
2639 }
2640 }
2641
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_div_8_strided_cn)2642 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_div_8_strided_cn) {
2643 TEST_REQUIRES_ARM_NEON_V8;
2644 for (uint32_t n = 16; n <= 24; n += 8) {
2645 for (size_t k = 1; k <= 40; k += 9) {
2646 GemmMicrokernelTester()
2647 .mr(4)
2648 .nr(8)
2649 .kr(1)
2650 .sr(1)
2651 .m(4)
2652 .n(n)
2653 .k(k)
2654 .cn_stride(11)
2655 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2656 }
2657 }
2658 }
2659
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_div_8_strided_a)2660 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_div_8_strided_a) {
2661 TEST_REQUIRES_ARM_NEON_V8;
2662 for (uint32_t n = 16; n <= 24; n += 8) {
2663 for (size_t k = 1; k <= 40; k += 9) {
2664 GemmMicrokernelTester()
2665 .mr(4)
2666 .nr(8)
2667 .kr(1)
2668 .sr(1)
2669 .m(4)
2670 .n(n)
2671 .k(k)
2672 .a_stride(43)
2673 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2674 }
2675 }
2676 }
2677
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_div_8_subtile)2678 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_div_8_subtile) {
2679 TEST_REQUIRES_ARM_NEON_V8;
2680 for (uint32_t n = 16; n <= 24; n += 8) {
2681 for (size_t k = 1; k <= 40; k += 9) {
2682 for (uint32_t m = 1; m <= 4; m++) {
2683 GemmMicrokernelTester()
2684 .mr(4)
2685 .nr(8)
2686 .kr(1)
2687 .sr(1)
2688 .m(m)
2689 .n(n)
2690 .k(k)
2691 .iterations(1)
2692 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2693 }
2694 }
2695 }
2696 }
2697
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,strided_cm_subtile)2698 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
2699 TEST_REQUIRES_ARM_NEON_V8;
2700 for (size_t k = 1; k <= 40; k += 9) {
2701 for (uint32_t n = 1; n <= 8; n++) {
2702 for (uint32_t m = 1; m <= 4; m++) {
2703 GemmMicrokernelTester()
2704 .mr(4)
2705 .nr(8)
2706 .kr(1)
2707 .sr(1)
2708 .m(m)
2709 .n(n)
2710 .k(k)
2711 .cm_stride(11)
2712 .iterations(1)
2713 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2714 }
2715 }
2716 }
2717 }
2718
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,qmin)2719 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, qmin) {
2720 TEST_REQUIRES_ARM_NEON_V8;
2721 GemmMicrokernelTester()
2722 .mr(4)
2723 .nr(8)
2724 .kr(1)
2725 .sr(1)
2726 .m(4)
2727 .n(8)
2728 .k(8)
2729 .qmin(128)
2730 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2731 }
2732
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,qmax)2733 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, qmax) {
2734 TEST_REQUIRES_ARM_NEON_V8;
2735 GemmMicrokernelTester()
2736 .mr(4)
2737 .nr(8)
2738 .kr(1)
2739 .sr(1)
2740 .m(4)
2741 .n(8)
2742 .k(8)
2743 .qmax(128)
2744 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2745 }
2746
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,strided_cm)2747 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, strided_cm) {
2748 TEST_REQUIRES_ARM_NEON_V8;
2749 GemmMicrokernelTester()
2750 .mr(4)
2751 .nr(8)
2752 .kr(1)
2753 .sr(1)
2754 .m(4)
2755 .n(8)
2756 .k(8)
2757 .cm_stride(11)
2758 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2759 }
2760 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
2761
2762
2763 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8)2764 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8) {
2765 TEST_REQUIRES_ARM_NEON_DOT;
2766 GemmMicrokernelTester()
2767 .mr(4)
2768 .nr(8)
2769 .kr(4)
2770 .sr(1)
2771 .m(4)
2772 .n(8)
2773 .k(8)
2774 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2775 }
2776
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,strided_cn)2777 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, strided_cn) {
2778 TEST_REQUIRES_ARM_NEON_DOT;
2779 GemmMicrokernelTester()
2780 .mr(4)
2781 .nr(8)
2782 .kr(4)
2783 .sr(1)
2784 .m(4)
2785 .n(8)
2786 .k(8)
2787 .cn_stride(11)
2788 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2789 }
2790
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_strided_a)2791 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_strided_a) {
2792 TEST_REQUIRES_ARM_NEON_DOT;
2793 GemmMicrokernelTester()
2794 .mr(4)
2795 .nr(8)
2796 .kr(4)
2797 .sr(1)
2798 .m(4)
2799 .n(8)
2800 .k(8)
2801 .a_stride(11)
2802 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2803 }
2804
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_subtile)2805 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile) {
2806 TEST_REQUIRES_ARM_NEON_DOT;
2807 for (uint32_t n = 1; n <= 8; n++) {
2808 for (uint32_t m = 1; m <= 4; m++) {
2809 GemmMicrokernelTester()
2810 .mr(4)
2811 .nr(8)
2812 .kr(4)
2813 .sr(1)
2814 .m(m)
2815 .n(n)
2816 .k(8)
2817 .iterations(1)
2818 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2819 }
2820 }
2821 }
2822
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_subtile_m)2823 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_m) {
2824 TEST_REQUIRES_ARM_NEON_DOT;
2825 for (uint32_t m = 1; m <= 4; m++) {
2826 GemmMicrokernelTester()
2827 .mr(4)
2828 .nr(8)
2829 .kr(4)
2830 .sr(1)
2831 .m(m)
2832 .n(8)
2833 .k(8)
2834 .iterations(1)
2835 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2836 }
2837 }
2838
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_subtile_n)2839 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_n) {
2840 TEST_REQUIRES_ARM_NEON_DOT;
2841 for (uint32_t n = 1; n <= 8; n++) {
2842 GemmMicrokernelTester()
2843 .mr(4)
2844 .nr(8)
2845 .kr(4)
2846 .sr(1)
2847 .m(4)
2848 .n(n)
2849 .k(8)
2850 .iterations(1)
2851 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2852 }
2853 }
2854
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_lt_8)2855 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8) {
2856 TEST_REQUIRES_ARM_NEON_DOT;
2857 for (size_t k = 1; k < 8; k++) {
2858 GemmMicrokernelTester()
2859 .mr(4)
2860 .nr(8)
2861 .kr(4)
2862 .sr(1)
2863 .m(4)
2864 .n(8)
2865 .k(k)
2866 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2867 }
2868 }
2869
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_lt_8_strided_a)2870 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8_strided_a) {
2871 TEST_REQUIRES_ARM_NEON_DOT;
2872 for (size_t k = 1; k < 8; k++) {
2873 GemmMicrokernelTester()
2874 .mr(4)
2875 .nr(8)
2876 .kr(4)
2877 .sr(1)
2878 .m(4)
2879 .n(8)
2880 .k(k)
2881 .a_stride(11)
2882 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2883 }
2884 }
2885
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_lt_8_subtile)2886 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8_subtile) {
2887 TEST_REQUIRES_ARM_NEON_DOT;
2888 for (size_t k = 1; k < 8; k++) {
2889 for (uint32_t n = 1; n <= 8; n++) {
2890 for (uint32_t m = 1; m <= 4; m++) {
2891 GemmMicrokernelTester()
2892 .mr(4)
2893 .nr(8)
2894 .kr(4)
2895 .sr(1)
2896 .m(m)
2897 .n(n)
2898 .k(k)
2899 .iterations(1)
2900 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2901 }
2902 }
2903 }
2904 }
2905
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_gt_8)2906 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8) {
2907 TEST_REQUIRES_ARM_NEON_DOT;
2908 for (size_t k = 9; k < 16; k++) {
2909 GemmMicrokernelTester()
2910 .mr(4)
2911 .nr(8)
2912 .kr(4)
2913 .sr(1)
2914 .m(4)
2915 .n(8)
2916 .k(k)
2917 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2918 }
2919 }
2920
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_gt_8_strided_a)2921 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8_strided_a) {
2922 TEST_REQUIRES_ARM_NEON_DOT;
2923 for (size_t k = 9; k < 16; k++) {
2924 GemmMicrokernelTester()
2925 .mr(4)
2926 .nr(8)
2927 .kr(4)
2928 .sr(1)
2929 .m(4)
2930 .n(8)
2931 .k(k)
2932 .a_stride(19)
2933 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2934 }
2935 }
2936
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_gt_8_subtile)2937 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8_subtile) {
2938 TEST_REQUIRES_ARM_NEON_DOT;
2939 for (size_t k = 9; k < 16; k++) {
2940 for (uint32_t n = 1; n <= 8; n++) {
2941 for (uint32_t m = 1; m <= 4; m++) {
2942 GemmMicrokernelTester()
2943 .mr(4)
2944 .nr(8)
2945 .kr(4)
2946 .sr(1)
2947 .m(m)
2948 .n(n)
2949 .k(k)
2950 .iterations(1)
2951 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2952 }
2953 }
2954 }
2955 }
2956
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_div_8)2957 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_div_8) {
2958 TEST_REQUIRES_ARM_NEON_DOT;
2959 for (size_t k = 16; k <= 80; k += 8) {
2960 GemmMicrokernelTester()
2961 .mr(4)
2962 .nr(8)
2963 .kr(4)
2964 .sr(1)
2965 .m(4)
2966 .n(8)
2967 .k(k)
2968 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2969 }
2970 }
2971
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_div_8_strided_a)2972 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_div_8_strided_a) {
2973 TEST_REQUIRES_ARM_NEON_DOT;
2974 for (size_t k = 16; k <= 80; k += 8) {
2975 GemmMicrokernelTester()
2976 .mr(4)
2977 .nr(8)
2978 .kr(4)
2979 .sr(1)
2980 .m(4)
2981 .n(8)
2982 .k(k)
2983 .a_stride(83)
2984 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2985 }
2986 }
2987
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_div_8_subtile)2988 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_div_8_subtile) {
2989 TEST_REQUIRES_ARM_NEON_DOT;
2990 for (size_t k = 16; k <= 80; k += 8) {
2991 for (uint32_t n = 1; n <= 8; n++) {
2992 for (uint32_t m = 1; m <= 4; m++) {
2993 GemmMicrokernelTester()
2994 .mr(4)
2995 .nr(8)
2996 .kr(4)
2997 .sr(1)
2998 .m(m)
2999 .n(n)
3000 .k(k)
3001 .iterations(1)
3002 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3003 }
3004 }
3005 }
3006 }
3007
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8)3008 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8) {
3009 TEST_REQUIRES_ARM_NEON_DOT;
3010 for (uint32_t n = 9; n < 16; n++) {
3011 for (size_t k = 1; k <= 40; k += 9) {
3012 GemmMicrokernelTester()
3013 .mr(4)
3014 .nr(8)
3015 .kr(4)
3016 .sr(1)
3017 .m(4)
3018 .n(n)
3019 .k(k)
3020 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3021 }
3022 }
3023 }
3024
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8_strided_cn)3025 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_strided_cn) {
3026 TEST_REQUIRES_ARM_NEON_DOT;
3027 for (uint32_t n = 9; n < 16; n++) {
3028 for (size_t k = 1; k <= 40; k += 9) {
3029 GemmMicrokernelTester()
3030 .mr(4)
3031 .nr(8)
3032 .kr(4)
3033 .sr(1)
3034 .m(4)
3035 .n(n)
3036 .k(k)
3037 .cn_stride(11)
3038 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3039 }
3040 }
3041 }
3042
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8_strided_a)3043 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_strided_a) {
3044 TEST_REQUIRES_ARM_NEON_DOT;
3045 for (uint32_t n = 9; n < 16; n++) {
3046 for (size_t k = 1; k <= 40; k += 9) {
3047 GemmMicrokernelTester()
3048 .mr(4)
3049 .nr(8)
3050 .kr(4)
3051 .sr(1)
3052 .m(4)
3053 .n(n)
3054 .k(k)
3055 .a_stride(43)
3056 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3057 }
3058 }
3059 }
3060
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8_subtile)3061 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_subtile) {
3062 TEST_REQUIRES_ARM_NEON_DOT;
3063 for (uint32_t n = 9; n < 16; n++) {
3064 for (size_t k = 1; k <= 40; k += 9) {
3065 for (uint32_t m = 1; m <= 4; m++) {
3066 GemmMicrokernelTester()
3067 .mr(4)
3068 .nr(8)
3069 .kr(4)
3070 .sr(1)
3071 .m(m)
3072 .n(n)
3073 .k(k)
3074 .iterations(1)
3075 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3076 }
3077 }
3078 }
3079 }
3080
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8)3081 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8) {
3082 TEST_REQUIRES_ARM_NEON_DOT;
3083 for (uint32_t n = 16; n <= 24; n += 8) {
3084 for (size_t k = 1; k <= 40; k += 9) {
3085 GemmMicrokernelTester()
3086 .mr(4)
3087 .nr(8)
3088 .kr(4)
3089 .sr(1)
3090 .m(4)
3091 .n(n)
3092 .k(k)
3093 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3094 }
3095 }
3096 }
3097
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8_strided_cn)3098 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_strided_cn) {
3099 TEST_REQUIRES_ARM_NEON_DOT;
3100 for (uint32_t n = 16; n <= 24; n += 8) {
3101 for (size_t k = 1; k <= 40; k += 9) {
3102 GemmMicrokernelTester()
3103 .mr(4)
3104 .nr(8)
3105 .kr(4)
3106 .sr(1)
3107 .m(4)
3108 .n(n)
3109 .k(k)
3110 .cn_stride(11)
3111 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3112 }
3113 }
3114 }
3115
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8_strided_a)3116 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_strided_a) {
3117 TEST_REQUIRES_ARM_NEON_DOT;
3118 for (uint32_t n = 16; n <= 24; n += 8) {
3119 for (size_t k = 1; k <= 40; k += 9) {
3120 GemmMicrokernelTester()
3121 .mr(4)
3122 .nr(8)
3123 .kr(4)
3124 .sr(1)
3125 .m(4)
3126 .n(n)
3127 .k(k)
3128 .a_stride(43)
3129 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3130 }
3131 }
3132 }
3133
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8_subtile)3134 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_subtile) {
3135 TEST_REQUIRES_ARM_NEON_DOT;
3136 for (uint32_t n = 16; n <= 24; n += 8) {
3137 for (size_t k = 1; k <= 40; k += 9) {
3138 for (uint32_t m = 1; m <= 4; m++) {
3139 GemmMicrokernelTester()
3140 .mr(4)
3141 .nr(8)
3142 .kr(4)
3143 .sr(1)
3144 .m(m)
3145 .n(n)
3146 .k(k)
3147 .iterations(1)
3148 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3149 }
3150 }
3151 }
3152 }
3153
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,strided_cm_subtile)3154 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, strided_cm_subtile) {
3155 TEST_REQUIRES_ARM_NEON_DOT;
3156 for (size_t k = 1; k <= 40; k += 9) {
3157 for (uint32_t n = 1; n <= 8; n++) {
3158 for (uint32_t m = 1; m <= 4; m++) {
3159 GemmMicrokernelTester()
3160 .mr(4)
3161 .nr(8)
3162 .kr(4)
3163 .sr(1)
3164 .m(m)
3165 .n(n)
3166 .k(k)
3167 .cm_stride(11)
3168 .iterations(1)
3169 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3170 }
3171 }
3172 }
3173 }
3174
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,qmin)3175 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, qmin) {
3176 TEST_REQUIRES_ARM_NEON_DOT;
3177 GemmMicrokernelTester()
3178 .mr(4)
3179 .nr(8)
3180 .kr(4)
3181 .sr(1)
3182 .m(4)
3183 .n(8)
3184 .k(8)
3185 .qmin(128)
3186 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3187 }
3188
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,qmax)3189 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, qmax) {
3190 TEST_REQUIRES_ARM_NEON_DOT;
3191 GemmMicrokernelTester()
3192 .mr(4)
3193 .nr(8)
3194 .kr(4)
3195 .sr(1)
3196 .m(4)
3197 .n(8)
3198 .k(8)
3199 .qmax(128)
3200 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3201 }
3202
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,strided_cm)3203 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, strided_cm) {
3204 TEST_REQUIRES_ARM_NEON_DOT;
3205 GemmMicrokernelTester()
3206 .mr(4)
3207 .nr(8)
3208 .kr(4)
3209 .sr(1)
3210 .m(4)
3211 .n(8)
3212 .k(8)
3213 .cm_stride(11)
3214 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3215 }
3216 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY
3217
3218
3219 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16)3220 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16) {
3221 TEST_REQUIRES_ARM_NEON;
3222 GemmMicrokernelTester()
3223 .mr(1)
3224 .nr(8)
3225 .kr(8)
3226 .sr(1)
3227 .m(1)
3228 .n(8)
3229 .k(16)
3230 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3231 }
3232
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cn)3233 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cn) {
3234 TEST_REQUIRES_ARM_NEON;
3235 GemmMicrokernelTester()
3236 .mr(1)
3237 .nr(8)
3238 .kr(8)
3239 .sr(1)
3240 .m(1)
3241 .n(8)
3242 .k(16)
3243 .cn_stride(11)
3244 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3245 }
3246
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_strided_a)3247 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_strided_a) {
3248 TEST_REQUIRES_ARM_NEON;
3249 GemmMicrokernelTester()
3250 .mr(1)
3251 .nr(8)
3252 .kr(8)
3253 .sr(1)
3254 .m(1)
3255 .n(8)
3256 .k(16)
3257 .a_stride(19)
3258 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3259 }
3260
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile)3261 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile) {
3262 TEST_REQUIRES_ARM_NEON;
3263 for (uint32_t n = 1; n <= 8; n++) {
3264 for (uint32_t m = 1; m <= 1; m++) {
3265 GemmMicrokernelTester()
3266 .mr(1)
3267 .nr(8)
3268 .kr(8)
3269 .sr(1)
3270 .m(m)
3271 .n(n)
3272 .k(16)
3273 .iterations(1)
3274 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3275 }
3276 }
3277 }
3278
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_m)3279 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_m) {
3280 TEST_REQUIRES_ARM_NEON;
3281 for (uint32_t m = 1; m <= 1; m++) {
3282 GemmMicrokernelTester()
3283 .mr(1)
3284 .nr(8)
3285 .kr(8)
3286 .sr(1)
3287 .m(m)
3288 .n(8)
3289 .k(16)
3290 .iterations(1)
3291 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3292 }
3293 }
3294
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_n)3295 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_n) {
3296 TEST_REQUIRES_ARM_NEON;
3297 for (uint32_t n = 1; n <= 8; n++) {
3298 GemmMicrokernelTester()
3299 .mr(1)
3300 .nr(8)
3301 .kr(8)
3302 .sr(1)
3303 .m(1)
3304 .n(n)
3305 .k(16)
3306 .iterations(1)
3307 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3308 }
3309 }
3310
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16)3311 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16) {
3312 TEST_REQUIRES_ARM_NEON;
3313 for (size_t k = 1; k < 16; k++) {
3314 GemmMicrokernelTester()
3315 .mr(1)
3316 .nr(8)
3317 .kr(8)
3318 .sr(1)
3319 .m(1)
3320 .n(8)
3321 .k(k)
3322 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3323 }
3324 }
3325
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16_strided_a)3326 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16_strided_a) {
3327 TEST_REQUIRES_ARM_NEON;
3328 for (size_t k = 1; k < 16; k++) {
3329 GemmMicrokernelTester()
3330 .mr(1)
3331 .nr(8)
3332 .kr(8)
3333 .sr(1)
3334 .m(1)
3335 .n(8)
3336 .k(k)
3337 .a_stride(19)
3338 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3339 }
3340 }
3341
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16_subtile)3342 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16_subtile) {
3343 TEST_REQUIRES_ARM_NEON;
3344 for (size_t k = 1; k < 16; k++) {
3345 for (uint32_t n = 1; n <= 8; n++) {
3346 for (uint32_t m = 1; m <= 1; m++) {
3347 GemmMicrokernelTester()
3348 .mr(1)
3349 .nr(8)
3350 .kr(8)
3351 .sr(1)
3352 .m(m)
3353 .n(n)
3354 .k(k)
3355 .iterations(1)
3356 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3357 }
3358 }
3359 }
3360 }
3361
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16)3362 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16) {
3363 TEST_REQUIRES_ARM_NEON;
3364 for (size_t k = 17; k < 32; k++) {
3365 GemmMicrokernelTester()
3366 .mr(1)
3367 .nr(8)
3368 .kr(8)
3369 .sr(1)
3370 .m(1)
3371 .n(8)
3372 .k(k)
3373 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3374 }
3375 }
3376
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16_strided_a)3377 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16_strided_a) {
3378 TEST_REQUIRES_ARM_NEON;
3379 for (size_t k = 17; k < 32; k++) {
3380 GemmMicrokernelTester()
3381 .mr(1)
3382 .nr(8)
3383 .kr(8)
3384 .sr(1)
3385 .m(1)
3386 .n(8)
3387 .k(k)
3388 .a_stride(37)
3389 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3390 }
3391 }
3392
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16_subtile)3393 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16_subtile) {
3394 TEST_REQUIRES_ARM_NEON;
3395 for (size_t k = 17; k < 32; k++) {
3396 for (uint32_t n = 1; n <= 8; n++) {
3397 for (uint32_t m = 1; m <= 1; m++) {
3398 GemmMicrokernelTester()
3399 .mr(1)
3400 .nr(8)
3401 .kr(8)
3402 .sr(1)
3403 .m(m)
3404 .n(n)
3405 .k(k)
3406 .iterations(1)
3407 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3408 }
3409 }
3410 }
3411 }
3412
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16)3413 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16) {
3414 TEST_REQUIRES_ARM_NEON;
3415 for (size_t k = 32; k <= 160; k += 16) {
3416 GemmMicrokernelTester()
3417 .mr(1)
3418 .nr(8)
3419 .kr(8)
3420 .sr(1)
3421 .m(1)
3422 .n(8)
3423 .k(k)
3424 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3425 }
3426 }
3427
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16_strided_a)3428 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16_strided_a) {
3429 TEST_REQUIRES_ARM_NEON;
3430 for (size_t k = 32; k <= 160; k += 16) {
3431 GemmMicrokernelTester()
3432 .mr(1)
3433 .nr(8)
3434 .kr(8)
3435 .sr(1)
3436 .m(1)
3437 .n(8)
3438 .k(k)
3439 .a_stride(163)
3440 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3441 }
3442 }
3443
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16_subtile)3444 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16_subtile) {
3445 TEST_REQUIRES_ARM_NEON;
3446 for (size_t k = 32; k <= 160; k += 16) {
3447 for (uint32_t n = 1; n <= 8; n++) {
3448 for (uint32_t m = 1; m <= 1; m++) {
3449 GemmMicrokernelTester()
3450 .mr(1)
3451 .nr(8)
3452 .kr(8)
3453 .sr(1)
3454 .m(m)
3455 .n(n)
3456 .k(k)
3457 .iterations(1)
3458 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3459 }
3460 }
3461 }
3462 }
3463
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8)3464 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8) {
3465 TEST_REQUIRES_ARM_NEON;
3466 for (uint32_t n = 9; n < 16; n++) {
3467 for (size_t k = 1; k <= 80; k += 17) {
3468 GemmMicrokernelTester()
3469 .mr(1)
3470 .nr(8)
3471 .kr(8)
3472 .sr(1)
3473 .m(1)
3474 .n(n)
3475 .k(k)
3476 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3477 }
3478 }
3479 }
3480
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_strided_cn)3481 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_strided_cn) {
3482 TEST_REQUIRES_ARM_NEON;
3483 for (uint32_t n = 9; n < 16; n++) {
3484 for (size_t k = 1; k <= 80; k += 17) {
3485 GemmMicrokernelTester()
3486 .mr(1)
3487 .nr(8)
3488 .kr(8)
3489 .sr(1)
3490 .m(1)
3491 .n(n)
3492 .k(k)
3493 .cn_stride(11)
3494 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3495 }
3496 }
3497 }
3498
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_strided_a)3499 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_strided_a) {
3500 TEST_REQUIRES_ARM_NEON;
3501 for (uint32_t n = 9; n < 16; n++) {
3502 for (size_t k = 1; k <= 80; k += 17) {
3503 GemmMicrokernelTester()
3504 .mr(1)
3505 .nr(8)
3506 .kr(8)
3507 .sr(1)
3508 .m(1)
3509 .n(n)
3510 .k(k)
3511 .a_stride(83)
3512 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3513 }
3514 }
3515 }
3516
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_subtile)3517 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_subtile) {
3518 TEST_REQUIRES_ARM_NEON;
3519 for (uint32_t n = 9; n < 16; n++) {
3520 for (size_t k = 1; k <= 80; k += 17) {
3521 for (uint32_t m = 1; m <= 1; m++) {
3522 GemmMicrokernelTester()
3523 .mr(1)
3524 .nr(8)
3525 .kr(8)
3526 .sr(1)
3527 .m(m)
3528 .n(n)
3529 .k(k)
3530 .iterations(1)
3531 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3532 }
3533 }
3534 }
3535 }
3536
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8)3537 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8) {
3538 TEST_REQUIRES_ARM_NEON;
3539 for (uint32_t n = 16; n <= 24; n += 8) {
3540 for (size_t k = 1; k <= 80; k += 17) {
3541 GemmMicrokernelTester()
3542 .mr(1)
3543 .nr(8)
3544 .kr(8)
3545 .sr(1)
3546 .m(1)
3547 .n(n)
3548 .k(k)
3549 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3550 }
3551 }
3552 }
3553
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_strided_cn)3554 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_strided_cn) {
3555 TEST_REQUIRES_ARM_NEON;
3556 for (uint32_t n = 16; n <= 24; n += 8) {
3557 for (size_t k = 1; k <= 80; k += 17) {
3558 GemmMicrokernelTester()
3559 .mr(1)
3560 .nr(8)
3561 .kr(8)
3562 .sr(1)
3563 .m(1)
3564 .n(n)
3565 .k(k)
3566 .cn_stride(11)
3567 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3568 }
3569 }
3570 }
3571
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_strided_a)3572 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_strided_a) {
3573 TEST_REQUIRES_ARM_NEON;
3574 for (uint32_t n = 16; n <= 24; n += 8) {
3575 for (size_t k = 1; k <= 80; k += 17) {
3576 GemmMicrokernelTester()
3577 .mr(1)
3578 .nr(8)
3579 .kr(8)
3580 .sr(1)
3581 .m(1)
3582 .n(n)
3583 .k(k)
3584 .a_stride(83)
3585 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3586 }
3587 }
3588 }
3589
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_subtile)3590 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_subtile) {
3591 TEST_REQUIRES_ARM_NEON;
3592 for (uint32_t n = 16; n <= 24; n += 8) {
3593 for (size_t k = 1; k <= 80; k += 17) {
3594 for (uint32_t m = 1; m <= 1; m++) {
3595 GemmMicrokernelTester()
3596 .mr(1)
3597 .nr(8)
3598 .kr(8)
3599 .sr(1)
3600 .m(m)
3601 .n(n)
3602 .k(k)
3603 .iterations(1)
3604 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3605 }
3606 }
3607 }
3608 }
3609
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm_subtile)3610 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm_subtile) {
3611 TEST_REQUIRES_ARM_NEON;
3612 for (size_t k = 1; k <= 80; k += 17) {
3613 for (uint32_t n = 1; n <= 8; n++) {
3614 for (uint32_t m = 1; m <= 1; m++) {
3615 GemmMicrokernelTester()
3616 .mr(1)
3617 .nr(8)
3618 .kr(8)
3619 .sr(1)
3620 .m(m)
3621 .n(n)
3622 .k(k)
3623 .cm_stride(11)
3624 .iterations(1)
3625 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3626 }
3627 }
3628 }
3629 }
3630
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmin)3631 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmin) {
3632 TEST_REQUIRES_ARM_NEON;
3633 GemmMicrokernelTester()
3634 .mr(1)
3635 .nr(8)
3636 .kr(8)
3637 .sr(1)
3638 .m(1)
3639 .n(8)
3640 .k(16)
3641 .qmin(128)
3642 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3643 }
3644
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmax)3645 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmax) {
3646 TEST_REQUIRES_ARM_NEON;
3647 GemmMicrokernelTester()
3648 .mr(1)
3649 .nr(8)
3650 .kr(8)
3651 .sr(1)
3652 .m(1)
3653 .n(8)
3654 .k(16)
3655 .qmax(128)
3656 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3657 }
3658
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm)3659 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm) {
3660 TEST_REQUIRES_ARM_NEON;
3661 GemmMicrokernelTester()
3662 .mr(1)
3663 .nr(8)
3664 .kr(8)
3665 .sr(1)
3666 .m(1)
3667 .n(8)
3668 .k(16)
3669 .cm_stride(11)
3670 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3671 }
3672 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3673
3674
3675 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_eq_16)3676 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_eq_16) {
3677 TEST_REQUIRES_ARM_NEON;
3678 GemmMicrokernelTester()
3679 .mr(2)
3680 .nr(8)
3681 .kr(8)
3682 .sr(1)
3683 .m(2)
3684 .n(8)
3685 .k(16)
3686 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3687 }
3688
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,strided_cn)3689 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, strided_cn) {
3690 TEST_REQUIRES_ARM_NEON;
3691 GemmMicrokernelTester()
3692 .mr(2)
3693 .nr(8)
3694 .kr(8)
3695 .sr(1)
3696 .m(2)
3697 .n(8)
3698 .k(16)
3699 .cn_stride(11)
3700 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3701 }
3702
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_eq_16_strided_a)3703 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_eq_16_strided_a) {
3704 TEST_REQUIRES_ARM_NEON;
3705 GemmMicrokernelTester()
3706 .mr(2)
3707 .nr(8)
3708 .kr(8)
3709 .sr(1)
3710 .m(2)
3711 .n(8)
3712 .k(16)
3713 .a_stride(19)
3714 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3715 }
3716
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile)3717 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile) {
3718 TEST_REQUIRES_ARM_NEON;
3719 for (uint32_t n = 1; n <= 8; n++) {
3720 for (uint32_t m = 1; m <= 2; m++) {
3721 GemmMicrokernelTester()
3722 .mr(2)
3723 .nr(8)
3724 .kr(8)
3725 .sr(1)
3726 .m(m)
3727 .n(n)
3728 .k(16)
3729 .iterations(1)
3730 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3731 }
3732 }
3733 }
3734
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile_m)3735 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_m) {
3736 TEST_REQUIRES_ARM_NEON;
3737 for (uint32_t m = 1; m <= 2; m++) {
3738 GemmMicrokernelTester()
3739 .mr(2)
3740 .nr(8)
3741 .kr(8)
3742 .sr(1)
3743 .m(m)
3744 .n(8)
3745 .k(16)
3746 .iterations(1)
3747 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3748 }
3749 }
3750
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile_n)3751 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_n) {
3752 TEST_REQUIRES_ARM_NEON;
3753 for (uint32_t n = 1; n <= 8; n++) {
3754 GemmMicrokernelTester()
3755 .mr(2)
3756 .nr(8)
3757 .kr(8)
3758 .sr(1)
3759 .m(2)
3760 .n(n)
3761 .k(16)
3762 .iterations(1)
3763 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3764 }
3765 }
3766
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_lt_16)3767 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_lt_16) {
3768 TEST_REQUIRES_ARM_NEON;
3769 for (size_t k = 1; k < 16; k++) {
3770 GemmMicrokernelTester()
3771 .mr(2)
3772 .nr(8)
3773 .kr(8)
3774 .sr(1)
3775 .m(2)
3776 .n(8)
3777 .k(k)
3778 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3779 }
3780 }
3781
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_lt_16_strided_a)3782 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_lt_16_strided_a) {
3783 TEST_REQUIRES_ARM_NEON;
3784 for (size_t k = 1; k < 16; k++) {
3785 GemmMicrokernelTester()
3786 .mr(2)
3787 .nr(8)
3788 .kr(8)
3789 .sr(1)
3790 .m(2)
3791 .n(8)
3792 .k(k)
3793 .a_stride(19)
3794 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3795 }
3796 }
3797
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_lt_16_subtile)3798 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_lt_16_subtile) {
3799 TEST_REQUIRES_ARM_NEON;
3800 for (size_t k = 1; k < 16; k++) {
3801 for (uint32_t n = 1; n <= 8; n++) {
3802 for (uint32_t m = 1; m <= 2; m++) {
3803 GemmMicrokernelTester()
3804 .mr(2)
3805 .nr(8)
3806 .kr(8)
3807 .sr(1)
3808 .m(m)
3809 .n(n)
3810 .k(k)
3811 .iterations(1)
3812 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3813 }
3814 }
3815 }
3816 }
3817
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_gt_16)3818 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_gt_16) {
3819 TEST_REQUIRES_ARM_NEON;
3820 for (size_t k = 17; k < 32; k++) {
3821 GemmMicrokernelTester()
3822 .mr(2)
3823 .nr(8)
3824 .kr(8)
3825 .sr(1)
3826 .m(2)
3827 .n(8)
3828 .k(k)
3829 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3830 }
3831 }
3832
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_gt_16_strided_a)3833 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_gt_16_strided_a) {
3834 TEST_REQUIRES_ARM_NEON;
3835 for (size_t k = 17; k < 32; k++) {
3836 GemmMicrokernelTester()
3837 .mr(2)
3838 .nr(8)
3839 .kr(8)
3840 .sr(1)
3841 .m(2)
3842 .n(8)
3843 .k(k)
3844 .a_stride(37)
3845 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3846 }
3847 }
3848
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_gt_16_subtile)3849 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_gt_16_subtile) {
3850 TEST_REQUIRES_ARM_NEON;
3851 for (size_t k = 17; k < 32; k++) {
3852 for (uint32_t n = 1; n <= 8; n++) {
3853 for (uint32_t m = 1; m <= 2; m++) {
3854 GemmMicrokernelTester()
3855 .mr(2)
3856 .nr(8)
3857 .kr(8)
3858 .sr(1)
3859 .m(m)
3860 .n(n)
3861 .k(k)
3862 .iterations(1)
3863 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3864 }
3865 }
3866 }
3867 }
3868
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_div_16)3869 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_div_16) {
3870 TEST_REQUIRES_ARM_NEON;
3871 for (size_t k = 32; k <= 160; k += 16) {
3872 GemmMicrokernelTester()
3873 .mr(2)
3874 .nr(8)
3875 .kr(8)
3876 .sr(1)
3877 .m(2)
3878 .n(8)
3879 .k(k)
3880 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3881 }
3882 }
3883
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_div_16_strided_a)3884 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_div_16_strided_a) {
3885 TEST_REQUIRES_ARM_NEON;
3886 for (size_t k = 32; k <= 160; k += 16) {
3887 GemmMicrokernelTester()
3888 .mr(2)
3889 .nr(8)
3890 .kr(8)
3891 .sr(1)
3892 .m(2)
3893 .n(8)
3894 .k(k)
3895 .a_stride(163)
3896 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3897 }
3898 }
3899
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_div_16_subtile)3900 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_div_16_subtile) {
3901 TEST_REQUIRES_ARM_NEON;
3902 for (size_t k = 32; k <= 160; k += 16) {
3903 for (uint32_t n = 1; n <= 8; n++) {
3904 for (uint32_t m = 1; m <= 2; m++) {
3905 GemmMicrokernelTester()
3906 .mr(2)
3907 .nr(8)
3908 .kr(8)
3909 .sr(1)
3910 .m(m)
3911 .n(n)
3912 .k(k)
3913 .iterations(1)
3914 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3915 }
3916 }
3917 }
3918 }
3919
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_gt_8)3920 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_gt_8) {
3921 TEST_REQUIRES_ARM_NEON;
3922 for (uint32_t n = 9; n < 16; n++) {
3923 for (size_t k = 1; k <= 80; k += 17) {
3924 GemmMicrokernelTester()
3925 .mr(2)
3926 .nr(8)
3927 .kr(8)
3928 .sr(1)
3929 .m(2)
3930 .n(n)
3931 .k(k)
3932 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3933 }
3934 }
3935 }
3936
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_gt_8_strided_cn)3937 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_gt_8_strided_cn) {
3938 TEST_REQUIRES_ARM_NEON;
3939 for (uint32_t n = 9; n < 16; n++) {
3940 for (size_t k = 1; k <= 80; k += 17) {
3941 GemmMicrokernelTester()
3942 .mr(2)
3943 .nr(8)
3944 .kr(8)
3945 .sr(1)
3946 .m(2)
3947 .n(n)
3948 .k(k)
3949 .cn_stride(11)
3950 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3951 }
3952 }
3953 }
3954
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_gt_8_strided_a)3955 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_gt_8_strided_a) {
3956 TEST_REQUIRES_ARM_NEON;
3957 for (uint32_t n = 9; n < 16; n++) {
3958 for (size_t k = 1; k <= 80; k += 17) {
3959 GemmMicrokernelTester()
3960 .mr(2)
3961 .nr(8)
3962 .kr(8)
3963 .sr(1)
3964 .m(2)
3965 .n(n)
3966 .k(k)
3967 .a_stride(83)
3968 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3969 }
3970 }
3971 }
3972
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_gt_8_subtile)3973 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_gt_8_subtile) {
3974 TEST_REQUIRES_ARM_NEON;
3975 for (uint32_t n = 9; n < 16; n++) {
3976 for (size_t k = 1; k <= 80; k += 17) {
3977 for (uint32_t m = 1; m <= 2; m++) {
3978 GemmMicrokernelTester()
3979 .mr(2)
3980 .nr(8)
3981 .kr(8)
3982 .sr(1)
3983 .m(m)
3984 .n(n)
3985 .k(k)
3986 .iterations(1)
3987 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3988 }
3989 }
3990 }
3991 }
3992
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_div_8)3993 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_div_8) {
3994 TEST_REQUIRES_ARM_NEON;
3995 for (uint32_t n = 16; n <= 24; n += 8) {
3996 for (size_t k = 1; k <= 80; k += 17) {
3997 GemmMicrokernelTester()
3998 .mr(2)
3999 .nr(8)
4000 .kr(8)
4001 .sr(1)
4002 .m(2)
4003 .n(n)
4004 .k(k)
4005 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4006 }
4007 }
4008 }
4009
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_div_8_strided_cn)4010 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_div_8_strided_cn) {
4011 TEST_REQUIRES_ARM_NEON;
4012 for (uint32_t n = 16; n <= 24; n += 8) {
4013 for (size_t k = 1; k <= 80; k += 17) {
4014 GemmMicrokernelTester()
4015 .mr(2)
4016 .nr(8)
4017 .kr(8)
4018 .sr(1)
4019 .m(2)
4020 .n(n)
4021 .k(k)
4022 .cn_stride(11)
4023 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4024 }
4025 }
4026 }
4027
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_div_8_strided_a)4028 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_div_8_strided_a) {
4029 TEST_REQUIRES_ARM_NEON;
4030 for (uint32_t n = 16; n <= 24; n += 8) {
4031 for (size_t k = 1; k <= 80; k += 17) {
4032 GemmMicrokernelTester()
4033 .mr(2)
4034 .nr(8)
4035 .kr(8)
4036 .sr(1)
4037 .m(2)
4038 .n(n)
4039 .k(k)
4040 .a_stride(83)
4041 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4042 }
4043 }
4044 }
4045
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_div_8_subtile)4046 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_div_8_subtile) {
4047 TEST_REQUIRES_ARM_NEON;
4048 for (uint32_t n = 16; n <= 24; n += 8) {
4049 for (size_t k = 1; k <= 80; k += 17) {
4050 for (uint32_t m = 1; m <= 2; m++) {
4051 GemmMicrokernelTester()
4052 .mr(2)
4053 .nr(8)
4054 .kr(8)
4055 .sr(1)
4056 .m(m)
4057 .n(n)
4058 .k(k)
4059 .iterations(1)
4060 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4061 }
4062 }
4063 }
4064 }
4065
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,strided_cm_subtile)4066 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, strided_cm_subtile) {
4067 TEST_REQUIRES_ARM_NEON;
4068 for (size_t k = 1; k <= 80; k += 17) {
4069 for (uint32_t n = 1; n <= 8; n++) {
4070 for (uint32_t m = 1; m <= 2; m++) {
4071 GemmMicrokernelTester()
4072 .mr(2)
4073 .nr(8)
4074 .kr(8)
4075 .sr(1)
4076 .m(m)
4077 .n(n)
4078 .k(k)
4079 .cm_stride(11)
4080 .iterations(1)
4081 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4082 }
4083 }
4084 }
4085 }
4086
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,qmin)4087 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, qmin) {
4088 TEST_REQUIRES_ARM_NEON;
4089 GemmMicrokernelTester()
4090 .mr(2)
4091 .nr(8)
4092 .kr(8)
4093 .sr(1)
4094 .m(2)
4095 .n(8)
4096 .k(16)
4097 .qmin(128)
4098 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4099 }
4100
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,qmax)4101 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, qmax) {
4102 TEST_REQUIRES_ARM_NEON;
4103 GemmMicrokernelTester()
4104 .mr(2)
4105 .nr(8)
4106 .kr(8)
4107 .sr(1)
4108 .m(2)
4109 .n(8)
4110 .k(16)
4111 .qmax(128)
4112 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4113 }
4114
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,strided_cm)4115 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, strided_cm) {
4116 TEST_REQUIRES_ARM_NEON;
4117 GemmMicrokernelTester()
4118 .mr(2)
4119 .nr(8)
4120 .kr(8)
4121 .sr(1)
4122 .m(2)
4123 .n(8)
4124 .k(16)
4125 .cm_stride(11)
4126 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4127 }
4128 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4129
4130
4131 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16)4132 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16) {
4133 TEST_REQUIRES_ARM_NEON;
4134 GemmMicrokernelTester()
4135 .mr(2)
4136 .nr(8)
4137 .kr(8)
4138 .sr(1)
4139 .m(2)
4140 .n(8)
4141 .k(16)
4142 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4143 }
4144
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cn)4145 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cn) {
4146 TEST_REQUIRES_ARM_NEON;
4147 GemmMicrokernelTester()
4148 .mr(2)
4149 .nr(8)
4150 .kr(8)
4151 .sr(1)
4152 .m(2)
4153 .n(8)
4154 .k(16)
4155 .cn_stride(11)
4156 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4157 }
4158
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_strided_a)4159 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_strided_a) {
4160 TEST_REQUIRES_ARM_NEON;
4161 GemmMicrokernelTester()
4162 .mr(2)
4163 .nr(8)
4164 .kr(8)
4165 .sr(1)
4166 .m(2)
4167 .n(8)
4168 .k(16)
4169 .a_stride(19)
4170 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4171 }
4172
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile)4173 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile) {
4174 TEST_REQUIRES_ARM_NEON;
4175 for (uint32_t n = 1; n <= 8; n++) {
4176 for (uint32_t m = 1; m <= 2; m++) {
4177 GemmMicrokernelTester()
4178 .mr(2)
4179 .nr(8)
4180 .kr(8)
4181 .sr(1)
4182 .m(m)
4183 .n(n)
4184 .k(16)
4185 .iterations(1)
4186 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4187 }
4188 }
4189 }
4190
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_m)4191 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_m) {
4192 TEST_REQUIRES_ARM_NEON;
4193 for (uint32_t m = 1; m <= 2; m++) {
4194 GemmMicrokernelTester()
4195 .mr(2)
4196 .nr(8)
4197 .kr(8)
4198 .sr(1)
4199 .m(m)
4200 .n(8)
4201 .k(16)
4202 .iterations(1)
4203 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4204 }
4205 }
4206
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_n)4207 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_n) {
4208 TEST_REQUIRES_ARM_NEON;
4209 for (uint32_t n = 1; n <= 8; n++) {
4210 GemmMicrokernelTester()
4211 .mr(2)
4212 .nr(8)
4213 .kr(8)
4214 .sr(1)
4215 .m(2)
4216 .n(n)
4217 .k(16)
4218 .iterations(1)
4219 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4220 }
4221 }
4222
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16)4223 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16) {
4224 TEST_REQUIRES_ARM_NEON;
4225 for (size_t k = 1; k < 16; k++) {
4226 GemmMicrokernelTester()
4227 .mr(2)
4228 .nr(8)
4229 .kr(8)
4230 .sr(1)
4231 .m(2)
4232 .n(8)
4233 .k(k)
4234 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4235 }
4236 }
4237
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16_strided_a)4238 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16_strided_a) {
4239 TEST_REQUIRES_ARM_NEON;
4240 for (size_t k = 1; k < 16; k++) {
4241 GemmMicrokernelTester()
4242 .mr(2)
4243 .nr(8)
4244 .kr(8)
4245 .sr(1)
4246 .m(2)
4247 .n(8)
4248 .k(k)
4249 .a_stride(19)
4250 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4251 }
4252 }
4253
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16_subtile)4254 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16_subtile) {
4255 TEST_REQUIRES_ARM_NEON;
4256 for (size_t k = 1; k < 16; k++) {
4257 for (uint32_t n = 1; n <= 8; n++) {
4258 for (uint32_t m = 1; m <= 2; m++) {
4259 GemmMicrokernelTester()
4260 .mr(2)
4261 .nr(8)
4262 .kr(8)
4263 .sr(1)
4264 .m(m)
4265 .n(n)
4266 .k(k)
4267 .iterations(1)
4268 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4269 }
4270 }
4271 }
4272 }
4273
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16)4274 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16) {
4275 TEST_REQUIRES_ARM_NEON;
4276 for (size_t k = 17; k < 32; k++) {
4277 GemmMicrokernelTester()
4278 .mr(2)
4279 .nr(8)
4280 .kr(8)
4281 .sr(1)
4282 .m(2)
4283 .n(8)
4284 .k(k)
4285 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4286 }
4287 }
4288
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16_strided_a)4289 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16_strided_a) {
4290 TEST_REQUIRES_ARM_NEON;
4291 for (size_t k = 17; k < 32; k++) {
4292 GemmMicrokernelTester()
4293 .mr(2)
4294 .nr(8)
4295 .kr(8)
4296 .sr(1)
4297 .m(2)
4298 .n(8)
4299 .k(k)
4300 .a_stride(37)
4301 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4302 }
4303 }
4304
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16_subtile)4305 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16_subtile) {
4306 TEST_REQUIRES_ARM_NEON;
4307 for (size_t k = 17; k < 32; k++) {
4308 for (uint32_t n = 1; n <= 8; n++) {
4309 for (uint32_t m = 1; m <= 2; m++) {
4310 GemmMicrokernelTester()
4311 .mr(2)
4312 .nr(8)
4313 .kr(8)
4314 .sr(1)
4315 .m(m)
4316 .n(n)
4317 .k(k)
4318 .iterations(1)
4319 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4320 }
4321 }
4322 }
4323 }
4324
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16)4325 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16) {
4326 TEST_REQUIRES_ARM_NEON;
4327 for (size_t k = 32; k <= 160; k += 16) {
4328 GemmMicrokernelTester()
4329 .mr(2)
4330 .nr(8)
4331 .kr(8)
4332 .sr(1)
4333 .m(2)
4334 .n(8)
4335 .k(k)
4336 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4337 }
4338 }
4339
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16_strided_a)4340 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16_strided_a) {
4341 TEST_REQUIRES_ARM_NEON;
4342 for (size_t k = 32; k <= 160; k += 16) {
4343 GemmMicrokernelTester()
4344 .mr(2)
4345 .nr(8)
4346 .kr(8)
4347 .sr(1)
4348 .m(2)
4349 .n(8)
4350 .k(k)
4351 .a_stride(163)
4352 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4353 }
4354 }
4355
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16_subtile)4356 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16_subtile) {
4357 TEST_REQUIRES_ARM_NEON;
4358 for (size_t k = 32; k <= 160; k += 16) {
4359 for (uint32_t n = 1; n <= 8; n++) {
4360 for (uint32_t m = 1; m <= 2; m++) {
4361 GemmMicrokernelTester()
4362 .mr(2)
4363 .nr(8)
4364 .kr(8)
4365 .sr(1)
4366 .m(m)
4367 .n(n)
4368 .k(k)
4369 .iterations(1)
4370 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4371 }
4372 }
4373 }
4374 }
4375
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8)4376 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8) {
4377 TEST_REQUIRES_ARM_NEON;
4378 for (uint32_t n = 9; n < 16; n++) {
4379 for (size_t k = 1; k <= 80; k += 17) {
4380 GemmMicrokernelTester()
4381 .mr(2)
4382 .nr(8)
4383 .kr(8)
4384 .sr(1)
4385 .m(2)
4386 .n(n)
4387 .k(k)
4388 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4389 }
4390 }
4391 }
4392
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_strided_cn)4393 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_strided_cn) {
4394 TEST_REQUIRES_ARM_NEON;
4395 for (uint32_t n = 9; n < 16; n++) {
4396 for (size_t k = 1; k <= 80; k += 17) {
4397 GemmMicrokernelTester()
4398 .mr(2)
4399 .nr(8)
4400 .kr(8)
4401 .sr(1)
4402 .m(2)
4403 .n(n)
4404 .k(k)
4405 .cn_stride(11)
4406 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4407 }
4408 }
4409 }
4410
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_strided_a)4411 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_strided_a) {
4412 TEST_REQUIRES_ARM_NEON;
4413 for (uint32_t n = 9; n < 16; n++) {
4414 for (size_t k = 1; k <= 80; k += 17) {
4415 GemmMicrokernelTester()
4416 .mr(2)
4417 .nr(8)
4418 .kr(8)
4419 .sr(1)
4420 .m(2)
4421 .n(n)
4422 .k(k)
4423 .a_stride(83)
4424 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4425 }
4426 }
4427 }
4428
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_subtile)4429 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_subtile) {
4430 TEST_REQUIRES_ARM_NEON;
4431 for (uint32_t n = 9; n < 16; n++) {
4432 for (size_t k = 1; k <= 80; k += 17) {
4433 for (uint32_t m = 1; m <= 2; m++) {
4434 GemmMicrokernelTester()
4435 .mr(2)
4436 .nr(8)
4437 .kr(8)
4438 .sr(1)
4439 .m(m)
4440 .n(n)
4441 .k(k)
4442 .iterations(1)
4443 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4444 }
4445 }
4446 }
4447 }
4448
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8)4449 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8) {
4450 TEST_REQUIRES_ARM_NEON;
4451 for (uint32_t n = 16; n <= 24; n += 8) {
4452 for (size_t k = 1; k <= 80; k += 17) {
4453 GemmMicrokernelTester()
4454 .mr(2)
4455 .nr(8)
4456 .kr(8)
4457 .sr(1)
4458 .m(2)
4459 .n(n)
4460 .k(k)
4461 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4462 }
4463 }
4464 }
4465
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_strided_cn)4466 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_strided_cn) {
4467 TEST_REQUIRES_ARM_NEON;
4468 for (uint32_t n = 16; n <= 24; n += 8) {
4469 for (size_t k = 1; k <= 80; k += 17) {
4470 GemmMicrokernelTester()
4471 .mr(2)
4472 .nr(8)
4473 .kr(8)
4474 .sr(1)
4475 .m(2)
4476 .n(n)
4477 .k(k)
4478 .cn_stride(11)
4479 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4480 }
4481 }
4482 }
4483
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_strided_a)4484 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_strided_a) {
4485 TEST_REQUIRES_ARM_NEON;
4486 for (uint32_t n = 16; n <= 24; n += 8) {
4487 for (size_t k = 1; k <= 80; k += 17) {
4488 GemmMicrokernelTester()
4489 .mr(2)
4490 .nr(8)
4491 .kr(8)
4492 .sr(1)
4493 .m(2)
4494 .n(n)
4495 .k(k)
4496 .a_stride(83)
4497 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4498 }
4499 }
4500 }
4501
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_subtile)4502 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_subtile) {
4503 TEST_REQUIRES_ARM_NEON;
4504 for (uint32_t n = 16; n <= 24; n += 8) {
4505 for (size_t k = 1; k <= 80; k += 17) {
4506 for (uint32_t m = 1; m <= 2; m++) {
4507 GemmMicrokernelTester()
4508 .mr(2)
4509 .nr(8)
4510 .kr(8)
4511 .sr(1)
4512 .m(m)
4513 .n(n)
4514 .k(k)
4515 .iterations(1)
4516 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4517 }
4518 }
4519 }
4520 }
4521
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm_subtile)4522 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm_subtile) {
4523 TEST_REQUIRES_ARM_NEON;
4524 for (size_t k = 1; k <= 80; k += 17) {
4525 for (uint32_t n = 1; n <= 8; n++) {
4526 for (uint32_t m = 1; m <= 2; m++) {
4527 GemmMicrokernelTester()
4528 .mr(2)
4529 .nr(8)
4530 .kr(8)
4531 .sr(1)
4532 .m(m)
4533 .n(n)
4534 .k(k)
4535 .cm_stride(11)
4536 .iterations(1)
4537 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4538 }
4539 }
4540 }
4541 }
4542
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmin)4543 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmin) {
4544 TEST_REQUIRES_ARM_NEON;
4545 GemmMicrokernelTester()
4546 .mr(2)
4547 .nr(8)
4548 .kr(8)
4549 .sr(1)
4550 .m(2)
4551 .n(8)
4552 .k(16)
4553 .qmin(128)
4554 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4555 }
4556
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmax)4557 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmax) {
4558 TEST_REQUIRES_ARM_NEON;
4559 GemmMicrokernelTester()
4560 .mr(2)
4561 .nr(8)
4562 .kr(8)
4563 .sr(1)
4564 .m(2)
4565 .n(8)
4566 .k(16)
4567 .qmax(128)
4568 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4569 }
4570
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm)4571 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm) {
4572 TEST_REQUIRES_ARM_NEON;
4573 GemmMicrokernelTester()
4574 .mr(2)
4575 .nr(8)
4576 .kr(8)
4577 .sr(1)
4578 .m(2)
4579 .n(8)
4580 .k(16)
4581 .cm_stride(11)
4582 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4583 }
4584 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4585
4586
4587 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,k_eq_8)4588 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, k_eq_8) {
4589 TEST_REQUIRES_ARM_NEON;
4590 GemmMicrokernelTester()
4591 .mr(2)
4592 .nr(8)
4593 .kr(8)
4594 .sr(1)
4595 .m(2)
4596 .n(8)
4597 .k(8)
4598 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4599 }
4600
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,strided_cn)4601 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, strided_cn) {
4602 TEST_REQUIRES_ARM_NEON;
4603 GemmMicrokernelTester()
4604 .mr(2)
4605 .nr(8)
4606 .kr(8)
4607 .sr(1)
4608 .m(2)
4609 .n(8)
4610 .k(8)
4611 .cn_stride(11)
4612 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4613 }
4614
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,k_eq_8_strided_a)4615 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, k_eq_8_strided_a) {
4616 TEST_REQUIRES_ARM_NEON;
4617 GemmMicrokernelTester()
4618 .mr(2)
4619 .nr(8)
4620 .kr(8)
4621 .sr(1)
4622 .m(2)
4623 .n(8)
4624 .k(8)
4625 .a_stride(11)
4626 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4627 }
4628
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,k_eq_8_subtile)4629 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, k_eq_8_subtile) {
4630 TEST_REQUIRES_ARM_NEON;
4631 for (uint32_t n = 1; n <= 8; n++) {
4632 for (uint32_t m = 1; m <= 2; m++) {
4633 GemmMicrokernelTester()
4634 .mr(2)
4635 .nr(8)
4636 .kr(8)
4637 .sr(1)
4638 .m(m)
4639 .n(n)
4640 .k(8)
4641 .iterations(1)
4642 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4643 }
4644 }
4645 }
4646
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,k_eq_8_subtile_m)4647 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, k_eq_8_subtile_m) {
4648 TEST_REQUIRES_ARM_NEON;
4649 for (uint32_t m = 1; m <= 2; m++) {
4650 GemmMicrokernelTester()
4651 .mr(2)
4652 .nr(8)
4653 .kr(8)
4654 .sr(1)
4655 .m(m)
4656 .n(8)
4657 .k(8)
4658 .iterations(1)
4659 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4660 }
4661 }
4662
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,k_eq_8_subtile_n)4663 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, k_eq_8_subtile_n) {
4664 TEST_REQUIRES_ARM_NEON;
4665 for (uint32_t n = 1; n <= 8; n++) {
4666 GemmMicrokernelTester()
4667 .mr(2)
4668 .nr(8)
4669 .kr(8)
4670 .sr(1)
4671 .m(2)
4672 .n(n)
4673 .k(8)
4674 .iterations(1)
4675 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4676 }
4677 }
4678
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,k_lt_8)4679 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, k_lt_8) {
4680 TEST_REQUIRES_ARM_NEON;
4681 for (size_t k = 1; k < 8; k++) {
4682 GemmMicrokernelTester()
4683 .mr(2)
4684 .nr(8)
4685 .kr(8)
4686 .sr(1)
4687 .m(2)
4688 .n(8)
4689 .k(k)
4690 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4691 }
4692 }
4693
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,k_lt_8_strided_a)4694 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, k_lt_8_strided_a) {
4695 TEST_REQUIRES_ARM_NEON;
4696 for (size_t k = 1; k < 8; k++) {
4697 GemmMicrokernelTester()
4698 .mr(2)
4699 .nr(8)
4700 .kr(8)
4701 .sr(1)
4702 .m(2)
4703 .n(8)
4704 .k(k)
4705 .a_stride(11)
4706 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4707 }
4708 }
4709
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,k_lt_8_subtile)4710 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, k_lt_8_subtile) {
4711 TEST_REQUIRES_ARM_NEON;
4712 for (size_t k = 1; k < 8; k++) {
4713 for (uint32_t n = 1; n <= 8; n++) {
4714 for (uint32_t m = 1; m <= 2; m++) {
4715 GemmMicrokernelTester()
4716 .mr(2)
4717 .nr(8)
4718 .kr(8)
4719 .sr(1)
4720 .m(m)
4721 .n(n)
4722 .k(k)
4723 .iterations(1)
4724 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4725 }
4726 }
4727 }
4728 }
4729
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,k_gt_8)4730 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, k_gt_8) {
4731 TEST_REQUIRES_ARM_NEON;
4732 for (size_t k = 9; k < 16; k++) {
4733 GemmMicrokernelTester()
4734 .mr(2)
4735 .nr(8)
4736 .kr(8)
4737 .sr(1)
4738 .m(2)
4739 .n(8)
4740 .k(k)
4741 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4742 }
4743 }
4744
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,k_gt_8_strided_a)4745 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, k_gt_8_strided_a) {
4746 TEST_REQUIRES_ARM_NEON;
4747 for (size_t k = 9; k < 16; k++) {
4748 GemmMicrokernelTester()
4749 .mr(2)
4750 .nr(8)
4751 .kr(8)
4752 .sr(1)
4753 .m(2)
4754 .n(8)
4755 .k(k)
4756 .a_stride(19)
4757 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4758 }
4759 }
4760
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,k_gt_8_subtile)4761 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, k_gt_8_subtile) {
4762 TEST_REQUIRES_ARM_NEON;
4763 for (size_t k = 9; k < 16; k++) {
4764 for (uint32_t n = 1; n <= 8; n++) {
4765 for (uint32_t m = 1; m <= 2; m++) {
4766 GemmMicrokernelTester()
4767 .mr(2)
4768 .nr(8)
4769 .kr(8)
4770 .sr(1)
4771 .m(m)
4772 .n(n)
4773 .k(k)
4774 .iterations(1)
4775 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4776 }
4777 }
4778 }
4779 }
4780
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,k_div_8)4781 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, k_div_8) {
4782 TEST_REQUIRES_ARM_NEON;
4783 for (size_t k = 16; k <= 80; k += 8) {
4784 GemmMicrokernelTester()
4785 .mr(2)
4786 .nr(8)
4787 .kr(8)
4788 .sr(1)
4789 .m(2)
4790 .n(8)
4791 .k(k)
4792 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4793 }
4794 }
4795
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,k_div_8_strided_a)4796 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, k_div_8_strided_a) {
4797 TEST_REQUIRES_ARM_NEON;
4798 for (size_t k = 16; k <= 80; k += 8) {
4799 GemmMicrokernelTester()
4800 .mr(2)
4801 .nr(8)
4802 .kr(8)
4803 .sr(1)
4804 .m(2)
4805 .n(8)
4806 .k(k)
4807 .a_stride(83)
4808 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4809 }
4810 }
4811
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,k_div_8_subtile)4812 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, k_div_8_subtile) {
4813 TEST_REQUIRES_ARM_NEON;
4814 for (size_t k = 16; k <= 80; k += 8) {
4815 for (uint32_t n = 1; n <= 8; n++) {
4816 for (uint32_t m = 1; m <= 2; m++) {
4817 GemmMicrokernelTester()
4818 .mr(2)
4819 .nr(8)
4820 .kr(8)
4821 .sr(1)
4822 .m(m)
4823 .n(n)
4824 .k(k)
4825 .iterations(1)
4826 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4827 }
4828 }
4829 }
4830 }
4831
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,n_gt_8)4832 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, n_gt_8) {
4833 TEST_REQUIRES_ARM_NEON;
4834 for (uint32_t n = 9; n < 16; n++) {
4835 for (size_t k = 1; k <= 40; k += 9) {
4836 GemmMicrokernelTester()
4837 .mr(2)
4838 .nr(8)
4839 .kr(8)
4840 .sr(1)
4841 .m(2)
4842 .n(n)
4843 .k(k)
4844 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4845 }
4846 }
4847 }
4848
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,n_gt_8_strided_cn)4849 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, n_gt_8_strided_cn) {
4850 TEST_REQUIRES_ARM_NEON;
4851 for (uint32_t n = 9; n < 16; n++) {
4852 for (size_t k = 1; k <= 40; k += 9) {
4853 GemmMicrokernelTester()
4854 .mr(2)
4855 .nr(8)
4856 .kr(8)
4857 .sr(1)
4858 .m(2)
4859 .n(n)
4860 .k(k)
4861 .cn_stride(11)
4862 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4863 }
4864 }
4865 }
4866
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,n_gt_8_strided_a)4867 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, n_gt_8_strided_a) {
4868 TEST_REQUIRES_ARM_NEON;
4869 for (uint32_t n = 9; n < 16; n++) {
4870 for (size_t k = 1; k <= 40; k += 9) {
4871 GemmMicrokernelTester()
4872 .mr(2)
4873 .nr(8)
4874 .kr(8)
4875 .sr(1)
4876 .m(2)
4877 .n(n)
4878 .k(k)
4879 .a_stride(43)
4880 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4881 }
4882 }
4883 }
4884
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,n_gt_8_subtile)4885 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, n_gt_8_subtile) {
4886 TEST_REQUIRES_ARM_NEON;
4887 for (uint32_t n = 9; n < 16; n++) {
4888 for (size_t k = 1; k <= 40; k += 9) {
4889 for (uint32_t m = 1; m <= 2; m++) {
4890 GemmMicrokernelTester()
4891 .mr(2)
4892 .nr(8)
4893 .kr(8)
4894 .sr(1)
4895 .m(m)
4896 .n(n)
4897 .k(k)
4898 .iterations(1)
4899 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4900 }
4901 }
4902 }
4903 }
4904
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,n_div_8)4905 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, n_div_8) {
4906 TEST_REQUIRES_ARM_NEON;
4907 for (uint32_t n = 16; n <= 24; n += 8) {
4908 for (size_t k = 1; k <= 40; k += 9) {
4909 GemmMicrokernelTester()
4910 .mr(2)
4911 .nr(8)
4912 .kr(8)
4913 .sr(1)
4914 .m(2)
4915 .n(n)
4916 .k(k)
4917 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4918 }
4919 }
4920 }
4921
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,n_div_8_strided_cn)4922 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, n_div_8_strided_cn) {
4923 TEST_REQUIRES_ARM_NEON;
4924 for (uint32_t n = 16; n <= 24; n += 8) {
4925 for (size_t k = 1; k <= 40; k += 9) {
4926 GemmMicrokernelTester()
4927 .mr(2)
4928 .nr(8)
4929 .kr(8)
4930 .sr(1)
4931 .m(2)
4932 .n(n)
4933 .k(k)
4934 .cn_stride(11)
4935 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4936 }
4937 }
4938 }
4939
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,n_div_8_strided_a)4940 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, n_div_8_strided_a) {
4941 TEST_REQUIRES_ARM_NEON;
4942 for (uint32_t n = 16; n <= 24; n += 8) {
4943 for (size_t k = 1; k <= 40; k += 9) {
4944 GemmMicrokernelTester()
4945 .mr(2)
4946 .nr(8)
4947 .kr(8)
4948 .sr(1)
4949 .m(2)
4950 .n(n)
4951 .k(k)
4952 .a_stride(43)
4953 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4954 }
4955 }
4956 }
4957
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,n_div_8_subtile)4958 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, n_div_8_subtile) {
4959 TEST_REQUIRES_ARM_NEON;
4960 for (uint32_t n = 16; n <= 24; n += 8) {
4961 for (size_t k = 1; k <= 40; k += 9) {
4962 for (uint32_t m = 1; m <= 2; m++) {
4963 GemmMicrokernelTester()
4964 .mr(2)
4965 .nr(8)
4966 .kr(8)
4967 .sr(1)
4968 .m(m)
4969 .n(n)
4970 .k(k)
4971 .iterations(1)
4972 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4973 }
4974 }
4975 }
4976 }
4977
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,strided_cm_subtile)4978 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, strided_cm_subtile) {
4979 TEST_REQUIRES_ARM_NEON;
4980 for (size_t k = 1; k <= 40; k += 9) {
4981 for (uint32_t n = 1; n <= 8; n++) {
4982 for (uint32_t m = 1; m <= 2; m++) {
4983 GemmMicrokernelTester()
4984 .mr(2)
4985 .nr(8)
4986 .kr(8)
4987 .sr(1)
4988 .m(m)
4989 .n(n)
4990 .k(k)
4991 .cm_stride(11)
4992 .iterations(1)
4993 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4994 }
4995 }
4996 }
4997 }
4998
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,qmin)4999 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, qmin) {
5000 TEST_REQUIRES_ARM_NEON;
5001 GemmMicrokernelTester()
5002 .mr(2)
5003 .nr(8)
5004 .kr(8)
5005 .sr(1)
5006 .m(2)
5007 .n(8)
5008 .k(8)
5009 .qmin(128)
5010 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5011 }
5012
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,qmax)5013 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, qmax) {
5014 TEST_REQUIRES_ARM_NEON;
5015 GemmMicrokernelTester()
5016 .mr(2)
5017 .nr(8)
5018 .kr(8)
5019 .sr(1)
5020 .m(2)
5021 .n(8)
5022 .k(8)
5023 .qmax(128)
5024 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5025 }
5026
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL,strided_cm)5027 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MULL, strided_cm) {
5028 TEST_REQUIRES_ARM_NEON;
5029 GemmMicrokernelTester()
5030 .mr(2)
5031 .nr(8)
5032 .kr(8)
5033 .sr(1)
5034 .m(2)
5035 .n(8)
5036 .k(8)
5037 .cm_stride(11)
5038 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5039 }
5040 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
5041
5042
5043 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,k_eq_4)5044 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4) {
5045 TEST_REQUIRES_ARM_NEON_DOT;
5046 GemmMicrokernelTester()
5047 .mr(4)
5048 .nr(16)
5049 .kr(4)
5050 .sr(1)
5051 .m(4)
5052 .n(16)
5053 .k(4)
5054 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5055 }
5056
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,strided_cn)5057 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, strided_cn) {
5058 TEST_REQUIRES_ARM_NEON_DOT;
5059 GemmMicrokernelTester()
5060 .mr(4)
5061 .nr(16)
5062 .kr(4)
5063 .sr(1)
5064 .m(4)
5065 .n(16)
5066 .k(4)
5067 .cn_stride(19)
5068 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5069 }
5070
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,k_eq_4_strided_a)5071 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_strided_a) {
5072 TEST_REQUIRES_ARM_NEON_DOT;
5073 GemmMicrokernelTester()
5074 .mr(4)
5075 .nr(16)
5076 .kr(4)
5077 .sr(1)
5078 .m(4)
5079 .n(16)
5080 .k(4)
5081 .a_stride(7)
5082 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5083 }
5084
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,k_eq_4_subtile)5085 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile) {
5086 TEST_REQUIRES_ARM_NEON_DOT;
5087 for (uint32_t n = 1; n <= 16; n++) {
5088 for (uint32_t m = 1; m <= 4; m++) {
5089 GemmMicrokernelTester()
5090 .mr(4)
5091 .nr(16)
5092 .kr(4)
5093 .sr(1)
5094 .m(m)
5095 .n(n)
5096 .k(4)
5097 .iterations(1)
5098 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5099 }
5100 }
5101 }
5102
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,k_eq_4_subtile_m)5103 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile_m) {
5104 TEST_REQUIRES_ARM_NEON_DOT;
5105 for (uint32_t m = 1; m <= 4; m++) {
5106 GemmMicrokernelTester()
5107 .mr(4)
5108 .nr(16)
5109 .kr(4)
5110 .sr(1)
5111 .m(m)
5112 .n(16)
5113 .k(4)
5114 .iterations(1)
5115 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5116 }
5117 }
5118
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,k_eq_4_subtile_n)5119 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile_n) {
5120 TEST_REQUIRES_ARM_NEON_DOT;
5121 for (uint32_t n = 1; n <= 16; n++) {
5122 GemmMicrokernelTester()
5123 .mr(4)
5124 .nr(16)
5125 .kr(4)
5126 .sr(1)
5127 .m(4)
5128 .n(n)
5129 .k(4)
5130 .iterations(1)
5131 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5132 }
5133 }
5134
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,k_lt_4)5135 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, k_lt_4) {
5136 TEST_REQUIRES_ARM_NEON_DOT;
5137 for (size_t k = 1; k < 4; k++) {
5138 GemmMicrokernelTester()
5139 .mr(4)
5140 .nr(16)
5141 .kr(4)
5142 .sr(1)
5143 .m(4)
5144 .n(16)
5145 .k(k)
5146 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5147 }
5148 }
5149
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,k_lt_4_strided_a)5150 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, k_lt_4_strided_a) {
5151 TEST_REQUIRES_ARM_NEON_DOT;
5152 for (size_t k = 1; k < 4; k++) {
5153 GemmMicrokernelTester()
5154 .mr(4)
5155 .nr(16)
5156 .kr(4)
5157 .sr(1)
5158 .m(4)
5159 .n(16)
5160 .k(k)
5161 .a_stride(7)
5162 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5163 }
5164 }
5165
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,k_lt_4_subtile)5166 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, k_lt_4_subtile) {
5167 TEST_REQUIRES_ARM_NEON_DOT;
5168 for (size_t k = 1; k < 4; k++) {
5169 for (uint32_t n = 1; n <= 16; n++) {
5170 for (uint32_t m = 1; m <= 4; m++) {
5171 GemmMicrokernelTester()
5172 .mr(4)
5173 .nr(16)
5174 .kr(4)
5175 .sr(1)
5176 .m(m)
5177 .n(n)
5178 .k(k)
5179 .iterations(1)
5180 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5181 }
5182 }
5183 }
5184 }
5185
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,k_gt_4)5186 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, k_gt_4) {
5187 TEST_REQUIRES_ARM_NEON_DOT;
5188 for (size_t k = 5; k < 8; k++) {
5189 GemmMicrokernelTester()
5190 .mr(4)
5191 .nr(16)
5192 .kr(4)
5193 .sr(1)
5194 .m(4)
5195 .n(16)
5196 .k(k)
5197 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5198 }
5199 }
5200
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,k_gt_4_strided_a)5201 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, k_gt_4_strided_a) {
5202 TEST_REQUIRES_ARM_NEON_DOT;
5203 for (size_t k = 5; k < 8; k++) {
5204 GemmMicrokernelTester()
5205 .mr(4)
5206 .nr(16)
5207 .kr(4)
5208 .sr(1)
5209 .m(4)
5210 .n(16)
5211 .k(k)
5212 .a_stride(11)
5213 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5214 }
5215 }
5216
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,k_gt_4_subtile)5217 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, k_gt_4_subtile) {
5218 TEST_REQUIRES_ARM_NEON_DOT;
5219 for (size_t k = 5; k < 8; k++) {
5220 for (uint32_t n = 1; n <= 16; n++) {
5221 for (uint32_t m = 1; m <= 4; m++) {
5222 GemmMicrokernelTester()
5223 .mr(4)
5224 .nr(16)
5225 .kr(4)
5226 .sr(1)
5227 .m(m)
5228 .n(n)
5229 .k(k)
5230 .iterations(1)
5231 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5232 }
5233 }
5234 }
5235 }
5236
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,k_div_4)5237 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, k_div_4) {
5238 TEST_REQUIRES_ARM_NEON_DOT;
5239 for (size_t k = 8; k <= 40; k += 4) {
5240 GemmMicrokernelTester()
5241 .mr(4)
5242 .nr(16)
5243 .kr(4)
5244 .sr(1)
5245 .m(4)
5246 .n(16)
5247 .k(k)
5248 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5249 }
5250 }
5251
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,k_div_4_strided_a)5252 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, k_div_4_strided_a) {
5253 TEST_REQUIRES_ARM_NEON_DOT;
5254 for (size_t k = 8; k <= 40; k += 4) {
5255 GemmMicrokernelTester()
5256 .mr(4)
5257 .nr(16)
5258 .kr(4)
5259 .sr(1)
5260 .m(4)
5261 .n(16)
5262 .k(k)
5263 .a_stride(43)
5264 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5265 }
5266 }
5267
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,k_div_4_subtile)5268 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, k_div_4_subtile) {
5269 TEST_REQUIRES_ARM_NEON_DOT;
5270 for (size_t k = 8; k <= 40; k += 4) {
5271 for (uint32_t n = 1; n <= 16; n++) {
5272 for (uint32_t m = 1; m <= 4; m++) {
5273 GemmMicrokernelTester()
5274 .mr(4)
5275 .nr(16)
5276 .kr(4)
5277 .sr(1)
5278 .m(m)
5279 .n(n)
5280 .k(k)
5281 .iterations(1)
5282 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5283 }
5284 }
5285 }
5286 }
5287
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,n_gt_16)5288 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, n_gt_16) {
5289 TEST_REQUIRES_ARM_NEON_DOT;
5290 for (uint32_t n = 17; n < 32; n++) {
5291 for (size_t k = 1; k <= 20; k += 5) {
5292 GemmMicrokernelTester()
5293 .mr(4)
5294 .nr(16)
5295 .kr(4)
5296 .sr(1)
5297 .m(4)
5298 .n(n)
5299 .k(k)
5300 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5301 }
5302 }
5303 }
5304
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,n_gt_16_strided_cn)5305 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, n_gt_16_strided_cn) {
5306 TEST_REQUIRES_ARM_NEON_DOT;
5307 for (uint32_t n = 17; n < 32; n++) {
5308 for (size_t k = 1; k <= 20; k += 5) {
5309 GemmMicrokernelTester()
5310 .mr(4)
5311 .nr(16)
5312 .kr(4)
5313 .sr(1)
5314 .m(4)
5315 .n(n)
5316 .k(k)
5317 .cn_stride(19)
5318 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5319 }
5320 }
5321 }
5322
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,n_gt_16_strided_a)5323 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, n_gt_16_strided_a) {
5324 TEST_REQUIRES_ARM_NEON_DOT;
5325 for (uint32_t n = 17; n < 32; n++) {
5326 for (size_t k = 1; k <= 20; k += 5) {
5327 GemmMicrokernelTester()
5328 .mr(4)
5329 .nr(16)
5330 .kr(4)
5331 .sr(1)
5332 .m(4)
5333 .n(n)
5334 .k(k)
5335 .a_stride(23)
5336 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5337 }
5338 }
5339 }
5340
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,n_gt_16_subtile)5341 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, n_gt_16_subtile) {
5342 TEST_REQUIRES_ARM_NEON_DOT;
5343 for (uint32_t n = 17; n < 32; n++) {
5344 for (size_t k = 1; k <= 20; k += 5) {
5345 for (uint32_t m = 1; m <= 4; m++) {
5346 GemmMicrokernelTester()
5347 .mr(4)
5348 .nr(16)
5349 .kr(4)
5350 .sr(1)
5351 .m(m)
5352 .n(n)
5353 .k(k)
5354 .iterations(1)
5355 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5356 }
5357 }
5358 }
5359 }
5360
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,n_div_16)5361 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, n_div_16) {
5362 TEST_REQUIRES_ARM_NEON_DOT;
5363 for (uint32_t n = 32; n <= 48; n += 16) {
5364 for (size_t k = 1; k <= 20; k += 5) {
5365 GemmMicrokernelTester()
5366 .mr(4)
5367 .nr(16)
5368 .kr(4)
5369 .sr(1)
5370 .m(4)
5371 .n(n)
5372 .k(k)
5373 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5374 }
5375 }
5376 }
5377
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,n_div_16_strided_cn)5378 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, n_div_16_strided_cn) {
5379 TEST_REQUIRES_ARM_NEON_DOT;
5380 for (uint32_t n = 32; n <= 48; n += 16) {
5381 for (size_t k = 1; k <= 20; k += 5) {
5382 GemmMicrokernelTester()
5383 .mr(4)
5384 .nr(16)
5385 .kr(4)
5386 .sr(1)
5387 .m(4)
5388 .n(n)
5389 .k(k)
5390 .cn_stride(19)
5391 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5392 }
5393 }
5394 }
5395
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,n_div_16_strided_a)5396 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, n_div_16_strided_a) {
5397 TEST_REQUIRES_ARM_NEON_DOT;
5398 for (uint32_t n = 32; n <= 48; n += 16) {
5399 for (size_t k = 1; k <= 20; k += 5) {
5400 GemmMicrokernelTester()
5401 .mr(4)
5402 .nr(16)
5403 .kr(4)
5404 .sr(1)
5405 .m(4)
5406 .n(n)
5407 .k(k)
5408 .a_stride(23)
5409 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5410 }
5411 }
5412 }
5413
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,n_div_16_subtile)5414 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, n_div_16_subtile) {
5415 TEST_REQUIRES_ARM_NEON_DOT;
5416 for (uint32_t n = 32; n <= 48; n += 16) {
5417 for (size_t k = 1; k <= 20; k += 5) {
5418 for (uint32_t m = 1; m <= 4; m++) {
5419 GemmMicrokernelTester()
5420 .mr(4)
5421 .nr(16)
5422 .kr(4)
5423 .sr(1)
5424 .m(m)
5425 .n(n)
5426 .k(k)
5427 .iterations(1)
5428 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5429 }
5430 }
5431 }
5432 }
5433
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,strided_cm_subtile)5434 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, strided_cm_subtile) {
5435 TEST_REQUIRES_ARM_NEON_DOT;
5436 for (size_t k = 1; k <= 20; k += 5) {
5437 for (uint32_t n = 1; n <= 16; n++) {
5438 for (uint32_t m = 1; m <= 4; m++) {
5439 GemmMicrokernelTester()
5440 .mr(4)
5441 .nr(16)
5442 .kr(4)
5443 .sr(1)
5444 .m(m)
5445 .n(n)
5446 .k(k)
5447 .cm_stride(19)
5448 .iterations(1)
5449 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5450 }
5451 }
5452 }
5453 }
5454
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,qmin)5455 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, qmin) {
5456 TEST_REQUIRES_ARM_NEON_DOT;
5457 GemmMicrokernelTester()
5458 .mr(4)
5459 .nr(16)
5460 .kr(4)
5461 .sr(1)
5462 .m(4)
5463 .n(16)
5464 .k(4)
5465 .qmin(128)
5466 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5467 }
5468
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,qmax)5469 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, qmax) {
5470 TEST_REQUIRES_ARM_NEON_DOT;
5471 GemmMicrokernelTester()
5472 .mr(4)
5473 .nr(16)
5474 .kr(4)
5475 .sr(1)
5476 .m(4)
5477 .n(16)
5478 .k(4)
5479 .qmax(128)
5480 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5481 }
5482
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32,strided_cm)5483 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD32, strided_cm) {
5484 TEST_REQUIRES_ARM_NEON_DOT;
5485 GemmMicrokernelTester()
5486 .mr(4)
5487 .nr(16)
5488 .kr(4)
5489 .sr(1)
5490 .m(4)
5491 .n(16)
5492 .k(4)
5493 .cm_stride(19)
5494 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5495 }
5496 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
5497
5498
5499 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16)5500 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16) {
5501 TEST_REQUIRES_ARM_NEON_DOT;
5502 GemmMicrokernelTester()
5503 .mr(4)
5504 .nr(16)
5505 .kr(4)
5506 .sr(1)
5507 .m(4)
5508 .n(16)
5509 .k(16)
5510 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5511 }
5512
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,strided_cn)5513 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cn) {
5514 TEST_REQUIRES_ARM_NEON_DOT;
5515 GemmMicrokernelTester()
5516 .mr(4)
5517 .nr(16)
5518 .kr(4)
5519 .sr(1)
5520 .m(4)
5521 .n(16)
5522 .k(16)
5523 .cn_stride(19)
5524 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5525 }
5526
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_strided_a)5527 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_strided_a) {
5528 TEST_REQUIRES_ARM_NEON_DOT;
5529 GemmMicrokernelTester()
5530 .mr(4)
5531 .nr(16)
5532 .kr(4)
5533 .sr(1)
5534 .m(4)
5535 .n(16)
5536 .k(16)
5537 .a_stride(19)
5538 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5539 }
5540
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile)5541 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile) {
5542 TEST_REQUIRES_ARM_NEON_DOT;
5543 for (uint32_t n = 1; n <= 16; n++) {
5544 for (uint32_t m = 1; m <= 4; m++) {
5545 GemmMicrokernelTester()
5546 .mr(4)
5547 .nr(16)
5548 .kr(4)
5549 .sr(1)
5550 .m(m)
5551 .n(n)
5552 .k(16)
5553 .iterations(1)
5554 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5555 }
5556 }
5557 }
5558
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_m)5559 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_m) {
5560 TEST_REQUIRES_ARM_NEON_DOT;
5561 for (uint32_t m = 1; m <= 4; m++) {
5562 GemmMicrokernelTester()
5563 .mr(4)
5564 .nr(16)
5565 .kr(4)
5566 .sr(1)
5567 .m(m)
5568 .n(16)
5569 .k(16)
5570 .iterations(1)
5571 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5572 }
5573 }
5574
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_n)5575 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_n) {
5576 TEST_REQUIRES_ARM_NEON_DOT;
5577 for (uint32_t n = 1; n <= 16; n++) {
5578 GemmMicrokernelTester()
5579 .mr(4)
5580 .nr(16)
5581 .kr(4)
5582 .sr(1)
5583 .m(4)
5584 .n(n)
5585 .k(16)
5586 .iterations(1)
5587 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5588 }
5589 }
5590
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_lt_16)5591 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16) {
5592 TEST_REQUIRES_ARM_NEON_DOT;
5593 for (size_t k = 1; k < 16; k++) {
5594 GemmMicrokernelTester()
5595 .mr(4)
5596 .nr(16)
5597 .kr(4)
5598 .sr(1)
5599 .m(4)
5600 .n(16)
5601 .k(k)
5602 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5603 }
5604 }
5605
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_lt_16_strided_a)5606 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16_strided_a) {
5607 TEST_REQUIRES_ARM_NEON_DOT;
5608 for (size_t k = 1; k < 16; k++) {
5609 GemmMicrokernelTester()
5610 .mr(4)
5611 .nr(16)
5612 .kr(4)
5613 .sr(1)
5614 .m(4)
5615 .n(16)
5616 .k(k)
5617 .a_stride(19)
5618 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5619 }
5620 }
5621
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_lt_16_subtile)5622 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16_subtile) {
5623 TEST_REQUIRES_ARM_NEON_DOT;
5624 for (size_t k = 1; k < 16; k++) {
5625 for (uint32_t n = 1; n <= 16; n++) {
5626 for (uint32_t m = 1; m <= 4; m++) {
5627 GemmMicrokernelTester()
5628 .mr(4)
5629 .nr(16)
5630 .kr(4)
5631 .sr(1)
5632 .m(m)
5633 .n(n)
5634 .k(k)
5635 .iterations(1)
5636 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5637 }
5638 }
5639 }
5640 }
5641
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_gt_16)5642 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16) {
5643 TEST_REQUIRES_ARM_NEON_DOT;
5644 for (size_t k = 17; k < 32; k++) {
5645 GemmMicrokernelTester()
5646 .mr(4)
5647 .nr(16)
5648 .kr(4)
5649 .sr(1)
5650 .m(4)
5651 .n(16)
5652 .k(k)
5653 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5654 }
5655 }
5656
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_gt_16_strided_a)5657 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16_strided_a) {
5658 TEST_REQUIRES_ARM_NEON_DOT;
5659 for (size_t k = 17; k < 32; k++) {
5660 GemmMicrokernelTester()
5661 .mr(4)
5662 .nr(16)
5663 .kr(4)
5664 .sr(1)
5665 .m(4)
5666 .n(16)
5667 .k(k)
5668 .a_stride(37)
5669 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5670 }
5671 }
5672
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_gt_16_subtile)5673 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16_subtile) {
5674 TEST_REQUIRES_ARM_NEON_DOT;
5675 for (size_t k = 17; k < 32; k++) {
5676 for (uint32_t n = 1; n <= 16; n++) {
5677 for (uint32_t m = 1; m <= 4; m++) {
5678 GemmMicrokernelTester()
5679 .mr(4)
5680 .nr(16)
5681 .kr(4)
5682 .sr(1)
5683 .m(m)
5684 .n(n)
5685 .k(k)
5686 .iterations(1)
5687 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5688 }
5689 }
5690 }
5691 }
5692
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_div_16)5693 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_div_16) {
5694 TEST_REQUIRES_ARM_NEON_DOT;
5695 for (size_t k = 32; k <= 160; k += 16) {
5696 GemmMicrokernelTester()
5697 .mr(4)
5698 .nr(16)
5699 .kr(4)
5700 .sr(1)
5701 .m(4)
5702 .n(16)
5703 .k(k)
5704 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5705 }
5706 }
5707
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_div_16_strided_a)5708 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_div_16_strided_a) {
5709 TEST_REQUIRES_ARM_NEON_DOT;
5710 for (size_t k = 32; k <= 160; k += 16) {
5711 GemmMicrokernelTester()
5712 .mr(4)
5713 .nr(16)
5714 .kr(4)
5715 .sr(1)
5716 .m(4)
5717 .n(16)
5718 .k(k)
5719 .a_stride(163)
5720 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5721 }
5722 }
5723
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_div_16_subtile)5724 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_div_16_subtile) {
5725 TEST_REQUIRES_ARM_NEON_DOT;
5726 for (size_t k = 32; k <= 160; k += 16) {
5727 for (uint32_t n = 1; n <= 16; n++) {
5728 for (uint32_t m = 1; m <= 4; m++) {
5729 GemmMicrokernelTester()
5730 .mr(4)
5731 .nr(16)
5732 .kr(4)
5733 .sr(1)
5734 .m(m)
5735 .n(n)
5736 .k(k)
5737 .iterations(1)
5738 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5739 }
5740 }
5741 }
5742 }
5743
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16)5744 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16) {
5745 TEST_REQUIRES_ARM_NEON_DOT;
5746 for (uint32_t n = 17; n < 32; n++) {
5747 for (size_t k = 1; k <= 80; k += 17) {
5748 GemmMicrokernelTester()
5749 .mr(4)
5750 .nr(16)
5751 .kr(4)
5752 .sr(1)
5753 .m(4)
5754 .n(n)
5755 .k(k)
5756 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5757 }
5758 }
5759 }
5760
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_strided_cn)5761 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_strided_cn) {
5762 TEST_REQUIRES_ARM_NEON_DOT;
5763 for (uint32_t n = 17; n < 32; n++) {
5764 for (size_t k = 1; k <= 80; k += 17) {
5765 GemmMicrokernelTester()
5766 .mr(4)
5767 .nr(16)
5768 .kr(4)
5769 .sr(1)
5770 .m(4)
5771 .n(n)
5772 .k(k)
5773 .cn_stride(19)
5774 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5775 }
5776 }
5777 }
5778
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_strided_a)5779 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_strided_a) {
5780 TEST_REQUIRES_ARM_NEON_DOT;
5781 for (uint32_t n = 17; n < 32; n++) {
5782 for (size_t k = 1; k <= 80; k += 17) {
5783 GemmMicrokernelTester()
5784 .mr(4)
5785 .nr(16)
5786 .kr(4)
5787 .sr(1)
5788 .m(4)
5789 .n(n)
5790 .k(k)
5791 .a_stride(83)
5792 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5793 }
5794 }
5795 }
5796
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_subtile)5797 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_subtile) {
5798 TEST_REQUIRES_ARM_NEON_DOT;
5799 for (uint32_t n = 17; n < 32; n++) {
5800 for (size_t k = 1; k <= 80; k += 17) {
5801 for (uint32_t m = 1; m <= 4; m++) {
5802 GemmMicrokernelTester()
5803 .mr(4)
5804 .nr(16)
5805 .kr(4)
5806 .sr(1)
5807 .m(m)
5808 .n(n)
5809 .k(k)
5810 .iterations(1)
5811 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5812 }
5813 }
5814 }
5815 }
5816
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16)5817 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16) {
5818 TEST_REQUIRES_ARM_NEON_DOT;
5819 for (uint32_t n = 32; n <= 48; n += 16) {
5820 for (size_t k = 1; k <= 80; k += 17) {
5821 GemmMicrokernelTester()
5822 .mr(4)
5823 .nr(16)
5824 .kr(4)
5825 .sr(1)
5826 .m(4)
5827 .n(n)
5828 .k(k)
5829 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5830 }
5831 }
5832 }
5833
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_strided_cn)5834 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_strided_cn) {
5835 TEST_REQUIRES_ARM_NEON_DOT;
5836 for (uint32_t n = 32; n <= 48; n += 16) {
5837 for (size_t k = 1; k <= 80; k += 17) {
5838 GemmMicrokernelTester()
5839 .mr(4)
5840 .nr(16)
5841 .kr(4)
5842 .sr(1)
5843 .m(4)
5844 .n(n)
5845 .k(k)
5846 .cn_stride(19)
5847 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5848 }
5849 }
5850 }
5851
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_strided_a)5852 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_strided_a) {
5853 TEST_REQUIRES_ARM_NEON_DOT;
5854 for (uint32_t n = 32; n <= 48; n += 16) {
5855 for (size_t k = 1; k <= 80; k += 17) {
5856 GemmMicrokernelTester()
5857 .mr(4)
5858 .nr(16)
5859 .kr(4)
5860 .sr(1)
5861 .m(4)
5862 .n(n)
5863 .k(k)
5864 .a_stride(83)
5865 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5866 }
5867 }
5868 }
5869
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_subtile)5870 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_subtile) {
5871 TEST_REQUIRES_ARM_NEON_DOT;
5872 for (uint32_t n = 32; n <= 48; n += 16) {
5873 for (size_t k = 1; k <= 80; k += 17) {
5874 for (uint32_t m = 1; m <= 4; m++) {
5875 GemmMicrokernelTester()
5876 .mr(4)
5877 .nr(16)
5878 .kr(4)
5879 .sr(1)
5880 .m(m)
5881 .n(n)
5882 .k(k)
5883 .iterations(1)
5884 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5885 }
5886 }
5887 }
5888 }
5889
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,strided_cm_subtile)5890 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cm_subtile) {
5891 TEST_REQUIRES_ARM_NEON_DOT;
5892 for (size_t k = 1; k <= 80; k += 17) {
5893 for (uint32_t n = 1; n <= 16; n++) {
5894 for (uint32_t m = 1; m <= 4; m++) {
5895 GemmMicrokernelTester()
5896 .mr(4)
5897 .nr(16)
5898 .kr(4)
5899 .sr(1)
5900 .m(m)
5901 .n(n)
5902 .k(k)
5903 .cm_stride(19)
5904 .iterations(1)
5905 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5906 }
5907 }
5908 }
5909 }
5910
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,qmin)5911 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, qmin) {
5912 TEST_REQUIRES_ARM_NEON_DOT;
5913 GemmMicrokernelTester()
5914 .mr(4)
5915 .nr(16)
5916 .kr(4)
5917 .sr(1)
5918 .m(4)
5919 .n(16)
5920 .k(16)
5921 .qmin(128)
5922 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5923 }
5924
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,qmax)5925 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, qmax) {
5926 TEST_REQUIRES_ARM_NEON_DOT;
5927 GemmMicrokernelTester()
5928 .mr(4)
5929 .nr(16)
5930 .kr(4)
5931 .sr(1)
5932 .m(4)
5933 .n(16)
5934 .k(16)
5935 .qmax(128)
5936 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5937 }
5938
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,strided_cm)5939 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cm) {
5940 TEST_REQUIRES_ARM_NEON_DOT;
5941 GemmMicrokernelTester()
5942 .mr(4)
5943 .nr(16)
5944 .kr(4)
5945 .sr(1)
5946 .m(4)
5947 .n(16)
5948 .k(16)
5949 .cm_stride(19)
5950 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5951 }
5952 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
5953
5954
5955 #if XNN_ARCH_ARM
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4)5956 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4) {
5957 TEST_REQUIRES_ARM_SIMD32;
5958 GemmMicrokernelTester()
5959 .mr(1)
5960 .nr(1)
5961 .kr(4)
5962 .sr(1)
5963 .m(1)
5964 .n(1)
5965 .k(4)
5966 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5967 }
5968
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,strided_cn)5969 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, strided_cn) {
5970 TEST_REQUIRES_ARM_SIMD32;
5971 GemmMicrokernelTester()
5972 .mr(1)
5973 .nr(1)
5974 .kr(4)
5975 .sr(1)
5976 .m(1)
5977 .n(1)
5978 .k(4)
5979 .cn_stride(3)
5980 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5981 }
5982
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_strided_a)5983 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_strided_a) {
5984 TEST_REQUIRES_ARM_SIMD32;
5985 GemmMicrokernelTester()
5986 .mr(1)
5987 .nr(1)
5988 .kr(4)
5989 .sr(1)
5990 .m(1)
5991 .n(1)
5992 .k(4)
5993 .a_stride(7)
5994 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5995 }
5996
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_subtile)5997 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_subtile) {
5998 TEST_REQUIRES_ARM_SIMD32;
5999 for (uint32_t n = 1; n <= 1; n++) {
6000 for (uint32_t m = 1; m <= 1; m++) {
6001 GemmMicrokernelTester()
6002 .mr(1)
6003 .nr(1)
6004 .kr(4)
6005 .sr(1)
6006 .m(m)
6007 .n(n)
6008 .k(4)
6009 .iterations(1)
6010 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6011 }
6012 }
6013 }
6014
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_subtile_m)6015 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_subtile_m) {
6016 TEST_REQUIRES_ARM_SIMD32;
6017 for (uint32_t m = 1; m <= 1; m++) {
6018 GemmMicrokernelTester()
6019 .mr(1)
6020 .nr(1)
6021 .kr(4)
6022 .sr(1)
6023 .m(m)
6024 .n(1)
6025 .k(4)
6026 .iterations(1)
6027 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6028 }
6029 }
6030
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_subtile_n)6031 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_subtile_n) {
6032 TEST_REQUIRES_ARM_SIMD32;
6033 for (uint32_t n = 1; n <= 1; n++) {
6034 GemmMicrokernelTester()
6035 .mr(1)
6036 .nr(1)
6037 .kr(4)
6038 .sr(1)
6039 .m(1)
6040 .n(n)
6041 .k(4)
6042 .iterations(1)
6043 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6044 }
6045 }
6046
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_lt_4)6047 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_lt_4) {
6048 TEST_REQUIRES_ARM_SIMD32;
6049 for (size_t k = 1; k < 4; k++) {
6050 GemmMicrokernelTester()
6051 .mr(1)
6052 .nr(1)
6053 .kr(4)
6054 .sr(1)
6055 .m(1)
6056 .n(1)
6057 .k(k)
6058 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6059 }
6060 }
6061
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_lt_4_strided_a)6062 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_lt_4_strided_a) {
6063 TEST_REQUIRES_ARM_SIMD32;
6064 for (size_t k = 1; k < 4; k++) {
6065 GemmMicrokernelTester()
6066 .mr(1)
6067 .nr(1)
6068 .kr(4)
6069 .sr(1)
6070 .m(1)
6071 .n(1)
6072 .k(k)
6073 .a_stride(7)
6074 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6075 }
6076 }
6077
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_lt_4_subtile)6078 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_lt_4_subtile) {
6079 TEST_REQUIRES_ARM_SIMD32;
6080 for (size_t k = 1; k < 4; k++) {
6081 for (uint32_t n = 1; n <= 1; n++) {
6082 for (uint32_t m = 1; m <= 1; m++) {
6083 GemmMicrokernelTester()
6084 .mr(1)
6085 .nr(1)
6086 .kr(4)
6087 .sr(1)
6088 .m(m)
6089 .n(n)
6090 .k(k)
6091 .iterations(1)
6092 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6093 }
6094 }
6095 }
6096 }
6097
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_gt_4)6098 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_gt_4) {
6099 TEST_REQUIRES_ARM_SIMD32;
6100 for (size_t k = 5; k < 8; k++) {
6101 GemmMicrokernelTester()
6102 .mr(1)
6103 .nr(1)
6104 .kr(4)
6105 .sr(1)
6106 .m(1)
6107 .n(1)
6108 .k(k)
6109 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6110 }
6111 }
6112
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_gt_4_strided_a)6113 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_gt_4_strided_a) {
6114 TEST_REQUIRES_ARM_SIMD32;
6115 for (size_t k = 5; k < 8; k++) {
6116 GemmMicrokernelTester()
6117 .mr(1)
6118 .nr(1)
6119 .kr(4)
6120 .sr(1)
6121 .m(1)
6122 .n(1)
6123 .k(k)
6124 .a_stride(11)
6125 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6126 }
6127 }
6128
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_gt_4_subtile)6129 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_gt_4_subtile) {
6130 TEST_REQUIRES_ARM_SIMD32;
6131 for (size_t k = 5; k < 8; k++) {
6132 for (uint32_t n = 1; n <= 1; n++) {
6133 for (uint32_t m = 1; m <= 1; m++) {
6134 GemmMicrokernelTester()
6135 .mr(1)
6136 .nr(1)
6137 .kr(4)
6138 .sr(1)
6139 .m(m)
6140 .n(n)
6141 .k(k)
6142 .iterations(1)
6143 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6144 }
6145 }
6146 }
6147 }
6148
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_div_4)6149 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_div_4) {
6150 TEST_REQUIRES_ARM_SIMD32;
6151 for (size_t k = 8; k <= 40; k += 4) {
6152 GemmMicrokernelTester()
6153 .mr(1)
6154 .nr(1)
6155 .kr(4)
6156 .sr(1)
6157 .m(1)
6158 .n(1)
6159 .k(k)
6160 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6161 }
6162 }
6163
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_div_4_strided_a)6164 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_div_4_strided_a) {
6165 TEST_REQUIRES_ARM_SIMD32;
6166 for (size_t k = 8; k <= 40; k += 4) {
6167 GemmMicrokernelTester()
6168 .mr(1)
6169 .nr(1)
6170 .kr(4)
6171 .sr(1)
6172 .m(1)
6173 .n(1)
6174 .k(k)
6175 .a_stride(43)
6176 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6177 }
6178 }
6179
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_div_4_subtile)6180 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_div_4_subtile) {
6181 TEST_REQUIRES_ARM_SIMD32;
6182 for (size_t k = 8; k <= 40; k += 4) {
6183 for (uint32_t n = 1; n <= 1; n++) {
6184 for (uint32_t m = 1; m <= 1; m++) {
6185 GemmMicrokernelTester()
6186 .mr(1)
6187 .nr(1)
6188 .kr(4)
6189 .sr(1)
6190 .m(m)
6191 .n(n)
6192 .k(k)
6193 .iterations(1)
6194 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6195 }
6196 }
6197 }
6198 }
6199
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1)6200 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1) {
6201 TEST_REQUIRES_ARM_SIMD32;
6202 for (uint32_t n = 2; n < 2; n++) {
6203 for (size_t k = 1; k <= 20; k += 5) {
6204 GemmMicrokernelTester()
6205 .mr(1)
6206 .nr(1)
6207 .kr(4)
6208 .sr(1)
6209 .m(1)
6210 .n(n)
6211 .k(k)
6212 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6213 }
6214 }
6215 }
6216
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1_strided_cn)6217 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1_strided_cn) {
6218 TEST_REQUIRES_ARM_SIMD32;
6219 for (uint32_t n = 2; n < 2; n++) {
6220 for (size_t k = 1; k <= 20; k += 5) {
6221 GemmMicrokernelTester()
6222 .mr(1)
6223 .nr(1)
6224 .kr(4)
6225 .sr(1)
6226 .m(1)
6227 .n(n)
6228 .k(k)
6229 .cn_stride(3)
6230 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6231 }
6232 }
6233 }
6234
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1_strided_a)6235 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1_strided_a) {
6236 TEST_REQUIRES_ARM_SIMD32;
6237 for (uint32_t n = 2; n < 2; n++) {
6238 for (size_t k = 1; k <= 20; k += 5) {
6239 GemmMicrokernelTester()
6240 .mr(1)
6241 .nr(1)
6242 .kr(4)
6243 .sr(1)
6244 .m(1)
6245 .n(n)
6246 .k(k)
6247 .a_stride(23)
6248 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6249 }
6250 }
6251 }
6252
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1_subtile)6253 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1_subtile) {
6254 TEST_REQUIRES_ARM_SIMD32;
6255 for (uint32_t n = 2; n < 2; n++) {
6256 for (size_t k = 1; k <= 20; k += 5) {
6257 for (uint32_t m = 1; m <= 1; m++) {
6258 GemmMicrokernelTester()
6259 .mr(1)
6260 .nr(1)
6261 .kr(4)
6262 .sr(1)
6263 .m(m)
6264 .n(n)
6265 .k(k)
6266 .iterations(1)
6267 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6268 }
6269 }
6270 }
6271 }
6272
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1)6273 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1) {
6274 TEST_REQUIRES_ARM_SIMD32;
6275 for (uint32_t n = 2; n <= 3; n += 1) {
6276 for (size_t k = 1; k <= 20; k += 5) {
6277 GemmMicrokernelTester()
6278 .mr(1)
6279 .nr(1)
6280 .kr(4)
6281 .sr(1)
6282 .m(1)
6283 .n(n)
6284 .k(k)
6285 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6286 }
6287 }
6288 }
6289
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1_strided_cn)6290 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1_strided_cn) {
6291 TEST_REQUIRES_ARM_SIMD32;
6292 for (uint32_t n = 2; n <= 3; n += 1) {
6293 for (size_t k = 1; k <= 20; k += 5) {
6294 GemmMicrokernelTester()
6295 .mr(1)
6296 .nr(1)
6297 .kr(4)
6298 .sr(1)
6299 .m(1)
6300 .n(n)
6301 .k(k)
6302 .cn_stride(3)
6303 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6304 }
6305 }
6306 }
6307
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1_strided_a)6308 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1_strided_a) {
6309 TEST_REQUIRES_ARM_SIMD32;
6310 for (uint32_t n = 2; n <= 3; n += 1) {
6311 for (size_t k = 1; k <= 20; k += 5) {
6312 GemmMicrokernelTester()
6313 .mr(1)
6314 .nr(1)
6315 .kr(4)
6316 .sr(1)
6317 .m(1)
6318 .n(n)
6319 .k(k)
6320 .a_stride(23)
6321 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6322 }
6323 }
6324 }
6325
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1_subtile)6326 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1_subtile) {
6327 TEST_REQUIRES_ARM_SIMD32;
6328 for (uint32_t n = 2; n <= 3; n += 1) {
6329 for (size_t k = 1; k <= 20; k += 5) {
6330 for (uint32_t m = 1; m <= 1; m++) {
6331 GemmMicrokernelTester()
6332 .mr(1)
6333 .nr(1)
6334 .kr(4)
6335 .sr(1)
6336 .m(m)
6337 .n(n)
6338 .k(k)
6339 .iterations(1)
6340 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6341 }
6342 }
6343 }
6344 }
6345
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,strided_cm_subtile)6346 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, strided_cm_subtile) {
6347 TEST_REQUIRES_ARM_SIMD32;
6348 for (size_t k = 1; k <= 20; k += 5) {
6349 for (uint32_t n = 1; n <= 1; n++) {
6350 for (uint32_t m = 1; m <= 1; m++) {
6351 GemmMicrokernelTester()
6352 .mr(1)
6353 .nr(1)
6354 .kr(4)
6355 .sr(1)
6356 .m(m)
6357 .n(n)
6358 .k(k)
6359 .cm_stride(3)
6360 .iterations(1)
6361 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6362 }
6363 }
6364 }
6365 }
6366
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,qmin)6367 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, qmin) {
6368 TEST_REQUIRES_ARM_SIMD32;
6369 GemmMicrokernelTester()
6370 .mr(1)
6371 .nr(1)
6372 .kr(4)
6373 .sr(1)
6374 .m(1)
6375 .n(1)
6376 .k(4)
6377 .qmin(128)
6378 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6379 }
6380
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,qmax)6381 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, qmax) {
6382 TEST_REQUIRES_ARM_SIMD32;
6383 GemmMicrokernelTester()
6384 .mr(1)
6385 .nr(1)
6386 .kr(4)
6387 .sr(1)
6388 .m(1)
6389 .n(1)
6390 .k(4)
6391 .qmax(128)
6392 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6393 }
6394
TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,strided_cm)6395 TEST(QC8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, strided_cm) {
6396 TEST_REQUIRES_ARM_SIMD32;
6397 GemmMicrokernelTester()
6398 .mr(1)
6399 .nr(1)
6400 .kr(4)
6401 .sr(1)
6402 .m(1)
6403 .n(1)
6404 .k(4)
6405 .cm_stride(3)
6406 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6407 }
6408 #endif // XNN_ARCH_ARM
6409
6410
6411 #if XNN_ARCH_ARM
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4)6412 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4) {
6413 TEST_REQUIRES_ARM_SIMD32;
6414 GemmMicrokernelTester()
6415 .mr(2)
6416 .nr(1)
6417 .kr(4)
6418 .sr(1)
6419 .m(2)
6420 .n(1)
6421 .k(4)
6422 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6423 }
6424
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,strided_cn)6425 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, strided_cn) {
6426 TEST_REQUIRES_ARM_SIMD32;
6427 GemmMicrokernelTester()
6428 .mr(2)
6429 .nr(1)
6430 .kr(4)
6431 .sr(1)
6432 .m(2)
6433 .n(1)
6434 .k(4)
6435 .cn_stride(3)
6436 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6437 }
6438
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_strided_a)6439 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_strided_a) {
6440 TEST_REQUIRES_ARM_SIMD32;
6441 GemmMicrokernelTester()
6442 .mr(2)
6443 .nr(1)
6444 .kr(4)
6445 .sr(1)
6446 .m(2)
6447 .n(1)
6448 .k(4)
6449 .a_stride(7)
6450 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6451 }
6452
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_subtile)6453 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_subtile) {
6454 TEST_REQUIRES_ARM_SIMD32;
6455 for (uint32_t n = 1; n <= 1; n++) {
6456 for (uint32_t m = 1; m <= 2; m++) {
6457 GemmMicrokernelTester()
6458 .mr(2)
6459 .nr(1)
6460 .kr(4)
6461 .sr(1)
6462 .m(m)
6463 .n(n)
6464 .k(4)
6465 .iterations(1)
6466 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6467 }
6468 }
6469 }
6470
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_subtile_m)6471 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_subtile_m) {
6472 TEST_REQUIRES_ARM_SIMD32;
6473 for (uint32_t m = 1; m <= 2; m++) {
6474 GemmMicrokernelTester()
6475 .mr(2)
6476 .nr(1)
6477 .kr(4)
6478 .sr(1)
6479 .m(m)
6480 .n(1)
6481 .k(4)
6482 .iterations(1)
6483 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6484 }
6485 }
6486
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_subtile_n)6487 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_subtile_n) {
6488 TEST_REQUIRES_ARM_SIMD32;
6489 for (uint32_t n = 1; n <= 1; n++) {
6490 GemmMicrokernelTester()
6491 .mr(2)
6492 .nr(1)
6493 .kr(4)
6494 .sr(1)
6495 .m(2)
6496 .n(n)
6497 .k(4)
6498 .iterations(1)
6499 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6500 }
6501 }
6502
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_lt_4)6503 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_lt_4) {
6504 TEST_REQUIRES_ARM_SIMD32;
6505 for (size_t k = 1; k < 4; k++) {
6506 GemmMicrokernelTester()
6507 .mr(2)
6508 .nr(1)
6509 .kr(4)
6510 .sr(1)
6511 .m(2)
6512 .n(1)
6513 .k(k)
6514 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6515 }
6516 }
6517
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_lt_4_strided_a)6518 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_lt_4_strided_a) {
6519 TEST_REQUIRES_ARM_SIMD32;
6520 for (size_t k = 1; k < 4; k++) {
6521 GemmMicrokernelTester()
6522 .mr(2)
6523 .nr(1)
6524 .kr(4)
6525 .sr(1)
6526 .m(2)
6527 .n(1)
6528 .k(k)
6529 .a_stride(7)
6530 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6531 }
6532 }
6533
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_lt_4_subtile)6534 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_lt_4_subtile) {
6535 TEST_REQUIRES_ARM_SIMD32;
6536 for (size_t k = 1; k < 4; k++) {
6537 for (uint32_t n = 1; n <= 1; n++) {
6538 for (uint32_t m = 1; m <= 2; m++) {
6539 GemmMicrokernelTester()
6540 .mr(2)
6541 .nr(1)
6542 .kr(4)
6543 .sr(1)
6544 .m(m)
6545 .n(n)
6546 .k(k)
6547 .iterations(1)
6548 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6549 }
6550 }
6551 }
6552 }
6553
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_gt_4)6554 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_gt_4) {
6555 TEST_REQUIRES_ARM_SIMD32;
6556 for (size_t k = 5; k < 8; k++) {
6557 GemmMicrokernelTester()
6558 .mr(2)
6559 .nr(1)
6560 .kr(4)
6561 .sr(1)
6562 .m(2)
6563 .n(1)
6564 .k(k)
6565 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6566 }
6567 }
6568
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_gt_4_strided_a)6569 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_gt_4_strided_a) {
6570 TEST_REQUIRES_ARM_SIMD32;
6571 for (size_t k = 5; k < 8; k++) {
6572 GemmMicrokernelTester()
6573 .mr(2)
6574 .nr(1)
6575 .kr(4)
6576 .sr(1)
6577 .m(2)
6578 .n(1)
6579 .k(k)
6580 .a_stride(11)
6581 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6582 }
6583 }
6584
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_gt_4_subtile)6585 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_gt_4_subtile) {
6586 TEST_REQUIRES_ARM_SIMD32;
6587 for (size_t k = 5; k < 8; k++) {
6588 for (uint32_t n = 1; n <= 1; n++) {
6589 for (uint32_t m = 1; m <= 2; m++) {
6590 GemmMicrokernelTester()
6591 .mr(2)
6592 .nr(1)
6593 .kr(4)
6594 .sr(1)
6595 .m(m)
6596 .n(n)
6597 .k(k)
6598 .iterations(1)
6599 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6600 }
6601 }
6602 }
6603 }
6604
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_div_4)6605 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_div_4) {
6606 TEST_REQUIRES_ARM_SIMD32;
6607 for (size_t k = 8; k <= 40; k += 4) {
6608 GemmMicrokernelTester()
6609 .mr(2)
6610 .nr(1)
6611 .kr(4)
6612 .sr(1)
6613 .m(2)
6614 .n(1)
6615 .k(k)
6616 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6617 }
6618 }
6619
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_div_4_strided_a)6620 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_div_4_strided_a) {
6621 TEST_REQUIRES_ARM_SIMD32;
6622 for (size_t k = 8; k <= 40; k += 4) {
6623 GemmMicrokernelTester()
6624 .mr(2)
6625 .nr(1)
6626 .kr(4)
6627 .sr(1)
6628 .m(2)
6629 .n(1)
6630 .k(k)
6631 .a_stride(43)
6632 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6633 }
6634 }
6635
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_div_4_subtile)6636 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_div_4_subtile) {
6637 TEST_REQUIRES_ARM_SIMD32;
6638 for (size_t k = 8; k <= 40; k += 4) {
6639 for (uint32_t n = 1; n <= 1; n++) {
6640 for (uint32_t m = 1; m <= 2; m++) {
6641 GemmMicrokernelTester()
6642 .mr(2)
6643 .nr(1)
6644 .kr(4)
6645 .sr(1)
6646 .m(m)
6647 .n(n)
6648 .k(k)
6649 .iterations(1)
6650 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6651 }
6652 }
6653 }
6654 }
6655
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1)6656 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1) {
6657 TEST_REQUIRES_ARM_SIMD32;
6658 for (uint32_t n = 2; n < 2; n++) {
6659 for (size_t k = 1; k <= 20; k += 5) {
6660 GemmMicrokernelTester()
6661 .mr(2)
6662 .nr(1)
6663 .kr(4)
6664 .sr(1)
6665 .m(2)
6666 .n(n)
6667 .k(k)
6668 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6669 }
6670 }
6671 }
6672
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1_strided_cn)6673 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1_strided_cn) {
6674 TEST_REQUIRES_ARM_SIMD32;
6675 for (uint32_t n = 2; n < 2; n++) {
6676 for (size_t k = 1; k <= 20; k += 5) {
6677 GemmMicrokernelTester()
6678 .mr(2)
6679 .nr(1)
6680 .kr(4)
6681 .sr(1)
6682 .m(2)
6683 .n(n)
6684 .k(k)
6685 .cn_stride(3)
6686 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6687 }
6688 }
6689 }
6690
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1_strided_a)6691 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1_strided_a) {
6692 TEST_REQUIRES_ARM_SIMD32;
6693 for (uint32_t n = 2; n < 2; n++) {
6694 for (size_t k = 1; k <= 20; k += 5) {
6695 GemmMicrokernelTester()
6696 .mr(2)
6697 .nr(1)
6698 .kr(4)
6699 .sr(1)
6700 .m(2)
6701 .n(n)
6702 .k(k)
6703 .a_stride(23)
6704 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6705 }
6706 }
6707 }
6708
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1_subtile)6709 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1_subtile) {
6710 TEST_REQUIRES_ARM_SIMD32;
6711 for (uint32_t n = 2; n < 2; n++) {
6712 for (size_t k = 1; k <= 20; k += 5) {
6713 for (uint32_t m = 1; m <= 2; m++) {
6714 GemmMicrokernelTester()
6715 .mr(2)
6716 .nr(1)
6717 .kr(4)
6718 .sr(1)
6719 .m(m)
6720 .n(n)
6721 .k(k)
6722 .iterations(1)
6723 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6724 }
6725 }
6726 }
6727 }
6728
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1)6729 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1) {
6730 TEST_REQUIRES_ARM_SIMD32;
6731 for (uint32_t n = 2; n <= 3; n += 1) {
6732 for (size_t k = 1; k <= 20; k += 5) {
6733 GemmMicrokernelTester()
6734 .mr(2)
6735 .nr(1)
6736 .kr(4)
6737 .sr(1)
6738 .m(2)
6739 .n(n)
6740 .k(k)
6741 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6742 }
6743 }
6744 }
6745
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1_strided_cn)6746 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1_strided_cn) {
6747 TEST_REQUIRES_ARM_SIMD32;
6748 for (uint32_t n = 2; n <= 3; n += 1) {
6749 for (size_t k = 1; k <= 20; k += 5) {
6750 GemmMicrokernelTester()
6751 .mr(2)
6752 .nr(1)
6753 .kr(4)
6754 .sr(1)
6755 .m(2)
6756 .n(n)
6757 .k(k)
6758 .cn_stride(3)
6759 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6760 }
6761 }
6762 }
6763
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1_strided_a)6764 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1_strided_a) {
6765 TEST_REQUIRES_ARM_SIMD32;
6766 for (uint32_t n = 2; n <= 3; n += 1) {
6767 for (size_t k = 1; k <= 20; k += 5) {
6768 GemmMicrokernelTester()
6769 .mr(2)
6770 .nr(1)
6771 .kr(4)
6772 .sr(1)
6773 .m(2)
6774 .n(n)
6775 .k(k)
6776 .a_stride(23)
6777 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6778 }
6779 }
6780 }
6781
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1_subtile)6782 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1_subtile) {
6783 TEST_REQUIRES_ARM_SIMD32;
6784 for (uint32_t n = 2; n <= 3; n += 1) {
6785 for (size_t k = 1; k <= 20; k += 5) {
6786 for (uint32_t m = 1; m <= 2; m++) {
6787 GemmMicrokernelTester()
6788 .mr(2)
6789 .nr(1)
6790 .kr(4)
6791 .sr(1)
6792 .m(m)
6793 .n(n)
6794 .k(k)
6795 .iterations(1)
6796 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6797 }
6798 }
6799 }
6800 }
6801
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,strided_cm_subtile)6802 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, strided_cm_subtile) {
6803 TEST_REQUIRES_ARM_SIMD32;
6804 for (size_t k = 1; k <= 20; k += 5) {
6805 for (uint32_t n = 1; n <= 1; n++) {
6806 for (uint32_t m = 1; m <= 2; m++) {
6807 GemmMicrokernelTester()
6808 .mr(2)
6809 .nr(1)
6810 .kr(4)
6811 .sr(1)
6812 .m(m)
6813 .n(n)
6814 .k(k)
6815 .cm_stride(3)
6816 .iterations(1)
6817 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6818 }
6819 }
6820 }
6821 }
6822
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,qmin)6823 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, qmin) {
6824 TEST_REQUIRES_ARM_SIMD32;
6825 GemmMicrokernelTester()
6826 .mr(2)
6827 .nr(1)
6828 .kr(4)
6829 .sr(1)
6830 .m(2)
6831 .n(1)
6832 .k(4)
6833 .qmin(128)
6834 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6835 }
6836
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,qmax)6837 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, qmax) {
6838 TEST_REQUIRES_ARM_SIMD32;
6839 GemmMicrokernelTester()
6840 .mr(2)
6841 .nr(1)
6842 .kr(4)
6843 .sr(1)
6844 .m(2)
6845 .n(1)
6846 .k(4)
6847 .qmax(128)
6848 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6849 }
6850
TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,strided_cm)6851 TEST(QC8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, strided_cm) {
6852 TEST_REQUIRES_ARM_SIMD32;
6853 GemmMicrokernelTester()
6854 .mr(2)
6855 .nr(1)
6856 .kr(4)
6857 .sr(1)
6858 .m(2)
6859 .n(1)
6860 .k(4)
6861 .cm_stride(3)
6862 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6863 }
6864 #endif // XNN_ARCH_ARM
6865
6866
6867 #if XNN_ARCH_ARM
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_eq_4)6868 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_eq_4) {
6869 TEST_REQUIRES_ARM_SIMD32;
6870 GemmMicrokernelTester()
6871 .mr(2)
6872 .nr(2)
6873 .kr(4)
6874 .sr(1)
6875 .m(2)
6876 .n(2)
6877 .k(4)
6878 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6879 }
6880
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,strided_cn)6881 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, strided_cn) {
6882 TEST_REQUIRES_ARM_SIMD32;
6883 GemmMicrokernelTester()
6884 .mr(2)
6885 .nr(2)
6886 .kr(4)
6887 .sr(1)
6888 .m(2)
6889 .n(2)
6890 .k(4)
6891 .cn_stride(5)
6892 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6893 }
6894
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_eq_4_strided_a)6895 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_eq_4_strided_a) {
6896 TEST_REQUIRES_ARM_SIMD32;
6897 GemmMicrokernelTester()
6898 .mr(2)
6899 .nr(2)
6900 .kr(4)
6901 .sr(1)
6902 .m(2)
6903 .n(2)
6904 .k(4)
6905 .a_stride(7)
6906 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6907 }
6908
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_eq_4_subtile)6909 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_eq_4_subtile) {
6910 TEST_REQUIRES_ARM_SIMD32;
6911 for (uint32_t n = 1; n <= 2; n++) {
6912 for (uint32_t m = 1; m <= 2; m++) {
6913 GemmMicrokernelTester()
6914 .mr(2)
6915 .nr(2)
6916 .kr(4)
6917 .sr(1)
6918 .m(m)
6919 .n(n)
6920 .k(4)
6921 .iterations(1)
6922 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6923 }
6924 }
6925 }
6926
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_eq_4_subtile_m)6927 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_eq_4_subtile_m) {
6928 TEST_REQUIRES_ARM_SIMD32;
6929 for (uint32_t m = 1; m <= 2; m++) {
6930 GemmMicrokernelTester()
6931 .mr(2)
6932 .nr(2)
6933 .kr(4)
6934 .sr(1)
6935 .m(m)
6936 .n(2)
6937 .k(4)
6938 .iterations(1)
6939 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6940 }
6941 }
6942
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_eq_4_subtile_n)6943 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_eq_4_subtile_n) {
6944 TEST_REQUIRES_ARM_SIMD32;
6945 for (uint32_t n = 1; n <= 2; n++) {
6946 GemmMicrokernelTester()
6947 .mr(2)
6948 .nr(2)
6949 .kr(4)
6950 .sr(1)
6951 .m(2)
6952 .n(n)
6953 .k(4)
6954 .iterations(1)
6955 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6956 }
6957 }
6958
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_lt_4)6959 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_lt_4) {
6960 TEST_REQUIRES_ARM_SIMD32;
6961 for (size_t k = 1; k < 4; k++) {
6962 GemmMicrokernelTester()
6963 .mr(2)
6964 .nr(2)
6965 .kr(4)
6966 .sr(1)
6967 .m(2)
6968 .n(2)
6969 .k(k)
6970 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6971 }
6972 }
6973
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_lt_4_strided_a)6974 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_lt_4_strided_a) {
6975 TEST_REQUIRES_ARM_SIMD32;
6976 for (size_t k = 1; k < 4; k++) {
6977 GemmMicrokernelTester()
6978 .mr(2)
6979 .nr(2)
6980 .kr(4)
6981 .sr(1)
6982 .m(2)
6983 .n(2)
6984 .k(k)
6985 .a_stride(7)
6986 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6987 }
6988 }
6989
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_lt_4_subtile)6990 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_lt_4_subtile) {
6991 TEST_REQUIRES_ARM_SIMD32;
6992 for (size_t k = 1; k < 4; k++) {
6993 for (uint32_t n = 1; n <= 2; n++) {
6994 for (uint32_t m = 1; m <= 2; m++) {
6995 GemmMicrokernelTester()
6996 .mr(2)
6997 .nr(2)
6998 .kr(4)
6999 .sr(1)
7000 .m(m)
7001 .n(n)
7002 .k(k)
7003 .iterations(1)
7004 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
7005 }
7006 }
7007 }
7008 }
7009
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_gt_4)7010 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_gt_4) {
7011 TEST_REQUIRES_ARM_SIMD32;
7012 for (size_t k = 5; k < 8; k++) {
7013 GemmMicrokernelTester()
7014 .mr(2)
7015 .nr(2)
7016 .kr(4)
7017 .sr(1)
7018 .m(2)
7019 .n(2)
7020 .k(k)
7021 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
7022 }
7023 }
7024
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_gt_4_strided_a)7025 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_gt_4_strided_a) {
7026 TEST_REQUIRES_ARM_SIMD32;
7027 for (size_t k = 5; k < 8; k++) {
7028 GemmMicrokernelTester()
7029 .mr(2)
7030 .nr(2)
7031 .kr(4)
7032 .sr(1)
7033 .m(2)
7034 .n(2)
7035 .k(k)
7036 .a_stride(11)
7037 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
7038 }
7039 }
7040
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_gt_4_subtile)7041 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_gt_4_subtile) {
7042 TEST_REQUIRES_ARM_SIMD32;
7043 for (size_t k = 5; k < 8; k++) {
7044 for (uint32_t n = 1; n <= 2; n++) {
7045 for (uint32_t m = 1; m <= 2; m++) {
7046 GemmMicrokernelTester()
7047 .mr(2)
7048 .nr(2)
7049 .kr(4)
7050 .sr(1)
7051 .m(m)
7052 .n(n)
7053 .k(k)
7054 .iterations(1)
7055 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
7056 }
7057 }
7058 }
7059 }
7060
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_div_4)7061 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_div_4) {
7062 TEST_REQUIRES_ARM_SIMD32;
7063 for (size_t k = 8; k <= 40; k += 4) {
7064 GemmMicrokernelTester()
7065 .mr(2)
7066 .nr(2)
7067 .kr(4)
7068 .sr(1)
7069 .m(2)
7070 .n(2)
7071 .k(k)
7072 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
7073 }
7074 }
7075
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_div_4_strided_a)7076 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_div_4_strided_a) {
7077 TEST_REQUIRES_ARM_SIMD32;
7078 for (size_t k = 8; k <= 40; k += 4) {
7079 GemmMicrokernelTester()
7080 .mr(2)
7081 .nr(2)
7082 .kr(4)
7083 .sr(1)
7084 .m(2)
7085 .n(2)
7086 .k(k)
7087 .a_stride(43)
7088 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
7089 }
7090 }
7091
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_div_4_subtile)7092 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_div_4_subtile) {
7093 TEST_REQUIRES_ARM_SIMD32;
7094 for (size_t k = 8; k <= 40; k += 4) {
7095 for (uint32_t n = 1; n <= 2; n++) {
7096 for (uint32_t m = 1; m <= 2; m++) {
7097 GemmMicrokernelTester()
7098 .mr(2)
7099 .nr(2)
7100 .kr(4)
7101 .sr(1)
7102 .m(m)
7103 .n(n)
7104 .k(k)
7105 .iterations(1)
7106 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
7107 }
7108 }
7109 }
7110 }
7111
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_gt_2)7112 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_gt_2) {
7113 TEST_REQUIRES_ARM_SIMD32;
7114 for (uint32_t n = 3; n < 4; n++) {
7115 for (size_t k = 1; k <= 20; k += 5) {
7116 GemmMicrokernelTester()
7117 .mr(2)
7118 .nr(2)
7119 .kr(4)
7120 .sr(1)
7121 .m(2)
7122 .n(n)
7123 .k(k)
7124 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
7125 }
7126 }
7127 }
7128
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_gt_2_strided_cn)7129 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_gt_2_strided_cn) {
7130 TEST_REQUIRES_ARM_SIMD32;
7131 for (uint32_t n = 3; n < 4; n++) {
7132 for (size_t k = 1; k <= 20; k += 5) {
7133 GemmMicrokernelTester()
7134 .mr(2)
7135 .nr(2)
7136 .kr(4)
7137 .sr(1)
7138 .m(2)
7139 .n(n)
7140 .k(k)
7141 .cn_stride(5)
7142 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
7143 }
7144 }
7145 }
7146
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_gt_2_strided_a)7147 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_gt_2_strided_a) {
7148 TEST_REQUIRES_ARM_SIMD32;
7149 for (uint32_t n = 3; n < 4; n++) {
7150 for (size_t k = 1; k <= 20; k += 5) {
7151 GemmMicrokernelTester()
7152 .mr(2)
7153 .nr(2)
7154 .kr(4)
7155 .sr(1)
7156 .m(2)
7157 .n(n)
7158 .k(k)
7159 .a_stride(23)
7160 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
7161 }
7162 }
7163 }
7164
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_gt_2_subtile)7165 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_gt_2_subtile) {
7166 TEST_REQUIRES_ARM_SIMD32;
7167 for (uint32_t n = 3; n < 4; n++) {
7168 for (size_t k = 1; k <= 20; k += 5) {
7169 for (uint32_t m = 1; m <= 2; m++) {
7170 GemmMicrokernelTester()
7171 .mr(2)
7172 .nr(2)
7173 .kr(4)
7174 .sr(1)
7175 .m(m)
7176 .n(n)
7177 .k(k)
7178 .iterations(1)
7179 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
7180 }
7181 }
7182 }
7183 }
7184
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_div_2)7185 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_div_2) {
7186 TEST_REQUIRES_ARM_SIMD32;
7187 for (uint32_t n = 4; n <= 6; n += 2) {
7188 for (size_t k = 1; k <= 20; k += 5) {
7189 GemmMicrokernelTester()
7190 .mr(2)
7191 .nr(2)
7192 .kr(4)
7193 .sr(1)
7194 .m(2)
7195 .n(n)
7196 .k(k)
7197 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
7198 }
7199 }
7200 }
7201
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_div_2_strided_cn)7202 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_div_2_strided_cn) {
7203 TEST_REQUIRES_ARM_SIMD32;
7204 for (uint32_t n = 4; n <= 6; n += 2) {
7205 for (size_t k = 1; k <= 20; k += 5) {
7206 GemmMicrokernelTester()
7207 .mr(2)
7208 .nr(2)
7209 .kr(4)
7210 .sr(1)
7211 .m(2)
7212 .n(n)
7213 .k(k)
7214 .cn_stride(5)
7215 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
7216 }
7217 }
7218 }
7219
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_div_2_strided_a)7220 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_div_2_strided_a) {
7221 TEST_REQUIRES_ARM_SIMD32;
7222 for (uint32_t n = 4; n <= 6; n += 2) {
7223 for (size_t k = 1; k <= 20; k += 5) {
7224 GemmMicrokernelTester()
7225 .mr(2)
7226 .nr(2)
7227 .kr(4)
7228 .sr(1)
7229 .m(2)
7230 .n(n)
7231 .k(k)
7232 .a_stride(23)
7233 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
7234 }
7235 }
7236 }
7237
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_div_2_subtile)7238 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_div_2_subtile) {
7239 TEST_REQUIRES_ARM_SIMD32;
7240 for (uint32_t n = 4; n <= 6; n += 2) {
7241 for (size_t k = 1; k <= 20; k += 5) {
7242 for (uint32_t m = 1; m <= 2; m++) {
7243 GemmMicrokernelTester()
7244 .mr(2)
7245 .nr(2)
7246 .kr(4)
7247 .sr(1)
7248 .m(m)
7249 .n(n)
7250 .k(k)
7251 .iterations(1)
7252 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
7253 }
7254 }
7255 }
7256 }
7257
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,strided_cm_subtile)7258 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, strided_cm_subtile) {
7259 TEST_REQUIRES_ARM_SIMD32;
7260 for (size_t k = 1; k <= 20; k += 5) {
7261 for (uint32_t n = 1; n <= 2; n++) {
7262 for (uint32_t m = 1; m <= 2; m++) {
7263 GemmMicrokernelTester()
7264 .mr(2)
7265 .nr(2)
7266 .kr(4)
7267 .sr(1)
7268 .m(m)
7269 .n(n)
7270 .k(k)
7271 .cm_stride(5)
7272 .iterations(1)
7273 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
7274 }
7275 }
7276 }
7277 }
7278
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,qmin)7279 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, qmin) {
7280 TEST_REQUIRES_ARM_SIMD32;
7281 GemmMicrokernelTester()
7282 .mr(2)
7283 .nr(2)
7284 .kr(4)
7285 .sr(1)
7286 .m(2)
7287 .n(2)
7288 .k(4)
7289 .qmin(128)
7290 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
7291 }
7292
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,qmax)7293 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, qmax) {
7294 TEST_REQUIRES_ARM_SIMD32;
7295 GemmMicrokernelTester()
7296 .mr(2)
7297 .nr(2)
7298 .kr(4)
7299 .sr(1)
7300 .m(2)
7301 .n(2)
7302 .k(4)
7303 .qmax(128)
7304 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
7305 }
7306
TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32,strided_cm)7307 TEST(QC8_GEMM_MINMAX_FP32_2X2C4__ARMSIMD32, strided_cm) {
7308 TEST_REQUIRES_ARM_SIMD32;
7309 GemmMicrokernelTester()
7310 .mr(2)
7311 .nr(2)
7312 .kr(4)
7313 .sr(1)
7314 .m(2)
7315 .n(2)
7316 .k(4)
7317 .cm_stride(5)
7318 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
7319 }
7320 #endif // XNN_ARCH_ARM
7321
7322
7323 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_eq_8)7324 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_eq_8) {
7325 TEST_REQUIRES_ARM_NEON_V8;
7326 GemmMicrokernelTester()
7327 .mr(1)
7328 .nr(8)
7329 .kr(1)
7330 .sr(1)
7331 .m(1)
7332 .n(8)
7333 .k(8)
7334 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7335 }
7336
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,strided_cn)7337 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, strided_cn) {
7338 TEST_REQUIRES_ARM_NEON_V8;
7339 GemmMicrokernelTester()
7340 .mr(1)
7341 .nr(8)
7342 .kr(1)
7343 .sr(1)
7344 .m(1)
7345 .n(8)
7346 .k(8)
7347 .cn_stride(11)
7348 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7349 }
7350
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_eq_8_strided_a)7351 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_eq_8_strided_a) {
7352 TEST_REQUIRES_ARM_NEON_V8;
7353 GemmMicrokernelTester()
7354 .mr(1)
7355 .nr(8)
7356 .kr(1)
7357 .sr(1)
7358 .m(1)
7359 .n(8)
7360 .k(8)
7361 .a_stride(11)
7362 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7363 }
7364
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_eq_8_subtile)7365 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_eq_8_subtile) {
7366 TEST_REQUIRES_ARM_NEON_V8;
7367 for (uint32_t n = 1; n <= 8; n++) {
7368 for (uint32_t m = 1; m <= 1; m++) {
7369 GemmMicrokernelTester()
7370 .mr(1)
7371 .nr(8)
7372 .kr(1)
7373 .sr(1)
7374 .m(m)
7375 .n(n)
7376 .k(8)
7377 .iterations(1)
7378 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7379 }
7380 }
7381 }
7382
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_eq_8_subtile_m)7383 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
7384 TEST_REQUIRES_ARM_NEON_V8;
7385 for (uint32_t m = 1; m <= 1; m++) {
7386 GemmMicrokernelTester()
7387 .mr(1)
7388 .nr(8)
7389 .kr(1)
7390 .sr(1)
7391 .m(m)
7392 .n(8)
7393 .k(8)
7394 .iterations(1)
7395 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7396 }
7397 }
7398
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_eq_8_subtile_n)7399 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
7400 TEST_REQUIRES_ARM_NEON_V8;
7401 for (uint32_t n = 1; n <= 8; n++) {
7402 GemmMicrokernelTester()
7403 .mr(1)
7404 .nr(8)
7405 .kr(1)
7406 .sr(1)
7407 .m(1)
7408 .n(n)
7409 .k(8)
7410 .iterations(1)
7411 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7412 }
7413 }
7414
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_lt_8)7415 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_lt_8) {
7416 TEST_REQUIRES_ARM_NEON_V8;
7417 for (size_t k = 1; k < 8; k++) {
7418 GemmMicrokernelTester()
7419 .mr(1)
7420 .nr(8)
7421 .kr(1)
7422 .sr(1)
7423 .m(1)
7424 .n(8)
7425 .k(k)
7426 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7427 }
7428 }
7429
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_lt_8_strided_a)7430 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_lt_8_strided_a) {
7431 TEST_REQUIRES_ARM_NEON_V8;
7432 for (size_t k = 1; k < 8; k++) {
7433 GemmMicrokernelTester()
7434 .mr(1)
7435 .nr(8)
7436 .kr(1)
7437 .sr(1)
7438 .m(1)
7439 .n(8)
7440 .k(k)
7441 .a_stride(11)
7442 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7443 }
7444 }
7445
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_lt_8_subtile)7446 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_lt_8_subtile) {
7447 TEST_REQUIRES_ARM_NEON_V8;
7448 for (size_t k = 1; k < 8; k++) {
7449 for (uint32_t n = 1; n <= 8; n++) {
7450 for (uint32_t m = 1; m <= 1; m++) {
7451 GemmMicrokernelTester()
7452 .mr(1)
7453 .nr(8)
7454 .kr(1)
7455 .sr(1)
7456 .m(m)
7457 .n(n)
7458 .k(k)
7459 .iterations(1)
7460 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7461 }
7462 }
7463 }
7464 }
7465
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_gt_8)7466 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_gt_8) {
7467 TEST_REQUIRES_ARM_NEON_V8;
7468 for (size_t k = 9; k < 16; k++) {
7469 GemmMicrokernelTester()
7470 .mr(1)
7471 .nr(8)
7472 .kr(1)
7473 .sr(1)
7474 .m(1)
7475 .n(8)
7476 .k(k)
7477 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7478 }
7479 }
7480
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_gt_8_strided_a)7481 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_gt_8_strided_a) {
7482 TEST_REQUIRES_ARM_NEON_V8;
7483 for (size_t k = 9; k < 16; k++) {
7484 GemmMicrokernelTester()
7485 .mr(1)
7486 .nr(8)
7487 .kr(1)
7488 .sr(1)
7489 .m(1)
7490 .n(8)
7491 .k(k)
7492 .a_stride(19)
7493 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7494 }
7495 }
7496
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_gt_8_subtile)7497 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_gt_8_subtile) {
7498 TEST_REQUIRES_ARM_NEON_V8;
7499 for (size_t k = 9; k < 16; k++) {
7500 for (uint32_t n = 1; n <= 8; n++) {
7501 for (uint32_t m = 1; m <= 1; m++) {
7502 GemmMicrokernelTester()
7503 .mr(1)
7504 .nr(8)
7505 .kr(1)
7506 .sr(1)
7507 .m(m)
7508 .n(n)
7509 .k(k)
7510 .iterations(1)
7511 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7512 }
7513 }
7514 }
7515 }
7516
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_div_8)7517 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_div_8) {
7518 TEST_REQUIRES_ARM_NEON_V8;
7519 for (size_t k = 16; k <= 80; k += 8) {
7520 GemmMicrokernelTester()
7521 .mr(1)
7522 .nr(8)
7523 .kr(1)
7524 .sr(1)
7525 .m(1)
7526 .n(8)
7527 .k(k)
7528 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7529 }
7530 }
7531
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_div_8_strided_a)7532 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_div_8_strided_a) {
7533 TEST_REQUIRES_ARM_NEON_V8;
7534 for (size_t k = 16; k <= 80; k += 8) {
7535 GemmMicrokernelTester()
7536 .mr(1)
7537 .nr(8)
7538 .kr(1)
7539 .sr(1)
7540 .m(1)
7541 .n(8)
7542 .k(k)
7543 .a_stride(83)
7544 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7545 }
7546 }
7547
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_div_8_subtile)7548 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_div_8_subtile) {
7549 TEST_REQUIRES_ARM_NEON_V8;
7550 for (size_t k = 16; k <= 80; k += 8) {
7551 for (uint32_t n = 1; n <= 8; n++) {
7552 for (uint32_t m = 1; m <= 1; m++) {
7553 GemmMicrokernelTester()
7554 .mr(1)
7555 .nr(8)
7556 .kr(1)
7557 .sr(1)
7558 .m(m)
7559 .n(n)
7560 .k(k)
7561 .iterations(1)
7562 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7563 }
7564 }
7565 }
7566 }
7567
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_gt_8)7568 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_gt_8) {
7569 TEST_REQUIRES_ARM_NEON_V8;
7570 for (uint32_t n = 9; n < 16; n++) {
7571 for (size_t k = 1; k <= 40; k += 9) {
7572 GemmMicrokernelTester()
7573 .mr(1)
7574 .nr(8)
7575 .kr(1)
7576 .sr(1)
7577 .m(1)
7578 .n(n)
7579 .k(k)
7580 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7581 }
7582 }
7583 }
7584
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_gt_8_strided_cn)7585 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_gt_8_strided_cn) {
7586 TEST_REQUIRES_ARM_NEON_V8;
7587 for (uint32_t n = 9; n < 16; n++) {
7588 for (size_t k = 1; k <= 40; k += 9) {
7589 GemmMicrokernelTester()
7590 .mr(1)
7591 .nr(8)
7592 .kr(1)
7593 .sr(1)
7594 .m(1)
7595 .n(n)
7596 .k(k)
7597 .cn_stride(11)
7598 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7599 }
7600 }
7601 }
7602
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_gt_8_strided_a)7603 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_gt_8_strided_a) {
7604 TEST_REQUIRES_ARM_NEON_V8;
7605 for (uint32_t n = 9; n < 16; n++) {
7606 for (size_t k = 1; k <= 40; k += 9) {
7607 GemmMicrokernelTester()
7608 .mr(1)
7609 .nr(8)
7610 .kr(1)
7611 .sr(1)
7612 .m(1)
7613 .n(n)
7614 .k(k)
7615 .a_stride(43)
7616 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7617 }
7618 }
7619 }
7620
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_gt_8_subtile)7621 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_gt_8_subtile) {
7622 TEST_REQUIRES_ARM_NEON_V8;
7623 for (uint32_t n = 9; n < 16; n++) {
7624 for (size_t k = 1; k <= 40; k += 9) {
7625 for (uint32_t m = 1; m <= 1; m++) {
7626 GemmMicrokernelTester()
7627 .mr(1)
7628 .nr(8)
7629 .kr(1)
7630 .sr(1)
7631 .m(m)
7632 .n(n)
7633 .k(k)
7634 .iterations(1)
7635 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7636 }
7637 }
7638 }
7639 }
7640
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_div_8)7641 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_div_8) {
7642 TEST_REQUIRES_ARM_NEON_V8;
7643 for (uint32_t n = 16; n <= 24; n += 8) {
7644 for (size_t k = 1; k <= 40; k += 9) {
7645 GemmMicrokernelTester()
7646 .mr(1)
7647 .nr(8)
7648 .kr(1)
7649 .sr(1)
7650 .m(1)
7651 .n(n)
7652 .k(k)
7653 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7654 }
7655 }
7656 }
7657
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_div_8_strided_cn)7658 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_div_8_strided_cn) {
7659 TEST_REQUIRES_ARM_NEON_V8;
7660 for (uint32_t n = 16; n <= 24; n += 8) {
7661 for (size_t k = 1; k <= 40; k += 9) {
7662 GemmMicrokernelTester()
7663 .mr(1)
7664 .nr(8)
7665 .kr(1)
7666 .sr(1)
7667 .m(1)
7668 .n(n)
7669 .k(k)
7670 .cn_stride(11)
7671 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7672 }
7673 }
7674 }
7675
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_div_8_strided_a)7676 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_div_8_strided_a) {
7677 TEST_REQUIRES_ARM_NEON_V8;
7678 for (uint32_t n = 16; n <= 24; n += 8) {
7679 for (size_t k = 1; k <= 40; k += 9) {
7680 GemmMicrokernelTester()
7681 .mr(1)
7682 .nr(8)
7683 .kr(1)
7684 .sr(1)
7685 .m(1)
7686 .n(n)
7687 .k(k)
7688 .a_stride(43)
7689 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7690 }
7691 }
7692 }
7693
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_div_8_subtile)7694 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_div_8_subtile) {
7695 TEST_REQUIRES_ARM_NEON_V8;
7696 for (uint32_t n = 16; n <= 24; n += 8) {
7697 for (size_t k = 1; k <= 40; k += 9) {
7698 for (uint32_t m = 1; m <= 1; m++) {
7699 GemmMicrokernelTester()
7700 .mr(1)
7701 .nr(8)
7702 .kr(1)
7703 .sr(1)
7704 .m(m)
7705 .n(n)
7706 .k(k)
7707 .iterations(1)
7708 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7709 }
7710 }
7711 }
7712 }
7713
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,strided_cm_subtile)7714 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, strided_cm_subtile) {
7715 TEST_REQUIRES_ARM_NEON_V8;
7716 for (size_t k = 1; k <= 40; k += 9) {
7717 for (uint32_t n = 1; n <= 8; n++) {
7718 for (uint32_t m = 1; m <= 1; m++) {
7719 GemmMicrokernelTester()
7720 .mr(1)
7721 .nr(8)
7722 .kr(1)
7723 .sr(1)
7724 .m(m)
7725 .n(n)
7726 .k(k)
7727 .cm_stride(11)
7728 .iterations(1)
7729 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7730 }
7731 }
7732 }
7733 }
7734
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,qmin)7735 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, qmin) {
7736 TEST_REQUIRES_ARM_NEON_V8;
7737 GemmMicrokernelTester()
7738 .mr(1)
7739 .nr(8)
7740 .kr(1)
7741 .sr(1)
7742 .m(1)
7743 .n(8)
7744 .k(8)
7745 .qmin(128)
7746 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7747 }
7748
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,qmax)7749 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, qmax) {
7750 TEST_REQUIRES_ARM_NEON_V8;
7751 GemmMicrokernelTester()
7752 .mr(1)
7753 .nr(8)
7754 .kr(1)
7755 .sr(1)
7756 .m(1)
7757 .n(8)
7758 .k(8)
7759 .qmax(128)
7760 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7761 }
7762
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,strided_cm)7763 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, strided_cm) {
7764 TEST_REQUIRES_ARM_NEON_V8;
7765 GemmMicrokernelTester()
7766 .mr(1)
7767 .nr(8)
7768 .kr(1)
7769 .sr(1)
7770 .m(1)
7771 .n(8)
7772 .k(8)
7773 .cm_stride(11)
7774 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7775 }
7776 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7777
7778
7779 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_eq_16)7780 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_eq_16) {
7781 TEST_REQUIRES_ARM_NEON;
7782 GemmMicrokernelTester()
7783 .mr(1)
7784 .nr(8)
7785 .kr(2)
7786 .sr(1)
7787 .m(1)
7788 .n(8)
7789 .k(16)
7790 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7791 }
7792
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,strided_cn)7793 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, strided_cn) {
7794 TEST_REQUIRES_ARM_NEON;
7795 GemmMicrokernelTester()
7796 .mr(1)
7797 .nr(8)
7798 .kr(2)
7799 .sr(1)
7800 .m(1)
7801 .n(8)
7802 .k(16)
7803 .cn_stride(11)
7804 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7805 }
7806
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_eq_16_strided_a)7807 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_eq_16_strided_a) {
7808 TEST_REQUIRES_ARM_NEON;
7809 GemmMicrokernelTester()
7810 .mr(1)
7811 .nr(8)
7812 .kr(2)
7813 .sr(1)
7814 .m(1)
7815 .n(8)
7816 .k(16)
7817 .a_stride(19)
7818 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7819 }
7820
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_eq_16_subtile)7821 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
7822 TEST_REQUIRES_ARM_NEON;
7823 for (uint32_t n = 1; n <= 8; n++) {
7824 for (uint32_t m = 1; m <= 1; m++) {
7825 GemmMicrokernelTester()
7826 .mr(1)
7827 .nr(8)
7828 .kr(2)
7829 .sr(1)
7830 .m(m)
7831 .n(n)
7832 .k(16)
7833 .iterations(1)
7834 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7835 }
7836 }
7837 }
7838
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_eq_16_subtile_m)7839 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
7840 TEST_REQUIRES_ARM_NEON;
7841 for (uint32_t m = 1; m <= 1; m++) {
7842 GemmMicrokernelTester()
7843 .mr(1)
7844 .nr(8)
7845 .kr(2)
7846 .sr(1)
7847 .m(m)
7848 .n(8)
7849 .k(16)
7850 .iterations(1)
7851 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7852 }
7853 }
7854
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_eq_16_subtile_n)7855 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
7856 TEST_REQUIRES_ARM_NEON;
7857 for (uint32_t n = 1; n <= 8; n++) {
7858 GemmMicrokernelTester()
7859 .mr(1)
7860 .nr(8)
7861 .kr(2)
7862 .sr(1)
7863 .m(1)
7864 .n(n)
7865 .k(16)
7866 .iterations(1)
7867 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7868 }
7869 }
7870
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_lt_16)7871 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_lt_16) {
7872 TEST_REQUIRES_ARM_NEON;
7873 for (size_t k = 1; k < 16; k++) {
7874 GemmMicrokernelTester()
7875 .mr(1)
7876 .nr(8)
7877 .kr(2)
7878 .sr(1)
7879 .m(1)
7880 .n(8)
7881 .k(k)
7882 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7883 }
7884 }
7885
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_lt_16_strided_a)7886 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_lt_16_strided_a) {
7887 TEST_REQUIRES_ARM_NEON;
7888 for (size_t k = 1; k < 16; k++) {
7889 GemmMicrokernelTester()
7890 .mr(1)
7891 .nr(8)
7892 .kr(2)
7893 .sr(1)
7894 .m(1)
7895 .n(8)
7896 .k(k)
7897 .a_stride(19)
7898 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7899 }
7900 }
7901
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_lt_16_subtile)7902 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
7903 TEST_REQUIRES_ARM_NEON;
7904 for (size_t k = 1; k < 16; k++) {
7905 for (uint32_t n = 1; n <= 8; n++) {
7906 for (uint32_t m = 1; m <= 1; m++) {
7907 GemmMicrokernelTester()
7908 .mr(1)
7909 .nr(8)
7910 .kr(2)
7911 .sr(1)
7912 .m(m)
7913 .n(n)
7914 .k(k)
7915 .iterations(1)
7916 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7917 }
7918 }
7919 }
7920 }
7921
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_gt_16)7922 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_gt_16) {
7923 TEST_REQUIRES_ARM_NEON;
7924 for (size_t k = 17; k < 32; k++) {
7925 GemmMicrokernelTester()
7926 .mr(1)
7927 .nr(8)
7928 .kr(2)
7929 .sr(1)
7930 .m(1)
7931 .n(8)
7932 .k(k)
7933 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7934 }
7935 }
7936
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_gt_16_strided_a)7937 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_gt_16_strided_a) {
7938 TEST_REQUIRES_ARM_NEON;
7939 for (size_t k = 17; k < 32; k++) {
7940 GemmMicrokernelTester()
7941 .mr(1)
7942 .nr(8)
7943 .kr(2)
7944 .sr(1)
7945 .m(1)
7946 .n(8)
7947 .k(k)
7948 .a_stride(37)
7949 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7950 }
7951 }
7952
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_gt_16_subtile)7953 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
7954 TEST_REQUIRES_ARM_NEON;
7955 for (size_t k = 17; k < 32; k++) {
7956 for (uint32_t n = 1; n <= 8; n++) {
7957 for (uint32_t m = 1; m <= 1; m++) {
7958 GemmMicrokernelTester()
7959 .mr(1)
7960 .nr(8)
7961 .kr(2)
7962 .sr(1)
7963 .m(m)
7964 .n(n)
7965 .k(k)
7966 .iterations(1)
7967 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7968 }
7969 }
7970 }
7971 }
7972
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_div_16)7973 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_div_16) {
7974 TEST_REQUIRES_ARM_NEON;
7975 for (size_t k = 32; k <= 160; k += 16) {
7976 GemmMicrokernelTester()
7977 .mr(1)
7978 .nr(8)
7979 .kr(2)
7980 .sr(1)
7981 .m(1)
7982 .n(8)
7983 .k(k)
7984 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7985 }
7986 }
7987
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_div_16_strided_a)7988 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_div_16_strided_a) {
7989 TEST_REQUIRES_ARM_NEON;
7990 for (size_t k = 32; k <= 160; k += 16) {
7991 GemmMicrokernelTester()
7992 .mr(1)
7993 .nr(8)
7994 .kr(2)
7995 .sr(1)
7996 .m(1)
7997 .n(8)
7998 .k(k)
7999 .a_stride(163)
8000 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8001 }
8002 }
8003
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_div_16_subtile)8004 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
8005 TEST_REQUIRES_ARM_NEON;
8006 for (size_t k = 32; k <= 160; k += 16) {
8007 for (uint32_t n = 1; n <= 8; n++) {
8008 for (uint32_t m = 1; m <= 1; m++) {
8009 GemmMicrokernelTester()
8010 .mr(1)
8011 .nr(8)
8012 .kr(2)
8013 .sr(1)
8014 .m(m)
8015 .n(n)
8016 .k(k)
8017 .iterations(1)
8018 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8019 }
8020 }
8021 }
8022 }
8023
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_gt_8)8024 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_gt_8) {
8025 TEST_REQUIRES_ARM_NEON;
8026 for (uint32_t n = 9; n < 16; n++) {
8027 for (size_t k = 1; k <= 80; k += 17) {
8028 GemmMicrokernelTester()
8029 .mr(1)
8030 .nr(8)
8031 .kr(2)
8032 .sr(1)
8033 .m(1)
8034 .n(n)
8035 .k(k)
8036 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8037 }
8038 }
8039 }
8040
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_gt_8_strided_cn)8041 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
8042 TEST_REQUIRES_ARM_NEON;
8043 for (uint32_t n = 9; n < 16; n++) {
8044 for (size_t k = 1; k <= 80; k += 17) {
8045 GemmMicrokernelTester()
8046 .mr(1)
8047 .nr(8)
8048 .kr(2)
8049 .sr(1)
8050 .m(1)
8051 .n(n)
8052 .k(k)
8053 .cn_stride(11)
8054 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8055 }
8056 }
8057 }
8058
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_gt_8_strided_a)8059 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_gt_8_strided_a) {
8060 TEST_REQUIRES_ARM_NEON;
8061 for (uint32_t n = 9; n < 16; n++) {
8062 for (size_t k = 1; k <= 80; k += 17) {
8063 GemmMicrokernelTester()
8064 .mr(1)
8065 .nr(8)
8066 .kr(2)
8067 .sr(1)
8068 .m(1)
8069 .n(n)
8070 .k(k)
8071 .a_stride(83)
8072 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8073 }
8074 }
8075 }
8076
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_gt_8_subtile)8077 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
8078 TEST_REQUIRES_ARM_NEON;
8079 for (uint32_t n = 9; n < 16; n++) {
8080 for (size_t k = 1; k <= 80; k += 17) {
8081 for (uint32_t m = 1; m <= 1; m++) {
8082 GemmMicrokernelTester()
8083 .mr(1)
8084 .nr(8)
8085 .kr(2)
8086 .sr(1)
8087 .m(m)
8088 .n(n)
8089 .k(k)
8090 .iterations(1)
8091 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8092 }
8093 }
8094 }
8095 }
8096
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_div_8)8097 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_div_8) {
8098 TEST_REQUIRES_ARM_NEON;
8099 for (uint32_t n = 16; n <= 24; n += 8) {
8100 for (size_t k = 1; k <= 80; k += 17) {
8101 GemmMicrokernelTester()
8102 .mr(1)
8103 .nr(8)
8104 .kr(2)
8105 .sr(1)
8106 .m(1)
8107 .n(n)
8108 .k(k)
8109 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8110 }
8111 }
8112 }
8113
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_div_8_strided_cn)8114 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
8115 TEST_REQUIRES_ARM_NEON;
8116 for (uint32_t n = 16; n <= 24; n += 8) {
8117 for (size_t k = 1; k <= 80; k += 17) {
8118 GemmMicrokernelTester()
8119 .mr(1)
8120 .nr(8)
8121 .kr(2)
8122 .sr(1)
8123 .m(1)
8124 .n(n)
8125 .k(k)
8126 .cn_stride(11)
8127 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8128 }
8129 }
8130 }
8131
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_div_8_strided_a)8132 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_div_8_strided_a) {
8133 TEST_REQUIRES_ARM_NEON;
8134 for (uint32_t n = 16; n <= 24; n += 8) {
8135 for (size_t k = 1; k <= 80; k += 17) {
8136 GemmMicrokernelTester()
8137 .mr(1)
8138 .nr(8)
8139 .kr(2)
8140 .sr(1)
8141 .m(1)
8142 .n(n)
8143 .k(k)
8144 .a_stride(83)
8145 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8146 }
8147 }
8148 }
8149
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_div_8_subtile)8150 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
8151 TEST_REQUIRES_ARM_NEON;
8152 for (uint32_t n = 16; n <= 24; n += 8) {
8153 for (size_t k = 1; k <= 80; k += 17) {
8154 for (uint32_t m = 1; m <= 1; m++) {
8155 GemmMicrokernelTester()
8156 .mr(1)
8157 .nr(8)
8158 .kr(2)
8159 .sr(1)
8160 .m(m)
8161 .n(n)
8162 .k(k)
8163 .iterations(1)
8164 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8165 }
8166 }
8167 }
8168 }
8169
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,strided_cm_subtile)8170 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
8171 TEST_REQUIRES_ARM_NEON;
8172 for (size_t k = 1; k <= 80; k += 17) {
8173 for (uint32_t n = 1; n <= 8; n++) {
8174 for (uint32_t m = 1; m <= 1; m++) {
8175 GemmMicrokernelTester()
8176 .mr(1)
8177 .nr(8)
8178 .kr(2)
8179 .sr(1)
8180 .m(m)
8181 .n(n)
8182 .k(k)
8183 .cm_stride(11)
8184 .iterations(1)
8185 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8186 }
8187 }
8188 }
8189 }
8190
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,qmin)8191 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, qmin) {
8192 TEST_REQUIRES_ARM_NEON;
8193 GemmMicrokernelTester()
8194 .mr(1)
8195 .nr(8)
8196 .kr(2)
8197 .sr(1)
8198 .m(1)
8199 .n(8)
8200 .k(16)
8201 .qmin(128)
8202 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8203 }
8204
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,qmax)8205 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, qmax) {
8206 TEST_REQUIRES_ARM_NEON;
8207 GemmMicrokernelTester()
8208 .mr(1)
8209 .nr(8)
8210 .kr(2)
8211 .sr(1)
8212 .m(1)
8213 .n(8)
8214 .k(16)
8215 .qmax(128)
8216 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8217 }
8218
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,strided_cm)8219 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, strided_cm) {
8220 TEST_REQUIRES_ARM_NEON;
8221 GemmMicrokernelTester()
8222 .mr(1)
8223 .nr(8)
8224 .kr(2)
8225 .sr(1)
8226 .m(1)
8227 .n(8)
8228 .k(16)
8229 .cm_stride(11)
8230 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8231 }
8232 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8233
8234
8235 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_eq_16)8236 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16) {
8237 TEST_REQUIRES_ARM_NEON;
8238 GemmMicrokernelTester()
8239 .mr(1)
8240 .nr(8)
8241 .kr(2)
8242 .sr(1)
8243 .m(1)
8244 .n(8)
8245 .k(16)
8246 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8247 }
8248
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,strided_cn)8249 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, strided_cn) {
8250 TEST_REQUIRES_ARM_NEON;
8251 GemmMicrokernelTester()
8252 .mr(1)
8253 .nr(8)
8254 .kr(2)
8255 .sr(1)
8256 .m(1)
8257 .n(8)
8258 .k(16)
8259 .cn_stride(11)
8260 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8261 }
8262
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_eq_16_strided_a)8263 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_strided_a) {
8264 TEST_REQUIRES_ARM_NEON;
8265 GemmMicrokernelTester()
8266 .mr(1)
8267 .nr(8)
8268 .kr(2)
8269 .sr(1)
8270 .m(1)
8271 .n(8)
8272 .k(16)
8273 .a_stride(19)
8274 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8275 }
8276
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_eq_16_subtile)8277 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
8278 TEST_REQUIRES_ARM_NEON;
8279 for (uint32_t n = 1; n <= 8; n++) {
8280 for (uint32_t m = 1; m <= 1; m++) {
8281 GemmMicrokernelTester()
8282 .mr(1)
8283 .nr(8)
8284 .kr(2)
8285 .sr(1)
8286 .m(m)
8287 .n(n)
8288 .k(16)
8289 .iterations(1)
8290 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8291 }
8292 }
8293 }
8294
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_m)8295 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
8296 TEST_REQUIRES_ARM_NEON;
8297 for (uint32_t m = 1; m <= 1; m++) {
8298 GemmMicrokernelTester()
8299 .mr(1)
8300 .nr(8)
8301 .kr(2)
8302 .sr(1)
8303 .m(m)
8304 .n(8)
8305 .k(16)
8306 .iterations(1)
8307 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8308 }
8309 }
8310
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_n)8311 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
8312 TEST_REQUIRES_ARM_NEON;
8313 for (uint32_t n = 1; n <= 8; n++) {
8314 GemmMicrokernelTester()
8315 .mr(1)
8316 .nr(8)
8317 .kr(2)
8318 .sr(1)
8319 .m(1)
8320 .n(n)
8321 .k(16)
8322 .iterations(1)
8323 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8324 }
8325 }
8326
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_lt_16)8327 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_lt_16) {
8328 TEST_REQUIRES_ARM_NEON;
8329 for (size_t k = 1; k < 16; k++) {
8330 GemmMicrokernelTester()
8331 .mr(1)
8332 .nr(8)
8333 .kr(2)
8334 .sr(1)
8335 .m(1)
8336 .n(8)
8337 .k(k)
8338 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8339 }
8340 }
8341
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_lt_16_strided_a)8342 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_lt_16_strided_a) {
8343 TEST_REQUIRES_ARM_NEON;
8344 for (size_t k = 1; k < 16; k++) {
8345 GemmMicrokernelTester()
8346 .mr(1)
8347 .nr(8)
8348 .kr(2)
8349 .sr(1)
8350 .m(1)
8351 .n(8)
8352 .k(k)
8353 .a_stride(19)
8354 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8355 }
8356 }
8357
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_lt_16_subtile)8358 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
8359 TEST_REQUIRES_ARM_NEON;
8360 for (size_t k = 1; k < 16; k++) {
8361 for (uint32_t n = 1; n <= 8; n++) {
8362 for (uint32_t m = 1; m <= 1; m++) {
8363 GemmMicrokernelTester()
8364 .mr(1)
8365 .nr(8)
8366 .kr(2)
8367 .sr(1)
8368 .m(m)
8369 .n(n)
8370 .k(k)
8371 .iterations(1)
8372 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8373 }
8374 }
8375 }
8376 }
8377
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_gt_16)8378 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_gt_16) {
8379 TEST_REQUIRES_ARM_NEON;
8380 for (size_t k = 17; k < 32; k++) {
8381 GemmMicrokernelTester()
8382 .mr(1)
8383 .nr(8)
8384 .kr(2)
8385 .sr(1)
8386 .m(1)
8387 .n(8)
8388 .k(k)
8389 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8390 }
8391 }
8392
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_gt_16_strided_a)8393 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_gt_16_strided_a) {
8394 TEST_REQUIRES_ARM_NEON;
8395 for (size_t k = 17; k < 32; k++) {
8396 GemmMicrokernelTester()
8397 .mr(1)
8398 .nr(8)
8399 .kr(2)
8400 .sr(1)
8401 .m(1)
8402 .n(8)
8403 .k(k)
8404 .a_stride(37)
8405 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8406 }
8407 }
8408
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_gt_16_subtile)8409 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
8410 TEST_REQUIRES_ARM_NEON;
8411 for (size_t k = 17; k < 32; k++) {
8412 for (uint32_t n = 1; n <= 8; n++) {
8413 for (uint32_t m = 1; m <= 1; m++) {
8414 GemmMicrokernelTester()
8415 .mr(1)
8416 .nr(8)
8417 .kr(2)
8418 .sr(1)
8419 .m(m)
8420 .n(n)
8421 .k(k)
8422 .iterations(1)
8423 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8424 }
8425 }
8426 }
8427 }
8428
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_div_16)8429 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_div_16) {
8430 TEST_REQUIRES_ARM_NEON;
8431 for (size_t k = 32; k <= 160; k += 16) {
8432 GemmMicrokernelTester()
8433 .mr(1)
8434 .nr(8)
8435 .kr(2)
8436 .sr(1)
8437 .m(1)
8438 .n(8)
8439 .k(k)
8440 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8441 }
8442 }
8443
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_div_16_strided_a)8444 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_div_16_strided_a) {
8445 TEST_REQUIRES_ARM_NEON;
8446 for (size_t k = 32; k <= 160; k += 16) {
8447 GemmMicrokernelTester()
8448 .mr(1)
8449 .nr(8)
8450 .kr(2)
8451 .sr(1)
8452 .m(1)
8453 .n(8)
8454 .k(k)
8455 .a_stride(163)
8456 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8457 }
8458 }
8459
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_div_16_subtile)8460 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_div_16_subtile) {
8461 TEST_REQUIRES_ARM_NEON;
8462 for (size_t k = 32; k <= 160; k += 16) {
8463 for (uint32_t n = 1; n <= 8; n++) {
8464 for (uint32_t m = 1; m <= 1; m++) {
8465 GemmMicrokernelTester()
8466 .mr(1)
8467 .nr(8)
8468 .kr(2)
8469 .sr(1)
8470 .m(m)
8471 .n(n)
8472 .k(k)
8473 .iterations(1)
8474 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8475 }
8476 }
8477 }
8478 }
8479
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_gt_8)8480 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8) {
8481 TEST_REQUIRES_ARM_NEON;
8482 for (uint32_t n = 9; n < 16; n++) {
8483 for (size_t k = 1; k <= 80; k += 17) {
8484 GemmMicrokernelTester()
8485 .mr(1)
8486 .nr(8)
8487 .kr(2)
8488 .sr(1)
8489 .m(1)
8490 .n(n)
8491 .k(k)
8492 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8493 }
8494 }
8495 }
8496
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_gt_8_strided_cn)8497 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
8498 TEST_REQUIRES_ARM_NEON;
8499 for (uint32_t n = 9; n < 16; n++) {
8500 for (size_t k = 1; k <= 80; k += 17) {
8501 GemmMicrokernelTester()
8502 .mr(1)
8503 .nr(8)
8504 .kr(2)
8505 .sr(1)
8506 .m(1)
8507 .n(n)
8508 .k(k)
8509 .cn_stride(11)
8510 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8511 }
8512 }
8513 }
8514
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_gt_8_strided_a)8515 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8_strided_a) {
8516 TEST_REQUIRES_ARM_NEON;
8517 for (uint32_t n = 9; n < 16; n++) {
8518 for (size_t k = 1; k <= 80; k += 17) {
8519 GemmMicrokernelTester()
8520 .mr(1)
8521 .nr(8)
8522 .kr(2)
8523 .sr(1)
8524 .m(1)
8525 .n(n)
8526 .k(k)
8527 .a_stride(83)
8528 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8529 }
8530 }
8531 }
8532
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_gt_8_subtile)8533 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8_subtile) {
8534 TEST_REQUIRES_ARM_NEON;
8535 for (uint32_t n = 9; n < 16; n++) {
8536 for (size_t k = 1; k <= 80; k += 17) {
8537 for (uint32_t m = 1; m <= 1; m++) {
8538 GemmMicrokernelTester()
8539 .mr(1)
8540 .nr(8)
8541 .kr(2)
8542 .sr(1)
8543 .m(m)
8544 .n(n)
8545 .k(k)
8546 .iterations(1)
8547 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8548 }
8549 }
8550 }
8551 }
8552
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_div_8)8553 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8) {
8554 TEST_REQUIRES_ARM_NEON;
8555 for (uint32_t n = 16; n <= 24; n += 8) {
8556 for (size_t k = 1; k <= 80; k += 17) {
8557 GemmMicrokernelTester()
8558 .mr(1)
8559 .nr(8)
8560 .kr(2)
8561 .sr(1)
8562 .m(1)
8563 .n(n)
8564 .k(k)
8565 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8566 }
8567 }
8568 }
8569
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_div_8_strided_cn)8570 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8_strided_cn) {
8571 TEST_REQUIRES_ARM_NEON;
8572 for (uint32_t n = 16; n <= 24; n += 8) {
8573 for (size_t k = 1; k <= 80; k += 17) {
8574 GemmMicrokernelTester()
8575 .mr(1)
8576 .nr(8)
8577 .kr(2)
8578 .sr(1)
8579 .m(1)
8580 .n(n)
8581 .k(k)
8582 .cn_stride(11)
8583 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8584 }
8585 }
8586 }
8587
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_div_8_strided_a)8588 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8_strided_a) {
8589 TEST_REQUIRES_ARM_NEON;
8590 for (uint32_t n = 16; n <= 24; n += 8) {
8591 for (size_t k = 1; k <= 80; k += 17) {
8592 GemmMicrokernelTester()
8593 .mr(1)
8594 .nr(8)
8595 .kr(2)
8596 .sr(1)
8597 .m(1)
8598 .n(n)
8599 .k(k)
8600 .a_stride(83)
8601 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8602 }
8603 }
8604 }
8605
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_div_8_subtile)8606 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8_subtile) {
8607 TEST_REQUIRES_ARM_NEON;
8608 for (uint32_t n = 16; n <= 24; n += 8) {
8609 for (size_t k = 1; k <= 80; k += 17) {
8610 for (uint32_t m = 1; m <= 1; m++) {
8611 GemmMicrokernelTester()
8612 .mr(1)
8613 .nr(8)
8614 .kr(2)
8615 .sr(1)
8616 .m(m)
8617 .n(n)
8618 .k(k)
8619 .iterations(1)
8620 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8621 }
8622 }
8623 }
8624 }
8625
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,strided_cm_subtile)8626 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, strided_cm_subtile) {
8627 TEST_REQUIRES_ARM_NEON;
8628 for (size_t k = 1; k <= 80; k += 17) {
8629 for (uint32_t n = 1; n <= 8; n++) {
8630 for (uint32_t m = 1; m <= 1; m++) {
8631 GemmMicrokernelTester()
8632 .mr(1)
8633 .nr(8)
8634 .kr(2)
8635 .sr(1)
8636 .m(m)
8637 .n(n)
8638 .k(k)
8639 .cm_stride(11)
8640 .iterations(1)
8641 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8642 }
8643 }
8644 }
8645 }
8646
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,qmin)8647 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, qmin) {
8648 TEST_REQUIRES_ARM_NEON;
8649 GemmMicrokernelTester()
8650 .mr(1)
8651 .nr(8)
8652 .kr(2)
8653 .sr(1)
8654 .m(1)
8655 .n(8)
8656 .k(16)
8657 .qmin(128)
8658 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8659 }
8660
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,qmax)8661 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, qmax) {
8662 TEST_REQUIRES_ARM_NEON;
8663 GemmMicrokernelTester()
8664 .mr(1)
8665 .nr(8)
8666 .kr(2)
8667 .sr(1)
8668 .m(1)
8669 .n(8)
8670 .k(16)
8671 .qmax(128)
8672 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8673 }
8674
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,strided_cm)8675 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, strided_cm) {
8676 TEST_REQUIRES_ARM_NEON;
8677 GemmMicrokernelTester()
8678 .mr(1)
8679 .nr(8)
8680 .kr(2)
8681 .sr(1)
8682 .m(1)
8683 .n(8)
8684 .k(16)
8685 .cm_stride(11)
8686 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8687 }
8688 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8689
8690
8691 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_eq_16)8692 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_eq_16) {
8693 TEST_REQUIRES_ARM_NEON;
8694 GemmMicrokernelTester()
8695 .mr(1)
8696 .nr(8)
8697 .kr(4)
8698 .sr(1)
8699 .m(1)
8700 .n(8)
8701 .k(16)
8702 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8703 }
8704
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,strided_cn)8705 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, strided_cn) {
8706 TEST_REQUIRES_ARM_NEON;
8707 GemmMicrokernelTester()
8708 .mr(1)
8709 .nr(8)
8710 .kr(4)
8711 .sr(1)
8712 .m(1)
8713 .n(8)
8714 .k(16)
8715 .cn_stride(11)
8716 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8717 }
8718
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_eq_16_strided_a)8719 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_eq_16_strided_a) {
8720 TEST_REQUIRES_ARM_NEON;
8721 GemmMicrokernelTester()
8722 .mr(1)
8723 .nr(8)
8724 .kr(4)
8725 .sr(1)
8726 .m(1)
8727 .n(8)
8728 .k(16)
8729 .a_stride(19)
8730 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8731 }
8732
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_eq_16_subtile)8733 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
8734 TEST_REQUIRES_ARM_NEON;
8735 for (uint32_t n = 1; n <= 8; n++) {
8736 for (uint32_t m = 1; m <= 1; m++) {
8737 GemmMicrokernelTester()
8738 .mr(1)
8739 .nr(8)
8740 .kr(4)
8741 .sr(1)
8742 .m(m)
8743 .n(n)
8744 .k(16)
8745 .iterations(1)
8746 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8747 }
8748 }
8749 }
8750
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_m)8751 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
8752 TEST_REQUIRES_ARM_NEON;
8753 for (uint32_t m = 1; m <= 1; m++) {
8754 GemmMicrokernelTester()
8755 .mr(1)
8756 .nr(8)
8757 .kr(4)
8758 .sr(1)
8759 .m(m)
8760 .n(8)
8761 .k(16)
8762 .iterations(1)
8763 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8764 }
8765 }
8766
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_n)8767 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
8768 TEST_REQUIRES_ARM_NEON;
8769 for (uint32_t n = 1; n <= 8; n++) {
8770 GemmMicrokernelTester()
8771 .mr(1)
8772 .nr(8)
8773 .kr(4)
8774 .sr(1)
8775 .m(1)
8776 .n(n)
8777 .k(16)
8778 .iterations(1)
8779 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8780 }
8781 }
8782
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_lt_16)8783 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_lt_16) {
8784 TEST_REQUIRES_ARM_NEON;
8785 for (size_t k = 1; k < 16; k++) {
8786 GemmMicrokernelTester()
8787 .mr(1)
8788 .nr(8)
8789 .kr(4)
8790 .sr(1)
8791 .m(1)
8792 .n(8)
8793 .k(k)
8794 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8795 }
8796 }
8797
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_lt_16_strided_a)8798 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_lt_16_strided_a) {
8799 TEST_REQUIRES_ARM_NEON;
8800 for (size_t k = 1; k < 16; k++) {
8801 GemmMicrokernelTester()
8802 .mr(1)
8803 .nr(8)
8804 .kr(4)
8805 .sr(1)
8806 .m(1)
8807 .n(8)
8808 .k(k)
8809 .a_stride(19)
8810 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8811 }
8812 }
8813
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_lt_16_subtile)8814 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
8815 TEST_REQUIRES_ARM_NEON;
8816 for (size_t k = 1; k < 16; k++) {
8817 for (uint32_t n = 1; n <= 8; n++) {
8818 for (uint32_t m = 1; m <= 1; m++) {
8819 GemmMicrokernelTester()
8820 .mr(1)
8821 .nr(8)
8822 .kr(4)
8823 .sr(1)
8824 .m(m)
8825 .n(n)
8826 .k(k)
8827 .iterations(1)
8828 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8829 }
8830 }
8831 }
8832 }
8833
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_gt_16)8834 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_gt_16) {
8835 TEST_REQUIRES_ARM_NEON;
8836 for (size_t k = 17; k < 32; k++) {
8837 GemmMicrokernelTester()
8838 .mr(1)
8839 .nr(8)
8840 .kr(4)
8841 .sr(1)
8842 .m(1)
8843 .n(8)
8844 .k(k)
8845 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8846 }
8847 }
8848
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_gt_16_strided_a)8849 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_gt_16_strided_a) {
8850 TEST_REQUIRES_ARM_NEON;
8851 for (size_t k = 17; k < 32; k++) {
8852 GemmMicrokernelTester()
8853 .mr(1)
8854 .nr(8)
8855 .kr(4)
8856 .sr(1)
8857 .m(1)
8858 .n(8)
8859 .k(k)
8860 .a_stride(37)
8861 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8862 }
8863 }
8864
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_gt_16_subtile)8865 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
8866 TEST_REQUIRES_ARM_NEON;
8867 for (size_t k = 17; k < 32; k++) {
8868 for (uint32_t n = 1; n <= 8; n++) {
8869 for (uint32_t m = 1; m <= 1; m++) {
8870 GemmMicrokernelTester()
8871 .mr(1)
8872 .nr(8)
8873 .kr(4)
8874 .sr(1)
8875 .m(m)
8876 .n(n)
8877 .k(k)
8878 .iterations(1)
8879 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8880 }
8881 }
8882 }
8883 }
8884
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_div_16)8885 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_div_16) {
8886 TEST_REQUIRES_ARM_NEON;
8887 for (size_t k = 32; k <= 160; k += 16) {
8888 GemmMicrokernelTester()
8889 .mr(1)
8890 .nr(8)
8891 .kr(4)
8892 .sr(1)
8893 .m(1)
8894 .n(8)
8895 .k(k)
8896 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8897 }
8898 }
8899
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_div_16_strided_a)8900 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_div_16_strided_a) {
8901 TEST_REQUIRES_ARM_NEON;
8902 for (size_t k = 32; k <= 160; k += 16) {
8903 GemmMicrokernelTester()
8904 .mr(1)
8905 .nr(8)
8906 .kr(4)
8907 .sr(1)
8908 .m(1)
8909 .n(8)
8910 .k(k)
8911 .a_stride(163)
8912 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8913 }
8914 }
8915
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_div_16_subtile)8916 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_div_16_subtile) {
8917 TEST_REQUIRES_ARM_NEON;
8918 for (size_t k = 32; k <= 160; k += 16) {
8919 for (uint32_t n = 1; n <= 8; n++) {
8920 for (uint32_t m = 1; m <= 1; m++) {
8921 GemmMicrokernelTester()
8922 .mr(1)
8923 .nr(8)
8924 .kr(4)
8925 .sr(1)
8926 .m(m)
8927 .n(n)
8928 .k(k)
8929 .iterations(1)
8930 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8931 }
8932 }
8933 }
8934 }
8935
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_gt_8)8936 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_gt_8) {
8937 TEST_REQUIRES_ARM_NEON;
8938 for (uint32_t n = 9; n < 16; n++) {
8939 for (size_t k = 1; k <= 80; k += 17) {
8940 GemmMicrokernelTester()
8941 .mr(1)
8942 .nr(8)
8943 .kr(4)
8944 .sr(1)
8945 .m(1)
8946 .n(n)
8947 .k(k)
8948 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8949 }
8950 }
8951 }
8952
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_gt_8_strided_cn)8953 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
8954 TEST_REQUIRES_ARM_NEON;
8955 for (uint32_t n = 9; n < 16; n++) {
8956 for (size_t k = 1; k <= 80; k += 17) {
8957 GemmMicrokernelTester()
8958 .mr(1)
8959 .nr(8)
8960 .kr(4)
8961 .sr(1)
8962 .m(1)
8963 .n(n)
8964 .k(k)
8965 .cn_stride(11)
8966 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8967 }
8968 }
8969 }
8970
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_gt_8_strided_a)8971 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_gt_8_strided_a) {
8972 TEST_REQUIRES_ARM_NEON;
8973 for (uint32_t n = 9; n < 16; n++) {
8974 for (size_t k = 1; k <= 80; k += 17) {
8975 GemmMicrokernelTester()
8976 .mr(1)
8977 .nr(8)
8978 .kr(4)
8979 .sr(1)
8980 .m(1)
8981 .n(n)
8982 .k(k)
8983 .a_stride(83)
8984 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8985 }
8986 }
8987 }
8988
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_gt_8_subtile)8989 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) {
8990 TEST_REQUIRES_ARM_NEON;
8991 for (uint32_t n = 9; n < 16; n++) {
8992 for (size_t k = 1; k <= 80; k += 17) {
8993 for (uint32_t m = 1; m <= 1; m++) {
8994 GemmMicrokernelTester()
8995 .mr(1)
8996 .nr(8)
8997 .kr(4)
8998 .sr(1)
8999 .m(m)
9000 .n(n)
9001 .k(k)
9002 .iterations(1)
9003 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9004 }
9005 }
9006 }
9007 }
9008
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_div_8)9009 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_div_8) {
9010 TEST_REQUIRES_ARM_NEON;
9011 for (uint32_t n = 16; n <= 24; n += 8) {
9012 for (size_t k = 1; k <= 80; k += 17) {
9013 GemmMicrokernelTester()
9014 .mr(1)
9015 .nr(8)
9016 .kr(4)
9017 .sr(1)
9018 .m(1)
9019 .n(n)
9020 .k(k)
9021 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9022 }
9023 }
9024 }
9025
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_div_8_strided_cn)9026 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) {
9027 TEST_REQUIRES_ARM_NEON;
9028 for (uint32_t n = 16; n <= 24; n += 8) {
9029 for (size_t k = 1; k <= 80; k += 17) {
9030 GemmMicrokernelTester()
9031 .mr(1)
9032 .nr(8)
9033 .kr(4)
9034 .sr(1)
9035 .m(1)
9036 .n(n)
9037 .k(k)
9038 .cn_stride(11)
9039 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9040 }
9041 }
9042 }
9043
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_div_8_strided_a)9044 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_div_8_strided_a) {
9045 TEST_REQUIRES_ARM_NEON;
9046 for (uint32_t n = 16; n <= 24; n += 8) {
9047 for (size_t k = 1; k <= 80; k += 17) {
9048 GemmMicrokernelTester()
9049 .mr(1)
9050 .nr(8)
9051 .kr(4)
9052 .sr(1)
9053 .m(1)
9054 .n(n)
9055 .k(k)
9056 .a_stride(83)
9057 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9058 }
9059 }
9060 }
9061
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_div_8_subtile)9062 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_div_8_subtile) {
9063 TEST_REQUIRES_ARM_NEON;
9064 for (uint32_t n = 16; n <= 24; n += 8) {
9065 for (size_t k = 1; k <= 80; k += 17) {
9066 for (uint32_t m = 1; m <= 1; m++) {
9067 GemmMicrokernelTester()
9068 .mr(1)
9069 .nr(8)
9070 .kr(4)
9071 .sr(1)
9072 .m(m)
9073 .n(n)
9074 .k(k)
9075 .iterations(1)
9076 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9077 }
9078 }
9079 }
9080 }
9081
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,strided_cm_subtile)9082 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, strided_cm_subtile) {
9083 TEST_REQUIRES_ARM_NEON;
9084 for (size_t k = 1; k <= 80; k += 17) {
9085 for (uint32_t n = 1; n <= 8; n++) {
9086 for (uint32_t m = 1; m <= 1; m++) {
9087 GemmMicrokernelTester()
9088 .mr(1)
9089 .nr(8)
9090 .kr(4)
9091 .sr(1)
9092 .m(m)
9093 .n(n)
9094 .k(k)
9095 .cm_stride(11)
9096 .iterations(1)
9097 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9098 }
9099 }
9100 }
9101 }
9102
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,qmin)9103 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, qmin) {
9104 TEST_REQUIRES_ARM_NEON;
9105 GemmMicrokernelTester()
9106 .mr(1)
9107 .nr(8)
9108 .kr(4)
9109 .sr(1)
9110 .m(1)
9111 .n(8)
9112 .k(16)
9113 .qmin(128)
9114 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9115 }
9116
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,qmax)9117 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, qmax) {
9118 TEST_REQUIRES_ARM_NEON;
9119 GemmMicrokernelTester()
9120 .mr(1)
9121 .nr(8)
9122 .kr(4)
9123 .sr(1)
9124 .m(1)
9125 .n(8)
9126 .k(16)
9127 .qmax(128)
9128 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9129 }
9130
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,strided_cm)9131 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, strided_cm) {
9132 TEST_REQUIRES_ARM_NEON;
9133 GemmMicrokernelTester()
9134 .mr(1)
9135 .nr(8)
9136 .kr(4)
9137 .sr(1)
9138 .m(1)
9139 .n(8)
9140 .k(16)
9141 .cm_stride(11)
9142 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9143 }
9144 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9145
9146
9147 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_eq_16)9148 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16) {
9149 TEST_REQUIRES_ARM_NEON_V8;
9150 GemmMicrokernelTester()
9151 .mr(1)
9152 .nr(8)
9153 .kr(8)
9154 .sr(1)
9155 .m(1)
9156 .n(8)
9157 .k(16)
9158 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9159 }
9160
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,strided_cn)9161 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, strided_cn) {
9162 TEST_REQUIRES_ARM_NEON_V8;
9163 GemmMicrokernelTester()
9164 .mr(1)
9165 .nr(8)
9166 .kr(8)
9167 .sr(1)
9168 .m(1)
9169 .n(8)
9170 .k(16)
9171 .cn_stride(11)
9172 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9173 }
9174
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_eq_16_strided_a)9175 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16_strided_a) {
9176 TEST_REQUIRES_ARM_NEON_V8;
9177 GemmMicrokernelTester()
9178 .mr(1)
9179 .nr(8)
9180 .kr(8)
9181 .sr(1)
9182 .m(1)
9183 .n(8)
9184 .k(16)
9185 .a_stride(19)
9186 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9187 }
9188
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_eq_16_subtile)9189 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16_subtile) {
9190 TEST_REQUIRES_ARM_NEON_V8;
9191 for (uint32_t n = 1; n <= 8; n++) {
9192 for (uint32_t m = 1; m <= 1; m++) {
9193 GemmMicrokernelTester()
9194 .mr(1)
9195 .nr(8)
9196 .kr(8)
9197 .sr(1)
9198 .m(m)
9199 .n(n)
9200 .k(16)
9201 .iterations(1)
9202 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9203 }
9204 }
9205 }
9206
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_eq_16_subtile_m)9207 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16_subtile_m) {
9208 TEST_REQUIRES_ARM_NEON_V8;
9209 for (uint32_t m = 1; m <= 1; m++) {
9210 GemmMicrokernelTester()
9211 .mr(1)
9212 .nr(8)
9213 .kr(8)
9214 .sr(1)
9215 .m(m)
9216 .n(8)
9217 .k(16)
9218 .iterations(1)
9219 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9220 }
9221 }
9222
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_eq_16_subtile_n)9223 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16_subtile_n) {
9224 TEST_REQUIRES_ARM_NEON_V8;
9225 for (uint32_t n = 1; n <= 8; n++) {
9226 GemmMicrokernelTester()
9227 .mr(1)
9228 .nr(8)
9229 .kr(8)
9230 .sr(1)
9231 .m(1)
9232 .n(n)
9233 .k(16)
9234 .iterations(1)
9235 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9236 }
9237 }
9238
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_lt_16)9239 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_lt_16) {
9240 TEST_REQUIRES_ARM_NEON_V8;
9241 for (size_t k = 1; k < 16; k++) {
9242 GemmMicrokernelTester()
9243 .mr(1)
9244 .nr(8)
9245 .kr(8)
9246 .sr(1)
9247 .m(1)
9248 .n(8)
9249 .k(k)
9250 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9251 }
9252 }
9253
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_lt_16_strided_a)9254 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_lt_16_strided_a) {
9255 TEST_REQUIRES_ARM_NEON_V8;
9256 for (size_t k = 1; k < 16; k++) {
9257 GemmMicrokernelTester()
9258 .mr(1)
9259 .nr(8)
9260 .kr(8)
9261 .sr(1)
9262 .m(1)
9263 .n(8)
9264 .k(k)
9265 .a_stride(19)
9266 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9267 }
9268 }
9269
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_lt_16_subtile)9270 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_lt_16_subtile) {
9271 TEST_REQUIRES_ARM_NEON_V8;
9272 for (size_t k = 1; k < 16; k++) {
9273 for (uint32_t n = 1; n <= 8; n++) {
9274 for (uint32_t m = 1; m <= 1; m++) {
9275 GemmMicrokernelTester()
9276 .mr(1)
9277 .nr(8)
9278 .kr(8)
9279 .sr(1)
9280 .m(m)
9281 .n(n)
9282 .k(k)
9283 .iterations(1)
9284 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9285 }
9286 }
9287 }
9288 }
9289
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_gt_16)9290 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_gt_16) {
9291 TEST_REQUIRES_ARM_NEON_V8;
9292 for (size_t k = 17; k < 32; k++) {
9293 GemmMicrokernelTester()
9294 .mr(1)
9295 .nr(8)
9296 .kr(8)
9297 .sr(1)
9298 .m(1)
9299 .n(8)
9300 .k(k)
9301 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9302 }
9303 }
9304
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_gt_16_strided_a)9305 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_gt_16_strided_a) {
9306 TEST_REQUIRES_ARM_NEON_V8;
9307 for (size_t k = 17; k < 32; k++) {
9308 GemmMicrokernelTester()
9309 .mr(1)
9310 .nr(8)
9311 .kr(8)
9312 .sr(1)
9313 .m(1)
9314 .n(8)
9315 .k(k)
9316 .a_stride(37)
9317 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9318 }
9319 }
9320
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_gt_16_subtile)9321 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_gt_16_subtile) {
9322 TEST_REQUIRES_ARM_NEON_V8;
9323 for (size_t k = 17; k < 32; k++) {
9324 for (uint32_t n = 1; n <= 8; n++) {
9325 for (uint32_t m = 1; m <= 1; m++) {
9326 GemmMicrokernelTester()
9327 .mr(1)
9328 .nr(8)
9329 .kr(8)
9330 .sr(1)
9331 .m(m)
9332 .n(n)
9333 .k(k)
9334 .iterations(1)
9335 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9336 }
9337 }
9338 }
9339 }
9340
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_div_16)9341 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_div_16) {
9342 TEST_REQUIRES_ARM_NEON_V8;
9343 for (size_t k = 32; k <= 160; k += 16) {
9344 GemmMicrokernelTester()
9345 .mr(1)
9346 .nr(8)
9347 .kr(8)
9348 .sr(1)
9349 .m(1)
9350 .n(8)
9351 .k(k)
9352 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9353 }
9354 }
9355
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_div_16_strided_a)9356 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_div_16_strided_a) {
9357 TEST_REQUIRES_ARM_NEON_V8;
9358 for (size_t k = 32; k <= 160; k += 16) {
9359 GemmMicrokernelTester()
9360 .mr(1)
9361 .nr(8)
9362 .kr(8)
9363 .sr(1)
9364 .m(1)
9365 .n(8)
9366 .k(k)
9367 .a_stride(163)
9368 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9369 }
9370 }
9371
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_div_16_subtile)9372 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_div_16_subtile) {
9373 TEST_REQUIRES_ARM_NEON_V8;
9374 for (size_t k = 32; k <= 160; k += 16) {
9375 for (uint32_t n = 1; n <= 8; n++) {
9376 for (uint32_t m = 1; m <= 1; m++) {
9377 GemmMicrokernelTester()
9378 .mr(1)
9379 .nr(8)
9380 .kr(8)
9381 .sr(1)
9382 .m(m)
9383 .n(n)
9384 .k(k)
9385 .iterations(1)
9386 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9387 }
9388 }
9389 }
9390 }
9391
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_gt_8)9392 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8) {
9393 TEST_REQUIRES_ARM_NEON_V8;
9394 for (uint32_t n = 9; n < 16; n++) {
9395 for (size_t k = 1; k <= 80; k += 17) {
9396 GemmMicrokernelTester()
9397 .mr(1)
9398 .nr(8)
9399 .kr(8)
9400 .sr(1)
9401 .m(1)
9402 .n(n)
9403 .k(k)
9404 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9405 }
9406 }
9407 }
9408
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_gt_8_strided_cn)9409 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8_strided_cn) {
9410 TEST_REQUIRES_ARM_NEON_V8;
9411 for (uint32_t n = 9; n < 16; n++) {
9412 for (size_t k = 1; k <= 80; k += 17) {
9413 GemmMicrokernelTester()
9414 .mr(1)
9415 .nr(8)
9416 .kr(8)
9417 .sr(1)
9418 .m(1)
9419 .n(n)
9420 .k(k)
9421 .cn_stride(11)
9422 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9423 }
9424 }
9425 }
9426
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_gt_8_strided_a)9427 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8_strided_a) {
9428 TEST_REQUIRES_ARM_NEON_V8;
9429 for (uint32_t n = 9; n < 16; n++) {
9430 for (size_t k = 1; k <= 80; k += 17) {
9431 GemmMicrokernelTester()
9432 .mr(1)
9433 .nr(8)
9434 .kr(8)
9435 .sr(1)
9436 .m(1)
9437 .n(n)
9438 .k(k)
9439 .a_stride(83)
9440 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9441 }
9442 }
9443 }
9444
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_gt_8_subtile)9445 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8_subtile) {
9446 TEST_REQUIRES_ARM_NEON_V8;
9447 for (uint32_t n = 9; n < 16; n++) {
9448 for (size_t k = 1; k <= 80; k += 17) {
9449 for (uint32_t m = 1; m <= 1; m++) {
9450 GemmMicrokernelTester()
9451 .mr(1)
9452 .nr(8)
9453 .kr(8)
9454 .sr(1)
9455 .m(m)
9456 .n(n)
9457 .k(k)
9458 .iterations(1)
9459 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9460 }
9461 }
9462 }
9463 }
9464
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_div_8)9465 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8) {
9466 TEST_REQUIRES_ARM_NEON_V8;
9467 for (uint32_t n = 16; n <= 24; n += 8) {
9468 for (size_t k = 1; k <= 80; k += 17) {
9469 GemmMicrokernelTester()
9470 .mr(1)
9471 .nr(8)
9472 .kr(8)
9473 .sr(1)
9474 .m(1)
9475 .n(n)
9476 .k(k)
9477 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9478 }
9479 }
9480 }
9481
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_div_8_strided_cn)9482 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8_strided_cn) {
9483 TEST_REQUIRES_ARM_NEON_V8;
9484 for (uint32_t n = 16; n <= 24; n += 8) {
9485 for (size_t k = 1; k <= 80; k += 17) {
9486 GemmMicrokernelTester()
9487 .mr(1)
9488 .nr(8)
9489 .kr(8)
9490 .sr(1)
9491 .m(1)
9492 .n(n)
9493 .k(k)
9494 .cn_stride(11)
9495 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9496 }
9497 }
9498 }
9499
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_div_8_strided_a)9500 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8_strided_a) {
9501 TEST_REQUIRES_ARM_NEON_V8;
9502 for (uint32_t n = 16; n <= 24; n += 8) {
9503 for (size_t k = 1; k <= 80; k += 17) {
9504 GemmMicrokernelTester()
9505 .mr(1)
9506 .nr(8)
9507 .kr(8)
9508 .sr(1)
9509 .m(1)
9510 .n(n)
9511 .k(k)
9512 .a_stride(83)
9513 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9514 }
9515 }
9516 }
9517
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_div_8_subtile)9518 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8_subtile) {
9519 TEST_REQUIRES_ARM_NEON_V8;
9520 for (uint32_t n = 16; n <= 24; n += 8) {
9521 for (size_t k = 1; k <= 80; k += 17) {
9522 for (uint32_t m = 1; m <= 1; m++) {
9523 GemmMicrokernelTester()
9524 .mr(1)
9525 .nr(8)
9526 .kr(8)
9527 .sr(1)
9528 .m(m)
9529 .n(n)
9530 .k(k)
9531 .iterations(1)
9532 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9533 }
9534 }
9535 }
9536 }
9537
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,strided_cm_subtile)9538 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, strided_cm_subtile) {
9539 TEST_REQUIRES_ARM_NEON_V8;
9540 for (size_t k = 1; k <= 80; k += 17) {
9541 for (uint32_t n = 1; n <= 8; n++) {
9542 for (uint32_t m = 1; m <= 1; m++) {
9543 GemmMicrokernelTester()
9544 .mr(1)
9545 .nr(8)
9546 .kr(8)
9547 .sr(1)
9548 .m(m)
9549 .n(n)
9550 .k(k)
9551 .cm_stride(11)
9552 .iterations(1)
9553 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9554 }
9555 }
9556 }
9557 }
9558
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,qmin)9559 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, qmin) {
9560 TEST_REQUIRES_ARM_NEON_V8;
9561 GemmMicrokernelTester()
9562 .mr(1)
9563 .nr(8)
9564 .kr(8)
9565 .sr(1)
9566 .m(1)
9567 .n(8)
9568 .k(16)
9569 .qmin(128)
9570 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9571 }
9572
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,qmax)9573 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, qmax) {
9574 TEST_REQUIRES_ARM_NEON_V8;
9575 GemmMicrokernelTester()
9576 .mr(1)
9577 .nr(8)
9578 .kr(8)
9579 .sr(1)
9580 .m(1)
9581 .n(8)
9582 .k(16)
9583 .qmax(128)
9584 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9585 }
9586
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,strided_cm)9587 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, strided_cm) {
9588 TEST_REQUIRES_ARM_NEON_V8;
9589 GemmMicrokernelTester()
9590 .mr(1)
9591 .nr(8)
9592 .kr(8)
9593 .sr(1)
9594 .m(1)
9595 .n(8)
9596 .k(16)
9597 .cm_stride(11)
9598 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9599 }
9600 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9601
9602
9603 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_eq_16)9604 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16) {
9605 TEST_REQUIRES_ARM_NEON;
9606 GemmMicrokernelTester()
9607 .mr(2)
9608 .nr(8)
9609 .kr(2)
9610 .sr(1)
9611 .m(2)
9612 .n(8)
9613 .k(16)
9614 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9615 }
9616
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,strided_cn)9617 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, strided_cn) {
9618 TEST_REQUIRES_ARM_NEON;
9619 GemmMicrokernelTester()
9620 .mr(2)
9621 .nr(8)
9622 .kr(2)
9623 .sr(1)
9624 .m(2)
9625 .n(8)
9626 .k(16)
9627 .cn_stride(11)
9628 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9629 }
9630
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_eq_16_strided_a)9631 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16_strided_a) {
9632 TEST_REQUIRES_ARM_NEON;
9633 GemmMicrokernelTester()
9634 .mr(2)
9635 .nr(8)
9636 .kr(2)
9637 .sr(1)
9638 .m(2)
9639 .n(8)
9640 .k(16)
9641 .a_stride(19)
9642 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9643 }
9644
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_eq_16_subtile)9645 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
9646 TEST_REQUIRES_ARM_NEON;
9647 for (uint32_t n = 1; n <= 8; n++) {
9648 for (uint32_t m = 1; m <= 2; m++) {
9649 GemmMicrokernelTester()
9650 .mr(2)
9651 .nr(8)
9652 .kr(2)
9653 .sr(1)
9654 .m(m)
9655 .n(n)
9656 .k(16)
9657 .iterations(1)
9658 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9659 }
9660 }
9661 }
9662
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_m)9663 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
9664 TEST_REQUIRES_ARM_NEON;
9665 for (uint32_t m = 1; m <= 2; m++) {
9666 GemmMicrokernelTester()
9667 .mr(2)
9668 .nr(8)
9669 .kr(2)
9670 .sr(1)
9671 .m(m)
9672 .n(8)
9673 .k(16)
9674 .iterations(1)
9675 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9676 }
9677 }
9678
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_n)9679 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
9680 TEST_REQUIRES_ARM_NEON;
9681 for (uint32_t n = 1; n <= 8; n++) {
9682 GemmMicrokernelTester()
9683 .mr(2)
9684 .nr(8)
9685 .kr(2)
9686 .sr(1)
9687 .m(2)
9688 .n(n)
9689 .k(16)
9690 .iterations(1)
9691 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9692 }
9693 }
9694
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_lt_16)9695 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_lt_16) {
9696 TEST_REQUIRES_ARM_NEON;
9697 for (size_t k = 1; k < 16; k++) {
9698 GemmMicrokernelTester()
9699 .mr(2)
9700 .nr(8)
9701 .kr(2)
9702 .sr(1)
9703 .m(2)
9704 .n(8)
9705 .k(k)
9706 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9707 }
9708 }
9709
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_lt_16_strided_a)9710 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_lt_16_strided_a) {
9711 TEST_REQUIRES_ARM_NEON;
9712 for (size_t k = 1; k < 16; k++) {
9713 GemmMicrokernelTester()
9714 .mr(2)
9715 .nr(8)
9716 .kr(2)
9717 .sr(1)
9718 .m(2)
9719 .n(8)
9720 .k(k)
9721 .a_stride(19)
9722 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9723 }
9724 }
9725
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_lt_16_subtile)9726 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
9727 TEST_REQUIRES_ARM_NEON;
9728 for (size_t k = 1; k < 16; k++) {
9729 for (uint32_t n = 1; n <= 8; n++) {
9730 for (uint32_t m = 1; m <= 2; m++) {
9731 GemmMicrokernelTester()
9732 .mr(2)
9733 .nr(8)
9734 .kr(2)
9735 .sr(1)
9736 .m(m)
9737 .n(n)
9738 .k(k)
9739 .iterations(1)
9740 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9741 }
9742 }
9743 }
9744 }
9745
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_gt_16)9746 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_gt_16) {
9747 TEST_REQUIRES_ARM_NEON;
9748 for (size_t k = 17; k < 32; k++) {
9749 GemmMicrokernelTester()
9750 .mr(2)
9751 .nr(8)
9752 .kr(2)
9753 .sr(1)
9754 .m(2)
9755 .n(8)
9756 .k(k)
9757 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9758 }
9759 }
9760
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_gt_16_strided_a)9761 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_gt_16_strided_a) {
9762 TEST_REQUIRES_ARM_NEON;
9763 for (size_t k = 17; k < 32; k++) {
9764 GemmMicrokernelTester()
9765 .mr(2)
9766 .nr(8)
9767 .kr(2)
9768 .sr(1)
9769 .m(2)
9770 .n(8)
9771 .k(k)
9772 .a_stride(37)
9773 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9774 }
9775 }
9776
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_gt_16_subtile)9777 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
9778 TEST_REQUIRES_ARM_NEON;
9779 for (size_t k = 17; k < 32; k++) {
9780 for (uint32_t n = 1; n <= 8; n++) {
9781 for (uint32_t m = 1; m <= 2; m++) {
9782 GemmMicrokernelTester()
9783 .mr(2)
9784 .nr(8)
9785 .kr(2)
9786 .sr(1)
9787 .m(m)
9788 .n(n)
9789 .k(k)
9790 .iterations(1)
9791 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9792 }
9793 }
9794 }
9795 }
9796
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_div_16)9797 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_div_16) {
9798 TEST_REQUIRES_ARM_NEON;
9799 for (size_t k = 32; k <= 160; k += 16) {
9800 GemmMicrokernelTester()
9801 .mr(2)
9802 .nr(8)
9803 .kr(2)
9804 .sr(1)
9805 .m(2)
9806 .n(8)
9807 .k(k)
9808 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9809 }
9810 }
9811
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_div_16_strided_a)9812 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_div_16_strided_a) {
9813 TEST_REQUIRES_ARM_NEON;
9814 for (size_t k = 32; k <= 160; k += 16) {
9815 GemmMicrokernelTester()
9816 .mr(2)
9817 .nr(8)
9818 .kr(2)
9819 .sr(1)
9820 .m(2)
9821 .n(8)
9822 .k(k)
9823 .a_stride(163)
9824 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9825 }
9826 }
9827
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_div_16_subtile)9828 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_div_16_subtile) {
9829 TEST_REQUIRES_ARM_NEON;
9830 for (size_t k = 32; k <= 160; k += 16) {
9831 for (uint32_t n = 1; n <= 8; n++) {
9832 for (uint32_t m = 1; m <= 2; m++) {
9833 GemmMicrokernelTester()
9834 .mr(2)
9835 .nr(8)
9836 .kr(2)
9837 .sr(1)
9838 .m(m)
9839 .n(n)
9840 .k(k)
9841 .iterations(1)
9842 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9843 }
9844 }
9845 }
9846 }
9847
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_gt_8)9848 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8) {
9849 TEST_REQUIRES_ARM_NEON;
9850 for (uint32_t n = 9; n < 16; n++) {
9851 for (size_t k = 1; k <= 80; k += 17) {
9852 GemmMicrokernelTester()
9853 .mr(2)
9854 .nr(8)
9855 .kr(2)
9856 .sr(1)
9857 .m(2)
9858 .n(n)
9859 .k(k)
9860 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9861 }
9862 }
9863 }
9864
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_gt_8_strided_cn)9865 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
9866 TEST_REQUIRES_ARM_NEON;
9867 for (uint32_t n = 9; n < 16; n++) {
9868 for (size_t k = 1; k <= 80; k += 17) {
9869 GemmMicrokernelTester()
9870 .mr(2)
9871 .nr(8)
9872 .kr(2)
9873 .sr(1)
9874 .m(2)
9875 .n(n)
9876 .k(k)
9877 .cn_stride(11)
9878 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9879 }
9880 }
9881 }
9882
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_gt_8_strided_a)9883 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8_strided_a) {
9884 TEST_REQUIRES_ARM_NEON;
9885 for (uint32_t n = 9; n < 16; n++) {
9886 for (size_t k = 1; k <= 80; k += 17) {
9887 GemmMicrokernelTester()
9888 .mr(2)
9889 .nr(8)
9890 .kr(2)
9891 .sr(1)
9892 .m(2)
9893 .n(n)
9894 .k(k)
9895 .a_stride(83)
9896 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9897 }
9898 }
9899 }
9900
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_gt_8_subtile)9901 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) {
9902 TEST_REQUIRES_ARM_NEON;
9903 for (uint32_t n = 9; n < 16; n++) {
9904 for (size_t k = 1; k <= 80; k += 17) {
9905 for (uint32_t m = 1; m <= 2; m++) {
9906 GemmMicrokernelTester()
9907 .mr(2)
9908 .nr(8)
9909 .kr(2)
9910 .sr(1)
9911 .m(m)
9912 .n(n)
9913 .k(k)
9914 .iterations(1)
9915 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9916 }
9917 }
9918 }
9919 }
9920
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_div_8)9921 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8) {
9922 TEST_REQUIRES_ARM_NEON;
9923 for (uint32_t n = 16; n <= 24; n += 8) {
9924 for (size_t k = 1; k <= 80; k += 17) {
9925 GemmMicrokernelTester()
9926 .mr(2)
9927 .nr(8)
9928 .kr(2)
9929 .sr(1)
9930 .m(2)
9931 .n(n)
9932 .k(k)
9933 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9934 }
9935 }
9936 }
9937
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_div_8_strided_cn)9938 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) {
9939 TEST_REQUIRES_ARM_NEON;
9940 for (uint32_t n = 16; n <= 24; n += 8) {
9941 for (size_t k = 1; k <= 80; k += 17) {
9942 GemmMicrokernelTester()
9943 .mr(2)
9944 .nr(8)
9945 .kr(2)
9946 .sr(1)
9947 .m(2)
9948 .n(n)
9949 .k(k)
9950 .cn_stride(11)
9951 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9952 }
9953 }
9954 }
9955
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_div_8_strided_a)9956 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8_strided_a) {
9957 TEST_REQUIRES_ARM_NEON;
9958 for (uint32_t n = 16; n <= 24; n += 8) {
9959 for (size_t k = 1; k <= 80; k += 17) {
9960 GemmMicrokernelTester()
9961 .mr(2)
9962 .nr(8)
9963 .kr(2)
9964 .sr(1)
9965 .m(2)
9966 .n(n)
9967 .k(k)
9968 .a_stride(83)
9969 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9970 }
9971 }
9972 }
9973
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_div_8_subtile)9974 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8_subtile) {
9975 TEST_REQUIRES_ARM_NEON;
9976 for (uint32_t n = 16; n <= 24; n += 8) {
9977 for (size_t k = 1; k <= 80; k += 17) {
9978 for (uint32_t m = 1; m <= 2; m++) {
9979 GemmMicrokernelTester()
9980 .mr(2)
9981 .nr(8)
9982 .kr(2)
9983 .sr(1)
9984 .m(m)
9985 .n(n)
9986 .k(k)
9987 .iterations(1)
9988 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9989 }
9990 }
9991 }
9992 }
9993
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,strided_cm_subtile)9994 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, strided_cm_subtile) {
9995 TEST_REQUIRES_ARM_NEON;
9996 for (size_t k = 1; k <= 80; k += 17) {
9997 for (uint32_t n = 1; n <= 8; n++) {
9998 for (uint32_t m = 1; m <= 2; m++) {
9999 GemmMicrokernelTester()
10000 .mr(2)
10001 .nr(8)
10002 .kr(2)
10003 .sr(1)
10004 .m(m)
10005 .n(n)
10006 .k(k)
10007 .cm_stride(11)
10008 .iterations(1)
10009 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10010 }
10011 }
10012 }
10013 }
10014
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,qmin)10015 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, qmin) {
10016 TEST_REQUIRES_ARM_NEON;
10017 GemmMicrokernelTester()
10018 .mr(2)
10019 .nr(8)
10020 .kr(2)
10021 .sr(1)
10022 .m(2)
10023 .n(8)
10024 .k(16)
10025 .qmin(128)
10026 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10027 }
10028
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,qmax)10029 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, qmax) {
10030 TEST_REQUIRES_ARM_NEON;
10031 GemmMicrokernelTester()
10032 .mr(2)
10033 .nr(8)
10034 .kr(2)
10035 .sr(1)
10036 .m(2)
10037 .n(8)
10038 .k(16)
10039 .qmax(128)
10040 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10041 }
10042
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,strided_cm)10043 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, strided_cm) {
10044 TEST_REQUIRES_ARM_NEON;
10045 GemmMicrokernelTester()
10046 .mr(2)
10047 .nr(8)
10048 .kr(2)
10049 .sr(1)
10050 .m(2)
10051 .n(8)
10052 .k(16)
10053 .cm_stride(11)
10054 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10055 }
10056 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10057
10058
10059 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_eq_16)10060 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_eq_16) {
10061 TEST_REQUIRES_ARM_NEON;
10062 GemmMicrokernelTester()
10063 .mr(2)
10064 .nr(8)
10065 .kr(2)
10066 .sr(1)
10067 .m(2)
10068 .n(8)
10069 .k(16)
10070 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10071 }
10072
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,strided_cn)10073 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, strided_cn) {
10074 TEST_REQUIRES_ARM_NEON;
10075 GemmMicrokernelTester()
10076 .mr(2)
10077 .nr(8)
10078 .kr(2)
10079 .sr(1)
10080 .m(2)
10081 .n(8)
10082 .k(16)
10083 .cn_stride(11)
10084 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10085 }
10086
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_eq_16_strided_a)10087 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_eq_16_strided_a) {
10088 TEST_REQUIRES_ARM_NEON;
10089 GemmMicrokernelTester()
10090 .mr(2)
10091 .nr(8)
10092 .kr(2)
10093 .sr(1)
10094 .m(2)
10095 .n(8)
10096 .k(16)
10097 .a_stride(19)
10098 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10099 }
10100
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_eq_16_subtile)10101 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
10102 TEST_REQUIRES_ARM_NEON;
10103 for (uint32_t n = 1; n <= 8; n++) {
10104 for (uint32_t m = 1; m <= 2; m++) {
10105 GemmMicrokernelTester()
10106 .mr(2)
10107 .nr(8)
10108 .kr(2)
10109 .sr(1)
10110 .m(m)
10111 .n(n)
10112 .k(16)
10113 .iterations(1)
10114 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10115 }
10116 }
10117 }
10118
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_m)10119 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
10120 TEST_REQUIRES_ARM_NEON;
10121 for (uint32_t m = 1; m <= 2; m++) {
10122 GemmMicrokernelTester()
10123 .mr(2)
10124 .nr(8)
10125 .kr(2)
10126 .sr(1)
10127 .m(m)
10128 .n(8)
10129 .k(16)
10130 .iterations(1)
10131 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10132 }
10133 }
10134
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_n)10135 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
10136 TEST_REQUIRES_ARM_NEON;
10137 for (uint32_t n = 1; n <= 8; n++) {
10138 GemmMicrokernelTester()
10139 .mr(2)
10140 .nr(8)
10141 .kr(2)
10142 .sr(1)
10143 .m(2)
10144 .n(n)
10145 .k(16)
10146 .iterations(1)
10147 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10148 }
10149 }
10150
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_lt_16)10151 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_lt_16) {
10152 TEST_REQUIRES_ARM_NEON;
10153 for (size_t k = 1; k < 16; k++) {
10154 GemmMicrokernelTester()
10155 .mr(2)
10156 .nr(8)
10157 .kr(2)
10158 .sr(1)
10159 .m(2)
10160 .n(8)
10161 .k(k)
10162 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10163 }
10164 }
10165
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_lt_16_strided_a)10166 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_lt_16_strided_a) {
10167 TEST_REQUIRES_ARM_NEON;
10168 for (size_t k = 1; k < 16; k++) {
10169 GemmMicrokernelTester()
10170 .mr(2)
10171 .nr(8)
10172 .kr(2)
10173 .sr(1)
10174 .m(2)
10175 .n(8)
10176 .k(k)
10177 .a_stride(19)
10178 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10179 }
10180 }
10181
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_lt_16_subtile)10182 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
10183 TEST_REQUIRES_ARM_NEON;
10184 for (size_t k = 1; k < 16; k++) {
10185 for (uint32_t n = 1; n <= 8; n++) {
10186 for (uint32_t m = 1; m <= 2; m++) {
10187 GemmMicrokernelTester()
10188 .mr(2)
10189 .nr(8)
10190 .kr(2)
10191 .sr(1)
10192 .m(m)
10193 .n(n)
10194 .k(k)
10195 .iterations(1)
10196 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10197 }
10198 }
10199 }
10200 }
10201
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_gt_16)10202 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_gt_16) {
10203 TEST_REQUIRES_ARM_NEON;
10204 for (size_t k = 17; k < 32; k++) {
10205 GemmMicrokernelTester()
10206 .mr(2)
10207 .nr(8)
10208 .kr(2)
10209 .sr(1)
10210 .m(2)
10211 .n(8)
10212 .k(k)
10213 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10214 }
10215 }
10216
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_gt_16_strided_a)10217 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_gt_16_strided_a) {
10218 TEST_REQUIRES_ARM_NEON;
10219 for (size_t k = 17; k < 32; k++) {
10220 GemmMicrokernelTester()
10221 .mr(2)
10222 .nr(8)
10223 .kr(2)
10224 .sr(1)
10225 .m(2)
10226 .n(8)
10227 .k(k)
10228 .a_stride(37)
10229 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10230 }
10231 }
10232
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_gt_16_subtile)10233 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
10234 TEST_REQUIRES_ARM_NEON;
10235 for (size_t k = 17; k < 32; k++) {
10236 for (uint32_t n = 1; n <= 8; n++) {
10237 for (uint32_t m = 1; m <= 2; m++) {
10238 GemmMicrokernelTester()
10239 .mr(2)
10240 .nr(8)
10241 .kr(2)
10242 .sr(1)
10243 .m(m)
10244 .n(n)
10245 .k(k)
10246 .iterations(1)
10247 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10248 }
10249 }
10250 }
10251 }
10252
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_div_16)10253 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_div_16) {
10254 TEST_REQUIRES_ARM_NEON;
10255 for (size_t k = 32; k <= 160; k += 16) {
10256 GemmMicrokernelTester()
10257 .mr(2)
10258 .nr(8)
10259 .kr(2)
10260 .sr(1)
10261 .m(2)
10262 .n(8)
10263 .k(k)
10264 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10265 }
10266 }
10267
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_div_16_strided_a)10268 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_div_16_strided_a) {
10269 TEST_REQUIRES_ARM_NEON;
10270 for (size_t k = 32; k <= 160; k += 16) {
10271 GemmMicrokernelTester()
10272 .mr(2)
10273 .nr(8)
10274 .kr(2)
10275 .sr(1)
10276 .m(2)
10277 .n(8)
10278 .k(k)
10279 .a_stride(163)
10280 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10281 }
10282 }
10283
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_div_16_subtile)10284 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_div_16_subtile) {
10285 TEST_REQUIRES_ARM_NEON;
10286 for (size_t k = 32; k <= 160; k += 16) {
10287 for (uint32_t n = 1; n <= 8; n++) {
10288 for (uint32_t m = 1; m <= 2; m++) {
10289 GemmMicrokernelTester()
10290 .mr(2)
10291 .nr(8)
10292 .kr(2)
10293 .sr(1)
10294 .m(m)
10295 .n(n)
10296 .k(k)
10297 .iterations(1)
10298 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10299 }
10300 }
10301 }
10302 }
10303
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_gt_8)10304 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_gt_8) {
10305 TEST_REQUIRES_ARM_NEON;
10306 for (uint32_t n = 9; n < 16; n++) {
10307 for (size_t k = 1; k <= 80; k += 17) {
10308 GemmMicrokernelTester()
10309 .mr(2)
10310 .nr(8)
10311 .kr(2)
10312 .sr(1)
10313 .m(2)
10314 .n(n)
10315 .k(k)
10316 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10317 }
10318 }
10319 }
10320
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_gt_8_strided_cn)10321 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_gt_8_strided_cn) {
10322 TEST_REQUIRES_ARM_NEON;
10323 for (uint32_t n = 9; n < 16; n++) {
10324 for (size_t k = 1; k <= 80; k += 17) {
10325 GemmMicrokernelTester()
10326 .mr(2)
10327 .nr(8)
10328 .kr(2)
10329 .sr(1)
10330 .m(2)
10331 .n(n)
10332 .k(k)
10333 .cn_stride(11)
10334 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10335 }
10336 }
10337 }
10338
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_gt_8_strided_a)10339 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_gt_8_strided_a) {
10340 TEST_REQUIRES_ARM_NEON;
10341 for (uint32_t n = 9; n < 16; n++) {
10342 for (size_t k = 1; k <= 80; k += 17) {
10343 GemmMicrokernelTester()
10344 .mr(2)
10345 .nr(8)
10346 .kr(2)
10347 .sr(1)
10348 .m(2)
10349 .n(n)
10350 .k(k)
10351 .a_stride(83)
10352 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10353 }
10354 }
10355 }
10356
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_gt_8_subtile)10357 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_gt_8_subtile) {
10358 TEST_REQUIRES_ARM_NEON;
10359 for (uint32_t n = 9; n < 16; n++) {
10360 for (size_t k = 1; k <= 80; k += 17) {
10361 for (uint32_t m = 1; m <= 2; m++) {
10362 GemmMicrokernelTester()
10363 .mr(2)
10364 .nr(8)
10365 .kr(2)
10366 .sr(1)
10367 .m(m)
10368 .n(n)
10369 .k(k)
10370 .iterations(1)
10371 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10372 }
10373 }
10374 }
10375 }
10376
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_div_8)10377 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_div_8) {
10378 TEST_REQUIRES_ARM_NEON;
10379 for (uint32_t n = 16; n <= 24; n += 8) {
10380 for (size_t k = 1; k <= 80; k += 17) {
10381 GemmMicrokernelTester()
10382 .mr(2)
10383 .nr(8)
10384 .kr(2)
10385 .sr(1)
10386 .m(2)
10387 .n(n)
10388 .k(k)
10389 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10390 }
10391 }
10392 }
10393
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_div_8_strided_cn)10394 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_div_8_strided_cn) {
10395 TEST_REQUIRES_ARM_NEON;
10396 for (uint32_t n = 16; n <= 24; n += 8) {
10397 for (size_t k = 1; k <= 80; k += 17) {
10398 GemmMicrokernelTester()
10399 .mr(2)
10400 .nr(8)
10401 .kr(2)
10402 .sr(1)
10403 .m(2)
10404 .n(n)
10405 .k(k)
10406 .cn_stride(11)
10407 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10408 }
10409 }
10410 }
10411
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_div_8_strided_a)10412 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_div_8_strided_a) {
10413 TEST_REQUIRES_ARM_NEON;
10414 for (uint32_t n = 16; n <= 24; n += 8) {
10415 for (size_t k = 1; k <= 80; k += 17) {
10416 GemmMicrokernelTester()
10417 .mr(2)
10418 .nr(8)
10419 .kr(2)
10420 .sr(1)
10421 .m(2)
10422 .n(n)
10423 .k(k)
10424 .a_stride(83)
10425 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10426 }
10427 }
10428 }
10429
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_div_8_subtile)10430 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_div_8_subtile) {
10431 TEST_REQUIRES_ARM_NEON;
10432 for (uint32_t n = 16; n <= 24; n += 8) {
10433 for (size_t k = 1; k <= 80; k += 17) {
10434 for (uint32_t m = 1; m <= 2; m++) {
10435 GemmMicrokernelTester()
10436 .mr(2)
10437 .nr(8)
10438 .kr(2)
10439 .sr(1)
10440 .m(m)
10441 .n(n)
10442 .k(k)
10443 .iterations(1)
10444 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10445 }
10446 }
10447 }
10448 }
10449
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,strided_cm_subtile)10450 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, strided_cm_subtile) {
10451 TEST_REQUIRES_ARM_NEON;
10452 for (size_t k = 1; k <= 80; k += 17) {
10453 for (uint32_t n = 1; n <= 8; n++) {
10454 for (uint32_t m = 1; m <= 2; m++) {
10455 GemmMicrokernelTester()
10456 .mr(2)
10457 .nr(8)
10458 .kr(2)
10459 .sr(1)
10460 .m(m)
10461 .n(n)
10462 .k(k)
10463 .cm_stride(11)
10464 .iterations(1)
10465 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10466 }
10467 }
10468 }
10469 }
10470
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,qmin)10471 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, qmin) {
10472 TEST_REQUIRES_ARM_NEON;
10473 GemmMicrokernelTester()
10474 .mr(2)
10475 .nr(8)
10476 .kr(2)
10477 .sr(1)
10478 .m(2)
10479 .n(8)
10480 .k(16)
10481 .qmin(128)
10482 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10483 }
10484
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,qmax)10485 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, qmax) {
10486 TEST_REQUIRES_ARM_NEON;
10487 GemmMicrokernelTester()
10488 .mr(2)
10489 .nr(8)
10490 .kr(2)
10491 .sr(1)
10492 .m(2)
10493 .n(8)
10494 .k(16)
10495 .qmax(128)
10496 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10497 }
10498
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,strided_cm)10499 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, strided_cm) {
10500 TEST_REQUIRES_ARM_NEON;
10501 GemmMicrokernelTester()
10502 .mr(2)
10503 .nr(8)
10504 .kr(2)
10505 .sr(1)
10506 .m(2)
10507 .n(8)
10508 .k(16)
10509 .cm_stride(11)
10510 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10511 }
10512 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10513
10514
10515 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_eq_16)10516 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16) {
10517 TEST_REQUIRES_ARM_NEON_V8;
10518 GemmMicrokernelTester()
10519 .mr(2)
10520 .nr(8)
10521 .kr(2)
10522 .sr(1)
10523 .m(2)
10524 .n(8)
10525 .k(16)
10526 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10527 }
10528
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,strided_cn)10529 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, strided_cn) {
10530 TEST_REQUIRES_ARM_NEON_V8;
10531 GemmMicrokernelTester()
10532 .mr(2)
10533 .nr(8)
10534 .kr(2)
10535 .sr(1)
10536 .m(2)
10537 .n(8)
10538 .k(16)
10539 .cn_stride(11)
10540 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10541 }
10542
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_eq_16_strided_a)10543 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16_strided_a) {
10544 TEST_REQUIRES_ARM_NEON_V8;
10545 GemmMicrokernelTester()
10546 .mr(2)
10547 .nr(8)
10548 .kr(2)
10549 .sr(1)
10550 .m(2)
10551 .n(8)
10552 .k(16)
10553 .a_stride(19)
10554 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10555 }
10556
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_eq_16_subtile)10557 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile) {
10558 TEST_REQUIRES_ARM_NEON_V8;
10559 for (uint32_t n = 1; n <= 8; n++) {
10560 for (uint32_t m = 1; m <= 2; m++) {
10561 GemmMicrokernelTester()
10562 .mr(2)
10563 .nr(8)
10564 .kr(2)
10565 .sr(1)
10566 .m(m)
10567 .n(n)
10568 .k(16)
10569 .iterations(1)
10570 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10571 }
10572 }
10573 }
10574
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_eq_16_subtile_m)10575 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile_m) {
10576 TEST_REQUIRES_ARM_NEON_V8;
10577 for (uint32_t m = 1; m <= 2; m++) {
10578 GemmMicrokernelTester()
10579 .mr(2)
10580 .nr(8)
10581 .kr(2)
10582 .sr(1)
10583 .m(m)
10584 .n(8)
10585 .k(16)
10586 .iterations(1)
10587 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10588 }
10589 }
10590
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_eq_16_subtile_n)10591 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile_n) {
10592 TEST_REQUIRES_ARM_NEON_V8;
10593 for (uint32_t n = 1; n <= 8; n++) {
10594 GemmMicrokernelTester()
10595 .mr(2)
10596 .nr(8)
10597 .kr(2)
10598 .sr(1)
10599 .m(2)
10600 .n(n)
10601 .k(16)
10602 .iterations(1)
10603 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10604 }
10605 }
10606
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_lt_16)10607 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_lt_16) {
10608 TEST_REQUIRES_ARM_NEON_V8;
10609 for (size_t k = 1; k < 16; k++) {
10610 GemmMicrokernelTester()
10611 .mr(2)
10612 .nr(8)
10613 .kr(2)
10614 .sr(1)
10615 .m(2)
10616 .n(8)
10617 .k(k)
10618 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10619 }
10620 }
10621
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_lt_16_strided_a)10622 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_lt_16_strided_a) {
10623 TEST_REQUIRES_ARM_NEON_V8;
10624 for (size_t k = 1; k < 16; k++) {
10625 GemmMicrokernelTester()
10626 .mr(2)
10627 .nr(8)
10628 .kr(2)
10629 .sr(1)
10630 .m(2)
10631 .n(8)
10632 .k(k)
10633 .a_stride(19)
10634 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10635 }
10636 }
10637
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_lt_16_subtile)10638 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_lt_16_subtile) {
10639 TEST_REQUIRES_ARM_NEON_V8;
10640 for (size_t k = 1; k < 16; k++) {
10641 for (uint32_t n = 1; n <= 8; n++) {
10642 for (uint32_t m = 1; m <= 2; m++) {
10643 GemmMicrokernelTester()
10644 .mr(2)
10645 .nr(8)
10646 .kr(2)
10647 .sr(1)
10648 .m(m)
10649 .n(n)
10650 .k(k)
10651 .iterations(1)
10652 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10653 }
10654 }
10655 }
10656 }
10657
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_gt_16)10658 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_gt_16) {
10659 TEST_REQUIRES_ARM_NEON_V8;
10660 for (size_t k = 17; k < 32; k++) {
10661 GemmMicrokernelTester()
10662 .mr(2)
10663 .nr(8)
10664 .kr(2)
10665 .sr(1)
10666 .m(2)
10667 .n(8)
10668 .k(k)
10669 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10670 }
10671 }
10672
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_gt_16_strided_a)10673 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_gt_16_strided_a) {
10674 TEST_REQUIRES_ARM_NEON_V8;
10675 for (size_t k = 17; k < 32; k++) {
10676 GemmMicrokernelTester()
10677 .mr(2)
10678 .nr(8)
10679 .kr(2)
10680 .sr(1)
10681 .m(2)
10682 .n(8)
10683 .k(k)
10684 .a_stride(37)
10685 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10686 }
10687 }
10688
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_gt_16_subtile)10689 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_gt_16_subtile) {
10690 TEST_REQUIRES_ARM_NEON_V8;
10691 for (size_t k = 17; k < 32; k++) {
10692 for (uint32_t n = 1; n <= 8; n++) {
10693 for (uint32_t m = 1; m <= 2; m++) {
10694 GemmMicrokernelTester()
10695 .mr(2)
10696 .nr(8)
10697 .kr(2)
10698 .sr(1)
10699 .m(m)
10700 .n(n)
10701 .k(k)
10702 .iterations(1)
10703 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10704 }
10705 }
10706 }
10707 }
10708
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_div_16)10709 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_div_16) {
10710 TEST_REQUIRES_ARM_NEON_V8;
10711 for (size_t k = 32; k <= 160; k += 16) {
10712 GemmMicrokernelTester()
10713 .mr(2)
10714 .nr(8)
10715 .kr(2)
10716 .sr(1)
10717 .m(2)
10718 .n(8)
10719 .k(k)
10720 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10721 }
10722 }
10723
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_div_16_strided_a)10724 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_div_16_strided_a) {
10725 TEST_REQUIRES_ARM_NEON_V8;
10726 for (size_t k = 32; k <= 160; k += 16) {
10727 GemmMicrokernelTester()
10728 .mr(2)
10729 .nr(8)
10730 .kr(2)
10731 .sr(1)
10732 .m(2)
10733 .n(8)
10734 .k(k)
10735 .a_stride(163)
10736 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10737 }
10738 }
10739
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_div_16_subtile)10740 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_div_16_subtile) {
10741 TEST_REQUIRES_ARM_NEON_V8;
10742 for (size_t k = 32; k <= 160; k += 16) {
10743 for (uint32_t n = 1; n <= 8; n++) {
10744 for (uint32_t m = 1; m <= 2; m++) {
10745 GemmMicrokernelTester()
10746 .mr(2)
10747 .nr(8)
10748 .kr(2)
10749 .sr(1)
10750 .m(m)
10751 .n(n)
10752 .k(k)
10753 .iterations(1)
10754 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10755 }
10756 }
10757 }
10758 }
10759
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_gt_8)10760 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8) {
10761 TEST_REQUIRES_ARM_NEON_V8;
10762 for (uint32_t n = 9; n < 16; n++) {
10763 for (size_t k = 1; k <= 80; k += 17) {
10764 GemmMicrokernelTester()
10765 .mr(2)
10766 .nr(8)
10767 .kr(2)
10768 .sr(1)
10769 .m(2)
10770 .n(n)
10771 .k(k)
10772 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10773 }
10774 }
10775 }
10776
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_gt_8_strided_cn)10777 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8_strided_cn) {
10778 TEST_REQUIRES_ARM_NEON_V8;
10779 for (uint32_t n = 9; n < 16; n++) {
10780 for (size_t k = 1; k <= 80; k += 17) {
10781 GemmMicrokernelTester()
10782 .mr(2)
10783 .nr(8)
10784 .kr(2)
10785 .sr(1)
10786 .m(2)
10787 .n(n)
10788 .k(k)
10789 .cn_stride(11)
10790 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10791 }
10792 }
10793 }
10794
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_gt_8_strided_a)10795 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8_strided_a) {
10796 TEST_REQUIRES_ARM_NEON_V8;
10797 for (uint32_t n = 9; n < 16; n++) {
10798 for (size_t k = 1; k <= 80; k += 17) {
10799 GemmMicrokernelTester()
10800 .mr(2)
10801 .nr(8)
10802 .kr(2)
10803 .sr(1)
10804 .m(2)
10805 .n(n)
10806 .k(k)
10807 .a_stride(83)
10808 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10809 }
10810 }
10811 }
10812
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_gt_8_subtile)10813 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8_subtile) {
10814 TEST_REQUIRES_ARM_NEON_V8;
10815 for (uint32_t n = 9; n < 16; n++) {
10816 for (size_t k = 1; k <= 80; k += 17) {
10817 for (uint32_t m = 1; m <= 2; m++) {
10818 GemmMicrokernelTester()
10819 .mr(2)
10820 .nr(8)
10821 .kr(2)
10822 .sr(1)
10823 .m(m)
10824 .n(n)
10825 .k(k)
10826 .iterations(1)
10827 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10828 }
10829 }
10830 }
10831 }
10832
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_div_8)10833 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8) {
10834 TEST_REQUIRES_ARM_NEON_V8;
10835 for (uint32_t n = 16; n <= 24; n += 8) {
10836 for (size_t k = 1; k <= 80; k += 17) {
10837 GemmMicrokernelTester()
10838 .mr(2)
10839 .nr(8)
10840 .kr(2)
10841 .sr(1)
10842 .m(2)
10843 .n(n)
10844 .k(k)
10845 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10846 }
10847 }
10848 }
10849
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_div_8_strided_cn)10850 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8_strided_cn) {
10851 TEST_REQUIRES_ARM_NEON_V8;
10852 for (uint32_t n = 16; n <= 24; n += 8) {
10853 for (size_t k = 1; k <= 80; k += 17) {
10854 GemmMicrokernelTester()
10855 .mr(2)
10856 .nr(8)
10857 .kr(2)
10858 .sr(1)
10859 .m(2)
10860 .n(n)
10861 .k(k)
10862 .cn_stride(11)
10863 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10864 }
10865 }
10866 }
10867
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_div_8_strided_a)10868 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8_strided_a) {
10869 TEST_REQUIRES_ARM_NEON_V8;
10870 for (uint32_t n = 16; n <= 24; n += 8) {
10871 for (size_t k = 1; k <= 80; k += 17) {
10872 GemmMicrokernelTester()
10873 .mr(2)
10874 .nr(8)
10875 .kr(2)
10876 .sr(1)
10877 .m(2)
10878 .n(n)
10879 .k(k)
10880 .a_stride(83)
10881 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10882 }
10883 }
10884 }
10885
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_div_8_subtile)10886 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8_subtile) {
10887 TEST_REQUIRES_ARM_NEON_V8;
10888 for (uint32_t n = 16; n <= 24; n += 8) {
10889 for (size_t k = 1; k <= 80; k += 17) {
10890 for (uint32_t m = 1; m <= 2; m++) {
10891 GemmMicrokernelTester()
10892 .mr(2)
10893 .nr(8)
10894 .kr(2)
10895 .sr(1)
10896 .m(m)
10897 .n(n)
10898 .k(k)
10899 .iterations(1)
10900 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10901 }
10902 }
10903 }
10904 }
10905
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,strided_cm_subtile)10906 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, strided_cm_subtile) {
10907 TEST_REQUIRES_ARM_NEON_V8;
10908 for (size_t k = 1; k <= 80; k += 17) {
10909 for (uint32_t n = 1; n <= 8; n++) {
10910 for (uint32_t m = 1; m <= 2; m++) {
10911 GemmMicrokernelTester()
10912 .mr(2)
10913 .nr(8)
10914 .kr(2)
10915 .sr(1)
10916 .m(m)
10917 .n(n)
10918 .k(k)
10919 .cm_stride(11)
10920 .iterations(1)
10921 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10922 }
10923 }
10924 }
10925 }
10926
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,qmin)10927 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, qmin) {
10928 TEST_REQUIRES_ARM_NEON_V8;
10929 GemmMicrokernelTester()
10930 .mr(2)
10931 .nr(8)
10932 .kr(2)
10933 .sr(1)
10934 .m(2)
10935 .n(8)
10936 .k(16)
10937 .qmin(128)
10938 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10939 }
10940
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,qmax)10941 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, qmax) {
10942 TEST_REQUIRES_ARM_NEON_V8;
10943 GemmMicrokernelTester()
10944 .mr(2)
10945 .nr(8)
10946 .kr(2)
10947 .sr(1)
10948 .m(2)
10949 .n(8)
10950 .k(16)
10951 .qmax(128)
10952 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10953 }
10954
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,strided_cm)10955 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, strided_cm) {
10956 TEST_REQUIRES_ARM_NEON_V8;
10957 GemmMicrokernelTester()
10958 .mr(2)
10959 .nr(8)
10960 .kr(2)
10961 .sr(1)
10962 .m(2)
10963 .n(8)
10964 .k(16)
10965 .cm_stride(11)
10966 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10967 }
10968 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10969
10970
10971 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_eq_16)10972 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16) {
10973 TEST_REQUIRES_ARM_NEON;
10974 GemmMicrokernelTester()
10975 .mr(2)
10976 .nr(8)
10977 .kr(4)
10978 .sr(1)
10979 .m(2)
10980 .n(8)
10981 .k(16)
10982 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10983 }
10984
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,strided_cn)10985 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, strided_cn) {
10986 TEST_REQUIRES_ARM_NEON;
10987 GemmMicrokernelTester()
10988 .mr(2)
10989 .nr(8)
10990 .kr(4)
10991 .sr(1)
10992 .m(2)
10993 .n(8)
10994 .k(16)
10995 .cn_stride(11)
10996 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10997 }
10998
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_eq_16_strided_a)10999 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16_strided_a) {
11000 TEST_REQUIRES_ARM_NEON;
11001 GemmMicrokernelTester()
11002 .mr(2)
11003 .nr(8)
11004 .kr(4)
11005 .sr(1)
11006 .m(2)
11007 .n(8)
11008 .k(16)
11009 .a_stride(19)
11010 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11011 }
11012
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_eq_16_subtile)11013 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile) {
11014 TEST_REQUIRES_ARM_NEON;
11015 for (uint32_t n = 1; n <= 8; n++) {
11016 for (uint32_t m = 1; m <= 2; m++) {
11017 GemmMicrokernelTester()
11018 .mr(2)
11019 .nr(8)
11020 .kr(4)
11021 .sr(1)
11022 .m(m)
11023 .n(n)
11024 .k(16)
11025 .iterations(1)
11026 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11027 }
11028 }
11029 }
11030
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_eq_16_subtile_m)11031 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
11032 TEST_REQUIRES_ARM_NEON;
11033 for (uint32_t m = 1; m <= 2; m++) {
11034 GemmMicrokernelTester()
11035 .mr(2)
11036 .nr(8)
11037 .kr(4)
11038 .sr(1)
11039 .m(m)
11040 .n(8)
11041 .k(16)
11042 .iterations(1)
11043 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11044 }
11045 }
11046
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_eq_16_subtile_n)11047 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
11048 TEST_REQUIRES_ARM_NEON;
11049 for (uint32_t n = 1; n <= 8; n++) {
11050 GemmMicrokernelTester()
11051 .mr(2)
11052 .nr(8)
11053 .kr(4)
11054 .sr(1)
11055 .m(2)
11056 .n(n)
11057 .k(16)
11058 .iterations(1)
11059 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11060 }
11061 }
11062
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_lt_16)11063 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_lt_16) {
11064 TEST_REQUIRES_ARM_NEON;
11065 for (size_t k = 1; k < 16; k++) {
11066 GemmMicrokernelTester()
11067 .mr(2)
11068 .nr(8)
11069 .kr(4)
11070 .sr(1)
11071 .m(2)
11072 .n(8)
11073 .k(k)
11074 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11075 }
11076 }
11077
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_lt_16_strided_a)11078 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_lt_16_strided_a) {
11079 TEST_REQUIRES_ARM_NEON;
11080 for (size_t k = 1; k < 16; k++) {
11081 GemmMicrokernelTester()
11082 .mr(2)
11083 .nr(8)
11084 .kr(4)
11085 .sr(1)
11086 .m(2)
11087 .n(8)
11088 .k(k)
11089 .a_stride(19)
11090 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11091 }
11092 }
11093
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_lt_16_subtile)11094 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_lt_16_subtile) {
11095 TEST_REQUIRES_ARM_NEON;
11096 for (size_t k = 1; k < 16; k++) {
11097 for (uint32_t n = 1; n <= 8; n++) {
11098 for (uint32_t m = 1; m <= 2; m++) {
11099 GemmMicrokernelTester()
11100 .mr(2)
11101 .nr(8)
11102 .kr(4)
11103 .sr(1)
11104 .m(m)
11105 .n(n)
11106 .k(k)
11107 .iterations(1)
11108 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11109 }
11110 }
11111 }
11112 }
11113
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_gt_16)11114 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_gt_16) {
11115 TEST_REQUIRES_ARM_NEON;
11116 for (size_t k = 17; k < 32; k++) {
11117 GemmMicrokernelTester()
11118 .mr(2)
11119 .nr(8)
11120 .kr(4)
11121 .sr(1)
11122 .m(2)
11123 .n(8)
11124 .k(k)
11125 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11126 }
11127 }
11128
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_gt_16_strided_a)11129 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_gt_16_strided_a) {
11130 TEST_REQUIRES_ARM_NEON;
11131 for (size_t k = 17; k < 32; k++) {
11132 GemmMicrokernelTester()
11133 .mr(2)
11134 .nr(8)
11135 .kr(4)
11136 .sr(1)
11137 .m(2)
11138 .n(8)
11139 .k(k)
11140 .a_stride(37)
11141 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11142 }
11143 }
11144
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_gt_16_subtile)11145 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_gt_16_subtile) {
11146 TEST_REQUIRES_ARM_NEON;
11147 for (size_t k = 17; k < 32; k++) {
11148 for (uint32_t n = 1; n <= 8; n++) {
11149 for (uint32_t m = 1; m <= 2; m++) {
11150 GemmMicrokernelTester()
11151 .mr(2)
11152 .nr(8)
11153 .kr(4)
11154 .sr(1)
11155 .m(m)
11156 .n(n)
11157 .k(k)
11158 .iterations(1)
11159 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11160 }
11161 }
11162 }
11163 }
11164
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_div_16)11165 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_div_16) {
11166 TEST_REQUIRES_ARM_NEON;
11167 for (size_t k = 32; k <= 160; k += 16) {
11168 GemmMicrokernelTester()
11169 .mr(2)
11170 .nr(8)
11171 .kr(4)
11172 .sr(1)
11173 .m(2)
11174 .n(8)
11175 .k(k)
11176 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11177 }
11178 }
11179
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_div_16_strided_a)11180 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_div_16_strided_a) {
11181 TEST_REQUIRES_ARM_NEON;
11182 for (size_t k = 32; k <= 160; k += 16) {
11183 GemmMicrokernelTester()
11184 .mr(2)
11185 .nr(8)
11186 .kr(4)
11187 .sr(1)
11188 .m(2)
11189 .n(8)
11190 .k(k)
11191 .a_stride(163)
11192 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11193 }
11194 }
11195
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_div_16_subtile)11196 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_div_16_subtile) {
11197 TEST_REQUIRES_ARM_NEON;
11198 for (size_t k = 32; k <= 160; k += 16) {
11199 for (uint32_t n = 1; n <= 8; n++) {
11200 for (uint32_t m = 1; m <= 2; m++) {
11201 GemmMicrokernelTester()
11202 .mr(2)
11203 .nr(8)
11204 .kr(4)
11205 .sr(1)
11206 .m(m)
11207 .n(n)
11208 .k(k)
11209 .iterations(1)
11210 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11211 }
11212 }
11213 }
11214 }
11215
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_gt_8)11216 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8) {
11217 TEST_REQUIRES_ARM_NEON;
11218 for (uint32_t n = 9; n < 16; n++) {
11219 for (size_t k = 1; k <= 80; k += 17) {
11220 GemmMicrokernelTester()
11221 .mr(2)
11222 .nr(8)
11223 .kr(4)
11224 .sr(1)
11225 .m(2)
11226 .n(n)
11227 .k(k)
11228 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11229 }
11230 }
11231 }
11232
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_gt_8_strided_cn)11233 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8_strided_cn) {
11234 TEST_REQUIRES_ARM_NEON;
11235 for (uint32_t n = 9; n < 16; n++) {
11236 for (size_t k = 1; k <= 80; k += 17) {
11237 GemmMicrokernelTester()
11238 .mr(2)
11239 .nr(8)
11240 .kr(4)
11241 .sr(1)
11242 .m(2)
11243 .n(n)
11244 .k(k)
11245 .cn_stride(11)
11246 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11247 }
11248 }
11249 }
11250
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_gt_8_strided_a)11251 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8_strided_a) {
11252 TEST_REQUIRES_ARM_NEON;
11253 for (uint32_t n = 9; n < 16; n++) {
11254 for (size_t k = 1; k <= 80; k += 17) {
11255 GemmMicrokernelTester()
11256 .mr(2)
11257 .nr(8)
11258 .kr(4)
11259 .sr(1)
11260 .m(2)
11261 .n(n)
11262 .k(k)
11263 .a_stride(83)
11264 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11265 }
11266 }
11267 }
11268
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_gt_8_subtile)11269 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8_subtile) {
11270 TEST_REQUIRES_ARM_NEON;
11271 for (uint32_t n = 9; n < 16; n++) {
11272 for (size_t k = 1; k <= 80; k += 17) {
11273 for (uint32_t m = 1; m <= 2; m++) {
11274 GemmMicrokernelTester()
11275 .mr(2)
11276 .nr(8)
11277 .kr(4)
11278 .sr(1)
11279 .m(m)
11280 .n(n)
11281 .k(k)
11282 .iterations(1)
11283 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11284 }
11285 }
11286 }
11287 }
11288
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_div_8)11289 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8) {
11290 TEST_REQUIRES_ARM_NEON;
11291 for (uint32_t n = 16; n <= 24; n += 8) {
11292 for (size_t k = 1; k <= 80; k += 17) {
11293 GemmMicrokernelTester()
11294 .mr(2)
11295 .nr(8)
11296 .kr(4)
11297 .sr(1)
11298 .m(2)
11299 .n(n)
11300 .k(k)
11301 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11302 }
11303 }
11304 }
11305
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_div_8_strided_cn)11306 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8_strided_cn) {
11307 TEST_REQUIRES_ARM_NEON;
11308 for (uint32_t n = 16; n <= 24; n += 8) {
11309 for (size_t k = 1; k <= 80; k += 17) {
11310 GemmMicrokernelTester()
11311 .mr(2)
11312 .nr(8)
11313 .kr(4)
11314 .sr(1)
11315 .m(2)
11316 .n(n)
11317 .k(k)
11318 .cn_stride(11)
11319 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11320 }
11321 }
11322 }
11323
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_div_8_strided_a)11324 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8_strided_a) {
11325 TEST_REQUIRES_ARM_NEON;
11326 for (uint32_t n = 16; n <= 24; n += 8) {
11327 for (size_t k = 1; k <= 80; k += 17) {
11328 GemmMicrokernelTester()
11329 .mr(2)
11330 .nr(8)
11331 .kr(4)
11332 .sr(1)
11333 .m(2)
11334 .n(n)
11335 .k(k)
11336 .a_stride(83)
11337 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11338 }
11339 }
11340 }
11341
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_div_8_subtile)11342 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8_subtile) {
11343 TEST_REQUIRES_ARM_NEON;
11344 for (uint32_t n = 16; n <= 24; n += 8) {
11345 for (size_t k = 1; k <= 80; k += 17) {
11346 for (uint32_t m = 1; m <= 2; m++) {
11347 GemmMicrokernelTester()
11348 .mr(2)
11349 .nr(8)
11350 .kr(4)
11351 .sr(1)
11352 .m(m)
11353 .n(n)
11354 .k(k)
11355 .iterations(1)
11356 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11357 }
11358 }
11359 }
11360 }
11361
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,strided_cm_subtile)11362 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, strided_cm_subtile) {
11363 TEST_REQUIRES_ARM_NEON;
11364 for (size_t k = 1; k <= 80; k += 17) {
11365 for (uint32_t n = 1; n <= 8; n++) {
11366 for (uint32_t m = 1; m <= 2; m++) {
11367 GemmMicrokernelTester()
11368 .mr(2)
11369 .nr(8)
11370 .kr(4)
11371 .sr(1)
11372 .m(m)
11373 .n(n)
11374 .k(k)
11375 .cm_stride(11)
11376 .iterations(1)
11377 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11378 }
11379 }
11380 }
11381 }
11382
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,qmin)11383 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, qmin) {
11384 TEST_REQUIRES_ARM_NEON;
11385 GemmMicrokernelTester()
11386 .mr(2)
11387 .nr(8)
11388 .kr(4)
11389 .sr(1)
11390 .m(2)
11391 .n(8)
11392 .k(16)
11393 .qmin(128)
11394 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11395 }
11396
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,qmax)11397 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, qmax) {
11398 TEST_REQUIRES_ARM_NEON;
11399 GemmMicrokernelTester()
11400 .mr(2)
11401 .nr(8)
11402 .kr(4)
11403 .sr(1)
11404 .m(2)
11405 .n(8)
11406 .k(16)
11407 .qmax(128)
11408 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11409 }
11410
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,strided_cm)11411 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, strided_cm) {
11412 TEST_REQUIRES_ARM_NEON;
11413 GemmMicrokernelTester()
11414 .mr(2)
11415 .nr(8)
11416 .kr(4)
11417 .sr(1)
11418 .m(2)
11419 .n(8)
11420 .k(16)
11421 .cm_stride(11)
11422 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11423 }
11424 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11425
11426
11427 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_eq_16)11428 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16) {
11429 TEST_REQUIRES_ARM_NEON;
11430 GemmMicrokernelTester()
11431 .mr(2)
11432 .nr(8)
11433 .kr(4)
11434 .sr(1)
11435 .m(2)
11436 .n(8)
11437 .k(16)
11438 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11439 }
11440
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,strided_cn)11441 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, strided_cn) {
11442 TEST_REQUIRES_ARM_NEON;
11443 GemmMicrokernelTester()
11444 .mr(2)
11445 .nr(8)
11446 .kr(4)
11447 .sr(1)
11448 .m(2)
11449 .n(8)
11450 .k(16)
11451 .cn_stride(11)
11452 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11453 }
11454
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_eq_16_strided_a)11455 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16_strided_a) {
11456 TEST_REQUIRES_ARM_NEON;
11457 GemmMicrokernelTester()
11458 .mr(2)
11459 .nr(8)
11460 .kr(4)
11461 .sr(1)
11462 .m(2)
11463 .n(8)
11464 .k(16)
11465 .a_stride(19)
11466 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11467 }
11468
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_eq_16_subtile)11469 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
11470 TEST_REQUIRES_ARM_NEON;
11471 for (uint32_t n = 1; n <= 8; n++) {
11472 for (uint32_t m = 1; m <= 2; m++) {
11473 GemmMicrokernelTester()
11474 .mr(2)
11475 .nr(8)
11476 .kr(4)
11477 .sr(1)
11478 .m(m)
11479 .n(n)
11480 .k(16)
11481 .iterations(1)
11482 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11483 }
11484 }
11485 }
11486
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_m)11487 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
11488 TEST_REQUIRES_ARM_NEON;
11489 for (uint32_t m = 1; m <= 2; m++) {
11490 GemmMicrokernelTester()
11491 .mr(2)
11492 .nr(8)
11493 .kr(4)
11494 .sr(1)
11495 .m(m)
11496 .n(8)
11497 .k(16)
11498 .iterations(1)
11499 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11500 }
11501 }
11502
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_n)11503 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
11504 TEST_REQUIRES_ARM_NEON;
11505 for (uint32_t n = 1; n <= 8; n++) {
11506 GemmMicrokernelTester()
11507 .mr(2)
11508 .nr(8)
11509 .kr(4)
11510 .sr(1)
11511 .m(2)
11512 .n(n)
11513 .k(16)
11514 .iterations(1)
11515 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11516 }
11517 }
11518
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_lt_16)11519 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_lt_16) {
11520 TEST_REQUIRES_ARM_NEON;
11521 for (size_t k = 1; k < 16; k++) {
11522 GemmMicrokernelTester()
11523 .mr(2)
11524 .nr(8)
11525 .kr(4)
11526 .sr(1)
11527 .m(2)
11528 .n(8)
11529 .k(k)
11530 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11531 }
11532 }
11533
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_lt_16_strided_a)11534 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_lt_16_strided_a) {
11535 TEST_REQUIRES_ARM_NEON;
11536 for (size_t k = 1; k < 16; k++) {
11537 GemmMicrokernelTester()
11538 .mr(2)
11539 .nr(8)
11540 .kr(4)
11541 .sr(1)
11542 .m(2)
11543 .n(8)
11544 .k(k)
11545 .a_stride(19)
11546 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11547 }
11548 }
11549
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_lt_16_subtile)11550 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
11551 TEST_REQUIRES_ARM_NEON;
11552 for (size_t k = 1; k < 16; k++) {
11553 for (uint32_t n = 1; n <= 8; n++) {
11554 for (uint32_t m = 1; m <= 2; m++) {
11555 GemmMicrokernelTester()
11556 .mr(2)
11557 .nr(8)
11558 .kr(4)
11559 .sr(1)
11560 .m(m)
11561 .n(n)
11562 .k(k)
11563 .iterations(1)
11564 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11565 }
11566 }
11567 }
11568 }
11569
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_gt_16)11570 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_gt_16) {
11571 TEST_REQUIRES_ARM_NEON;
11572 for (size_t k = 17; k < 32; k++) {
11573 GemmMicrokernelTester()
11574 .mr(2)
11575 .nr(8)
11576 .kr(4)
11577 .sr(1)
11578 .m(2)
11579 .n(8)
11580 .k(k)
11581 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11582 }
11583 }
11584
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_gt_16_strided_a)11585 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_gt_16_strided_a) {
11586 TEST_REQUIRES_ARM_NEON;
11587 for (size_t k = 17; k < 32; k++) {
11588 GemmMicrokernelTester()
11589 .mr(2)
11590 .nr(8)
11591 .kr(4)
11592 .sr(1)
11593 .m(2)
11594 .n(8)
11595 .k(k)
11596 .a_stride(37)
11597 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11598 }
11599 }
11600
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_gt_16_subtile)11601 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
11602 TEST_REQUIRES_ARM_NEON;
11603 for (size_t k = 17; k < 32; k++) {
11604 for (uint32_t n = 1; n <= 8; n++) {
11605 for (uint32_t m = 1; m <= 2; m++) {
11606 GemmMicrokernelTester()
11607 .mr(2)
11608 .nr(8)
11609 .kr(4)
11610 .sr(1)
11611 .m(m)
11612 .n(n)
11613 .k(k)
11614 .iterations(1)
11615 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11616 }
11617 }
11618 }
11619 }
11620
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_div_16)11621 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_div_16) {
11622 TEST_REQUIRES_ARM_NEON;
11623 for (size_t k = 32; k <= 160; k += 16) {
11624 GemmMicrokernelTester()
11625 .mr(2)
11626 .nr(8)
11627 .kr(4)
11628 .sr(1)
11629 .m(2)
11630 .n(8)
11631 .k(k)
11632 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11633 }
11634 }
11635
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_div_16_strided_a)11636 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_div_16_strided_a) {
11637 TEST_REQUIRES_ARM_NEON;
11638 for (size_t k = 32; k <= 160; k += 16) {
11639 GemmMicrokernelTester()
11640 .mr(2)
11641 .nr(8)
11642 .kr(4)
11643 .sr(1)
11644 .m(2)
11645 .n(8)
11646 .k(k)
11647 .a_stride(163)
11648 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11649 }
11650 }
11651
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_div_16_subtile)11652 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_div_16_subtile) {
11653 TEST_REQUIRES_ARM_NEON;
11654 for (size_t k = 32; k <= 160; k += 16) {
11655 for (uint32_t n = 1; n <= 8; n++) {
11656 for (uint32_t m = 1; m <= 2; m++) {
11657 GemmMicrokernelTester()
11658 .mr(2)
11659 .nr(8)
11660 .kr(4)
11661 .sr(1)
11662 .m(m)
11663 .n(n)
11664 .k(k)
11665 .iterations(1)
11666 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11667 }
11668 }
11669 }
11670 }
11671
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_gt_8)11672 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8) {
11673 TEST_REQUIRES_ARM_NEON;
11674 for (uint32_t n = 9; n < 16; n++) {
11675 for (size_t k = 1; k <= 80; k += 17) {
11676 GemmMicrokernelTester()
11677 .mr(2)
11678 .nr(8)
11679 .kr(4)
11680 .sr(1)
11681 .m(2)
11682 .n(n)
11683 .k(k)
11684 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11685 }
11686 }
11687 }
11688
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_gt_8_strided_cn)11689 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
11690 TEST_REQUIRES_ARM_NEON;
11691 for (uint32_t n = 9; n < 16; n++) {
11692 for (size_t k = 1; k <= 80; k += 17) {
11693 GemmMicrokernelTester()
11694 .mr(2)
11695 .nr(8)
11696 .kr(4)
11697 .sr(1)
11698 .m(2)
11699 .n(n)
11700 .k(k)
11701 .cn_stride(11)
11702 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11703 }
11704 }
11705 }
11706
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_gt_8_strided_a)11707 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8_strided_a) {
11708 TEST_REQUIRES_ARM_NEON;
11709 for (uint32_t n = 9; n < 16; n++) {
11710 for (size_t k = 1; k <= 80; k += 17) {
11711 GemmMicrokernelTester()
11712 .mr(2)
11713 .nr(8)
11714 .kr(4)
11715 .sr(1)
11716 .m(2)
11717 .n(n)
11718 .k(k)
11719 .a_stride(83)
11720 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11721 }
11722 }
11723 }
11724
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_gt_8_subtile)11725 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) {
11726 TEST_REQUIRES_ARM_NEON;
11727 for (uint32_t n = 9; n < 16; n++) {
11728 for (size_t k = 1; k <= 80; k += 17) {
11729 for (uint32_t m = 1; m <= 2; m++) {
11730 GemmMicrokernelTester()
11731 .mr(2)
11732 .nr(8)
11733 .kr(4)
11734 .sr(1)
11735 .m(m)
11736 .n(n)
11737 .k(k)
11738 .iterations(1)
11739 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11740 }
11741 }
11742 }
11743 }
11744
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_div_8)11745 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8) {
11746 TEST_REQUIRES_ARM_NEON;
11747 for (uint32_t n = 16; n <= 24; n += 8) {
11748 for (size_t k = 1; k <= 80; k += 17) {
11749 GemmMicrokernelTester()
11750 .mr(2)
11751 .nr(8)
11752 .kr(4)
11753 .sr(1)
11754 .m(2)
11755 .n(n)
11756 .k(k)
11757 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11758 }
11759 }
11760 }
11761
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_div_8_strided_cn)11762 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) {
11763 TEST_REQUIRES_ARM_NEON;
11764 for (uint32_t n = 16; n <= 24; n += 8) {
11765 for (size_t k = 1; k <= 80; k += 17) {
11766 GemmMicrokernelTester()
11767 .mr(2)
11768 .nr(8)
11769 .kr(4)
11770 .sr(1)
11771 .m(2)
11772 .n(n)
11773 .k(k)
11774 .cn_stride(11)
11775 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11776 }
11777 }
11778 }
11779
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_div_8_strided_a)11780 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8_strided_a) {
11781 TEST_REQUIRES_ARM_NEON;
11782 for (uint32_t n = 16; n <= 24; n += 8) {
11783 for (size_t k = 1; k <= 80; k += 17) {
11784 GemmMicrokernelTester()
11785 .mr(2)
11786 .nr(8)
11787 .kr(4)
11788 .sr(1)
11789 .m(2)
11790 .n(n)
11791 .k(k)
11792 .a_stride(83)
11793 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11794 }
11795 }
11796 }
11797
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_div_8_subtile)11798 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8_subtile) {
11799 TEST_REQUIRES_ARM_NEON;
11800 for (uint32_t n = 16; n <= 24; n += 8) {
11801 for (size_t k = 1; k <= 80; k += 17) {
11802 for (uint32_t m = 1; m <= 2; m++) {
11803 GemmMicrokernelTester()
11804 .mr(2)
11805 .nr(8)
11806 .kr(4)
11807 .sr(1)
11808 .m(m)
11809 .n(n)
11810 .k(k)
11811 .iterations(1)
11812 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11813 }
11814 }
11815 }
11816 }
11817
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,strided_cm_subtile)11818 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, strided_cm_subtile) {
11819 TEST_REQUIRES_ARM_NEON;
11820 for (size_t k = 1; k <= 80; k += 17) {
11821 for (uint32_t n = 1; n <= 8; n++) {
11822 for (uint32_t m = 1; m <= 2; m++) {
11823 GemmMicrokernelTester()
11824 .mr(2)
11825 .nr(8)
11826 .kr(4)
11827 .sr(1)
11828 .m(m)
11829 .n(n)
11830 .k(k)
11831 .cm_stride(11)
11832 .iterations(1)
11833 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11834 }
11835 }
11836 }
11837 }
11838
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,qmin)11839 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, qmin) {
11840 TEST_REQUIRES_ARM_NEON;
11841 GemmMicrokernelTester()
11842 .mr(2)
11843 .nr(8)
11844 .kr(4)
11845 .sr(1)
11846 .m(2)
11847 .n(8)
11848 .k(16)
11849 .qmin(128)
11850 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11851 }
11852
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,qmax)11853 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, qmax) {
11854 TEST_REQUIRES_ARM_NEON;
11855 GemmMicrokernelTester()
11856 .mr(2)
11857 .nr(8)
11858 .kr(4)
11859 .sr(1)
11860 .m(2)
11861 .n(8)
11862 .k(16)
11863 .qmax(128)
11864 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11865 }
11866
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,strided_cm)11867 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, strided_cm) {
11868 TEST_REQUIRES_ARM_NEON;
11869 GemmMicrokernelTester()
11870 .mr(2)
11871 .nr(8)
11872 .kr(4)
11873 .sr(1)
11874 .m(2)
11875 .n(8)
11876 .k(16)
11877 .cm_stride(11)
11878 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11879 }
11880 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11881
11882
11883 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_eq_16)11884 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_eq_16) {
11885 TEST_REQUIRES_ARM_NEON_V8;
11886 GemmMicrokernelTester()
11887 .mr(2)
11888 .nr(8)
11889 .kr(4)
11890 .sr(1)
11891 .m(2)
11892 .n(8)
11893 .k(16)
11894 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11895 }
11896
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,strided_cn)11897 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, strided_cn) {
11898 TEST_REQUIRES_ARM_NEON_V8;
11899 GemmMicrokernelTester()
11900 .mr(2)
11901 .nr(8)
11902 .kr(4)
11903 .sr(1)
11904 .m(2)
11905 .n(8)
11906 .k(16)
11907 .cn_stride(11)
11908 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11909 }
11910
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_eq_16_strided_a)11911 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_eq_16_strided_a) {
11912 TEST_REQUIRES_ARM_NEON_V8;
11913 GemmMicrokernelTester()
11914 .mr(2)
11915 .nr(8)
11916 .kr(4)
11917 .sr(1)
11918 .m(2)
11919 .n(8)
11920 .k(16)
11921 .a_stride(19)
11922 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11923 }
11924
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile)11925 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile) {
11926 TEST_REQUIRES_ARM_NEON_V8;
11927 for (uint32_t n = 1; n <= 8; n++) {
11928 for (uint32_t m = 1; m <= 2; m++) {
11929 GemmMicrokernelTester()
11930 .mr(2)
11931 .nr(8)
11932 .kr(4)
11933 .sr(1)
11934 .m(m)
11935 .n(n)
11936 .k(16)
11937 .iterations(1)
11938 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11939 }
11940 }
11941 }
11942
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile_m)11943 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile_m) {
11944 TEST_REQUIRES_ARM_NEON_V8;
11945 for (uint32_t m = 1; m <= 2; m++) {
11946 GemmMicrokernelTester()
11947 .mr(2)
11948 .nr(8)
11949 .kr(4)
11950 .sr(1)
11951 .m(m)
11952 .n(8)
11953 .k(16)
11954 .iterations(1)
11955 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11956 }
11957 }
11958
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile_n)11959 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile_n) {
11960 TEST_REQUIRES_ARM_NEON_V8;
11961 for (uint32_t n = 1; n <= 8; n++) {
11962 GemmMicrokernelTester()
11963 .mr(2)
11964 .nr(8)
11965 .kr(4)
11966 .sr(1)
11967 .m(2)
11968 .n(n)
11969 .k(16)
11970 .iterations(1)
11971 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11972 }
11973 }
11974
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_lt_16)11975 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_lt_16) {
11976 TEST_REQUIRES_ARM_NEON_V8;
11977 for (size_t k = 1; k < 16; k++) {
11978 GemmMicrokernelTester()
11979 .mr(2)
11980 .nr(8)
11981 .kr(4)
11982 .sr(1)
11983 .m(2)
11984 .n(8)
11985 .k(k)
11986 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11987 }
11988 }
11989
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_lt_16_strided_a)11990 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_lt_16_strided_a) {
11991 TEST_REQUIRES_ARM_NEON_V8;
11992 for (size_t k = 1; k < 16; k++) {
11993 GemmMicrokernelTester()
11994 .mr(2)
11995 .nr(8)
11996 .kr(4)
11997 .sr(1)
11998 .m(2)
11999 .n(8)
12000 .k(k)
12001 .a_stride(19)
12002 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12003 }
12004 }
12005
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_lt_16_subtile)12006 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_lt_16_subtile) {
12007 TEST_REQUIRES_ARM_NEON_V8;
12008 for (size_t k = 1; k < 16; k++) {
12009 for (uint32_t n = 1; n <= 8; n++) {
12010 for (uint32_t m = 1; m <= 2; m++) {
12011 GemmMicrokernelTester()
12012 .mr(2)
12013 .nr(8)
12014 .kr(4)
12015 .sr(1)
12016 .m(m)
12017 .n(n)
12018 .k(k)
12019 .iterations(1)
12020 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12021 }
12022 }
12023 }
12024 }
12025
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_gt_16)12026 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_gt_16) {
12027 TEST_REQUIRES_ARM_NEON_V8;
12028 for (size_t k = 17; k < 32; k++) {
12029 GemmMicrokernelTester()
12030 .mr(2)
12031 .nr(8)
12032 .kr(4)
12033 .sr(1)
12034 .m(2)
12035 .n(8)
12036 .k(k)
12037 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12038 }
12039 }
12040
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_gt_16_strided_a)12041 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_gt_16_strided_a) {
12042 TEST_REQUIRES_ARM_NEON_V8;
12043 for (size_t k = 17; k < 32; k++) {
12044 GemmMicrokernelTester()
12045 .mr(2)
12046 .nr(8)
12047 .kr(4)
12048 .sr(1)
12049 .m(2)
12050 .n(8)
12051 .k(k)
12052 .a_stride(37)
12053 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12054 }
12055 }
12056
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_gt_16_subtile)12057 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_gt_16_subtile) {
12058 TEST_REQUIRES_ARM_NEON_V8;
12059 for (size_t k = 17; k < 32; k++) {
12060 for (uint32_t n = 1; n <= 8; n++) {
12061 for (uint32_t m = 1; m <= 2; m++) {
12062 GemmMicrokernelTester()
12063 .mr(2)
12064 .nr(8)
12065 .kr(4)
12066 .sr(1)
12067 .m(m)
12068 .n(n)
12069 .k(k)
12070 .iterations(1)
12071 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12072 }
12073 }
12074 }
12075 }
12076
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_div_16)12077 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_div_16) {
12078 TEST_REQUIRES_ARM_NEON_V8;
12079 for (size_t k = 32; k <= 160; k += 16) {
12080 GemmMicrokernelTester()
12081 .mr(2)
12082 .nr(8)
12083 .kr(4)
12084 .sr(1)
12085 .m(2)
12086 .n(8)
12087 .k(k)
12088 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12089 }
12090 }
12091
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_div_16_strided_a)12092 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_div_16_strided_a) {
12093 TEST_REQUIRES_ARM_NEON_V8;
12094 for (size_t k = 32; k <= 160; k += 16) {
12095 GemmMicrokernelTester()
12096 .mr(2)
12097 .nr(8)
12098 .kr(4)
12099 .sr(1)
12100 .m(2)
12101 .n(8)
12102 .k(k)
12103 .a_stride(163)
12104 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12105 }
12106 }
12107
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_div_16_subtile)12108 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_div_16_subtile) {
12109 TEST_REQUIRES_ARM_NEON_V8;
12110 for (size_t k = 32; k <= 160; k += 16) {
12111 for (uint32_t n = 1; n <= 8; n++) {
12112 for (uint32_t m = 1; m <= 2; m++) {
12113 GemmMicrokernelTester()
12114 .mr(2)
12115 .nr(8)
12116 .kr(4)
12117 .sr(1)
12118 .m(m)
12119 .n(n)
12120 .k(k)
12121 .iterations(1)
12122 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12123 }
12124 }
12125 }
12126 }
12127
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_gt_8)12128 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_gt_8) {
12129 TEST_REQUIRES_ARM_NEON_V8;
12130 for (uint32_t n = 9; n < 16; n++) {
12131 for (size_t k = 1; k <= 80; k += 17) {
12132 GemmMicrokernelTester()
12133 .mr(2)
12134 .nr(8)
12135 .kr(4)
12136 .sr(1)
12137 .m(2)
12138 .n(n)
12139 .k(k)
12140 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12141 }
12142 }
12143 }
12144
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_gt_8_strided_cn)12145 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_gt_8_strided_cn) {
12146 TEST_REQUIRES_ARM_NEON_V8;
12147 for (uint32_t n = 9; n < 16; n++) {
12148 for (size_t k = 1; k <= 80; k += 17) {
12149 GemmMicrokernelTester()
12150 .mr(2)
12151 .nr(8)
12152 .kr(4)
12153 .sr(1)
12154 .m(2)
12155 .n(n)
12156 .k(k)
12157 .cn_stride(11)
12158 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12159 }
12160 }
12161 }
12162
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_gt_8_strided_a)12163 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_gt_8_strided_a) {
12164 TEST_REQUIRES_ARM_NEON_V8;
12165 for (uint32_t n = 9; n < 16; n++) {
12166 for (size_t k = 1; k <= 80; k += 17) {
12167 GemmMicrokernelTester()
12168 .mr(2)
12169 .nr(8)
12170 .kr(4)
12171 .sr(1)
12172 .m(2)
12173 .n(n)
12174 .k(k)
12175 .a_stride(83)
12176 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12177 }
12178 }
12179 }
12180
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_gt_8_subtile)12181 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_gt_8_subtile) {
12182 TEST_REQUIRES_ARM_NEON_V8;
12183 for (uint32_t n = 9; n < 16; n++) {
12184 for (size_t k = 1; k <= 80; k += 17) {
12185 for (uint32_t m = 1; m <= 2; m++) {
12186 GemmMicrokernelTester()
12187 .mr(2)
12188 .nr(8)
12189 .kr(4)
12190 .sr(1)
12191 .m(m)
12192 .n(n)
12193 .k(k)
12194 .iterations(1)
12195 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12196 }
12197 }
12198 }
12199 }
12200
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_div_8)12201 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_div_8) {
12202 TEST_REQUIRES_ARM_NEON_V8;
12203 for (uint32_t n = 16; n <= 24; n += 8) {
12204 for (size_t k = 1; k <= 80; k += 17) {
12205 GemmMicrokernelTester()
12206 .mr(2)
12207 .nr(8)
12208 .kr(4)
12209 .sr(1)
12210 .m(2)
12211 .n(n)
12212 .k(k)
12213 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12214 }
12215 }
12216 }
12217
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_div_8_strided_cn)12218 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_div_8_strided_cn) {
12219 TEST_REQUIRES_ARM_NEON_V8;
12220 for (uint32_t n = 16; n <= 24; n += 8) {
12221 for (size_t k = 1; k <= 80; k += 17) {
12222 GemmMicrokernelTester()
12223 .mr(2)
12224 .nr(8)
12225 .kr(4)
12226 .sr(1)
12227 .m(2)
12228 .n(n)
12229 .k(k)
12230 .cn_stride(11)
12231 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12232 }
12233 }
12234 }
12235
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_div_8_strided_a)12236 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_div_8_strided_a) {
12237 TEST_REQUIRES_ARM_NEON_V8;
12238 for (uint32_t n = 16; n <= 24; n += 8) {
12239 for (size_t k = 1; k <= 80; k += 17) {
12240 GemmMicrokernelTester()
12241 .mr(2)
12242 .nr(8)
12243 .kr(4)
12244 .sr(1)
12245 .m(2)
12246 .n(n)
12247 .k(k)
12248 .a_stride(83)
12249 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12250 }
12251 }
12252 }
12253
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_div_8_subtile)12254 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_div_8_subtile) {
12255 TEST_REQUIRES_ARM_NEON_V8;
12256 for (uint32_t n = 16; n <= 24; n += 8) {
12257 for (size_t k = 1; k <= 80; k += 17) {
12258 for (uint32_t m = 1; m <= 2; m++) {
12259 GemmMicrokernelTester()
12260 .mr(2)
12261 .nr(8)
12262 .kr(4)
12263 .sr(1)
12264 .m(m)
12265 .n(n)
12266 .k(k)
12267 .iterations(1)
12268 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12269 }
12270 }
12271 }
12272 }
12273
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,strided_cm_subtile)12274 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, strided_cm_subtile) {
12275 TEST_REQUIRES_ARM_NEON_V8;
12276 for (size_t k = 1; k <= 80; k += 17) {
12277 for (uint32_t n = 1; n <= 8; n++) {
12278 for (uint32_t m = 1; m <= 2; m++) {
12279 GemmMicrokernelTester()
12280 .mr(2)
12281 .nr(8)
12282 .kr(4)
12283 .sr(1)
12284 .m(m)
12285 .n(n)
12286 .k(k)
12287 .cm_stride(11)
12288 .iterations(1)
12289 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12290 }
12291 }
12292 }
12293 }
12294
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,qmin)12295 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, qmin) {
12296 TEST_REQUIRES_ARM_NEON_V8;
12297 GemmMicrokernelTester()
12298 .mr(2)
12299 .nr(8)
12300 .kr(4)
12301 .sr(1)
12302 .m(2)
12303 .n(8)
12304 .k(16)
12305 .qmin(128)
12306 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12307 }
12308
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,qmax)12309 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, qmax) {
12310 TEST_REQUIRES_ARM_NEON_V8;
12311 GemmMicrokernelTester()
12312 .mr(2)
12313 .nr(8)
12314 .kr(4)
12315 .sr(1)
12316 .m(2)
12317 .n(8)
12318 .k(16)
12319 .qmax(128)
12320 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12321 }
12322
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,strided_cm)12323 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, strided_cm) {
12324 TEST_REQUIRES_ARM_NEON_V8;
12325 GemmMicrokernelTester()
12326 .mr(2)
12327 .nr(8)
12328 .kr(4)
12329 .sr(1)
12330 .m(2)
12331 .n(8)
12332 .k(16)
12333 .cm_stride(11)
12334 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12335 }
12336 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12337
12338
12339 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_eq_16)12340 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_eq_16) {
12341 TEST_REQUIRES_ARM_NEON_V8;
12342 GemmMicrokernelTester()
12343 .mr(2)
12344 .nr(8)
12345 .kr(4)
12346 .sr(1)
12347 .m(2)
12348 .n(8)
12349 .k(16)
12350 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12351 }
12352
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,strided_cn)12353 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, strided_cn) {
12354 TEST_REQUIRES_ARM_NEON_V8;
12355 GemmMicrokernelTester()
12356 .mr(2)
12357 .nr(8)
12358 .kr(4)
12359 .sr(1)
12360 .m(2)
12361 .n(8)
12362 .k(16)
12363 .cn_stride(11)
12364 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12365 }
12366
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_eq_16_strided_a)12367 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_eq_16_strided_a) {
12368 TEST_REQUIRES_ARM_NEON_V8;
12369 GemmMicrokernelTester()
12370 .mr(2)
12371 .nr(8)
12372 .kr(4)
12373 .sr(1)
12374 .m(2)
12375 .n(8)
12376 .k(16)
12377 .a_stride(19)
12378 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12379 }
12380
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_eq_16_subtile)12381 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile) {
12382 TEST_REQUIRES_ARM_NEON_V8;
12383 for (uint32_t n = 1; n <= 8; n++) {
12384 for (uint32_t m = 1; m <= 2; m++) {
12385 GemmMicrokernelTester()
12386 .mr(2)
12387 .nr(8)
12388 .kr(4)
12389 .sr(1)
12390 .m(m)
12391 .n(n)
12392 .k(16)
12393 .iterations(1)
12394 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12395 }
12396 }
12397 }
12398
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_eq_16_subtile_m)12399 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile_m) {
12400 TEST_REQUIRES_ARM_NEON_V8;
12401 for (uint32_t m = 1; m <= 2; m++) {
12402 GemmMicrokernelTester()
12403 .mr(2)
12404 .nr(8)
12405 .kr(4)
12406 .sr(1)
12407 .m(m)
12408 .n(8)
12409 .k(16)
12410 .iterations(1)
12411 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12412 }
12413 }
12414
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_eq_16_subtile_n)12415 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile_n) {
12416 TEST_REQUIRES_ARM_NEON_V8;
12417 for (uint32_t n = 1; n <= 8; n++) {
12418 GemmMicrokernelTester()
12419 .mr(2)
12420 .nr(8)
12421 .kr(4)
12422 .sr(1)
12423 .m(2)
12424 .n(n)
12425 .k(16)
12426 .iterations(1)
12427 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12428 }
12429 }
12430
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_lt_16)12431 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_lt_16) {
12432 TEST_REQUIRES_ARM_NEON_V8;
12433 for (size_t k = 1; k < 16; k++) {
12434 GemmMicrokernelTester()
12435 .mr(2)
12436 .nr(8)
12437 .kr(4)
12438 .sr(1)
12439 .m(2)
12440 .n(8)
12441 .k(k)
12442 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12443 }
12444 }
12445
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_lt_16_strided_a)12446 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_lt_16_strided_a) {
12447 TEST_REQUIRES_ARM_NEON_V8;
12448 for (size_t k = 1; k < 16; k++) {
12449 GemmMicrokernelTester()
12450 .mr(2)
12451 .nr(8)
12452 .kr(4)
12453 .sr(1)
12454 .m(2)
12455 .n(8)
12456 .k(k)
12457 .a_stride(19)
12458 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12459 }
12460 }
12461
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_lt_16_subtile)12462 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_lt_16_subtile) {
12463 TEST_REQUIRES_ARM_NEON_V8;
12464 for (size_t k = 1; k < 16; k++) {
12465 for (uint32_t n = 1; n <= 8; n++) {
12466 for (uint32_t m = 1; m <= 2; m++) {
12467 GemmMicrokernelTester()
12468 .mr(2)
12469 .nr(8)
12470 .kr(4)
12471 .sr(1)
12472 .m(m)
12473 .n(n)
12474 .k(k)
12475 .iterations(1)
12476 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12477 }
12478 }
12479 }
12480 }
12481
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_gt_16)12482 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_gt_16) {
12483 TEST_REQUIRES_ARM_NEON_V8;
12484 for (size_t k = 17; k < 32; k++) {
12485 GemmMicrokernelTester()
12486 .mr(2)
12487 .nr(8)
12488 .kr(4)
12489 .sr(1)
12490 .m(2)
12491 .n(8)
12492 .k(k)
12493 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12494 }
12495 }
12496
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_gt_16_strided_a)12497 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_gt_16_strided_a) {
12498 TEST_REQUIRES_ARM_NEON_V8;
12499 for (size_t k = 17; k < 32; k++) {
12500 GemmMicrokernelTester()
12501 .mr(2)
12502 .nr(8)
12503 .kr(4)
12504 .sr(1)
12505 .m(2)
12506 .n(8)
12507 .k(k)
12508 .a_stride(37)
12509 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12510 }
12511 }
12512
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_gt_16_subtile)12513 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_gt_16_subtile) {
12514 TEST_REQUIRES_ARM_NEON_V8;
12515 for (size_t k = 17; k < 32; k++) {
12516 for (uint32_t n = 1; n <= 8; n++) {
12517 for (uint32_t m = 1; m <= 2; m++) {
12518 GemmMicrokernelTester()
12519 .mr(2)
12520 .nr(8)
12521 .kr(4)
12522 .sr(1)
12523 .m(m)
12524 .n(n)
12525 .k(k)
12526 .iterations(1)
12527 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12528 }
12529 }
12530 }
12531 }
12532
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_div_16)12533 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_div_16) {
12534 TEST_REQUIRES_ARM_NEON_V8;
12535 for (size_t k = 32; k <= 160; k += 16) {
12536 GemmMicrokernelTester()
12537 .mr(2)
12538 .nr(8)
12539 .kr(4)
12540 .sr(1)
12541 .m(2)
12542 .n(8)
12543 .k(k)
12544 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12545 }
12546 }
12547
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_div_16_strided_a)12548 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_div_16_strided_a) {
12549 TEST_REQUIRES_ARM_NEON_V8;
12550 for (size_t k = 32; k <= 160; k += 16) {
12551 GemmMicrokernelTester()
12552 .mr(2)
12553 .nr(8)
12554 .kr(4)
12555 .sr(1)
12556 .m(2)
12557 .n(8)
12558 .k(k)
12559 .a_stride(163)
12560 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12561 }
12562 }
12563
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_div_16_subtile)12564 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_div_16_subtile) {
12565 TEST_REQUIRES_ARM_NEON_V8;
12566 for (size_t k = 32; k <= 160; k += 16) {
12567 for (uint32_t n = 1; n <= 8; n++) {
12568 for (uint32_t m = 1; m <= 2; m++) {
12569 GemmMicrokernelTester()
12570 .mr(2)
12571 .nr(8)
12572 .kr(4)
12573 .sr(1)
12574 .m(m)
12575 .n(n)
12576 .k(k)
12577 .iterations(1)
12578 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12579 }
12580 }
12581 }
12582 }
12583
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_gt_8)12584 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_gt_8) {
12585 TEST_REQUIRES_ARM_NEON_V8;
12586 for (uint32_t n = 9; n < 16; n++) {
12587 for (size_t k = 1; k <= 80; k += 17) {
12588 GemmMicrokernelTester()
12589 .mr(2)
12590 .nr(8)
12591 .kr(4)
12592 .sr(1)
12593 .m(2)
12594 .n(n)
12595 .k(k)
12596 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12597 }
12598 }
12599 }
12600
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_gt_8_strided_cn)12601 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_gt_8_strided_cn) {
12602 TEST_REQUIRES_ARM_NEON_V8;
12603 for (uint32_t n = 9; n < 16; n++) {
12604 for (size_t k = 1; k <= 80; k += 17) {
12605 GemmMicrokernelTester()
12606 .mr(2)
12607 .nr(8)
12608 .kr(4)
12609 .sr(1)
12610 .m(2)
12611 .n(n)
12612 .k(k)
12613 .cn_stride(11)
12614 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12615 }
12616 }
12617 }
12618
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_gt_8_strided_a)12619 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_gt_8_strided_a) {
12620 TEST_REQUIRES_ARM_NEON_V8;
12621 for (uint32_t n = 9; n < 16; n++) {
12622 for (size_t k = 1; k <= 80; k += 17) {
12623 GemmMicrokernelTester()
12624 .mr(2)
12625 .nr(8)
12626 .kr(4)
12627 .sr(1)
12628 .m(2)
12629 .n(n)
12630 .k(k)
12631 .a_stride(83)
12632 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12633 }
12634 }
12635 }
12636
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_gt_8_subtile)12637 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_gt_8_subtile) {
12638 TEST_REQUIRES_ARM_NEON_V8;
12639 for (uint32_t n = 9; n < 16; n++) {
12640 for (size_t k = 1; k <= 80; k += 17) {
12641 for (uint32_t m = 1; m <= 2; m++) {
12642 GemmMicrokernelTester()
12643 .mr(2)
12644 .nr(8)
12645 .kr(4)
12646 .sr(1)
12647 .m(m)
12648 .n(n)
12649 .k(k)
12650 .iterations(1)
12651 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12652 }
12653 }
12654 }
12655 }
12656
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_div_8)12657 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_div_8) {
12658 TEST_REQUIRES_ARM_NEON_V8;
12659 for (uint32_t n = 16; n <= 24; n += 8) {
12660 for (size_t k = 1; k <= 80; k += 17) {
12661 GemmMicrokernelTester()
12662 .mr(2)
12663 .nr(8)
12664 .kr(4)
12665 .sr(1)
12666 .m(2)
12667 .n(n)
12668 .k(k)
12669 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12670 }
12671 }
12672 }
12673
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_div_8_strided_cn)12674 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_div_8_strided_cn) {
12675 TEST_REQUIRES_ARM_NEON_V8;
12676 for (uint32_t n = 16; n <= 24; n += 8) {
12677 for (size_t k = 1; k <= 80; k += 17) {
12678 GemmMicrokernelTester()
12679 .mr(2)
12680 .nr(8)
12681 .kr(4)
12682 .sr(1)
12683 .m(2)
12684 .n(n)
12685 .k(k)
12686 .cn_stride(11)
12687 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12688 }
12689 }
12690 }
12691
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_div_8_strided_a)12692 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_div_8_strided_a) {
12693 TEST_REQUIRES_ARM_NEON_V8;
12694 for (uint32_t n = 16; n <= 24; n += 8) {
12695 for (size_t k = 1; k <= 80; k += 17) {
12696 GemmMicrokernelTester()
12697 .mr(2)
12698 .nr(8)
12699 .kr(4)
12700 .sr(1)
12701 .m(2)
12702 .n(n)
12703 .k(k)
12704 .a_stride(83)
12705 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12706 }
12707 }
12708 }
12709
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_div_8_subtile)12710 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_div_8_subtile) {
12711 TEST_REQUIRES_ARM_NEON_V8;
12712 for (uint32_t n = 16; n <= 24; n += 8) {
12713 for (size_t k = 1; k <= 80; k += 17) {
12714 for (uint32_t m = 1; m <= 2; m++) {
12715 GemmMicrokernelTester()
12716 .mr(2)
12717 .nr(8)
12718 .kr(4)
12719 .sr(1)
12720 .m(m)
12721 .n(n)
12722 .k(k)
12723 .iterations(1)
12724 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12725 }
12726 }
12727 }
12728 }
12729
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,strided_cm_subtile)12730 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, strided_cm_subtile) {
12731 TEST_REQUIRES_ARM_NEON_V8;
12732 for (size_t k = 1; k <= 80; k += 17) {
12733 for (uint32_t n = 1; n <= 8; n++) {
12734 for (uint32_t m = 1; m <= 2; m++) {
12735 GemmMicrokernelTester()
12736 .mr(2)
12737 .nr(8)
12738 .kr(4)
12739 .sr(1)
12740 .m(m)
12741 .n(n)
12742 .k(k)
12743 .cm_stride(11)
12744 .iterations(1)
12745 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12746 }
12747 }
12748 }
12749 }
12750
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,qmin)12751 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, qmin) {
12752 TEST_REQUIRES_ARM_NEON_V8;
12753 GemmMicrokernelTester()
12754 .mr(2)
12755 .nr(8)
12756 .kr(4)
12757 .sr(1)
12758 .m(2)
12759 .n(8)
12760 .k(16)
12761 .qmin(128)
12762 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12763 }
12764
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,qmax)12765 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, qmax) {
12766 TEST_REQUIRES_ARM_NEON_V8;
12767 GemmMicrokernelTester()
12768 .mr(2)
12769 .nr(8)
12770 .kr(4)
12771 .sr(1)
12772 .m(2)
12773 .n(8)
12774 .k(16)
12775 .qmax(128)
12776 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12777 }
12778
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,strided_cm)12779 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, strided_cm) {
12780 TEST_REQUIRES_ARM_NEON_V8;
12781 GemmMicrokernelTester()
12782 .mr(2)
12783 .nr(8)
12784 .kr(4)
12785 .sr(1)
12786 .m(2)
12787 .n(8)
12788 .k(16)
12789 .cm_stride(11)
12790 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12791 }
12792 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12793
12794
12795 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_eq_8)12796 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_eq_8) {
12797 TEST_REQUIRES_ARM_NEON;
12798 GemmMicrokernelTester()
12799 .mr(2)
12800 .nr(16)
12801 .kr(1)
12802 .sr(1)
12803 .m(2)
12804 .n(16)
12805 .k(8)
12806 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12807 }
12808
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,strided_cn)12809 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, strided_cn) {
12810 TEST_REQUIRES_ARM_NEON;
12811 GemmMicrokernelTester()
12812 .mr(2)
12813 .nr(16)
12814 .kr(1)
12815 .sr(1)
12816 .m(2)
12817 .n(16)
12818 .k(8)
12819 .cn_stride(19)
12820 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12821 }
12822
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_eq_8_strided_a)12823 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
12824 TEST_REQUIRES_ARM_NEON;
12825 GemmMicrokernelTester()
12826 .mr(2)
12827 .nr(16)
12828 .kr(1)
12829 .sr(1)
12830 .m(2)
12831 .n(16)
12832 .k(8)
12833 .a_stride(11)
12834 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12835 }
12836
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_eq_8_subtile)12837 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_eq_8_subtile) {
12838 TEST_REQUIRES_ARM_NEON;
12839 for (uint32_t n = 1; n <= 16; n++) {
12840 for (uint32_t m = 1; m <= 2; m++) {
12841 GemmMicrokernelTester()
12842 .mr(2)
12843 .nr(16)
12844 .kr(1)
12845 .sr(1)
12846 .m(m)
12847 .n(n)
12848 .k(8)
12849 .iterations(1)
12850 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12851 }
12852 }
12853 }
12854
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_eq_8_subtile_m)12855 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
12856 TEST_REQUIRES_ARM_NEON;
12857 for (uint32_t m = 1; m <= 2; m++) {
12858 GemmMicrokernelTester()
12859 .mr(2)
12860 .nr(16)
12861 .kr(1)
12862 .sr(1)
12863 .m(m)
12864 .n(16)
12865 .k(8)
12866 .iterations(1)
12867 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12868 }
12869 }
12870
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_eq_8_subtile_n)12871 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
12872 TEST_REQUIRES_ARM_NEON;
12873 for (uint32_t n = 1; n <= 16; n++) {
12874 GemmMicrokernelTester()
12875 .mr(2)
12876 .nr(16)
12877 .kr(1)
12878 .sr(1)
12879 .m(2)
12880 .n(n)
12881 .k(8)
12882 .iterations(1)
12883 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12884 }
12885 }
12886
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_lt_8)12887 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_lt_8) {
12888 TEST_REQUIRES_ARM_NEON;
12889 for (size_t k = 1; k < 8; k++) {
12890 GemmMicrokernelTester()
12891 .mr(2)
12892 .nr(16)
12893 .kr(1)
12894 .sr(1)
12895 .m(2)
12896 .n(16)
12897 .k(k)
12898 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12899 }
12900 }
12901
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_lt_8_strided_a)12902 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_lt_8_strided_a) {
12903 TEST_REQUIRES_ARM_NEON;
12904 for (size_t k = 1; k < 8; k++) {
12905 GemmMicrokernelTester()
12906 .mr(2)
12907 .nr(16)
12908 .kr(1)
12909 .sr(1)
12910 .m(2)
12911 .n(16)
12912 .k(k)
12913 .a_stride(11)
12914 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12915 }
12916 }
12917
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_lt_8_subtile)12918 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_lt_8_subtile) {
12919 TEST_REQUIRES_ARM_NEON;
12920 for (size_t k = 1; k < 8; k++) {
12921 for (uint32_t n = 1; n <= 16; n++) {
12922 for (uint32_t m = 1; m <= 2; m++) {
12923 GemmMicrokernelTester()
12924 .mr(2)
12925 .nr(16)
12926 .kr(1)
12927 .sr(1)
12928 .m(m)
12929 .n(n)
12930 .k(k)
12931 .iterations(1)
12932 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12933 }
12934 }
12935 }
12936 }
12937
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_gt_8)12938 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_gt_8) {
12939 TEST_REQUIRES_ARM_NEON;
12940 for (size_t k = 9; k < 16; k++) {
12941 GemmMicrokernelTester()
12942 .mr(2)
12943 .nr(16)
12944 .kr(1)
12945 .sr(1)
12946 .m(2)
12947 .n(16)
12948 .k(k)
12949 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12950 }
12951 }
12952
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_gt_8_strided_a)12953 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_gt_8_strided_a) {
12954 TEST_REQUIRES_ARM_NEON;
12955 for (size_t k = 9; k < 16; k++) {
12956 GemmMicrokernelTester()
12957 .mr(2)
12958 .nr(16)
12959 .kr(1)
12960 .sr(1)
12961 .m(2)
12962 .n(16)
12963 .k(k)
12964 .a_stride(19)
12965 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12966 }
12967 }
12968
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_gt_8_subtile)12969 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_gt_8_subtile) {
12970 TEST_REQUIRES_ARM_NEON;
12971 for (size_t k = 9; k < 16; k++) {
12972 for (uint32_t n = 1; n <= 16; n++) {
12973 for (uint32_t m = 1; m <= 2; m++) {
12974 GemmMicrokernelTester()
12975 .mr(2)
12976 .nr(16)
12977 .kr(1)
12978 .sr(1)
12979 .m(m)
12980 .n(n)
12981 .k(k)
12982 .iterations(1)
12983 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12984 }
12985 }
12986 }
12987 }
12988
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_div_8)12989 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_div_8) {
12990 TEST_REQUIRES_ARM_NEON;
12991 for (size_t k = 16; k <= 80; k += 8) {
12992 GemmMicrokernelTester()
12993 .mr(2)
12994 .nr(16)
12995 .kr(1)
12996 .sr(1)
12997 .m(2)
12998 .n(16)
12999 .k(k)
13000 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13001 }
13002 }
13003
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_div_8_strided_a)13004 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_div_8_strided_a) {
13005 TEST_REQUIRES_ARM_NEON;
13006 for (size_t k = 16; k <= 80; k += 8) {
13007 GemmMicrokernelTester()
13008 .mr(2)
13009 .nr(16)
13010 .kr(1)
13011 .sr(1)
13012 .m(2)
13013 .n(16)
13014 .k(k)
13015 .a_stride(83)
13016 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13017 }
13018 }
13019
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_div_8_subtile)13020 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_div_8_subtile) {
13021 TEST_REQUIRES_ARM_NEON;
13022 for (size_t k = 16; k <= 80; k += 8) {
13023 for (uint32_t n = 1; n <= 16; n++) {
13024 for (uint32_t m = 1; m <= 2; m++) {
13025 GemmMicrokernelTester()
13026 .mr(2)
13027 .nr(16)
13028 .kr(1)
13029 .sr(1)
13030 .m(m)
13031 .n(n)
13032 .k(k)
13033 .iterations(1)
13034 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13035 }
13036 }
13037 }
13038 }
13039
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_gt_16)13040 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_gt_16) {
13041 TEST_REQUIRES_ARM_NEON;
13042 for (uint32_t n = 17; n < 32; n++) {
13043 for (size_t k = 1; k <= 40; k += 9) {
13044 GemmMicrokernelTester()
13045 .mr(2)
13046 .nr(16)
13047 .kr(1)
13048 .sr(1)
13049 .m(2)
13050 .n(n)
13051 .k(k)
13052 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13053 }
13054 }
13055 }
13056
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_gt_16_strided_cn)13057 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
13058 TEST_REQUIRES_ARM_NEON;
13059 for (uint32_t n = 17; n < 32; n++) {
13060 for (size_t k = 1; k <= 40; k += 9) {
13061 GemmMicrokernelTester()
13062 .mr(2)
13063 .nr(16)
13064 .kr(1)
13065 .sr(1)
13066 .m(2)
13067 .n(n)
13068 .k(k)
13069 .cn_stride(19)
13070 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13071 }
13072 }
13073 }
13074
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_gt_16_strided_a)13075 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_gt_16_strided_a) {
13076 TEST_REQUIRES_ARM_NEON;
13077 for (uint32_t n = 17; n < 32; n++) {
13078 for (size_t k = 1; k <= 40; k += 9) {
13079 GemmMicrokernelTester()
13080 .mr(2)
13081 .nr(16)
13082 .kr(1)
13083 .sr(1)
13084 .m(2)
13085 .n(n)
13086 .k(k)
13087 .a_stride(43)
13088 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13089 }
13090 }
13091 }
13092
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_gt_16_subtile)13093 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_gt_16_subtile) {
13094 TEST_REQUIRES_ARM_NEON;
13095 for (uint32_t n = 17; n < 32; n++) {
13096 for (size_t k = 1; k <= 40; k += 9) {
13097 for (uint32_t m = 1; m <= 2; m++) {
13098 GemmMicrokernelTester()
13099 .mr(2)
13100 .nr(16)
13101 .kr(1)
13102 .sr(1)
13103 .m(m)
13104 .n(n)
13105 .k(k)
13106 .iterations(1)
13107 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13108 }
13109 }
13110 }
13111 }
13112
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_div_16)13113 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_div_16) {
13114 TEST_REQUIRES_ARM_NEON;
13115 for (uint32_t n = 32; n <= 48; n += 16) {
13116 for (size_t k = 1; k <= 40; k += 9) {
13117 GemmMicrokernelTester()
13118 .mr(2)
13119 .nr(16)
13120 .kr(1)
13121 .sr(1)
13122 .m(2)
13123 .n(n)
13124 .k(k)
13125 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13126 }
13127 }
13128 }
13129
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_div_16_strided_cn)13130 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
13131 TEST_REQUIRES_ARM_NEON;
13132 for (uint32_t n = 32; n <= 48; n += 16) {
13133 for (size_t k = 1; k <= 40; k += 9) {
13134 GemmMicrokernelTester()
13135 .mr(2)
13136 .nr(16)
13137 .kr(1)
13138 .sr(1)
13139 .m(2)
13140 .n(n)
13141 .k(k)
13142 .cn_stride(19)
13143 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13144 }
13145 }
13146 }
13147
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_div_16_strided_a)13148 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_div_16_strided_a) {
13149 TEST_REQUIRES_ARM_NEON;
13150 for (uint32_t n = 32; n <= 48; n += 16) {
13151 for (size_t k = 1; k <= 40; k += 9) {
13152 GemmMicrokernelTester()
13153 .mr(2)
13154 .nr(16)
13155 .kr(1)
13156 .sr(1)
13157 .m(2)
13158 .n(n)
13159 .k(k)
13160 .a_stride(43)
13161 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13162 }
13163 }
13164 }
13165
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_div_16_subtile)13166 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_div_16_subtile) {
13167 TEST_REQUIRES_ARM_NEON;
13168 for (uint32_t n = 32; n <= 48; n += 16) {
13169 for (size_t k = 1; k <= 40; k += 9) {
13170 for (uint32_t m = 1; m <= 2; m++) {
13171 GemmMicrokernelTester()
13172 .mr(2)
13173 .nr(16)
13174 .kr(1)
13175 .sr(1)
13176 .m(m)
13177 .n(n)
13178 .k(k)
13179 .iterations(1)
13180 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13181 }
13182 }
13183 }
13184 }
13185
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,strided_cm_subtile)13186 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, strided_cm_subtile) {
13187 TEST_REQUIRES_ARM_NEON;
13188 for (size_t k = 1; k <= 40; k += 9) {
13189 for (uint32_t n = 1; n <= 16; n++) {
13190 for (uint32_t m = 1; m <= 2; m++) {
13191 GemmMicrokernelTester()
13192 .mr(2)
13193 .nr(16)
13194 .kr(1)
13195 .sr(1)
13196 .m(m)
13197 .n(n)
13198 .k(k)
13199 .cm_stride(19)
13200 .iterations(1)
13201 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13202 }
13203 }
13204 }
13205 }
13206
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,qmin)13207 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, qmin) {
13208 TEST_REQUIRES_ARM_NEON;
13209 GemmMicrokernelTester()
13210 .mr(2)
13211 .nr(16)
13212 .kr(1)
13213 .sr(1)
13214 .m(2)
13215 .n(16)
13216 .k(8)
13217 .qmin(128)
13218 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13219 }
13220
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,qmax)13221 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, qmax) {
13222 TEST_REQUIRES_ARM_NEON;
13223 GemmMicrokernelTester()
13224 .mr(2)
13225 .nr(16)
13226 .kr(1)
13227 .sr(1)
13228 .m(2)
13229 .n(16)
13230 .k(8)
13231 .qmax(128)
13232 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13233 }
13234
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,strided_cm)13235 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, strided_cm) {
13236 TEST_REQUIRES_ARM_NEON;
13237 GemmMicrokernelTester()
13238 .mr(2)
13239 .nr(16)
13240 .kr(1)
13241 .sr(1)
13242 .m(2)
13243 .n(16)
13244 .k(8)
13245 .cm_stride(19)
13246 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13247 }
13248 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13249
13250
13251 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_eq_8)13252 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
13253 TEST_REQUIRES_ARM_NEON;
13254 GemmMicrokernelTester()
13255 .mr(3)
13256 .nr(8)
13257 .kr(1)
13258 .sr(1)
13259 .m(3)
13260 .n(8)
13261 .k(8)
13262 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13263 }
13264
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,strided_cn)13265 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, strided_cn) {
13266 TEST_REQUIRES_ARM_NEON;
13267 GemmMicrokernelTester()
13268 .mr(3)
13269 .nr(8)
13270 .kr(1)
13271 .sr(1)
13272 .m(3)
13273 .n(8)
13274 .k(8)
13275 .cn_stride(11)
13276 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13277 }
13278
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_eq_8_strided_a)13279 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
13280 TEST_REQUIRES_ARM_NEON;
13281 GemmMicrokernelTester()
13282 .mr(3)
13283 .nr(8)
13284 .kr(1)
13285 .sr(1)
13286 .m(3)
13287 .n(8)
13288 .k(8)
13289 .a_stride(11)
13290 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13291 }
13292
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)13293 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
13294 TEST_REQUIRES_ARM_NEON;
13295 for (uint32_t n = 1; n <= 8; n++) {
13296 for (uint32_t m = 1; m <= 3; m++) {
13297 GemmMicrokernelTester()
13298 .mr(3)
13299 .nr(8)
13300 .kr(1)
13301 .sr(1)
13302 .m(m)
13303 .n(n)
13304 .k(8)
13305 .iterations(1)
13306 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13307 }
13308 }
13309 }
13310
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)13311 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
13312 TEST_REQUIRES_ARM_NEON;
13313 for (uint32_t m = 1; m <= 3; m++) {
13314 GemmMicrokernelTester()
13315 .mr(3)
13316 .nr(8)
13317 .kr(1)
13318 .sr(1)
13319 .m(m)
13320 .n(8)
13321 .k(8)
13322 .iterations(1)
13323 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13324 }
13325 }
13326
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)13327 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
13328 TEST_REQUIRES_ARM_NEON;
13329 for (uint32_t n = 1; n <= 8; n++) {
13330 GemmMicrokernelTester()
13331 .mr(3)
13332 .nr(8)
13333 .kr(1)
13334 .sr(1)
13335 .m(3)
13336 .n(n)
13337 .k(8)
13338 .iterations(1)
13339 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13340 }
13341 }
13342
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_lt_8)13343 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
13344 TEST_REQUIRES_ARM_NEON;
13345 for (size_t k = 1; k < 8; k++) {
13346 GemmMicrokernelTester()
13347 .mr(3)
13348 .nr(8)
13349 .kr(1)
13350 .sr(1)
13351 .m(3)
13352 .n(8)
13353 .k(k)
13354 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13355 }
13356 }
13357
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_lt_8_strided_a)13358 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
13359 TEST_REQUIRES_ARM_NEON;
13360 for (size_t k = 1; k < 8; k++) {
13361 GemmMicrokernelTester()
13362 .mr(3)
13363 .nr(8)
13364 .kr(1)
13365 .sr(1)
13366 .m(3)
13367 .n(8)
13368 .k(k)
13369 .a_stride(11)
13370 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13371 }
13372 }
13373
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)13374 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
13375 TEST_REQUIRES_ARM_NEON;
13376 for (size_t k = 1; k < 8; k++) {
13377 for (uint32_t n = 1; n <= 8; n++) {
13378 for (uint32_t m = 1; m <= 3; m++) {
13379 GemmMicrokernelTester()
13380 .mr(3)
13381 .nr(8)
13382 .kr(1)
13383 .sr(1)
13384 .m(m)
13385 .n(n)
13386 .k(k)
13387 .iterations(1)
13388 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13389 }
13390 }
13391 }
13392 }
13393
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_gt_8)13394 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
13395 TEST_REQUIRES_ARM_NEON;
13396 for (size_t k = 9; k < 16; k++) {
13397 GemmMicrokernelTester()
13398 .mr(3)
13399 .nr(8)
13400 .kr(1)
13401 .sr(1)
13402 .m(3)
13403 .n(8)
13404 .k(k)
13405 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13406 }
13407 }
13408
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_gt_8_strided_a)13409 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
13410 TEST_REQUIRES_ARM_NEON;
13411 for (size_t k = 9; k < 16; k++) {
13412 GemmMicrokernelTester()
13413 .mr(3)
13414 .nr(8)
13415 .kr(1)
13416 .sr(1)
13417 .m(3)
13418 .n(8)
13419 .k(k)
13420 .a_stride(19)
13421 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13422 }
13423 }
13424
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)13425 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
13426 TEST_REQUIRES_ARM_NEON;
13427 for (size_t k = 9; k < 16; k++) {
13428 for (uint32_t n = 1; n <= 8; n++) {
13429 for (uint32_t m = 1; m <= 3; m++) {
13430 GemmMicrokernelTester()
13431 .mr(3)
13432 .nr(8)
13433 .kr(1)
13434 .sr(1)
13435 .m(m)
13436 .n(n)
13437 .k(k)
13438 .iterations(1)
13439 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13440 }
13441 }
13442 }
13443 }
13444
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_div_8)13445 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_div_8) {
13446 TEST_REQUIRES_ARM_NEON;
13447 for (size_t k = 16; k <= 80; k += 8) {
13448 GemmMicrokernelTester()
13449 .mr(3)
13450 .nr(8)
13451 .kr(1)
13452 .sr(1)
13453 .m(3)
13454 .n(8)
13455 .k(k)
13456 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13457 }
13458 }
13459
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_div_8_strided_a)13460 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
13461 TEST_REQUIRES_ARM_NEON;
13462 for (size_t k = 16; k <= 80; k += 8) {
13463 GemmMicrokernelTester()
13464 .mr(3)
13465 .nr(8)
13466 .kr(1)
13467 .sr(1)
13468 .m(3)
13469 .n(8)
13470 .k(k)
13471 .a_stride(83)
13472 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13473 }
13474 }
13475
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_div_8_subtile)13476 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
13477 TEST_REQUIRES_ARM_NEON;
13478 for (size_t k = 16; k <= 80; k += 8) {
13479 for (uint32_t n = 1; n <= 8; n++) {
13480 for (uint32_t m = 1; m <= 3; m++) {
13481 GemmMicrokernelTester()
13482 .mr(3)
13483 .nr(8)
13484 .kr(1)
13485 .sr(1)
13486 .m(m)
13487 .n(n)
13488 .k(k)
13489 .iterations(1)
13490 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13491 }
13492 }
13493 }
13494 }
13495
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_gt_8)13496 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
13497 TEST_REQUIRES_ARM_NEON;
13498 for (uint32_t n = 9; n < 16; n++) {
13499 for (size_t k = 1; k <= 40; k += 9) {
13500 GemmMicrokernelTester()
13501 .mr(3)
13502 .nr(8)
13503 .kr(1)
13504 .sr(1)
13505 .m(3)
13506 .n(n)
13507 .k(k)
13508 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13509 }
13510 }
13511 }
13512
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_cn)13513 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
13514 TEST_REQUIRES_ARM_NEON;
13515 for (uint32_t n = 9; n < 16; n++) {
13516 for (size_t k = 1; k <= 40; k += 9) {
13517 GemmMicrokernelTester()
13518 .mr(3)
13519 .nr(8)
13520 .kr(1)
13521 .sr(1)
13522 .m(3)
13523 .n(n)
13524 .k(k)
13525 .cn_stride(11)
13526 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13527 }
13528 }
13529 }
13530
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_a)13531 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_a) {
13532 TEST_REQUIRES_ARM_NEON;
13533 for (uint32_t n = 9; n < 16; n++) {
13534 for (size_t k = 1; k <= 40; k += 9) {
13535 GemmMicrokernelTester()
13536 .mr(3)
13537 .nr(8)
13538 .kr(1)
13539 .sr(1)
13540 .m(3)
13541 .n(n)
13542 .k(k)
13543 .a_stride(43)
13544 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13545 }
13546 }
13547 }
13548
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_gt_8_subtile)13549 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
13550 TEST_REQUIRES_ARM_NEON;
13551 for (uint32_t n = 9; n < 16; n++) {
13552 for (size_t k = 1; k <= 40; k += 9) {
13553 for (uint32_t m = 1; m <= 3; m++) {
13554 GemmMicrokernelTester()
13555 .mr(3)
13556 .nr(8)
13557 .kr(1)
13558 .sr(1)
13559 .m(m)
13560 .n(n)
13561 .k(k)
13562 .iterations(1)
13563 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13564 }
13565 }
13566 }
13567 }
13568
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_div_8)13569 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_div_8) {
13570 TEST_REQUIRES_ARM_NEON;
13571 for (uint32_t n = 16; n <= 24; n += 8) {
13572 for (size_t k = 1; k <= 40; k += 9) {
13573 GemmMicrokernelTester()
13574 .mr(3)
13575 .nr(8)
13576 .kr(1)
13577 .sr(1)
13578 .m(3)
13579 .n(n)
13580 .k(k)
13581 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13582 }
13583 }
13584 }
13585
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_cn)13586 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
13587 TEST_REQUIRES_ARM_NEON;
13588 for (uint32_t n = 16; n <= 24; n += 8) {
13589 for (size_t k = 1; k <= 40; k += 9) {
13590 GemmMicrokernelTester()
13591 .mr(3)
13592 .nr(8)
13593 .kr(1)
13594 .sr(1)
13595 .m(3)
13596 .n(n)
13597 .k(k)
13598 .cn_stride(11)
13599 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13600 }
13601 }
13602 }
13603
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_a)13604 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_a) {
13605 TEST_REQUIRES_ARM_NEON;
13606 for (uint32_t n = 16; n <= 24; n += 8) {
13607 for (size_t k = 1; k <= 40; k += 9) {
13608 GemmMicrokernelTester()
13609 .mr(3)
13610 .nr(8)
13611 .kr(1)
13612 .sr(1)
13613 .m(3)
13614 .n(n)
13615 .k(k)
13616 .a_stride(43)
13617 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13618 }
13619 }
13620 }
13621
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_div_8_subtile)13622 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
13623 TEST_REQUIRES_ARM_NEON;
13624 for (uint32_t n = 16; n <= 24; n += 8) {
13625 for (size_t k = 1; k <= 40; k += 9) {
13626 for (uint32_t m = 1; m <= 3; m++) {
13627 GemmMicrokernelTester()
13628 .mr(3)
13629 .nr(8)
13630 .kr(1)
13631 .sr(1)
13632 .m(m)
13633 .n(n)
13634 .k(k)
13635 .iterations(1)
13636 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13637 }
13638 }
13639 }
13640 }
13641
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,strided_cm_subtile)13642 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
13643 TEST_REQUIRES_ARM_NEON;
13644 for (size_t k = 1; k <= 40; k += 9) {
13645 for (uint32_t n = 1; n <= 8; n++) {
13646 for (uint32_t m = 1; m <= 3; m++) {
13647 GemmMicrokernelTester()
13648 .mr(3)
13649 .nr(8)
13650 .kr(1)
13651 .sr(1)
13652 .m(m)
13653 .n(n)
13654 .k(k)
13655 .cm_stride(11)
13656 .iterations(1)
13657 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13658 }
13659 }
13660 }
13661 }
13662
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,qmin)13663 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, qmin) {
13664 TEST_REQUIRES_ARM_NEON;
13665 GemmMicrokernelTester()
13666 .mr(3)
13667 .nr(8)
13668 .kr(1)
13669 .sr(1)
13670 .m(3)
13671 .n(8)
13672 .k(8)
13673 .qmin(128)
13674 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13675 }
13676
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,qmax)13677 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, qmax) {
13678 TEST_REQUIRES_ARM_NEON;
13679 GemmMicrokernelTester()
13680 .mr(3)
13681 .nr(8)
13682 .kr(1)
13683 .sr(1)
13684 .m(3)
13685 .n(8)
13686 .k(8)
13687 .qmax(128)
13688 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13689 }
13690
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,strided_cm)13691 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, strided_cm) {
13692 TEST_REQUIRES_ARM_NEON;
13693 GemmMicrokernelTester()
13694 .mr(3)
13695 .nr(8)
13696 .kr(1)
13697 .sr(1)
13698 .m(3)
13699 .n(8)
13700 .k(8)
13701 .cm_stride(11)
13702 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13703 }
13704 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13705
13706
13707 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_eq_8)13708 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_eq_8) {
13709 TEST_REQUIRES_ARM_NEON_V8;
13710 GemmMicrokernelTester()
13711 .mr(3)
13712 .nr(8)
13713 .kr(1)
13714 .sr(1)
13715 .m(3)
13716 .n(8)
13717 .k(8)
13718 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13719 }
13720
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,strided_cn)13721 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, strided_cn) {
13722 TEST_REQUIRES_ARM_NEON_V8;
13723 GemmMicrokernelTester()
13724 .mr(3)
13725 .nr(8)
13726 .kr(1)
13727 .sr(1)
13728 .m(3)
13729 .n(8)
13730 .k(8)
13731 .cn_stride(11)
13732 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13733 }
13734
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_eq_8_strided_a)13735 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_eq_8_strided_a) {
13736 TEST_REQUIRES_ARM_NEON_V8;
13737 GemmMicrokernelTester()
13738 .mr(3)
13739 .nr(8)
13740 .kr(1)
13741 .sr(1)
13742 .m(3)
13743 .n(8)
13744 .k(8)
13745 .a_stride(11)
13746 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13747 }
13748
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_eq_8_subtile)13749 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_eq_8_subtile) {
13750 TEST_REQUIRES_ARM_NEON_V8;
13751 for (uint32_t n = 1; n <= 8; n++) {
13752 for (uint32_t m = 1; m <= 3; m++) {
13753 GemmMicrokernelTester()
13754 .mr(3)
13755 .nr(8)
13756 .kr(1)
13757 .sr(1)
13758 .m(m)
13759 .n(n)
13760 .k(8)
13761 .iterations(1)
13762 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13763 }
13764 }
13765 }
13766
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_eq_8_subtile_m)13767 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
13768 TEST_REQUIRES_ARM_NEON_V8;
13769 for (uint32_t m = 1; m <= 3; m++) {
13770 GemmMicrokernelTester()
13771 .mr(3)
13772 .nr(8)
13773 .kr(1)
13774 .sr(1)
13775 .m(m)
13776 .n(8)
13777 .k(8)
13778 .iterations(1)
13779 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13780 }
13781 }
13782
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_eq_8_subtile_n)13783 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
13784 TEST_REQUIRES_ARM_NEON_V8;
13785 for (uint32_t n = 1; n <= 8; n++) {
13786 GemmMicrokernelTester()
13787 .mr(3)
13788 .nr(8)
13789 .kr(1)
13790 .sr(1)
13791 .m(3)
13792 .n(n)
13793 .k(8)
13794 .iterations(1)
13795 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13796 }
13797 }
13798
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_lt_8)13799 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_lt_8) {
13800 TEST_REQUIRES_ARM_NEON_V8;
13801 for (size_t k = 1; k < 8; k++) {
13802 GemmMicrokernelTester()
13803 .mr(3)
13804 .nr(8)
13805 .kr(1)
13806 .sr(1)
13807 .m(3)
13808 .n(8)
13809 .k(k)
13810 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13811 }
13812 }
13813
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_lt_8_strided_a)13814 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_lt_8_strided_a) {
13815 TEST_REQUIRES_ARM_NEON_V8;
13816 for (size_t k = 1; k < 8; k++) {
13817 GemmMicrokernelTester()
13818 .mr(3)
13819 .nr(8)
13820 .kr(1)
13821 .sr(1)
13822 .m(3)
13823 .n(8)
13824 .k(k)
13825 .a_stride(11)
13826 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13827 }
13828 }
13829
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_lt_8_subtile)13830 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_lt_8_subtile) {
13831 TEST_REQUIRES_ARM_NEON_V8;
13832 for (size_t k = 1; k < 8; k++) {
13833 for (uint32_t n = 1; n <= 8; n++) {
13834 for (uint32_t m = 1; m <= 3; m++) {
13835 GemmMicrokernelTester()
13836 .mr(3)
13837 .nr(8)
13838 .kr(1)
13839 .sr(1)
13840 .m(m)
13841 .n(n)
13842 .k(k)
13843 .iterations(1)
13844 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13845 }
13846 }
13847 }
13848 }
13849
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_gt_8)13850 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_gt_8) {
13851 TEST_REQUIRES_ARM_NEON_V8;
13852 for (size_t k = 9; k < 16; k++) {
13853 GemmMicrokernelTester()
13854 .mr(3)
13855 .nr(8)
13856 .kr(1)
13857 .sr(1)
13858 .m(3)
13859 .n(8)
13860 .k(k)
13861 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13862 }
13863 }
13864
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_gt_8_strided_a)13865 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_gt_8_strided_a) {
13866 TEST_REQUIRES_ARM_NEON_V8;
13867 for (size_t k = 9; k < 16; k++) {
13868 GemmMicrokernelTester()
13869 .mr(3)
13870 .nr(8)
13871 .kr(1)
13872 .sr(1)
13873 .m(3)
13874 .n(8)
13875 .k(k)
13876 .a_stride(19)
13877 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13878 }
13879 }
13880
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_gt_8_subtile)13881 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_gt_8_subtile) {
13882 TEST_REQUIRES_ARM_NEON_V8;
13883 for (size_t k = 9; k < 16; k++) {
13884 for (uint32_t n = 1; n <= 8; n++) {
13885 for (uint32_t m = 1; m <= 3; m++) {
13886 GemmMicrokernelTester()
13887 .mr(3)
13888 .nr(8)
13889 .kr(1)
13890 .sr(1)
13891 .m(m)
13892 .n(n)
13893 .k(k)
13894 .iterations(1)
13895 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13896 }
13897 }
13898 }
13899 }
13900
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_div_8)13901 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_div_8) {
13902 TEST_REQUIRES_ARM_NEON_V8;
13903 for (size_t k = 16; k <= 80; k += 8) {
13904 GemmMicrokernelTester()
13905 .mr(3)
13906 .nr(8)
13907 .kr(1)
13908 .sr(1)
13909 .m(3)
13910 .n(8)
13911 .k(k)
13912 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13913 }
13914 }
13915
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_div_8_strided_a)13916 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_div_8_strided_a) {
13917 TEST_REQUIRES_ARM_NEON_V8;
13918 for (size_t k = 16; k <= 80; k += 8) {
13919 GemmMicrokernelTester()
13920 .mr(3)
13921 .nr(8)
13922 .kr(1)
13923 .sr(1)
13924 .m(3)
13925 .n(8)
13926 .k(k)
13927 .a_stride(83)
13928 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13929 }
13930 }
13931
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_div_8_subtile)13932 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_div_8_subtile) {
13933 TEST_REQUIRES_ARM_NEON_V8;
13934 for (size_t k = 16; k <= 80; k += 8) {
13935 for (uint32_t n = 1; n <= 8; n++) {
13936 for (uint32_t m = 1; m <= 3; m++) {
13937 GemmMicrokernelTester()
13938 .mr(3)
13939 .nr(8)
13940 .kr(1)
13941 .sr(1)
13942 .m(m)
13943 .n(n)
13944 .k(k)
13945 .iterations(1)
13946 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13947 }
13948 }
13949 }
13950 }
13951
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,n_gt_8)13952 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, n_gt_8) {
13953 TEST_REQUIRES_ARM_NEON_V8;
13954 for (uint32_t n = 9; n < 16; n++) {
13955 for (size_t k = 1; k <= 40; k += 9) {
13956 GemmMicrokernelTester()
13957 .mr(3)
13958 .nr(8)
13959 .kr(1)
13960 .sr(1)
13961 .m(3)
13962 .n(n)
13963 .k(k)
13964 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13965 }
13966 }
13967 }
13968
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,n_gt_8_strided_cn)13969 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, n_gt_8_strided_cn) {
13970 TEST_REQUIRES_ARM_NEON_V8;
13971 for (uint32_t n = 9; n < 16; n++) {
13972 for (size_t k = 1; k <= 40; k += 9) {
13973 GemmMicrokernelTester()
13974 .mr(3)
13975 .nr(8)
13976 .kr(1)
13977 .sr(1)
13978 .m(3)
13979 .n(n)
13980 .k(k)
13981 .cn_stride(11)
13982 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13983 }
13984 }
13985 }
13986
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,n_gt_8_strided_a)13987 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, n_gt_8_strided_a) {
13988 TEST_REQUIRES_ARM_NEON_V8;
13989 for (uint32_t n = 9; n < 16; n++) {
13990 for (size_t k = 1; k <= 40; k += 9) {
13991 GemmMicrokernelTester()
13992 .mr(3)
13993 .nr(8)
13994 .kr(1)
13995 .sr(1)
13996 .m(3)
13997 .n(n)
13998 .k(k)
13999 .a_stride(43)
14000 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14001 }
14002 }
14003 }
14004
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,n_gt_8_subtile)14005 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, n_gt_8_subtile) {
14006 TEST_REQUIRES_ARM_NEON_V8;
14007 for (uint32_t n = 9; n < 16; n++) {
14008 for (size_t k = 1; k <= 40; k += 9) {
14009 for (uint32_t m = 1; m <= 3; m++) {
14010 GemmMicrokernelTester()
14011 .mr(3)
14012 .nr(8)
14013 .kr(1)
14014 .sr(1)
14015 .m(m)
14016 .n(n)
14017 .k(k)
14018 .iterations(1)
14019 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14020 }
14021 }
14022 }
14023 }
14024
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,n_div_8)14025 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, n_div_8) {
14026 TEST_REQUIRES_ARM_NEON_V8;
14027 for (uint32_t n = 16; n <= 24; n += 8) {
14028 for (size_t k = 1; k <= 40; k += 9) {
14029 GemmMicrokernelTester()
14030 .mr(3)
14031 .nr(8)
14032 .kr(1)
14033 .sr(1)
14034 .m(3)
14035 .n(n)
14036 .k(k)
14037 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14038 }
14039 }
14040 }
14041
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,n_div_8_strided_cn)14042 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, n_div_8_strided_cn) {
14043 TEST_REQUIRES_ARM_NEON_V8;
14044 for (uint32_t n = 16; n <= 24; n += 8) {
14045 for (size_t k = 1; k <= 40; k += 9) {
14046 GemmMicrokernelTester()
14047 .mr(3)
14048 .nr(8)
14049 .kr(1)
14050 .sr(1)
14051 .m(3)
14052 .n(n)
14053 .k(k)
14054 .cn_stride(11)
14055 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14056 }
14057 }
14058 }
14059
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,n_div_8_strided_a)14060 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, n_div_8_strided_a) {
14061 TEST_REQUIRES_ARM_NEON_V8;
14062 for (uint32_t n = 16; n <= 24; n += 8) {
14063 for (size_t k = 1; k <= 40; k += 9) {
14064 GemmMicrokernelTester()
14065 .mr(3)
14066 .nr(8)
14067 .kr(1)
14068 .sr(1)
14069 .m(3)
14070 .n(n)
14071 .k(k)
14072 .a_stride(43)
14073 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14074 }
14075 }
14076 }
14077
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,n_div_8_subtile)14078 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, n_div_8_subtile) {
14079 TEST_REQUIRES_ARM_NEON_V8;
14080 for (uint32_t n = 16; n <= 24; n += 8) {
14081 for (size_t k = 1; k <= 40; k += 9) {
14082 for (uint32_t m = 1; m <= 3; m++) {
14083 GemmMicrokernelTester()
14084 .mr(3)
14085 .nr(8)
14086 .kr(1)
14087 .sr(1)
14088 .m(m)
14089 .n(n)
14090 .k(k)
14091 .iterations(1)
14092 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14093 }
14094 }
14095 }
14096 }
14097
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,strided_cm_subtile)14098 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, strided_cm_subtile) {
14099 TEST_REQUIRES_ARM_NEON_V8;
14100 for (size_t k = 1; k <= 40; k += 9) {
14101 for (uint32_t n = 1; n <= 8; n++) {
14102 for (uint32_t m = 1; m <= 3; m++) {
14103 GemmMicrokernelTester()
14104 .mr(3)
14105 .nr(8)
14106 .kr(1)
14107 .sr(1)
14108 .m(m)
14109 .n(n)
14110 .k(k)
14111 .cm_stride(11)
14112 .iterations(1)
14113 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14114 }
14115 }
14116 }
14117 }
14118
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,qmin)14119 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, qmin) {
14120 TEST_REQUIRES_ARM_NEON_V8;
14121 GemmMicrokernelTester()
14122 .mr(3)
14123 .nr(8)
14124 .kr(1)
14125 .sr(1)
14126 .m(3)
14127 .n(8)
14128 .k(8)
14129 .qmin(128)
14130 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14131 }
14132
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,qmax)14133 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, qmax) {
14134 TEST_REQUIRES_ARM_NEON_V8;
14135 GemmMicrokernelTester()
14136 .mr(3)
14137 .nr(8)
14138 .kr(1)
14139 .sr(1)
14140 .m(3)
14141 .n(8)
14142 .k(8)
14143 .qmax(128)
14144 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14145 }
14146
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,strided_cm)14147 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, strided_cm) {
14148 TEST_REQUIRES_ARM_NEON_V8;
14149 GemmMicrokernelTester()
14150 .mr(3)
14151 .nr(8)
14152 .kr(1)
14153 .sr(1)
14154 .m(3)
14155 .n(8)
14156 .k(8)
14157 .cm_stride(11)
14158 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14159 }
14160 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14161
14162
14163 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_eq_8)14164 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_eq_8) {
14165 TEST_REQUIRES_ARM_NEON;
14166 GemmMicrokernelTester()
14167 .mr(4)
14168 .nr(8)
14169 .kr(1)
14170 .sr(1)
14171 .m(4)
14172 .n(8)
14173 .k(8)
14174 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14175 }
14176
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,strided_cn)14177 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, strided_cn) {
14178 TEST_REQUIRES_ARM_NEON;
14179 GemmMicrokernelTester()
14180 .mr(4)
14181 .nr(8)
14182 .kr(1)
14183 .sr(1)
14184 .m(4)
14185 .n(8)
14186 .k(8)
14187 .cn_stride(11)
14188 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14189 }
14190
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_eq_8_strided_a)14191 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
14192 TEST_REQUIRES_ARM_NEON;
14193 GemmMicrokernelTester()
14194 .mr(4)
14195 .nr(8)
14196 .kr(1)
14197 .sr(1)
14198 .m(4)
14199 .n(8)
14200 .k(8)
14201 .a_stride(11)
14202 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14203 }
14204
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_eq_8_subtile)14205 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_eq_8_subtile) {
14206 TEST_REQUIRES_ARM_NEON;
14207 for (uint32_t n = 1; n <= 8; n++) {
14208 for (uint32_t m = 1; m <= 4; m++) {
14209 GemmMicrokernelTester()
14210 .mr(4)
14211 .nr(8)
14212 .kr(1)
14213 .sr(1)
14214 .m(m)
14215 .n(n)
14216 .k(8)
14217 .iterations(1)
14218 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14219 }
14220 }
14221 }
14222
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_eq_8_subtile_m)14223 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
14224 TEST_REQUIRES_ARM_NEON;
14225 for (uint32_t m = 1; m <= 4; m++) {
14226 GemmMicrokernelTester()
14227 .mr(4)
14228 .nr(8)
14229 .kr(1)
14230 .sr(1)
14231 .m(m)
14232 .n(8)
14233 .k(8)
14234 .iterations(1)
14235 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14236 }
14237 }
14238
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_eq_8_subtile_n)14239 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
14240 TEST_REQUIRES_ARM_NEON;
14241 for (uint32_t n = 1; n <= 8; n++) {
14242 GemmMicrokernelTester()
14243 .mr(4)
14244 .nr(8)
14245 .kr(1)
14246 .sr(1)
14247 .m(4)
14248 .n(n)
14249 .k(8)
14250 .iterations(1)
14251 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14252 }
14253 }
14254
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_lt_8)14255 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_lt_8) {
14256 TEST_REQUIRES_ARM_NEON;
14257 for (size_t k = 1; k < 8; k++) {
14258 GemmMicrokernelTester()
14259 .mr(4)
14260 .nr(8)
14261 .kr(1)
14262 .sr(1)
14263 .m(4)
14264 .n(8)
14265 .k(k)
14266 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14267 }
14268 }
14269
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_lt_8_strided_a)14270 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_lt_8_strided_a) {
14271 TEST_REQUIRES_ARM_NEON;
14272 for (size_t k = 1; k < 8; k++) {
14273 GemmMicrokernelTester()
14274 .mr(4)
14275 .nr(8)
14276 .kr(1)
14277 .sr(1)
14278 .m(4)
14279 .n(8)
14280 .k(k)
14281 .a_stride(11)
14282 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14283 }
14284 }
14285
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_lt_8_subtile)14286 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_lt_8_subtile) {
14287 TEST_REQUIRES_ARM_NEON;
14288 for (size_t k = 1; k < 8; k++) {
14289 for (uint32_t n = 1; n <= 8; n++) {
14290 for (uint32_t m = 1; m <= 4; m++) {
14291 GemmMicrokernelTester()
14292 .mr(4)
14293 .nr(8)
14294 .kr(1)
14295 .sr(1)
14296 .m(m)
14297 .n(n)
14298 .k(k)
14299 .iterations(1)
14300 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14301 }
14302 }
14303 }
14304 }
14305
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_gt_8)14306 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_gt_8) {
14307 TEST_REQUIRES_ARM_NEON;
14308 for (size_t k = 9; k < 16; k++) {
14309 GemmMicrokernelTester()
14310 .mr(4)
14311 .nr(8)
14312 .kr(1)
14313 .sr(1)
14314 .m(4)
14315 .n(8)
14316 .k(k)
14317 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14318 }
14319 }
14320
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_gt_8_strided_a)14321 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_gt_8_strided_a) {
14322 TEST_REQUIRES_ARM_NEON;
14323 for (size_t k = 9; k < 16; k++) {
14324 GemmMicrokernelTester()
14325 .mr(4)
14326 .nr(8)
14327 .kr(1)
14328 .sr(1)
14329 .m(4)
14330 .n(8)
14331 .k(k)
14332 .a_stride(19)
14333 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14334 }
14335 }
14336
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_gt_8_subtile)14337 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_gt_8_subtile) {
14338 TEST_REQUIRES_ARM_NEON;
14339 for (size_t k = 9; k < 16; k++) {
14340 for (uint32_t n = 1; n <= 8; n++) {
14341 for (uint32_t m = 1; m <= 4; m++) {
14342 GemmMicrokernelTester()
14343 .mr(4)
14344 .nr(8)
14345 .kr(1)
14346 .sr(1)
14347 .m(m)
14348 .n(n)
14349 .k(k)
14350 .iterations(1)
14351 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14352 }
14353 }
14354 }
14355 }
14356
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_div_8)14357 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_div_8) {
14358 TEST_REQUIRES_ARM_NEON;
14359 for (size_t k = 16; k <= 80; k += 8) {
14360 GemmMicrokernelTester()
14361 .mr(4)
14362 .nr(8)
14363 .kr(1)
14364 .sr(1)
14365 .m(4)
14366 .n(8)
14367 .k(k)
14368 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14369 }
14370 }
14371
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_div_8_strided_a)14372 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_div_8_strided_a) {
14373 TEST_REQUIRES_ARM_NEON;
14374 for (size_t k = 16; k <= 80; k += 8) {
14375 GemmMicrokernelTester()
14376 .mr(4)
14377 .nr(8)
14378 .kr(1)
14379 .sr(1)
14380 .m(4)
14381 .n(8)
14382 .k(k)
14383 .a_stride(83)
14384 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14385 }
14386 }
14387
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_div_8_subtile)14388 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_div_8_subtile) {
14389 TEST_REQUIRES_ARM_NEON;
14390 for (size_t k = 16; k <= 80; k += 8) {
14391 for (uint32_t n = 1; n <= 8; n++) {
14392 for (uint32_t m = 1; m <= 4; m++) {
14393 GemmMicrokernelTester()
14394 .mr(4)
14395 .nr(8)
14396 .kr(1)
14397 .sr(1)
14398 .m(m)
14399 .n(n)
14400 .k(k)
14401 .iterations(1)
14402 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14403 }
14404 }
14405 }
14406 }
14407
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_gt_8)14408 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_gt_8) {
14409 TEST_REQUIRES_ARM_NEON;
14410 for (uint32_t n = 9; n < 16; n++) {
14411 for (size_t k = 1; k <= 40; k += 9) {
14412 GemmMicrokernelTester()
14413 .mr(4)
14414 .nr(8)
14415 .kr(1)
14416 .sr(1)
14417 .m(4)
14418 .n(n)
14419 .k(k)
14420 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14421 }
14422 }
14423 }
14424
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_gt_8_strided_cn)14425 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
14426 TEST_REQUIRES_ARM_NEON;
14427 for (uint32_t n = 9; n < 16; n++) {
14428 for (size_t k = 1; k <= 40; k += 9) {
14429 GemmMicrokernelTester()
14430 .mr(4)
14431 .nr(8)
14432 .kr(1)
14433 .sr(1)
14434 .m(4)
14435 .n(n)
14436 .k(k)
14437 .cn_stride(11)
14438 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14439 }
14440 }
14441 }
14442
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_gt_8_strided_a)14443 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_gt_8_strided_a) {
14444 TEST_REQUIRES_ARM_NEON;
14445 for (uint32_t n = 9; n < 16; n++) {
14446 for (size_t k = 1; k <= 40; k += 9) {
14447 GemmMicrokernelTester()
14448 .mr(4)
14449 .nr(8)
14450 .kr(1)
14451 .sr(1)
14452 .m(4)
14453 .n(n)
14454 .k(k)
14455 .a_stride(43)
14456 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14457 }
14458 }
14459 }
14460
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_gt_8_subtile)14461 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_gt_8_subtile) {
14462 TEST_REQUIRES_ARM_NEON;
14463 for (uint32_t n = 9; n < 16; n++) {
14464 for (size_t k = 1; k <= 40; k += 9) {
14465 for (uint32_t m = 1; m <= 4; m++) {
14466 GemmMicrokernelTester()
14467 .mr(4)
14468 .nr(8)
14469 .kr(1)
14470 .sr(1)
14471 .m(m)
14472 .n(n)
14473 .k(k)
14474 .iterations(1)
14475 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14476 }
14477 }
14478 }
14479 }
14480
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_div_8)14481 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_div_8) {
14482 TEST_REQUIRES_ARM_NEON;
14483 for (uint32_t n = 16; n <= 24; n += 8) {
14484 for (size_t k = 1; k <= 40; k += 9) {
14485 GemmMicrokernelTester()
14486 .mr(4)
14487 .nr(8)
14488 .kr(1)
14489 .sr(1)
14490 .m(4)
14491 .n(n)
14492 .k(k)
14493 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14494 }
14495 }
14496 }
14497
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_div_8_strided_cn)14498 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
14499 TEST_REQUIRES_ARM_NEON;
14500 for (uint32_t n = 16; n <= 24; n += 8) {
14501 for (size_t k = 1; k <= 40; k += 9) {
14502 GemmMicrokernelTester()
14503 .mr(4)
14504 .nr(8)
14505 .kr(1)
14506 .sr(1)
14507 .m(4)
14508 .n(n)
14509 .k(k)
14510 .cn_stride(11)
14511 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14512 }
14513 }
14514 }
14515
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_div_8_strided_a)14516 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_div_8_strided_a) {
14517 TEST_REQUIRES_ARM_NEON;
14518 for (uint32_t n = 16; n <= 24; n += 8) {
14519 for (size_t k = 1; k <= 40; k += 9) {
14520 GemmMicrokernelTester()
14521 .mr(4)
14522 .nr(8)
14523 .kr(1)
14524 .sr(1)
14525 .m(4)
14526 .n(n)
14527 .k(k)
14528 .a_stride(43)
14529 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14530 }
14531 }
14532 }
14533
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_div_8_subtile)14534 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_div_8_subtile) {
14535 TEST_REQUIRES_ARM_NEON;
14536 for (uint32_t n = 16; n <= 24; n += 8) {
14537 for (size_t k = 1; k <= 40; k += 9) {
14538 for (uint32_t m = 1; m <= 4; m++) {
14539 GemmMicrokernelTester()
14540 .mr(4)
14541 .nr(8)
14542 .kr(1)
14543 .sr(1)
14544 .m(m)
14545 .n(n)
14546 .k(k)
14547 .iterations(1)
14548 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14549 }
14550 }
14551 }
14552 }
14553
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,strided_cm_subtile)14554 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, strided_cm_subtile) {
14555 TEST_REQUIRES_ARM_NEON;
14556 for (size_t k = 1; k <= 40; k += 9) {
14557 for (uint32_t n = 1; n <= 8; n++) {
14558 for (uint32_t m = 1; m <= 4; m++) {
14559 GemmMicrokernelTester()
14560 .mr(4)
14561 .nr(8)
14562 .kr(1)
14563 .sr(1)
14564 .m(m)
14565 .n(n)
14566 .k(k)
14567 .cm_stride(11)
14568 .iterations(1)
14569 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14570 }
14571 }
14572 }
14573 }
14574
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,qmin)14575 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, qmin) {
14576 TEST_REQUIRES_ARM_NEON;
14577 GemmMicrokernelTester()
14578 .mr(4)
14579 .nr(8)
14580 .kr(1)
14581 .sr(1)
14582 .m(4)
14583 .n(8)
14584 .k(8)
14585 .qmin(128)
14586 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14587 }
14588
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,qmax)14589 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, qmax) {
14590 TEST_REQUIRES_ARM_NEON;
14591 GemmMicrokernelTester()
14592 .mr(4)
14593 .nr(8)
14594 .kr(1)
14595 .sr(1)
14596 .m(4)
14597 .n(8)
14598 .k(8)
14599 .qmax(128)
14600 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14601 }
14602
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,strided_cm)14603 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, strided_cm) {
14604 TEST_REQUIRES_ARM_NEON;
14605 GemmMicrokernelTester()
14606 .mr(4)
14607 .nr(8)
14608 .kr(1)
14609 .sr(1)
14610 .m(4)
14611 .n(8)
14612 .k(8)
14613 .cm_stride(11)
14614 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14615 }
14616 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14617
14618
14619 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_eq_8)14620 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
14621 TEST_REQUIRES_ARM_NEON_V8;
14622 GemmMicrokernelTester()
14623 .mr(4)
14624 .nr(8)
14625 .kr(1)
14626 .sr(1)
14627 .m(4)
14628 .n(8)
14629 .k(8)
14630 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14631 }
14632
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,strided_cn)14633 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, strided_cn) {
14634 TEST_REQUIRES_ARM_NEON_V8;
14635 GemmMicrokernelTester()
14636 .mr(4)
14637 .nr(8)
14638 .kr(1)
14639 .sr(1)
14640 .m(4)
14641 .n(8)
14642 .k(8)
14643 .cn_stride(11)
14644 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14645 }
14646
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_strided_a)14647 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_strided_a) {
14648 TEST_REQUIRES_ARM_NEON_V8;
14649 GemmMicrokernelTester()
14650 .mr(4)
14651 .nr(8)
14652 .kr(1)
14653 .sr(1)
14654 .m(4)
14655 .n(8)
14656 .k(8)
14657 .a_stride(11)
14658 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14659 }
14660
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)14661 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
14662 TEST_REQUIRES_ARM_NEON_V8;
14663 for (uint32_t n = 1; n <= 8; n++) {
14664 for (uint32_t m = 1; m <= 4; m++) {
14665 GemmMicrokernelTester()
14666 .mr(4)
14667 .nr(8)
14668 .kr(1)
14669 .sr(1)
14670 .m(m)
14671 .n(n)
14672 .k(8)
14673 .iterations(1)
14674 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14675 }
14676 }
14677 }
14678
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)14679 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
14680 TEST_REQUIRES_ARM_NEON_V8;
14681 for (uint32_t m = 1; m <= 4; m++) {
14682 GemmMicrokernelTester()
14683 .mr(4)
14684 .nr(8)
14685 .kr(1)
14686 .sr(1)
14687 .m(m)
14688 .n(8)
14689 .k(8)
14690 .iterations(1)
14691 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14692 }
14693 }
14694
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)14695 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
14696 TEST_REQUIRES_ARM_NEON_V8;
14697 for (uint32_t n = 1; n <= 8; n++) {
14698 GemmMicrokernelTester()
14699 .mr(4)
14700 .nr(8)
14701 .kr(1)
14702 .sr(1)
14703 .m(4)
14704 .n(n)
14705 .k(8)
14706 .iterations(1)
14707 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14708 }
14709 }
14710
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_lt_8)14711 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
14712 TEST_REQUIRES_ARM_NEON_V8;
14713 for (size_t k = 1; k < 8; k++) {
14714 GemmMicrokernelTester()
14715 .mr(4)
14716 .nr(8)
14717 .kr(1)
14718 .sr(1)
14719 .m(4)
14720 .n(8)
14721 .k(k)
14722 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14723 }
14724 }
14725
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_lt_8_strided_a)14726 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_lt_8_strided_a) {
14727 TEST_REQUIRES_ARM_NEON_V8;
14728 for (size_t k = 1; k < 8; k++) {
14729 GemmMicrokernelTester()
14730 .mr(4)
14731 .nr(8)
14732 .kr(1)
14733 .sr(1)
14734 .m(4)
14735 .n(8)
14736 .k(k)
14737 .a_stride(11)
14738 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14739 }
14740 }
14741
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)14742 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
14743 TEST_REQUIRES_ARM_NEON_V8;
14744 for (size_t k = 1; k < 8; k++) {
14745 for (uint32_t n = 1; n <= 8; n++) {
14746 for (uint32_t m = 1; m <= 4; m++) {
14747 GemmMicrokernelTester()
14748 .mr(4)
14749 .nr(8)
14750 .kr(1)
14751 .sr(1)
14752 .m(m)
14753 .n(n)
14754 .k(k)
14755 .iterations(1)
14756 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14757 }
14758 }
14759 }
14760 }
14761
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_gt_8)14762 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
14763 TEST_REQUIRES_ARM_NEON_V8;
14764 for (size_t k = 9; k < 16; k++) {
14765 GemmMicrokernelTester()
14766 .mr(4)
14767 .nr(8)
14768 .kr(1)
14769 .sr(1)
14770 .m(4)
14771 .n(8)
14772 .k(k)
14773 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14774 }
14775 }
14776
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_gt_8_strided_a)14777 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_gt_8_strided_a) {
14778 TEST_REQUIRES_ARM_NEON_V8;
14779 for (size_t k = 9; k < 16; k++) {
14780 GemmMicrokernelTester()
14781 .mr(4)
14782 .nr(8)
14783 .kr(1)
14784 .sr(1)
14785 .m(4)
14786 .n(8)
14787 .k(k)
14788 .a_stride(19)
14789 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14790 }
14791 }
14792
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)14793 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
14794 TEST_REQUIRES_ARM_NEON_V8;
14795 for (size_t k = 9; k < 16; k++) {
14796 for (uint32_t n = 1; n <= 8; n++) {
14797 for (uint32_t m = 1; m <= 4; m++) {
14798 GemmMicrokernelTester()
14799 .mr(4)
14800 .nr(8)
14801 .kr(1)
14802 .sr(1)
14803 .m(m)
14804 .n(n)
14805 .k(k)
14806 .iterations(1)
14807 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14808 }
14809 }
14810 }
14811 }
14812
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_div_8)14813 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_div_8) {
14814 TEST_REQUIRES_ARM_NEON_V8;
14815 for (size_t k = 16; k <= 80; k += 8) {
14816 GemmMicrokernelTester()
14817 .mr(4)
14818 .nr(8)
14819 .kr(1)
14820 .sr(1)
14821 .m(4)
14822 .n(8)
14823 .k(k)
14824 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14825 }
14826 }
14827
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_div_8_strided_a)14828 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_div_8_strided_a) {
14829 TEST_REQUIRES_ARM_NEON_V8;
14830 for (size_t k = 16; k <= 80; k += 8) {
14831 GemmMicrokernelTester()
14832 .mr(4)
14833 .nr(8)
14834 .kr(1)
14835 .sr(1)
14836 .m(4)
14837 .n(8)
14838 .k(k)
14839 .a_stride(83)
14840 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14841 }
14842 }
14843
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)14844 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
14845 TEST_REQUIRES_ARM_NEON_V8;
14846 for (size_t k = 16; k <= 80; k += 8) {
14847 for (uint32_t n = 1; n <= 8; n++) {
14848 for (uint32_t m = 1; m <= 4; m++) {
14849 GemmMicrokernelTester()
14850 .mr(4)
14851 .nr(8)
14852 .kr(1)
14853 .sr(1)
14854 .m(m)
14855 .n(n)
14856 .k(k)
14857 .iterations(1)
14858 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14859 }
14860 }
14861 }
14862 }
14863
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,n_gt_8)14864 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, n_gt_8) {
14865 TEST_REQUIRES_ARM_NEON_V8;
14866 for (uint32_t n = 9; n < 16; n++) {
14867 for (size_t k = 1; k <= 40; k += 9) {
14868 GemmMicrokernelTester()
14869 .mr(4)
14870 .nr(8)
14871 .kr(1)
14872 .sr(1)
14873 .m(4)
14874 .n(n)
14875 .k(k)
14876 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14877 }
14878 }
14879 }
14880
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_strided_cn)14881 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
14882 TEST_REQUIRES_ARM_NEON_V8;
14883 for (uint32_t n = 9; n < 16; n++) {
14884 for (size_t k = 1; k <= 40; k += 9) {
14885 GemmMicrokernelTester()
14886 .mr(4)
14887 .nr(8)
14888 .kr(1)
14889 .sr(1)
14890 .m(4)
14891 .n(n)
14892 .k(k)
14893 .cn_stride(11)
14894 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14895 }
14896 }
14897 }
14898
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_strided_a)14899 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_strided_a) {
14900 TEST_REQUIRES_ARM_NEON_V8;
14901 for (uint32_t n = 9; n < 16; n++) {
14902 for (size_t k = 1; k <= 40; k += 9) {
14903 GemmMicrokernelTester()
14904 .mr(4)
14905 .nr(8)
14906 .kr(1)
14907 .sr(1)
14908 .m(4)
14909 .n(n)
14910 .k(k)
14911 .a_stride(43)
14912 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14913 }
14914 }
14915 }
14916
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_subtile)14917 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_subtile) {
14918 TEST_REQUIRES_ARM_NEON_V8;
14919 for (uint32_t n = 9; n < 16; n++) {
14920 for (size_t k = 1; k <= 40; k += 9) {
14921 for (uint32_t m = 1; m <= 4; m++) {
14922 GemmMicrokernelTester()
14923 .mr(4)
14924 .nr(8)
14925 .kr(1)
14926 .sr(1)
14927 .m(m)
14928 .n(n)
14929 .k(k)
14930 .iterations(1)
14931 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14932 }
14933 }
14934 }
14935 }
14936
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,n_div_8)14937 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, n_div_8) {
14938 TEST_REQUIRES_ARM_NEON_V8;
14939 for (uint32_t n = 16; n <= 24; n += 8) {
14940 for (size_t k = 1; k <= 40; k += 9) {
14941 GemmMicrokernelTester()
14942 .mr(4)
14943 .nr(8)
14944 .kr(1)
14945 .sr(1)
14946 .m(4)
14947 .n(n)
14948 .k(k)
14949 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14950 }
14951 }
14952 }
14953
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,n_div_8_strided_cn)14954 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, n_div_8_strided_cn) {
14955 TEST_REQUIRES_ARM_NEON_V8;
14956 for (uint32_t n = 16; n <= 24; n += 8) {
14957 for (size_t k = 1; k <= 40; k += 9) {
14958 GemmMicrokernelTester()
14959 .mr(4)
14960 .nr(8)
14961 .kr(1)
14962 .sr(1)
14963 .m(4)
14964 .n(n)
14965 .k(k)
14966 .cn_stride(11)
14967 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14968 }
14969 }
14970 }
14971
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,n_div_8_strided_a)14972 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, n_div_8_strided_a) {
14973 TEST_REQUIRES_ARM_NEON_V8;
14974 for (uint32_t n = 16; n <= 24; n += 8) {
14975 for (size_t k = 1; k <= 40; k += 9) {
14976 GemmMicrokernelTester()
14977 .mr(4)
14978 .nr(8)
14979 .kr(1)
14980 .sr(1)
14981 .m(4)
14982 .n(n)
14983 .k(k)
14984 .a_stride(43)
14985 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14986 }
14987 }
14988 }
14989
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,n_div_8_subtile)14990 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, n_div_8_subtile) {
14991 TEST_REQUIRES_ARM_NEON_V8;
14992 for (uint32_t n = 16; n <= 24; n += 8) {
14993 for (size_t k = 1; k <= 40; k += 9) {
14994 for (uint32_t m = 1; m <= 4; m++) {
14995 GemmMicrokernelTester()
14996 .mr(4)
14997 .nr(8)
14998 .kr(1)
14999 .sr(1)
15000 .m(m)
15001 .n(n)
15002 .k(k)
15003 .iterations(1)
15004 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15005 }
15006 }
15007 }
15008 }
15009
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)15010 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
15011 TEST_REQUIRES_ARM_NEON_V8;
15012 for (size_t k = 1; k <= 40; k += 9) {
15013 for (uint32_t n = 1; n <= 8; n++) {
15014 for (uint32_t m = 1; m <= 4; m++) {
15015 GemmMicrokernelTester()
15016 .mr(4)
15017 .nr(8)
15018 .kr(1)
15019 .sr(1)
15020 .m(m)
15021 .n(n)
15022 .k(k)
15023 .cm_stride(11)
15024 .iterations(1)
15025 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15026 }
15027 }
15028 }
15029 }
15030
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,qmin)15031 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, qmin) {
15032 TEST_REQUIRES_ARM_NEON_V8;
15033 GemmMicrokernelTester()
15034 .mr(4)
15035 .nr(8)
15036 .kr(1)
15037 .sr(1)
15038 .m(4)
15039 .n(8)
15040 .k(8)
15041 .qmin(128)
15042 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15043 }
15044
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,qmax)15045 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, qmax) {
15046 TEST_REQUIRES_ARM_NEON_V8;
15047 GemmMicrokernelTester()
15048 .mr(4)
15049 .nr(8)
15050 .kr(1)
15051 .sr(1)
15052 .m(4)
15053 .n(8)
15054 .k(8)
15055 .qmax(128)
15056 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15057 }
15058
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,strided_cm)15059 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, strided_cm) {
15060 TEST_REQUIRES_ARM_NEON_V8;
15061 GemmMicrokernelTester()
15062 .mr(4)
15063 .nr(8)
15064 .kr(1)
15065 .sr(1)
15066 .m(4)
15067 .n(8)
15068 .k(8)
15069 .cm_stride(11)
15070 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15071 }
15072 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15073
15074
15075 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_eq_8)15076 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_eq_8) {
15077 TEST_REQUIRES_ARM_NEON;
15078 GemmMicrokernelTester()
15079 .mr(6)
15080 .nr(8)
15081 .kr(1)
15082 .sr(1)
15083 .m(6)
15084 .n(8)
15085 .k(8)
15086 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15087 }
15088
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,strided_cn)15089 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, strided_cn) {
15090 TEST_REQUIRES_ARM_NEON;
15091 GemmMicrokernelTester()
15092 .mr(6)
15093 .nr(8)
15094 .kr(1)
15095 .sr(1)
15096 .m(6)
15097 .n(8)
15098 .k(8)
15099 .cn_stride(11)
15100 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15101 }
15102
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_eq_8_strided_a)15103 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
15104 TEST_REQUIRES_ARM_NEON;
15105 GemmMicrokernelTester()
15106 .mr(6)
15107 .nr(8)
15108 .kr(1)
15109 .sr(1)
15110 .m(6)
15111 .n(8)
15112 .k(8)
15113 .a_stride(11)
15114 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15115 }
15116
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_eq_8_subtile)15117 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_eq_8_subtile) {
15118 TEST_REQUIRES_ARM_NEON;
15119 for (uint32_t n = 1; n <= 8; n++) {
15120 for (uint32_t m = 1; m <= 6; m++) {
15121 GemmMicrokernelTester()
15122 .mr(6)
15123 .nr(8)
15124 .kr(1)
15125 .sr(1)
15126 .m(m)
15127 .n(n)
15128 .k(8)
15129 .iterations(1)
15130 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15131 }
15132 }
15133 }
15134
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_eq_8_subtile_m)15135 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
15136 TEST_REQUIRES_ARM_NEON;
15137 for (uint32_t m = 1; m <= 6; m++) {
15138 GemmMicrokernelTester()
15139 .mr(6)
15140 .nr(8)
15141 .kr(1)
15142 .sr(1)
15143 .m(m)
15144 .n(8)
15145 .k(8)
15146 .iterations(1)
15147 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15148 }
15149 }
15150
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_eq_8_subtile_n)15151 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
15152 TEST_REQUIRES_ARM_NEON;
15153 for (uint32_t n = 1; n <= 8; n++) {
15154 GemmMicrokernelTester()
15155 .mr(6)
15156 .nr(8)
15157 .kr(1)
15158 .sr(1)
15159 .m(6)
15160 .n(n)
15161 .k(8)
15162 .iterations(1)
15163 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15164 }
15165 }
15166
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_lt_8)15167 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_lt_8) {
15168 TEST_REQUIRES_ARM_NEON;
15169 for (size_t k = 1; k < 8; k++) {
15170 GemmMicrokernelTester()
15171 .mr(6)
15172 .nr(8)
15173 .kr(1)
15174 .sr(1)
15175 .m(6)
15176 .n(8)
15177 .k(k)
15178 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15179 }
15180 }
15181
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_lt_8_strided_a)15182 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_lt_8_strided_a) {
15183 TEST_REQUIRES_ARM_NEON;
15184 for (size_t k = 1; k < 8; k++) {
15185 GemmMicrokernelTester()
15186 .mr(6)
15187 .nr(8)
15188 .kr(1)
15189 .sr(1)
15190 .m(6)
15191 .n(8)
15192 .k(k)
15193 .a_stride(11)
15194 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15195 }
15196 }
15197
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_lt_8_subtile)15198 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_lt_8_subtile) {
15199 TEST_REQUIRES_ARM_NEON;
15200 for (size_t k = 1; k < 8; k++) {
15201 for (uint32_t n = 1; n <= 8; n++) {
15202 for (uint32_t m = 1; m <= 6; m++) {
15203 GemmMicrokernelTester()
15204 .mr(6)
15205 .nr(8)
15206 .kr(1)
15207 .sr(1)
15208 .m(m)
15209 .n(n)
15210 .k(k)
15211 .iterations(1)
15212 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15213 }
15214 }
15215 }
15216 }
15217
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_gt_8)15218 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_gt_8) {
15219 TEST_REQUIRES_ARM_NEON;
15220 for (size_t k = 9; k < 16; k++) {
15221 GemmMicrokernelTester()
15222 .mr(6)
15223 .nr(8)
15224 .kr(1)
15225 .sr(1)
15226 .m(6)
15227 .n(8)
15228 .k(k)
15229 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15230 }
15231 }
15232
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_gt_8_strided_a)15233 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_gt_8_strided_a) {
15234 TEST_REQUIRES_ARM_NEON;
15235 for (size_t k = 9; k < 16; k++) {
15236 GemmMicrokernelTester()
15237 .mr(6)
15238 .nr(8)
15239 .kr(1)
15240 .sr(1)
15241 .m(6)
15242 .n(8)
15243 .k(k)
15244 .a_stride(19)
15245 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15246 }
15247 }
15248
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_gt_8_subtile)15249 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_gt_8_subtile) {
15250 TEST_REQUIRES_ARM_NEON;
15251 for (size_t k = 9; k < 16; k++) {
15252 for (uint32_t n = 1; n <= 8; n++) {
15253 for (uint32_t m = 1; m <= 6; m++) {
15254 GemmMicrokernelTester()
15255 .mr(6)
15256 .nr(8)
15257 .kr(1)
15258 .sr(1)
15259 .m(m)
15260 .n(n)
15261 .k(k)
15262 .iterations(1)
15263 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15264 }
15265 }
15266 }
15267 }
15268
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_div_8)15269 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_div_8) {
15270 TEST_REQUIRES_ARM_NEON;
15271 for (size_t k = 16; k <= 80; k += 8) {
15272 GemmMicrokernelTester()
15273 .mr(6)
15274 .nr(8)
15275 .kr(1)
15276 .sr(1)
15277 .m(6)
15278 .n(8)
15279 .k(k)
15280 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15281 }
15282 }
15283
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_div_8_strided_a)15284 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_div_8_strided_a) {
15285 TEST_REQUIRES_ARM_NEON;
15286 for (size_t k = 16; k <= 80; k += 8) {
15287 GemmMicrokernelTester()
15288 .mr(6)
15289 .nr(8)
15290 .kr(1)
15291 .sr(1)
15292 .m(6)
15293 .n(8)
15294 .k(k)
15295 .a_stride(83)
15296 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15297 }
15298 }
15299
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_div_8_subtile)15300 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_div_8_subtile) {
15301 TEST_REQUIRES_ARM_NEON;
15302 for (size_t k = 16; k <= 80; k += 8) {
15303 for (uint32_t n = 1; n <= 8; n++) {
15304 for (uint32_t m = 1; m <= 6; m++) {
15305 GemmMicrokernelTester()
15306 .mr(6)
15307 .nr(8)
15308 .kr(1)
15309 .sr(1)
15310 .m(m)
15311 .n(n)
15312 .k(k)
15313 .iterations(1)
15314 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15315 }
15316 }
15317 }
15318 }
15319
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,n_gt_8)15320 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, n_gt_8) {
15321 TEST_REQUIRES_ARM_NEON;
15322 for (uint32_t n = 9; n < 16; n++) {
15323 for (size_t k = 1; k <= 40; k += 9) {
15324 GemmMicrokernelTester()
15325 .mr(6)
15326 .nr(8)
15327 .kr(1)
15328 .sr(1)
15329 .m(6)
15330 .n(n)
15331 .k(k)
15332 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15333 }
15334 }
15335 }
15336
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,n_gt_8_strided_cn)15337 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
15338 TEST_REQUIRES_ARM_NEON;
15339 for (uint32_t n = 9; n < 16; n++) {
15340 for (size_t k = 1; k <= 40; k += 9) {
15341 GemmMicrokernelTester()
15342 .mr(6)
15343 .nr(8)
15344 .kr(1)
15345 .sr(1)
15346 .m(6)
15347 .n(n)
15348 .k(k)
15349 .cn_stride(11)
15350 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15351 }
15352 }
15353 }
15354
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,n_gt_8_strided_a)15355 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, n_gt_8_strided_a) {
15356 TEST_REQUIRES_ARM_NEON;
15357 for (uint32_t n = 9; n < 16; n++) {
15358 for (size_t k = 1; k <= 40; k += 9) {
15359 GemmMicrokernelTester()
15360 .mr(6)
15361 .nr(8)
15362 .kr(1)
15363 .sr(1)
15364 .m(6)
15365 .n(n)
15366 .k(k)
15367 .a_stride(43)
15368 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15369 }
15370 }
15371 }
15372
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,n_gt_8_subtile)15373 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, n_gt_8_subtile) {
15374 TEST_REQUIRES_ARM_NEON;
15375 for (uint32_t n = 9; n < 16; n++) {
15376 for (size_t k = 1; k <= 40; k += 9) {
15377 for (uint32_t m = 1; m <= 6; m++) {
15378 GemmMicrokernelTester()
15379 .mr(6)
15380 .nr(8)
15381 .kr(1)
15382 .sr(1)
15383 .m(m)
15384 .n(n)
15385 .k(k)
15386 .iterations(1)
15387 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15388 }
15389 }
15390 }
15391 }
15392
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,n_div_8)15393 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, n_div_8) {
15394 TEST_REQUIRES_ARM_NEON;
15395 for (uint32_t n = 16; n <= 24; n += 8) {
15396 for (size_t k = 1; k <= 40; k += 9) {
15397 GemmMicrokernelTester()
15398 .mr(6)
15399 .nr(8)
15400 .kr(1)
15401 .sr(1)
15402 .m(6)
15403 .n(n)
15404 .k(k)
15405 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15406 }
15407 }
15408 }
15409
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,n_div_8_strided_cn)15410 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
15411 TEST_REQUIRES_ARM_NEON;
15412 for (uint32_t n = 16; n <= 24; n += 8) {
15413 for (size_t k = 1; k <= 40; k += 9) {
15414 GemmMicrokernelTester()
15415 .mr(6)
15416 .nr(8)
15417 .kr(1)
15418 .sr(1)
15419 .m(6)
15420 .n(n)
15421 .k(k)
15422 .cn_stride(11)
15423 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15424 }
15425 }
15426 }
15427
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,n_div_8_strided_a)15428 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, n_div_8_strided_a) {
15429 TEST_REQUIRES_ARM_NEON;
15430 for (uint32_t n = 16; n <= 24; n += 8) {
15431 for (size_t k = 1; k <= 40; k += 9) {
15432 GemmMicrokernelTester()
15433 .mr(6)
15434 .nr(8)
15435 .kr(1)
15436 .sr(1)
15437 .m(6)
15438 .n(n)
15439 .k(k)
15440 .a_stride(43)
15441 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15442 }
15443 }
15444 }
15445
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,n_div_8_subtile)15446 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, n_div_8_subtile) {
15447 TEST_REQUIRES_ARM_NEON;
15448 for (uint32_t n = 16; n <= 24; n += 8) {
15449 for (size_t k = 1; k <= 40; k += 9) {
15450 for (uint32_t m = 1; m <= 6; m++) {
15451 GemmMicrokernelTester()
15452 .mr(6)
15453 .nr(8)
15454 .kr(1)
15455 .sr(1)
15456 .m(m)
15457 .n(n)
15458 .k(k)
15459 .iterations(1)
15460 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15461 }
15462 }
15463 }
15464 }
15465
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,strided_cm_subtile)15466 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, strided_cm_subtile) {
15467 TEST_REQUIRES_ARM_NEON;
15468 for (size_t k = 1; k <= 40; k += 9) {
15469 for (uint32_t n = 1; n <= 8; n++) {
15470 for (uint32_t m = 1; m <= 6; m++) {
15471 GemmMicrokernelTester()
15472 .mr(6)
15473 .nr(8)
15474 .kr(1)
15475 .sr(1)
15476 .m(m)
15477 .n(n)
15478 .k(k)
15479 .cm_stride(11)
15480 .iterations(1)
15481 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15482 }
15483 }
15484 }
15485 }
15486
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,qmin)15487 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, qmin) {
15488 TEST_REQUIRES_ARM_NEON;
15489 GemmMicrokernelTester()
15490 .mr(6)
15491 .nr(8)
15492 .kr(1)
15493 .sr(1)
15494 .m(6)
15495 .n(8)
15496 .k(8)
15497 .qmin(128)
15498 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15499 }
15500
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,qmax)15501 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, qmax) {
15502 TEST_REQUIRES_ARM_NEON;
15503 GemmMicrokernelTester()
15504 .mr(6)
15505 .nr(8)
15506 .kr(1)
15507 .sr(1)
15508 .m(6)
15509 .n(8)
15510 .k(8)
15511 .qmax(128)
15512 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15513 }
15514
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,strided_cm)15515 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, strided_cm) {
15516 TEST_REQUIRES_ARM_NEON;
15517 GemmMicrokernelTester()
15518 .mr(6)
15519 .nr(8)
15520 .kr(1)
15521 .sr(1)
15522 .m(6)
15523 .n(8)
15524 .k(8)
15525 .cm_stride(11)
15526 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15527 }
15528 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15529
15530
15531 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_eq_8)15532 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
15533 TEST_REQUIRES_ARM_NEON;
15534 GemmMicrokernelTester()
15535 .mr(6)
15536 .nr(8)
15537 .kr(1)
15538 .sr(1)
15539 .m(6)
15540 .n(8)
15541 .k(8)
15542 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15543 }
15544
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,strided_cn)15545 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, strided_cn) {
15546 TEST_REQUIRES_ARM_NEON;
15547 GemmMicrokernelTester()
15548 .mr(6)
15549 .nr(8)
15550 .kr(1)
15551 .sr(1)
15552 .m(6)
15553 .n(8)
15554 .k(8)
15555 .cn_stride(11)
15556 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15557 }
15558
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_eq_8_strided_a)15559 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
15560 TEST_REQUIRES_ARM_NEON;
15561 GemmMicrokernelTester()
15562 .mr(6)
15563 .nr(8)
15564 .kr(1)
15565 .sr(1)
15566 .m(6)
15567 .n(8)
15568 .k(8)
15569 .a_stride(11)
15570 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15571 }
15572
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)15573 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
15574 TEST_REQUIRES_ARM_NEON;
15575 for (uint32_t n = 1; n <= 8; n++) {
15576 for (uint32_t m = 1; m <= 6; m++) {
15577 GemmMicrokernelTester()
15578 .mr(6)
15579 .nr(8)
15580 .kr(1)
15581 .sr(1)
15582 .m(m)
15583 .n(n)
15584 .k(8)
15585 .iterations(1)
15586 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15587 }
15588 }
15589 }
15590
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)15591 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
15592 TEST_REQUIRES_ARM_NEON;
15593 for (uint32_t m = 1; m <= 6; m++) {
15594 GemmMicrokernelTester()
15595 .mr(6)
15596 .nr(8)
15597 .kr(1)
15598 .sr(1)
15599 .m(m)
15600 .n(8)
15601 .k(8)
15602 .iterations(1)
15603 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15604 }
15605 }
15606
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)15607 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
15608 TEST_REQUIRES_ARM_NEON;
15609 for (uint32_t n = 1; n <= 8; n++) {
15610 GemmMicrokernelTester()
15611 .mr(6)
15612 .nr(8)
15613 .kr(1)
15614 .sr(1)
15615 .m(6)
15616 .n(n)
15617 .k(8)
15618 .iterations(1)
15619 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15620 }
15621 }
15622
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_lt_8)15623 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
15624 TEST_REQUIRES_ARM_NEON;
15625 for (size_t k = 1; k < 8; k++) {
15626 GemmMicrokernelTester()
15627 .mr(6)
15628 .nr(8)
15629 .kr(1)
15630 .sr(1)
15631 .m(6)
15632 .n(8)
15633 .k(k)
15634 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15635 }
15636 }
15637
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_lt_8_strided_a)15638 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
15639 TEST_REQUIRES_ARM_NEON;
15640 for (size_t k = 1; k < 8; k++) {
15641 GemmMicrokernelTester()
15642 .mr(6)
15643 .nr(8)
15644 .kr(1)
15645 .sr(1)
15646 .m(6)
15647 .n(8)
15648 .k(k)
15649 .a_stride(11)
15650 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15651 }
15652 }
15653
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)15654 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
15655 TEST_REQUIRES_ARM_NEON;
15656 for (size_t k = 1; k < 8; k++) {
15657 for (uint32_t n = 1; n <= 8; n++) {
15658 for (uint32_t m = 1; m <= 6; m++) {
15659 GemmMicrokernelTester()
15660 .mr(6)
15661 .nr(8)
15662 .kr(1)
15663 .sr(1)
15664 .m(m)
15665 .n(n)
15666 .k(k)
15667 .iterations(1)
15668 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15669 }
15670 }
15671 }
15672 }
15673
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_gt_8)15674 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
15675 TEST_REQUIRES_ARM_NEON;
15676 for (size_t k = 9; k < 16; k++) {
15677 GemmMicrokernelTester()
15678 .mr(6)
15679 .nr(8)
15680 .kr(1)
15681 .sr(1)
15682 .m(6)
15683 .n(8)
15684 .k(k)
15685 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15686 }
15687 }
15688
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_gt_8_strided_a)15689 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
15690 TEST_REQUIRES_ARM_NEON;
15691 for (size_t k = 9; k < 16; k++) {
15692 GemmMicrokernelTester()
15693 .mr(6)
15694 .nr(8)
15695 .kr(1)
15696 .sr(1)
15697 .m(6)
15698 .n(8)
15699 .k(k)
15700 .a_stride(19)
15701 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15702 }
15703 }
15704
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)15705 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
15706 TEST_REQUIRES_ARM_NEON;
15707 for (size_t k = 9; k < 16; k++) {
15708 for (uint32_t n = 1; n <= 8; n++) {
15709 for (uint32_t m = 1; m <= 6; m++) {
15710 GemmMicrokernelTester()
15711 .mr(6)
15712 .nr(8)
15713 .kr(1)
15714 .sr(1)
15715 .m(m)
15716 .n(n)
15717 .k(k)
15718 .iterations(1)
15719 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15720 }
15721 }
15722 }
15723 }
15724
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_div_8)15725 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_div_8) {
15726 TEST_REQUIRES_ARM_NEON;
15727 for (size_t k = 16; k <= 80; k += 8) {
15728 GemmMicrokernelTester()
15729 .mr(6)
15730 .nr(8)
15731 .kr(1)
15732 .sr(1)
15733 .m(6)
15734 .n(8)
15735 .k(k)
15736 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15737 }
15738 }
15739
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_div_8_strided_a)15740 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
15741 TEST_REQUIRES_ARM_NEON;
15742 for (size_t k = 16; k <= 80; k += 8) {
15743 GemmMicrokernelTester()
15744 .mr(6)
15745 .nr(8)
15746 .kr(1)
15747 .sr(1)
15748 .m(6)
15749 .n(8)
15750 .k(k)
15751 .a_stride(83)
15752 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15753 }
15754 }
15755
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_div_8_subtile)15756 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
15757 TEST_REQUIRES_ARM_NEON;
15758 for (size_t k = 16; k <= 80; k += 8) {
15759 for (uint32_t n = 1; n <= 8; n++) {
15760 for (uint32_t m = 1; m <= 6; m++) {
15761 GemmMicrokernelTester()
15762 .mr(6)
15763 .nr(8)
15764 .kr(1)
15765 .sr(1)
15766 .m(m)
15767 .n(n)
15768 .k(k)
15769 .iterations(1)
15770 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15771 }
15772 }
15773 }
15774 }
15775
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_gt_8)15776 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
15777 TEST_REQUIRES_ARM_NEON;
15778 for (uint32_t n = 9; n < 16; n++) {
15779 for (size_t k = 1; k <= 40; k += 9) {
15780 GemmMicrokernelTester()
15781 .mr(6)
15782 .nr(8)
15783 .kr(1)
15784 .sr(1)
15785 .m(6)
15786 .n(n)
15787 .k(k)
15788 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15789 }
15790 }
15791 }
15792
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_cn)15793 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
15794 TEST_REQUIRES_ARM_NEON;
15795 for (uint32_t n = 9; n < 16; n++) {
15796 for (size_t k = 1; k <= 40; k += 9) {
15797 GemmMicrokernelTester()
15798 .mr(6)
15799 .nr(8)
15800 .kr(1)
15801 .sr(1)
15802 .m(6)
15803 .n(n)
15804 .k(k)
15805 .cn_stride(11)
15806 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15807 }
15808 }
15809 }
15810
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_a)15811 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_a) {
15812 TEST_REQUIRES_ARM_NEON;
15813 for (uint32_t n = 9; n < 16; n++) {
15814 for (size_t k = 1; k <= 40; k += 9) {
15815 GemmMicrokernelTester()
15816 .mr(6)
15817 .nr(8)
15818 .kr(1)
15819 .sr(1)
15820 .m(6)
15821 .n(n)
15822 .k(k)
15823 .a_stride(43)
15824 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15825 }
15826 }
15827 }
15828
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_gt_8_subtile)15829 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
15830 TEST_REQUIRES_ARM_NEON;
15831 for (uint32_t n = 9; n < 16; n++) {
15832 for (size_t k = 1; k <= 40; k += 9) {
15833 for (uint32_t m = 1; m <= 6; m++) {
15834 GemmMicrokernelTester()
15835 .mr(6)
15836 .nr(8)
15837 .kr(1)
15838 .sr(1)
15839 .m(m)
15840 .n(n)
15841 .k(k)
15842 .iterations(1)
15843 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15844 }
15845 }
15846 }
15847 }
15848
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_div_8)15849 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_div_8) {
15850 TEST_REQUIRES_ARM_NEON;
15851 for (uint32_t n = 16; n <= 24; n += 8) {
15852 for (size_t k = 1; k <= 40; k += 9) {
15853 GemmMicrokernelTester()
15854 .mr(6)
15855 .nr(8)
15856 .kr(1)
15857 .sr(1)
15858 .m(6)
15859 .n(n)
15860 .k(k)
15861 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15862 }
15863 }
15864 }
15865
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_cn)15866 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
15867 TEST_REQUIRES_ARM_NEON;
15868 for (uint32_t n = 16; n <= 24; n += 8) {
15869 for (size_t k = 1; k <= 40; k += 9) {
15870 GemmMicrokernelTester()
15871 .mr(6)
15872 .nr(8)
15873 .kr(1)
15874 .sr(1)
15875 .m(6)
15876 .n(n)
15877 .k(k)
15878 .cn_stride(11)
15879 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15880 }
15881 }
15882 }
15883
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_a)15884 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_a) {
15885 TEST_REQUIRES_ARM_NEON;
15886 for (uint32_t n = 16; n <= 24; n += 8) {
15887 for (size_t k = 1; k <= 40; k += 9) {
15888 GemmMicrokernelTester()
15889 .mr(6)
15890 .nr(8)
15891 .kr(1)
15892 .sr(1)
15893 .m(6)
15894 .n(n)
15895 .k(k)
15896 .a_stride(43)
15897 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15898 }
15899 }
15900 }
15901
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_div_8_subtile)15902 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
15903 TEST_REQUIRES_ARM_NEON;
15904 for (uint32_t n = 16; n <= 24; n += 8) {
15905 for (size_t k = 1; k <= 40; k += 9) {
15906 for (uint32_t m = 1; m <= 6; m++) {
15907 GemmMicrokernelTester()
15908 .mr(6)
15909 .nr(8)
15910 .kr(1)
15911 .sr(1)
15912 .m(m)
15913 .n(n)
15914 .k(k)
15915 .iterations(1)
15916 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15917 }
15918 }
15919 }
15920 }
15921
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,strided_cm_subtile)15922 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
15923 TEST_REQUIRES_ARM_NEON;
15924 for (size_t k = 1; k <= 40; k += 9) {
15925 for (uint32_t n = 1; n <= 8; n++) {
15926 for (uint32_t m = 1; m <= 6; m++) {
15927 GemmMicrokernelTester()
15928 .mr(6)
15929 .nr(8)
15930 .kr(1)
15931 .sr(1)
15932 .m(m)
15933 .n(n)
15934 .k(k)
15935 .cm_stride(11)
15936 .iterations(1)
15937 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15938 }
15939 }
15940 }
15941 }
15942
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,qmin)15943 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, qmin) {
15944 TEST_REQUIRES_ARM_NEON;
15945 GemmMicrokernelTester()
15946 .mr(6)
15947 .nr(8)
15948 .kr(1)
15949 .sr(1)
15950 .m(6)
15951 .n(8)
15952 .k(8)
15953 .qmin(128)
15954 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15955 }
15956
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,qmax)15957 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, qmax) {
15958 TEST_REQUIRES_ARM_NEON;
15959 GemmMicrokernelTester()
15960 .mr(6)
15961 .nr(8)
15962 .kr(1)
15963 .sr(1)
15964 .m(6)
15965 .n(8)
15966 .k(8)
15967 .qmax(128)
15968 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15969 }
15970
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,strided_cm)15971 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, strided_cm) {
15972 TEST_REQUIRES_ARM_NEON;
15973 GemmMicrokernelTester()
15974 .mr(6)
15975 .nr(8)
15976 .kr(1)
15977 .sr(1)
15978 .m(6)
15979 .n(8)
15980 .k(8)
15981 .cm_stride(11)
15982 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15983 }
15984 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15985
15986
15987 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_eq_8)15988 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
15989 TEST_REQUIRES_ARM_NEON_V8;
15990 GemmMicrokernelTester()
15991 .mr(6)
15992 .nr(8)
15993 .kr(1)
15994 .sr(1)
15995 .m(6)
15996 .n(8)
15997 .k(8)
15998 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15999 }
16000
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,strided_cn)16001 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, strided_cn) {
16002 TEST_REQUIRES_ARM_NEON_V8;
16003 GemmMicrokernelTester()
16004 .mr(6)
16005 .nr(8)
16006 .kr(1)
16007 .sr(1)
16008 .m(6)
16009 .n(8)
16010 .k(8)
16011 .cn_stride(11)
16012 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16013 }
16014
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_strided_a)16015 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_strided_a) {
16016 TEST_REQUIRES_ARM_NEON_V8;
16017 GemmMicrokernelTester()
16018 .mr(6)
16019 .nr(8)
16020 .kr(1)
16021 .sr(1)
16022 .m(6)
16023 .n(8)
16024 .k(8)
16025 .a_stride(11)
16026 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16027 }
16028
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)16029 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
16030 TEST_REQUIRES_ARM_NEON_V8;
16031 for (uint32_t n = 1; n <= 8; n++) {
16032 for (uint32_t m = 1; m <= 6; m++) {
16033 GemmMicrokernelTester()
16034 .mr(6)
16035 .nr(8)
16036 .kr(1)
16037 .sr(1)
16038 .m(m)
16039 .n(n)
16040 .k(8)
16041 .iterations(1)
16042 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16043 }
16044 }
16045 }
16046
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)16047 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
16048 TEST_REQUIRES_ARM_NEON_V8;
16049 for (uint32_t m = 1; m <= 6; m++) {
16050 GemmMicrokernelTester()
16051 .mr(6)
16052 .nr(8)
16053 .kr(1)
16054 .sr(1)
16055 .m(m)
16056 .n(8)
16057 .k(8)
16058 .iterations(1)
16059 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16060 }
16061 }
16062
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)16063 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
16064 TEST_REQUIRES_ARM_NEON_V8;
16065 for (uint32_t n = 1; n <= 8; n++) {
16066 GemmMicrokernelTester()
16067 .mr(6)
16068 .nr(8)
16069 .kr(1)
16070 .sr(1)
16071 .m(6)
16072 .n(n)
16073 .k(8)
16074 .iterations(1)
16075 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16076 }
16077 }
16078
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_lt_8)16079 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
16080 TEST_REQUIRES_ARM_NEON_V8;
16081 for (size_t k = 1; k < 8; k++) {
16082 GemmMicrokernelTester()
16083 .mr(6)
16084 .nr(8)
16085 .kr(1)
16086 .sr(1)
16087 .m(6)
16088 .n(8)
16089 .k(k)
16090 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16091 }
16092 }
16093
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_lt_8_strided_a)16094 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_lt_8_strided_a) {
16095 TEST_REQUIRES_ARM_NEON_V8;
16096 for (size_t k = 1; k < 8; k++) {
16097 GemmMicrokernelTester()
16098 .mr(6)
16099 .nr(8)
16100 .kr(1)
16101 .sr(1)
16102 .m(6)
16103 .n(8)
16104 .k(k)
16105 .a_stride(11)
16106 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16107 }
16108 }
16109
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)16110 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
16111 TEST_REQUIRES_ARM_NEON_V8;
16112 for (size_t k = 1; k < 8; k++) {
16113 for (uint32_t n = 1; n <= 8; n++) {
16114 for (uint32_t m = 1; m <= 6; m++) {
16115 GemmMicrokernelTester()
16116 .mr(6)
16117 .nr(8)
16118 .kr(1)
16119 .sr(1)
16120 .m(m)
16121 .n(n)
16122 .k(k)
16123 .iterations(1)
16124 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16125 }
16126 }
16127 }
16128 }
16129
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_gt_8)16130 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
16131 TEST_REQUIRES_ARM_NEON_V8;
16132 for (size_t k = 9; k < 16; k++) {
16133 GemmMicrokernelTester()
16134 .mr(6)
16135 .nr(8)
16136 .kr(1)
16137 .sr(1)
16138 .m(6)
16139 .n(8)
16140 .k(k)
16141 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16142 }
16143 }
16144
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_gt_8_strided_a)16145 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_gt_8_strided_a) {
16146 TEST_REQUIRES_ARM_NEON_V8;
16147 for (size_t k = 9; k < 16; k++) {
16148 GemmMicrokernelTester()
16149 .mr(6)
16150 .nr(8)
16151 .kr(1)
16152 .sr(1)
16153 .m(6)
16154 .n(8)
16155 .k(k)
16156 .a_stride(19)
16157 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16158 }
16159 }
16160
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)16161 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
16162 TEST_REQUIRES_ARM_NEON_V8;
16163 for (size_t k = 9; k < 16; k++) {
16164 for (uint32_t n = 1; n <= 8; n++) {
16165 for (uint32_t m = 1; m <= 6; m++) {
16166 GemmMicrokernelTester()
16167 .mr(6)
16168 .nr(8)
16169 .kr(1)
16170 .sr(1)
16171 .m(m)
16172 .n(n)
16173 .k(k)
16174 .iterations(1)
16175 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16176 }
16177 }
16178 }
16179 }
16180
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_div_8)16181 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_div_8) {
16182 TEST_REQUIRES_ARM_NEON_V8;
16183 for (size_t k = 16; k <= 80; k += 8) {
16184 GemmMicrokernelTester()
16185 .mr(6)
16186 .nr(8)
16187 .kr(1)
16188 .sr(1)
16189 .m(6)
16190 .n(8)
16191 .k(k)
16192 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16193 }
16194 }
16195
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_div_8_strided_a)16196 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_div_8_strided_a) {
16197 TEST_REQUIRES_ARM_NEON_V8;
16198 for (size_t k = 16; k <= 80; k += 8) {
16199 GemmMicrokernelTester()
16200 .mr(6)
16201 .nr(8)
16202 .kr(1)
16203 .sr(1)
16204 .m(6)
16205 .n(8)
16206 .k(k)
16207 .a_stride(83)
16208 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16209 }
16210 }
16211
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)16212 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
16213 TEST_REQUIRES_ARM_NEON_V8;
16214 for (size_t k = 16; k <= 80; k += 8) {
16215 for (uint32_t n = 1; n <= 8; n++) {
16216 for (uint32_t m = 1; m <= 6; m++) {
16217 GemmMicrokernelTester()
16218 .mr(6)
16219 .nr(8)
16220 .kr(1)
16221 .sr(1)
16222 .m(m)
16223 .n(n)
16224 .k(k)
16225 .iterations(1)
16226 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16227 }
16228 }
16229 }
16230 }
16231
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,n_gt_8)16232 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, n_gt_8) {
16233 TEST_REQUIRES_ARM_NEON_V8;
16234 for (uint32_t n = 9; n < 16; n++) {
16235 for (size_t k = 1; k <= 40; k += 9) {
16236 GemmMicrokernelTester()
16237 .mr(6)
16238 .nr(8)
16239 .kr(1)
16240 .sr(1)
16241 .m(6)
16242 .n(n)
16243 .k(k)
16244 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16245 }
16246 }
16247 }
16248
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_strided_cn)16249 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
16250 TEST_REQUIRES_ARM_NEON_V8;
16251 for (uint32_t n = 9; n < 16; n++) {
16252 for (size_t k = 1; k <= 40; k += 9) {
16253 GemmMicrokernelTester()
16254 .mr(6)
16255 .nr(8)
16256 .kr(1)
16257 .sr(1)
16258 .m(6)
16259 .n(n)
16260 .k(k)
16261 .cn_stride(11)
16262 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16263 }
16264 }
16265 }
16266
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_strided_a)16267 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_strided_a) {
16268 TEST_REQUIRES_ARM_NEON_V8;
16269 for (uint32_t n = 9; n < 16; n++) {
16270 for (size_t k = 1; k <= 40; k += 9) {
16271 GemmMicrokernelTester()
16272 .mr(6)
16273 .nr(8)
16274 .kr(1)
16275 .sr(1)
16276 .m(6)
16277 .n(n)
16278 .k(k)
16279 .a_stride(43)
16280 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16281 }
16282 }
16283 }
16284
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_subtile)16285 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_subtile) {
16286 TEST_REQUIRES_ARM_NEON_V8;
16287 for (uint32_t n = 9; n < 16; n++) {
16288 for (size_t k = 1; k <= 40; k += 9) {
16289 for (uint32_t m = 1; m <= 6; m++) {
16290 GemmMicrokernelTester()
16291 .mr(6)
16292 .nr(8)
16293 .kr(1)
16294 .sr(1)
16295 .m(m)
16296 .n(n)
16297 .k(k)
16298 .iterations(1)
16299 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16300 }
16301 }
16302 }
16303 }
16304
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,n_div_8)16305 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, n_div_8) {
16306 TEST_REQUIRES_ARM_NEON_V8;
16307 for (uint32_t n = 16; n <= 24; n += 8) {
16308 for (size_t k = 1; k <= 40; k += 9) {
16309 GemmMicrokernelTester()
16310 .mr(6)
16311 .nr(8)
16312 .kr(1)
16313 .sr(1)
16314 .m(6)
16315 .n(n)
16316 .k(k)
16317 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16318 }
16319 }
16320 }
16321
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,n_div_8_strided_cn)16322 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, n_div_8_strided_cn) {
16323 TEST_REQUIRES_ARM_NEON_V8;
16324 for (uint32_t n = 16; n <= 24; n += 8) {
16325 for (size_t k = 1; k <= 40; k += 9) {
16326 GemmMicrokernelTester()
16327 .mr(6)
16328 .nr(8)
16329 .kr(1)
16330 .sr(1)
16331 .m(6)
16332 .n(n)
16333 .k(k)
16334 .cn_stride(11)
16335 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16336 }
16337 }
16338 }
16339
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,n_div_8_strided_a)16340 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, n_div_8_strided_a) {
16341 TEST_REQUIRES_ARM_NEON_V8;
16342 for (uint32_t n = 16; n <= 24; n += 8) {
16343 for (size_t k = 1; k <= 40; k += 9) {
16344 GemmMicrokernelTester()
16345 .mr(6)
16346 .nr(8)
16347 .kr(1)
16348 .sr(1)
16349 .m(6)
16350 .n(n)
16351 .k(k)
16352 .a_stride(43)
16353 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16354 }
16355 }
16356 }
16357
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,n_div_8_subtile)16358 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, n_div_8_subtile) {
16359 TEST_REQUIRES_ARM_NEON_V8;
16360 for (uint32_t n = 16; n <= 24; n += 8) {
16361 for (size_t k = 1; k <= 40; k += 9) {
16362 for (uint32_t m = 1; m <= 6; m++) {
16363 GemmMicrokernelTester()
16364 .mr(6)
16365 .nr(8)
16366 .kr(1)
16367 .sr(1)
16368 .m(m)
16369 .n(n)
16370 .k(k)
16371 .iterations(1)
16372 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16373 }
16374 }
16375 }
16376 }
16377
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)16378 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
16379 TEST_REQUIRES_ARM_NEON_V8;
16380 for (size_t k = 1; k <= 40; k += 9) {
16381 for (uint32_t n = 1; n <= 8; n++) {
16382 for (uint32_t m = 1; m <= 6; m++) {
16383 GemmMicrokernelTester()
16384 .mr(6)
16385 .nr(8)
16386 .kr(1)
16387 .sr(1)
16388 .m(m)
16389 .n(n)
16390 .k(k)
16391 .cm_stride(11)
16392 .iterations(1)
16393 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16394 }
16395 }
16396 }
16397 }
16398
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,qmin)16399 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, qmin) {
16400 TEST_REQUIRES_ARM_NEON_V8;
16401 GemmMicrokernelTester()
16402 .mr(6)
16403 .nr(8)
16404 .kr(1)
16405 .sr(1)
16406 .m(6)
16407 .n(8)
16408 .k(8)
16409 .qmin(128)
16410 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16411 }
16412
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,qmax)16413 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, qmax) {
16414 TEST_REQUIRES_ARM_NEON_V8;
16415 GemmMicrokernelTester()
16416 .mr(6)
16417 .nr(8)
16418 .kr(1)
16419 .sr(1)
16420 .m(6)
16421 .n(8)
16422 .k(8)
16423 .qmax(128)
16424 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16425 }
16426
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,strided_cm)16427 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, strided_cm) {
16428 TEST_REQUIRES_ARM_NEON_V8;
16429 GemmMicrokernelTester()
16430 .mr(6)
16431 .nr(8)
16432 .kr(1)
16433 .sr(1)
16434 .m(6)
16435 .n(8)
16436 .k(8)
16437 .cm_stride(11)
16438 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16439 }
16440 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
16441
16442
16443 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_eq_8)16444 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_eq_8) {
16445 TEST_REQUIRES_ARM_NEON_V8;
16446 GemmMicrokernelTester()
16447 .mr(6)
16448 .nr(16)
16449 .kr(1)
16450 .sr(1)
16451 .m(6)
16452 .n(16)
16453 .k(8)
16454 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16455 }
16456
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,strided_cn)16457 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, strided_cn) {
16458 TEST_REQUIRES_ARM_NEON_V8;
16459 GemmMicrokernelTester()
16460 .mr(6)
16461 .nr(16)
16462 .kr(1)
16463 .sr(1)
16464 .m(6)
16465 .n(16)
16466 .k(8)
16467 .cn_stride(19)
16468 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16469 }
16470
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_eq_8_strided_a)16471 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_eq_8_strided_a) {
16472 TEST_REQUIRES_ARM_NEON_V8;
16473 GemmMicrokernelTester()
16474 .mr(6)
16475 .nr(16)
16476 .kr(1)
16477 .sr(1)
16478 .m(6)
16479 .n(16)
16480 .k(8)
16481 .a_stride(11)
16482 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16483 }
16484
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_eq_8_subtile)16485 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
16486 TEST_REQUIRES_ARM_NEON_V8;
16487 for (uint32_t n = 1; n <= 16; n++) {
16488 for (uint32_t m = 1; m <= 6; m++) {
16489 GemmMicrokernelTester()
16490 .mr(6)
16491 .nr(16)
16492 .kr(1)
16493 .sr(1)
16494 .m(m)
16495 .n(n)
16496 .k(8)
16497 .iterations(1)
16498 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16499 }
16500 }
16501 }
16502
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_eq_8_subtile_m)16503 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
16504 TEST_REQUIRES_ARM_NEON_V8;
16505 for (uint32_t m = 1; m <= 6; m++) {
16506 GemmMicrokernelTester()
16507 .mr(6)
16508 .nr(16)
16509 .kr(1)
16510 .sr(1)
16511 .m(m)
16512 .n(16)
16513 .k(8)
16514 .iterations(1)
16515 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16516 }
16517 }
16518
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_eq_8_subtile_n)16519 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
16520 TEST_REQUIRES_ARM_NEON_V8;
16521 for (uint32_t n = 1; n <= 16; n++) {
16522 GemmMicrokernelTester()
16523 .mr(6)
16524 .nr(16)
16525 .kr(1)
16526 .sr(1)
16527 .m(6)
16528 .n(n)
16529 .k(8)
16530 .iterations(1)
16531 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16532 }
16533 }
16534
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_lt_8)16535 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_lt_8) {
16536 TEST_REQUIRES_ARM_NEON_V8;
16537 for (size_t k = 1; k < 8; k++) {
16538 GemmMicrokernelTester()
16539 .mr(6)
16540 .nr(16)
16541 .kr(1)
16542 .sr(1)
16543 .m(6)
16544 .n(16)
16545 .k(k)
16546 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16547 }
16548 }
16549
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_lt_8_strided_a)16550 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_lt_8_strided_a) {
16551 TEST_REQUIRES_ARM_NEON_V8;
16552 for (size_t k = 1; k < 8; k++) {
16553 GemmMicrokernelTester()
16554 .mr(6)
16555 .nr(16)
16556 .kr(1)
16557 .sr(1)
16558 .m(6)
16559 .n(16)
16560 .k(k)
16561 .a_stride(11)
16562 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16563 }
16564 }
16565
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_lt_8_subtile)16566 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
16567 TEST_REQUIRES_ARM_NEON_V8;
16568 for (size_t k = 1; k < 8; k++) {
16569 for (uint32_t n = 1; n <= 16; n++) {
16570 for (uint32_t m = 1; m <= 6; m++) {
16571 GemmMicrokernelTester()
16572 .mr(6)
16573 .nr(16)
16574 .kr(1)
16575 .sr(1)
16576 .m(m)
16577 .n(n)
16578 .k(k)
16579 .iterations(1)
16580 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16581 }
16582 }
16583 }
16584 }
16585
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_gt_8)16586 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_gt_8) {
16587 TEST_REQUIRES_ARM_NEON_V8;
16588 for (size_t k = 9; k < 16; k++) {
16589 GemmMicrokernelTester()
16590 .mr(6)
16591 .nr(16)
16592 .kr(1)
16593 .sr(1)
16594 .m(6)
16595 .n(16)
16596 .k(k)
16597 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16598 }
16599 }
16600
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_gt_8_strided_a)16601 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_gt_8_strided_a) {
16602 TEST_REQUIRES_ARM_NEON_V8;
16603 for (size_t k = 9; k < 16; k++) {
16604 GemmMicrokernelTester()
16605 .mr(6)
16606 .nr(16)
16607 .kr(1)
16608 .sr(1)
16609 .m(6)
16610 .n(16)
16611 .k(k)
16612 .a_stride(19)
16613 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16614 }
16615 }
16616
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_gt_8_subtile)16617 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
16618 TEST_REQUIRES_ARM_NEON_V8;
16619 for (size_t k = 9; k < 16; k++) {
16620 for (uint32_t n = 1; n <= 16; n++) {
16621 for (uint32_t m = 1; m <= 6; m++) {
16622 GemmMicrokernelTester()
16623 .mr(6)
16624 .nr(16)
16625 .kr(1)
16626 .sr(1)
16627 .m(m)
16628 .n(n)
16629 .k(k)
16630 .iterations(1)
16631 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16632 }
16633 }
16634 }
16635 }
16636
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_div_8)16637 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_div_8) {
16638 TEST_REQUIRES_ARM_NEON_V8;
16639 for (size_t k = 16; k <= 80; k += 8) {
16640 GemmMicrokernelTester()
16641 .mr(6)
16642 .nr(16)
16643 .kr(1)
16644 .sr(1)
16645 .m(6)
16646 .n(16)
16647 .k(k)
16648 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16649 }
16650 }
16651
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_div_8_strided_a)16652 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_div_8_strided_a) {
16653 TEST_REQUIRES_ARM_NEON_V8;
16654 for (size_t k = 16; k <= 80; k += 8) {
16655 GemmMicrokernelTester()
16656 .mr(6)
16657 .nr(16)
16658 .kr(1)
16659 .sr(1)
16660 .m(6)
16661 .n(16)
16662 .k(k)
16663 .a_stride(83)
16664 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16665 }
16666 }
16667
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_div_8_subtile)16668 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
16669 TEST_REQUIRES_ARM_NEON_V8;
16670 for (size_t k = 16; k <= 80; k += 8) {
16671 for (uint32_t n = 1; n <= 16; n++) {
16672 for (uint32_t m = 1; m <= 6; m++) {
16673 GemmMicrokernelTester()
16674 .mr(6)
16675 .nr(16)
16676 .kr(1)
16677 .sr(1)
16678 .m(m)
16679 .n(n)
16680 .k(k)
16681 .iterations(1)
16682 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16683 }
16684 }
16685 }
16686 }
16687
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,n_gt_16)16688 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, n_gt_16) {
16689 TEST_REQUIRES_ARM_NEON_V8;
16690 for (uint32_t n = 17; n < 32; n++) {
16691 for (size_t k = 1; k <= 40; k += 9) {
16692 GemmMicrokernelTester()
16693 .mr(6)
16694 .nr(16)
16695 .kr(1)
16696 .sr(1)
16697 .m(6)
16698 .n(n)
16699 .k(k)
16700 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16701 }
16702 }
16703 }
16704
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,n_gt_16_strided_cn)16705 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
16706 TEST_REQUIRES_ARM_NEON_V8;
16707 for (uint32_t n = 17; n < 32; n++) {
16708 for (size_t k = 1; k <= 40; k += 9) {
16709 GemmMicrokernelTester()
16710 .mr(6)
16711 .nr(16)
16712 .kr(1)
16713 .sr(1)
16714 .m(6)
16715 .n(n)
16716 .k(k)
16717 .cn_stride(19)
16718 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16719 }
16720 }
16721 }
16722
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,n_gt_16_strided_a)16723 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, n_gt_16_strided_a) {
16724 TEST_REQUIRES_ARM_NEON_V8;
16725 for (uint32_t n = 17; n < 32; n++) {
16726 for (size_t k = 1; k <= 40; k += 9) {
16727 GemmMicrokernelTester()
16728 .mr(6)
16729 .nr(16)
16730 .kr(1)
16731 .sr(1)
16732 .m(6)
16733 .n(n)
16734 .k(k)
16735 .a_stride(43)
16736 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16737 }
16738 }
16739 }
16740
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,n_gt_16_subtile)16741 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
16742 TEST_REQUIRES_ARM_NEON_V8;
16743 for (uint32_t n = 17; n < 32; n++) {
16744 for (size_t k = 1; k <= 40; k += 9) {
16745 for (uint32_t m = 1; m <= 6; m++) {
16746 GemmMicrokernelTester()
16747 .mr(6)
16748 .nr(16)
16749 .kr(1)
16750 .sr(1)
16751 .m(m)
16752 .n(n)
16753 .k(k)
16754 .iterations(1)
16755 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16756 }
16757 }
16758 }
16759 }
16760
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,n_div_16)16761 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, n_div_16) {
16762 TEST_REQUIRES_ARM_NEON_V8;
16763 for (uint32_t n = 32; n <= 48; n += 16) {
16764 for (size_t k = 1; k <= 40; k += 9) {
16765 GemmMicrokernelTester()
16766 .mr(6)
16767 .nr(16)
16768 .kr(1)
16769 .sr(1)
16770 .m(6)
16771 .n(n)
16772 .k(k)
16773 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16774 }
16775 }
16776 }
16777
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,n_div_16_strided_cn)16778 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
16779 TEST_REQUIRES_ARM_NEON_V8;
16780 for (uint32_t n = 32; n <= 48; n += 16) {
16781 for (size_t k = 1; k <= 40; k += 9) {
16782 GemmMicrokernelTester()
16783 .mr(6)
16784 .nr(16)
16785 .kr(1)
16786 .sr(1)
16787 .m(6)
16788 .n(n)
16789 .k(k)
16790 .cn_stride(19)
16791 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16792 }
16793 }
16794 }
16795
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,n_div_16_strided_a)16796 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, n_div_16_strided_a) {
16797 TEST_REQUIRES_ARM_NEON_V8;
16798 for (uint32_t n = 32; n <= 48; n += 16) {
16799 for (size_t k = 1; k <= 40; k += 9) {
16800 GemmMicrokernelTester()
16801 .mr(6)
16802 .nr(16)
16803 .kr(1)
16804 .sr(1)
16805 .m(6)
16806 .n(n)
16807 .k(k)
16808 .a_stride(43)
16809 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16810 }
16811 }
16812 }
16813
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,n_div_16_subtile)16814 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
16815 TEST_REQUIRES_ARM_NEON_V8;
16816 for (uint32_t n = 32; n <= 48; n += 16) {
16817 for (size_t k = 1; k <= 40; k += 9) {
16818 for (uint32_t m = 1; m <= 6; m++) {
16819 GemmMicrokernelTester()
16820 .mr(6)
16821 .nr(16)
16822 .kr(1)
16823 .sr(1)
16824 .m(m)
16825 .n(n)
16826 .k(k)
16827 .iterations(1)
16828 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16829 }
16830 }
16831 }
16832 }
16833
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,strided_cm_subtile)16834 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
16835 TEST_REQUIRES_ARM_NEON_V8;
16836 for (size_t k = 1; k <= 40; k += 9) {
16837 for (uint32_t n = 1; n <= 16; n++) {
16838 for (uint32_t m = 1; m <= 6; m++) {
16839 GemmMicrokernelTester()
16840 .mr(6)
16841 .nr(16)
16842 .kr(1)
16843 .sr(1)
16844 .m(m)
16845 .n(n)
16846 .k(k)
16847 .cm_stride(19)
16848 .iterations(1)
16849 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16850 }
16851 }
16852 }
16853 }
16854
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,qmin)16855 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, qmin) {
16856 TEST_REQUIRES_ARM_NEON_V8;
16857 GemmMicrokernelTester()
16858 .mr(6)
16859 .nr(16)
16860 .kr(1)
16861 .sr(1)
16862 .m(6)
16863 .n(16)
16864 .k(8)
16865 .qmin(128)
16866 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16867 }
16868
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,qmax)16869 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, qmax) {
16870 TEST_REQUIRES_ARM_NEON_V8;
16871 GemmMicrokernelTester()
16872 .mr(6)
16873 .nr(16)
16874 .kr(1)
16875 .sr(1)
16876 .m(6)
16877 .n(16)
16878 .k(8)
16879 .qmax(128)
16880 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16881 }
16882
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,strided_cm)16883 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, strided_cm) {
16884 TEST_REQUIRES_ARM_NEON_V8;
16885 GemmMicrokernelTester()
16886 .mr(6)
16887 .nr(16)
16888 .kr(1)
16889 .sr(1)
16890 .m(6)
16891 .n(16)
16892 .k(8)
16893 .cm_stride(19)
16894 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16895 }
16896 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
16897
16898
16899 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,k_eq_8)16900 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, k_eq_8) {
16901 TEST_REQUIRES_ARM_NEON_DOT;
16902 GemmMicrokernelTester()
16903 .mr(6)
16904 .nr(16)
16905 .kr(4)
16906 .sr(1)
16907 .m(6)
16908 .n(16)
16909 .k(8)
16910 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16911 }
16912
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,strided_cn)16913 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, strided_cn) {
16914 TEST_REQUIRES_ARM_NEON_DOT;
16915 GemmMicrokernelTester()
16916 .mr(6)
16917 .nr(16)
16918 .kr(4)
16919 .sr(1)
16920 .m(6)
16921 .n(16)
16922 .k(8)
16923 .cn_stride(19)
16924 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16925 }
16926
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,k_eq_8_strided_a)16927 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, k_eq_8_strided_a) {
16928 TEST_REQUIRES_ARM_NEON_DOT;
16929 GemmMicrokernelTester()
16930 .mr(6)
16931 .nr(16)
16932 .kr(4)
16933 .sr(1)
16934 .m(6)
16935 .n(16)
16936 .k(8)
16937 .a_stride(11)
16938 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16939 }
16940
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,k_eq_8_subtile)16941 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, k_eq_8_subtile) {
16942 TEST_REQUIRES_ARM_NEON_DOT;
16943 for (uint32_t n = 1; n <= 16; n++) {
16944 for (uint32_t m = 1; m <= 6; m++) {
16945 GemmMicrokernelTester()
16946 .mr(6)
16947 .nr(16)
16948 .kr(4)
16949 .sr(1)
16950 .m(m)
16951 .n(n)
16952 .k(8)
16953 .iterations(1)
16954 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16955 }
16956 }
16957 }
16958
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,k_eq_8_subtile_m)16959 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, k_eq_8_subtile_m) {
16960 TEST_REQUIRES_ARM_NEON_DOT;
16961 for (uint32_t m = 1; m <= 6; m++) {
16962 GemmMicrokernelTester()
16963 .mr(6)
16964 .nr(16)
16965 .kr(4)
16966 .sr(1)
16967 .m(m)
16968 .n(16)
16969 .k(8)
16970 .iterations(1)
16971 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16972 }
16973 }
16974
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,k_eq_8_subtile_n)16975 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, k_eq_8_subtile_n) {
16976 TEST_REQUIRES_ARM_NEON_DOT;
16977 for (uint32_t n = 1; n <= 16; n++) {
16978 GemmMicrokernelTester()
16979 .mr(6)
16980 .nr(16)
16981 .kr(4)
16982 .sr(1)
16983 .m(6)
16984 .n(n)
16985 .k(8)
16986 .iterations(1)
16987 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16988 }
16989 }
16990
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,k_lt_8)16991 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, k_lt_8) {
16992 TEST_REQUIRES_ARM_NEON_DOT;
16993 for (size_t k = 1; k < 8; k++) {
16994 GemmMicrokernelTester()
16995 .mr(6)
16996 .nr(16)
16997 .kr(4)
16998 .sr(1)
16999 .m(6)
17000 .n(16)
17001 .k(k)
17002 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17003 }
17004 }
17005
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,k_lt_8_strided_a)17006 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, k_lt_8_strided_a) {
17007 TEST_REQUIRES_ARM_NEON_DOT;
17008 for (size_t k = 1; k < 8; k++) {
17009 GemmMicrokernelTester()
17010 .mr(6)
17011 .nr(16)
17012 .kr(4)
17013 .sr(1)
17014 .m(6)
17015 .n(16)
17016 .k(k)
17017 .a_stride(11)
17018 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17019 }
17020 }
17021
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,k_lt_8_subtile)17022 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, k_lt_8_subtile) {
17023 TEST_REQUIRES_ARM_NEON_DOT;
17024 for (size_t k = 1; k < 8; k++) {
17025 for (uint32_t n = 1; n <= 16; n++) {
17026 for (uint32_t m = 1; m <= 6; m++) {
17027 GemmMicrokernelTester()
17028 .mr(6)
17029 .nr(16)
17030 .kr(4)
17031 .sr(1)
17032 .m(m)
17033 .n(n)
17034 .k(k)
17035 .iterations(1)
17036 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17037 }
17038 }
17039 }
17040 }
17041
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,k_gt_8)17042 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, k_gt_8) {
17043 TEST_REQUIRES_ARM_NEON_DOT;
17044 for (size_t k = 9; k < 16; k++) {
17045 GemmMicrokernelTester()
17046 .mr(6)
17047 .nr(16)
17048 .kr(4)
17049 .sr(1)
17050 .m(6)
17051 .n(16)
17052 .k(k)
17053 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17054 }
17055 }
17056
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,k_gt_8_strided_a)17057 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, k_gt_8_strided_a) {
17058 TEST_REQUIRES_ARM_NEON_DOT;
17059 for (size_t k = 9; k < 16; k++) {
17060 GemmMicrokernelTester()
17061 .mr(6)
17062 .nr(16)
17063 .kr(4)
17064 .sr(1)
17065 .m(6)
17066 .n(16)
17067 .k(k)
17068 .a_stride(19)
17069 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17070 }
17071 }
17072
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,k_gt_8_subtile)17073 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, k_gt_8_subtile) {
17074 TEST_REQUIRES_ARM_NEON_DOT;
17075 for (size_t k = 9; k < 16; k++) {
17076 for (uint32_t n = 1; n <= 16; n++) {
17077 for (uint32_t m = 1; m <= 6; m++) {
17078 GemmMicrokernelTester()
17079 .mr(6)
17080 .nr(16)
17081 .kr(4)
17082 .sr(1)
17083 .m(m)
17084 .n(n)
17085 .k(k)
17086 .iterations(1)
17087 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17088 }
17089 }
17090 }
17091 }
17092
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,k_div_8)17093 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, k_div_8) {
17094 TEST_REQUIRES_ARM_NEON_DOT;
17095 for (size_t k = 16; k <= 80; k += 8) {
17096 GemmMicrokernelTester()
17097 .mr(6)
17098 .nr(16)
17099 .kr(4)
17100 .sr(1)
17101 .m(6)
17102 .n(16)
17103 .k(k)
17104 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17105 }
17106 }
17107
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,k_div_8_strided_a)17108 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, k_div_8_strided_a) {
17109 TEST_REQUIRES_ARM_NEON_DOT;
17110 for (size_t k = 16; k <= 80; k += 8) {
17111 GemmMicrokernelTester()
17112 .mr(6)
17113 .nr(16)
17114 .kr(4)
17115 .sr(1)
17116 .m(6)
17117 .n(16)
17118 .k(k)
17119 .a_stride(83)
17120 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17121 }
17122 }
17123
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,k_div_8_subtile)17124 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, k_div_8_subtile) {
17125 TEST_REQUIRES_ARM_NEON_DOT;
17126 for (size_t k = 16; k <= 80; k += 8) {
17127 for (uint32_t n = 1; n <= 16; n++) {
17128 for (uint32_t m = 1; m <= 6; m++) {
17129 GemmMicrokernelTester()
17130 .mr(6)
17131 .nr(16)
17132 .kr(4)
17133 .sr(1)
17134 .m(m)
17135 .n(n)
17136 .k(k)
17137 .iterations(1)
17138 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17139 }
17140 }
17141 }
17142 }
17143
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,n_gt_16)17144 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, n_gt_16) {
17145 TEST_REQUIRES_ARM_NEON_DOT;
17146 for (uint32_t n = 17; n < 32; n++) {
17147 for (size_t k = 1; k <= 40; k += 9) {
17148 GemmMicrokernelTester()
17149 .mr(6)
17150 .nr(16)
17151 .kr(4)
17152 .sr(1)
17153 .m(6)
17154 .n(n)
17155 .k(k)
17156 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17157 }
17158 }
17159 }
17160
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,n_gt_16_strided_cn)17161 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, n_gt_16_strided_cn) {
17162 TEST_REQUIRES_ARM_NEON_DOT;
17163 for (uint32_t n = 17; n < 32; n++) {
17164 for (size_t k = 1; k <= 40; k += 9) {
17165 GemmMicrokernelTester()
17166 .mr(6)
17167 .nr(16)
17168 .kr(4)
17169 .sr(1)
17170 .m(6)
17171 .n(n)
17172 .k(k)
17173 .cn_stride(19)
17174 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17175 }
17176 }
17177 }
17178
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,n_gt_16_strided_a)17179 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, n_gt_16_strided_a) {
17180 TEST_REQUIRES_ARM_NEON_DOT;
17181 for (uint32_t n = 17; n < 32; n++) {
17182 for (size_t k = 1; k <= 40; k += 9) {
17183 GemmMicrokernelTester()
17184 .mr(6)
17185 .nr(16)
17186 .kr(4)
17187 .sr(1)
17188 .m(6)
17189 .n(n)
17190 .k(k)
17191 .a_stride(43)
17192 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17193 }
17194 }
17195 }
17196
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,n_gt_16_subtile)17197 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, n_gt_16_subtile) {
17198 TEST_REQUIRES_ARM_NEON_DOT;
17199 for (uint32_t n = 17; n < 32; n++) {
17200 for (size_t k = 1; k <= 40; k += 9) {
17201 for (uint32_t m = 1; m <= 6; m++) {
17202 GemmMicrokernelTester()
17203 .mr(6)
17204 .nr(16)
17205 .kr(4)
17206 .sr(1)
17207 .m(m)
17208 .n(n)
17209 .k(k)
17210 .iterations(1)
17211 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17212 }
17213 }
17214 }
17215 }
17216
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,n_div_16)17217 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, n_div_16) {
17218 TEST_REQUIRES_ARM_NEON_DOT;
17219 for (uint32_t n = 32; n <= 48; n += 16) {
17220 for (size_t k = 1; k <= 40; k += 9) {
17221 GemmMicrokernelTester()
17222 .mr(6)
17223 .nr(16)
17224 .kr(4)
17225 .sr(1)
17226 .m(6)
17227 .n(n)
17228 .k(k)
17229 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17230 }
17231 }
17232 }
17233
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,n_div_16_strided_cn)17234 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, n_div_16_strided_cn) {
17235 TEST_REQUIRES_ARM_NEON_DOT;
17236 for (uint32_t n = 32; n <= 48; n += 16) {
17237 for (size_t k = 1; k <= 40; k += 9) {
17238 GemmMicrokernelTester()
17239 .mr(6)
17240 .nr(16)
17241 .kr(4)
17242 .sr(1)
17243 .m(6)
17244 .n(n)
17245 .k(k)
17246 .cn_stride(19)
17247 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17248 }
17249 }
17250 }
17251
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,n_div_16_strided_a)17252 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, n_div_16_strided_a) {
17253 TEST_REQUIRES_ARM_NEON_DOT;
17254 for (uint32_t n = 32; n <= 48; n += 16) {
17255 for (size_t k = 1; k <= 40; k += 9) {
17256 GemmMicrokernelTester()
17257 .mr(6)
17258 .nr(16)
17259 .kr(4)
17260 .sr(1)
17261 .m(6)
17262 .n(n)
17263 .k(k)
17264 .a_stride(43)
17265 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17266 }
17267 }
17268 }
17269
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,n_div_16_subtile)17270 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, n_div_16_subtile) {
17271 TEST_REQUIRES_ARM_NEON_DOT;
17272 for (uint32_t n = 32; n <= 48; n += 16) {
17273 for (size_t k = 1; k <= 40; k += 9) {
17274 for (uint32_t m = 1; m <= 6; m++) {
17275 GemmMicrokernelTester()
17276 .mr(6)
17277 .nr(16)
17278 .kr(4)
17279 .sr(1)
17280 .m(m)
17281 .n(n)
17282 .k(k)
17283 .iterations(1)
17284 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17285 }
17286 }
17287 }
17288 }
17289
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,strided_cm_subtile)17290 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, strided_cm_subtile) {
17291 TEST_REQUIRES_ARM_NEON_DOT;
17292 for (size_t k = 1; k <= 40; k += 9) {
17293 for (uint32_t n = 1; n <= 16; n++) {
17294 for (uint32_t m = 1; m <= 6; m++) {
17295 GemmMicrokernelTester()
17296 .mr(6)
17297 .nr(16)
17298 .kr(4)
17299 .sr(1)
17300 .m(m)
17301 .n(n)
17302 .k(k)
17303 .cm_stride(19)
17304 .iterations(1)
17305 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17306 }
17307 }
17308 }
17309 }
17310
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,qmin)17311 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, qmin) {
17312 TEST_REQUIRES_ARM_NEON_DOT;
17313 GemmMicrokernelTester()
17314 .mr(6)
17315 .nr(16)
17316 .kr(4)
17317 .sr(1)
17318 .m(6)
17319 .n(16)
17320 .k(8)
17321 .qmin(128)
17322 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17323 }
17324
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,qmax)17325 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, qmax) {
17326 TEST_REQUIRES_ARM_NEON_DOT;
17327 GemmMicrokernelTester()
17328 .mr(6)
17329 .nr(16)
17330 .kr(4)
17331 .sr(1)
17332 .m(6)
17333 .n(16)
17334 .k(8)
17335 .qmax(128)
17336 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17337 }
17338
TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT,strided_cm)17339 TEST(QC8_GEMM_MINMAX_FP32_6X16C4__NEONDOT, strided_cm) {
17340 TEST_REQUIRES_ARM_NEON_DOT;
17341 GemmMicrokernelTester()
17342 .mr(6)
17343 .nr(16)
17344 .kr(4)
17345 .sr(1)
17346 .m(6)
17347 .n(16)
17348 .k(8)
17349 .cm_stride(19)
17350 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17351 }
17352 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
17353
17354
17355 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_eq_8)17356 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8) {
17357 TEST_REQUIRES_X86_SSE2;
17358 GemmMicrokernelTester()
17359 .mr(4)
17360 .nr(4)
17361 .kr(2)
17362 .sr(1)
17363 .m(4)
17364 .n(4)
17365 .k(8)
17366 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17367 }
17368
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,strided_cn)17369 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, strided_cn) {
17370 TEST_REQUIRES_X86_SSE2;
17371 GemmMicrokernelTester()
17372 .mr(4)
17373 .nr(4)
17374 .kr(2)
17375 .sr(1)
17376 .m(4)
17377 .n(4)
17378 .k(8)
17379 .cn_stride(7)
17380 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17381 }
17382
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_eq_8_strided_a)17383 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_strided_a) {
17384 TEST_REQUIRES_X86_SSE2;
17385 GemmMicrokernelTester()
17386 .mr(4)
17387 .nr(4)
17388 .kr(2)
17389 .sr(1)
17390 .m(4)
17391 .n(4)
17392 .k(8)
17393 .a_stride(11)
17394 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17395 }
17396
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_eq_8_subtile)17397 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_subtile) {
17398 TEST_REQUIRES_X86_SSE2;
17399 for (uint32_t n = 1; n <= 4; n++) {
17400 for (uint32_t m = 1; m <= 4; m++) {
17401 GemmMicrokernelTester()
17402 .mr(4)
17403 .nr(4)
17404 .kr(2)
17405 .sr(1)
17406 .m(m)
17407 .n(n)
17408 .k(8)
17409 .iterations(1)
17410 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17411 }
17412 }
17413 }
17414
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_eq_8_subtile_m)17415 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_subtile_m) {
17416 TEST_REQUIRES_X86_SSE2;
17417 for (uint32_t m = 1; m <= 4; m++) {
17418 GemmMicrokernelTester()
17419 .mr(4)
17420 .nr(4)
17421 .kr(2)
17422 .sr(1)
17423 .m(m)
17424 .n(4)
17425 .k(8)
17426 .iterations(1)
17427 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17428 }
17429 }
17430
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_eq_8_subtile_n)17431 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_subtile_n) {
17432 TEST_REQUIRES_X86_SSE2;
17433 for (uint32_t n = 1; n <= 4; n++) {
17434 GemmMicrokernelTester()
17435 .mr(4)
17436 .nr(4)
17437 .kr(2)
17438 .sr(1)
17439 .m(4)
17440 .n(n)
17441 .k(8)
17442 .iterations(1)
17443 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17444 }
17445 }
17446
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_lt_8)17447 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_lt_8) {
17448 TEST_REQUIRES_X86_SSE2;
17449 for (size_t k = 1; k < 8; k++) {
17450 GemmMicrokernelTester()
17451 .mr(4)
17452 .nr(4)
17453 .kr(2)
17454 .sr(1)
17455 .m(4)
17456 .n(4)
17457 .k(k)
17458 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17459 }
17460 }
17461
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_lt_8_strided_a)17462 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_lt_8_strided_a) {
17463 TEST_REQUIRES_X86_SSE2;
17464 for (size_t k = 1; k < 8; k++) {
17465 GemmMicrokernelTester()
17466 .mr(4)
17467 .nr(4)
17468 .kr(2)
17469 .sr(1)
17470 .m(4)
17471 .n(4)
17472 .k(k)
17473 .a_stride(11)
17474 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17475 }
17476 }
17477
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_lt_8_subtile)17478 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_lt_8_subtile) {
17479 TEST_REQUIRES_X86_SSE2;
17480 for (size_t k = 1; k < 8; k++) {
17481 for (uint32_t n = 1; n <= 4; n++) {
17482 for (uint32_t m = 1; m <= 4; m++) {
17483 GemmMicrokernelTester()
17484 .mr(4)
17485 .nr(4)
17486 .kr(2)
17487 .sr(1)
17488 .m(m)
17489 .n(n)
17490 .k(k)
17491 .iterations(1)
17492 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17493 }
17494 }
17495 }
17496 }
17497
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_gt_8)17498 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_gt_8) {
17499 TEST_REQUIRES_X86_SSE2;
17500 for (size_t k = 9; k < 16; k++) {
17501 GemmMicrokernelTester()
17502 .mr(4)
17503 .nr(4)
17504 .kr(2)
17505 .sr(1)
17506 .m(4)
17507 .n(4)
17508 .k(k)
17509 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17510 }
17511 }
17512
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_gt_8_strided_a)17513 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_gt_8_strided_a) {
17514 TEST_REQUIRES_X86_SSE2;
17515 for (size_t k = 9; k < 16; k++) {
17516 GemmMicrokernelTester()
17517 .mr(4)
17518 .nr(4)
17519 .kr(2)
17520 .sr(1)
17521 .m(4)
17522 .n(4)
17523 .k(k)
17524 .a_stride(19)
17525 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17526 }
17527 }
17528
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_gt_8_subtile)17529 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_gt_8_subtile) {
17530 TEST_REQUIRES_X86_SSE2;
17531 for (size_t k = 9; k < 16; k++) {
17532 for (uint32_t n = 1; n <= 4; n++) {
17533 for (uint32_t m = 1; m <= 4; m++) {
17534 GemmMicrokernelTester()
17535 .mr(4)
17536 .nr(4)
17537 .kr(2)
17538 .sr(1)
17539 .m(m)
17540 .n(n)
17541 .k(k)
17542 .iterations(1)
17543 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17544 }
17545 }
17546 }
17547 }
17548
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_div_8)17549 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_div_8) {
17550 TEST_REQUIRES_X86_SSE2;
17551 for (size_t k = 16; k <= 80; k += 8) {
17552 GemmMicrokernelTester()
17553 .mr(4)
17554 .nr(4)
17555 .kr(2)
17556 .sr(1)
17557 .m(4)
17558 .n(4)
17559 .k(k)
17560 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17561 }
17562 }
17563
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_div_8_strided_a)17564 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_div_8_strided_a) {
17565 TEST_REQUIRES_X86_SSE2;
17566 for (size_t k = 16; k <= 80; k += 8) {
17567 GemmMicrokernelTester()
17568 .mr(4)
17569 .nr(4)
17570 .kr(2)
17571 .sr(1)
17572 .m(4)
17573 .n(4)
17574 .k(k)
17575 .a_stride(83)
17576 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17577 }
17578 }
17579
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_div_8_subtile)17580 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_div_8_subtile) {
17581 TEST_REQUIRES_X86_SSE2;
17582 for (size_t k = 16; k <= 80; k += 8) {
17583 for (uint32_t n = 1; n <= 4; n++) {
17584 for (uint32_t m = 1; m <= 4; m++) {
17585 GemmMicrokernelTester()
17586 .mr(4)
17587 .nr(4)
17588 .kr(2)
17589 .sr(1)
17590 .m(m)
17591 .n(n)
17592 .k(k)
17593 .iterations(1)
17594 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17595 }
17596 }
17597 }
17598 }
17599
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_gt_4)17600 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4) {
17601 TEST_REQUIRES_X86_SSE2;
17602 for (uint32_t n = 5; n < 8; n++) {
17603 for (size_t k = 1; k <= 40; k += 9) {
17604 GemmMicrokernelTester()
17605 .mr(4)
17606 .nr(4)
17607 .kr(2)
17608 .sr(1)
17609 .m(4)
17610 .n(n)
17611 .k(k)
17612 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17613 }
17614 }
17615 }
17616
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_gt_4_strided_cn)17617 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4_strided_cn) {
17618 TEST_REQUIRES_X86_SSE2;
17619 for (uint32_t n = 5; n < 8; n++) {
17620 for (size_t k = 1; k <= 40; k += 9) {
17621 GemmMicrokernelTester()
17622 .mr(4)
17623 .nr(4)
17624 .kr(2)
17625 .sr(1)
17626 .m(4)
17627 .n(n)
17628 .k(k)
17629 .cn_stride(7)
17630 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17631 }
17632 }
17633 }
17634
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_gt_4_strided_a)17635 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4_strided_a) {
17636 TEST_REQUIRES_X86_SSE2;
17637 for (uint32_t n = 5; n < 8; n++) {
17638 for (size_t k = 1; k <= 40; k += 9) {
17639 GemmMicrokernelTester()
17640 .mr(4)
17641 .nr(4)
17642 .kr(2)
17643 .sr(1)
17644 .m(4)
17645 .n(n)
17646 .k(k)
17647 .a_stride(43)
17648 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17649 }
17650 }
17651 }
17652
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_gt_4_subtile)17653 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4_subtile) {
17654 TEST_REQUIRES_X86_SSE2;
17655 for (uint32_t n = 5; n < 8; n++) {
17656 for (size_t k = 1; k <= 40; k += 9) {
17657 for (uint32_t m = 1; m <= 4; m++) {
17658 GemmMicrokernelTester()
17659 .mr(4)
17660 .nr(4)
17661 .kr(2)
17662 .sr(1)
17663 .m(m)
17664 .n(n)
17665 .k(k)
17666 .iterations(1)
17667 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17668 }
17669 }
17670 }
17671 }
17672
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_div_4)17673 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4) {
17674 TEST_REQUIRES_X86_SSE2;
17675 for (uint32_t n = 8; n <= 12; n += 4) {
17676 for (size_t k = 1; k <= 40; k += 9) {
17677 GemmMicrokernelTester()
17678 .mr(4)
17679 .nr(4)
17680 .kr(2)
17681 .sr(1)
17682 .m(4)
17683 .n(n)
17684 .k(k)
17685 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17686 }
17687 }
17688 }
17689
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_div_4_strided_cn)17690 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4_strided_cn) {
17691 TEST_REQUIRES_X86_SSE2;
17692 for (uint32_t n = 8; n <= 12; n += 4) {
17693 for (size_t k = 1; k <= 40; k += 9) {
17694 GemmMicrokernelTester()
17695 .mr(4)
17696 .nr(4)
17697 .kr(2)
17698 .sr(1)
17699 .m(4)
17700 .n(n)
17701 .k(k)
17702 .cn_stride(7)
17703 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17704 }
17705 }
17706 }
17707
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_div_4_strided_a)17708 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4_strided_a) {
17709 TEST_REQUIRES_X86_SSE2;
17710 for (uint32_t n = 8; n <= 12; n += 4) {
17711 for (size_t k = 1; k <= 40; k += 9) {
17712 GemmMicrokernelTester()
17713 .mr(4)
17714 .nr(4)
17715 .kr(2)
17716 .sr(1)
17717 .m(4)
17718 .n(n)
17719 .k(k)
17720 .a_stride(43)
17721 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17722 }
17723 }
17724 }
17725
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_div_4_subtile)17726 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4_subtile) {
17727 TEST_REQUIRES_X86_SSE2;
17728 for (uint32_t n = 8; n <= 12; n += 4) {
17729 for (size_t k = 1; k <= 40; k += 9) {
17730 for (uint32_t m = 1; m <= 4; m++) {
17731 GemmMicrokernelTester()
17732 .mr(4)
17733 .nr(4)
17734 .kr(2)
17735 .sr(1)
17736 .m(m)
17737 .n(n)
17738 .k(k)
17739 .iterations(1)
17740 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17741 }
17742 }
17743 }
17744 }
17745
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,strided_cm_subtile)17746 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, strided_cm_subtile) {
17747 TEST_REQUIRES_X86_SSE2;
17748 for (size_t k = 1; k <= 40; k += 9) {
17749 for (uint32_t n = 1; n <= 4; n++) {
17750 for (uint32_t m = 1; m <= 4; m++) {
17751 GemmMicrokernelTester()
17752 .mr(4)
17753 .nr(4)
17754 .kr(2)
17755 .sr(1)
17756 .m(m)
17757 .n(n)
17758 .k(k)
17759 .cm_stride(7)
17760 .iterations(1)
17761 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17762 }
17763 }
17764 }
17765 }
17766
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,qmin)17767 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, qmin) {
17768 TEST_REQUIRES_X86_SSE2;
17769 GemmMicrokernelTester()
17770 .mr(4)
17771 .nr(4)
17772 .kr(2)
17773 .sr(1)
17774 .m(4)
17775 .n(4)
17776 .k(8)
17777 .qmin(128)
17778 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17779 }
17780
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,qmax)17781 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, qmax) {
17782 TEST_REQUIRES_X86_SSE2;
17783 GemmMicrokernelTester()
17784 .mr(4)
17785 .nr(4)
17786 .kr(2)
17787 .sr(1)
17788 .m(4)
17789 .n(4)
17790 .k(8)
17791 .qmax(128)
17792 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17793 }
17794
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64,strided_cm)17795 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, strided_cm) {
17796 TEST_REQUIRES_X86_SSE2;
17797 GemmMicrokernelTester()
17798 .mr(4)
17799 .nr(4)
17800 .kr(2)
17801 .sr(1)
17802 .m(4)
17803 .n(4)
17804 .k(8)
17805 .cm_stride(7)
17806 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
17807 }
17808 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17809
17810
17811 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8)17812 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8) {
17813 TEST_REQUIRES_X86_AVX;
17814 GemmMicrokernelTester()
17815 .mr(4)
17816 .nr(4)
17817 .kr(2)
17818 .sr(1)
17819 .m(4)
17820 .n(4)
17821 .k(8)
17822 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17823 }
17824
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,strided_cn)17825 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cn) {
17826 TEST_REQUIRES_X86_AVX;
17827 GemmMicrokernelTester()
17828 .mr(4)
17829 .nr(4)
17830 .kr(2)
17831 .sr(1)
17832 .m(4)
17833 .n(4)
17834 .k(8)
17835 .cn_stride(7)
17836 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17837 }
17838
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_strided_a)17839 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_strided_a) {
17840 TEST_REQUIRES_X86_AVX;
17841 GemmMicrokernelTester()
17842 .mr(4)
17843 .nr(4)
17844 .kr(2)
17845 .sr(1)
17846 .m(4)
17847 .n(4)
17848 .k(8)
17849 .a_stride(11)
17850 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17851 }
17852
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_subtile)17853 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile) {
17854 TEST_REQUIRES_X86_AVX;
17855 for (uint32_t n = 1; n <= 4; n++) {
17856 for (uint32_t m = 1; m <= 4; m++) {
17857 GemmMicrokernelTester()
17858 .mr(4)
17859 .nr(4)
17860 .kr(2)
17861 .sr(1)
17862 .m(m)
17863 .n(n)
17864 .k(8)
17865 .iterations(1)
17866 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17867 }
17868 }
17869 }
17870
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_subtile_m)17871 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_m) {
17872 TEST_REQUIRES_X86_AVX;
17873 for (uint32_t m = 1; m <= 4; m++) {
17874 GemmMicrokernelTester()
17875 .mr(4)
17876 .nr(4)
17877 .kr(2)
17878 .sr(1)
17879 .m(m)
17880 .n(4)
17881 .k(8)
17882 .iterations(1)
17883 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17884 }
17885 }
17886
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_subtile_n)17887 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_n) {
17888 TEST_REQUIRES_X86_AVX;
17889 for (uint32_t n = 1; n <= 4; n++) {
17890 GemmMicrokernelTester()
17891 .mr(4)
17892 .nr(4)
17893 .kr(2)
17894 .sr(1)
17895 .m(4)
17896 .n(n)
17897 .k(8)
17898 .iterations(1)
17899 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17900 }
17901 }
17902
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_lt_8)17903 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8) {
17904 TEST_REQUIRES_X86_AVX;
17905 for (size_t k = 1; k < 8; k++) {
17906 GemmMicrokernelTester()
17907 .mr(4)
17908 .nr(4)
17909 .kr(2)
17910 .sr(1)
17911 .m(4)
17912 .n(4)
17913 .k(k)
17914 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17915 }
17916 }
17917
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_lt_8_strided_a)17918 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8_strided_a) {
17919 TEST_REQUIRES_X86_AVX;
17920 for (size_t k = 1; k < 8; k++) {
17921 GemmMicrokernelTester()
17922 .mr(4)
17923 .nr(4)
17924 .kr(2)
17925 .sr(1)
17926 .m(4)
17927 .n(4)
17928 .k(k)
17929 .a_stride(11)
17930 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17931 }
17932 }
17933
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_lt_8_subtile)17934 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8_subtile) {
17935 TEST_REQUIRES_X86_AVX;
17936 for (size_t k = 1; k < 8; k++) {
17937 for (uint32_t n = 1; n <= 4; n++) {
17938 for (uint32_t m = 1; m <= 4; m++) {
17939 GemmMicrokernelTester()
17940 .mr(4)
17941 .nr(4)
17942 .kr(2)
17943 .sr(1)
17944 .m(m)
17945 .n(n)
17946 .k(k)
17947 .iterations(1)
17948 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17949 }
17950 }
17951 }
17952 }
17953
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_gt_8)17954 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8) {
17955 TEST_REQUIRES_X86_AVX;
17956 for (size_t k = 9; k < 16; k++) {
17957 GemmMicrokernelTester()
17958 .mr(4)
17959 .nr(4)
17960 .kr(2)
17961 .sr(1)
17962 .m(4)
17963 .n(4)
17964 .k(k)
17965 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17966 }
17967 }
17968
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_gt_8_strided_a)17969 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8_strided_a) {
17970 TEST_REQUIRES_X86_AVX;
17971 for (size_t k = 9; k < 16; k++) {
17972 GemmMicrokernelTester()
17973 .mr(4)
17974 .nr(4)
17975 .kr(2)
17976 .sr(1)
17977 .m(4)
17978 .n(4)
17979 .k(k)
17980 .a_stride(19)
17981 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17982 }
17983 }
17984
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_gt_8_subtile)17985 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8_subtile) {
17986 TEST_REQUIRES_X86_AVX;
17987 for (size_t k = 9; k < 16; k++) {
17988 for (uint32_t n = 1; n <= 4; n++) {
17989 for (uint32_t m = 1; m <= 4; m++) {
17990 GemmMicrokernelTester()
17991 .mr(4)
17992 .nr(4)
17993 .kr(2)
17994 .sr(1)
17995 .m(m)
17996 .n(n)
17997 .k(k)
17998 .iterations(1)
17999 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18000 }
18001 }
18002 }
18003 }
18004
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_div_8)18005 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8) {
18006 TEST_REQUIRES_X86_AVX;
18007 for (size_t k = 16; k <= 80; k += 8) {
18008 GemmMicrokernelTester()
18009 .mr(4)
18010 .nr(4)
18011 .kr(2)
18012 .sr(1)
18013 .m(4)
18014 .n(4)
18015 .k(k)
18016 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18017 }
18018 }
18019
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_div_8_strided_a)18020 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8_strided_a) {
18021 TEST_REQUIRES_X86_AVX;
18022 for (size_t k = 16; k <= 80; k += 8) {
18023 GemmMicrokernelTester()
18024 .mr(4)
18025 .nr(4)
18026 .kr(2)
18027 .sr(1)
18028 .m(4)
18029 .n(4)
18030 .k(k)
18031 .a_stride(83)
18032 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18033 }
18034 }
18035
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_div_8_subtile)18036 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8_subtile) {
18037 TEST_REQUIRES_X86_AVX;
18038 for (size_t k = 16; k <= 80; k += 8) {
18039 for (uint32_t n = 1; n <= 4; n++) {
18040 for (uint32_t m = 1; m <= 4; m++) {
18041 GemmMicrokernelTester()
18042 .mr(4)
18043 .nr(4)
18044 .kr(2)
18045 .sr(1)
18046 .m(m)
18047 .n(n)
18048 .k(k)
18049 .iterations(1)
18050 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18051 }
18052 }
18053 }
18054 }
18055
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4)18056 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4) {
18057 TEST_REQUIRES_X86_AVX;
18058 for (uint32_t n = 5; n < 8; n++) {
18059 for (size_t k = 1; k <= 40; k += 9) {
18060 GemmMicrokernelTester()
18061 .mr(4)
18062 .nr(4)
18063 .kr(2)
18064 .sr(1)
18065 .m(4)
18066 .n(n)
18067 .k(k)
18068 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18069 }
18070 }
18071 }
18072
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4_strided_cn)18073 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_strided_cn) {
18074 TEST_REQUIRES_X86_AVX;
18075 for (uint32_t n = 5; n < 8; n++) {
18076 for (size_t k = 1; k <= 40; k += 9) {
18077 GemmMicrokernelTester()
18078 .mr(4)
18079 .nr(4)
18080 .kr(2)
18081 .sr(1)
18082 .m(4)
18083 .n(n)
18084 .k(k)
18085 .cn_stride(7)
18086 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18087 }
18088 }
18089 }
18090
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4_strided_a)18091 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_strided_a) {
18092 TEST_REQUIRES_X86_AVX;
18093 for (uint32_t n = 5; n < 8; n++) {
18094 for (size_t k = 1; k <= 40; k += 9) {
18095 GemmMicrokernelTester()
18096 .mr(4)
18097 .nr(4)
18098 .kr(2)
18099 .sr(1)
18100 .m(4)
18101 .n(n)
18102 .k(k)
18103 .a_stride(43)
18104 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18105 }
18106 }
18107 }
18108
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4_subtile)18109 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_subtile) {
18110 TEST_REQUIRES_X86_AVX;
18111 for (uint32_t n = 5; n < 8; n++) {
18112 for (size_t k = 1; k <= 40; k += 9) {
18113 for (uint32_t m = 1; m <= 4; m++) {
18114 GemmMicrokernelTester()
18115 .mr(4)
18116 .nr(4)
18117 .kr(2)
18118 .sr(1)
18119 .m(m)
18120 .n(n)
18121 .k(k)
18122 .iterations(1)
18123 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18124 }
18125 }
18126 }
18127 }
18128
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4)18129 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4) {
18130 TEST_REQUIRES_X86_AVX;
18131 for (uint32_t n = 8; n <= 12; n += 4) {
18132 for (size_t k = 1; k <= 40; k += 9) {
18133 GemmMicrokernelTester()
18134 .mr(4)
18135 .nr(4)
18136 .kr(2)
18137 .sr(1)
18138 .m(4)
18139 .n(n)
18140 .k(k)
18141 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18142 }
18143 }
18144 }
18145
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4_strided_cn)18146 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_strided_cn) {
18147 TEST_REQUIRES_X86_AVX;
18148 for (uint32_t n = 8; n <= 12; n += 4) {
18149 for (size_t k = 1; k <= 40; k += 9) {
18150 GemmMicrokernelTester()
18151 .mr(4)
18152 .nr(4)
18153 .kr(2)
18154 .sr(1)
18155 .m(4)
18156 .n(n)
18157 .k(k)
18158 .cn_stride(7)
18159 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18160 }
18161 }
18162 }
18163
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4_strided_a)18164 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_strided_a) {
18165 TEST_REQUIRES_X86_AVX;
18166 for (uint32_t n = 8; n <= 12; n += 4) {
18167 for (size_t k = 1; k <= 40; k += 9) {
18168 GemmMicrokernelTester()
18169 .mr(4)
18170 .nr(4)
18171 .kr(2)
18172 .sr(1)
18173 .m(4)
18174 .n(n)
18175 .k(k)
18176 .a_stride(43)
18177 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18178 }
18179 }
18180 }
18181
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4_subtile)18182 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_subtile) {
18183 TEST_REQUIRES_X86_AVX;
18184 for (uint32_t n = 8; n <= 12; n += 4) {
18185 for (size_t k = 1; k <= 40; k += 9) {
18186 for (uint32_t m = 1; m <= 4; m++) {
18187 GemmMicrokernelTester()
18188 .mr(4)
18189 .nr(4)
18190 .kr(2)
18191 .sr(1)
18192 .m(m)
18193 .n(n)
18194 .k(k)
18195 .iterations(1)
18196 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18197 }
18198 }
18199 }
18200 }
18201
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,strided_cm_subtile)18202 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm_subtile) {
18203 TEST_REQUIRES_X86_AVX;
18204 for (size_t k = 1; k <= 40; k += 9) {
18205 for (uint32_t n = 1; n <= 4; n++) {
18206 for (uint32_t m = 1; m <= 4; m++) {
18207 GemmMicrokernelTester()
18208 .mr(4)
18209 .nr(4)
18210 .kr(2)
18211 .sr(1)
18212 .m(m)
18213 .n(n)
18214 .k(k)
18215 .cm_stride(7)
18216 .iterations(1)
18217 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18218 }
18219 }
18220 }
18221 }
18222
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,qmin)18223 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmin) {
18224 TEST_REQUIRES_X86_AVX;
18225 GemmMicrokernelTester()
18226 .mr(4)
18227 .nr(4)
18228 .kr(2)
18229 .sr(1)
18230 .m(4)
18231 .n(4)
18232 .k(8)
18233 .qmin(128)
18234 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18235 }
18236
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,qmax)18237 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmax) {
18238 TEST_REQUIRES_X86_AVX;
18239 GemmMicrokernelTester()
18240 .mr(4)
18241 .nr(4)
18242 .kr(2)
18243 .sr(1)
18244 .m(4)
18245 .n(4)
18246 .k(8)
18247 .qmax(128)
18248 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18249 }
18250
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,strided_cm)18251 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm) {
18252 TEST_REQUIRES_X86_AVX;
18253 GemmMicrokernelTester()
18254 .mr(4)
18255 .nr(4)
18256 .kr(2)
18257 .sr(1)
18258 .m(4)
18259 .n(4)
18260 .k(8)
18261 .cm_stride(7)
18262 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18263 }
18264 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18265
18266
18267 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_eq_8)18268 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8) {
18269 TEST_REQUIRES_X86_AVX;
18270 GemmMicrokernelTester()
18271 .mr(2)
18272 .nr(4)
18273 .kr(2)
18274 .sr(1)
18275 .m(2)
18276 .n(4)
18277 .k(8)
18278 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18279 }
18280
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,strided_cn)18281 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, strided_cn) {
18282 TEST_REQUIRES_X86_AVX;
18283 GemmMicrokernelTester()
18284 .mr(2)
18285 .nr(4)
18286 .kr(2)
18287 .sr(1)
18288 .m(2)
18289 .n(4)
18290 .k(8)
18291 .cn_stride(7)
18292 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18293 }
18294
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_eq_8_strided_a)18295 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_strided_a) {
18296 TEST_REQUIRES_X86_AVX;
18297 GemmMicrokernelTester()
18298 .mr(2)
18299 .nr(4)
18300 .kr(2)
18301 .sr(1)
18302 .m(2)
18303 .n(4)
18304 .k(8)
18305 .a_stride(11)
18306 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18307 }
18308
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_eq_8_subtile)18309 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_subtile) {
18310 TEST_REQUIRES_X86_AVX;
18311 for (uint32_t n = 1; n <= 4; n++) {
18312 for (uint32_t m = 1; m <= 2; m++) {
18313 GemmMicrokernelTester()
18314 .mr(2)
18315 .nr(4)
18316 .kr(2)
18317 .sr(1)
18318 .m(m)
18319 .n(n)
18320 .k(8)
18321 .iterations(1)
18322 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18323 }
18324 }
18325 }
18326
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_eq_8_subtile_m)18327 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_subtile_m) {
18328 TEST_REQUIRES_X86_AVX;
18329 for (uint32_t m = 1; m <= 2; m++) {
18330 GemmMicrokernelTester()
18331 .mr(2)
18332 .nr(4)
18333 .kr(2)
18334 .sr(1)
18335 .m(m)
18336 .n(4)
18337 .k(8)
18338 .iterations(1)
18339 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18340 }
18341 }
18342
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_eq_8_subtile_n)18343 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_subtile_n) {
18344 TEST_REQUIRES_X86_AVX;
18345 for (uint32_t n = 1; n <= 4; n++) {
18346 GemmMicrokernelTester()
18347 .mr(2)
18348 .nr(4)
18349 .kr(2)
18350 .sr(1)
18351 .m(2)
18352 .n(n)
18353 .k(8)
18354 .iterations(1)
18355 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18356 }
18357 }
18358
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_lt_8)18359 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_lt_8) {
18360 TEST_REQUIRES_X86_AVX;
18361 for (size_t k = 1; k < 8; k++) {
18362 GemmMicrokernelTester()
18363 .mr(2)
18364 .nr(4)
18365 .kr(2)
18366 .sr(1)
18367 .m(2)
18368 .n(4)
18369 .k(k)
18370 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18371 }
18372 }
18373
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_lt_8_strided_a)18374 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_lt_8_strided_a) {
18375 TEST_REQUIRES_X86_AVX;
18376 for (size_t k = 1; k < 8; k++) {
18377 GemmMicrokernelTester()
18378 .mr(2)
18379 .nr(4)
18380 .kr(2)
18381 .sr(1)
18382 .m(2)
18383 .n(4)
18384 .k(k)
18385 .a_stride(11)
18386 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18387 }
18388 }
18389
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_lt_8_subtile)18390 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_lt_8_subtile) {
18391 TEST_REQUIRES_X86_AVX;
18392 for (size_t k = 1; k < 8; k++) {
18393 for (uint32_t n = 1; n <= 4; n++) {
18394 for (uint32_t m = 1; m <= 2; m++) {
18395 GemmMicrokernelTester()
18396 .mr(2)
18397 .nr(4)
18398 .kr(2)
18399 .sr(1)
18400 .m(m)
18401 .n(n)
18402 .k(k)
18403 .iterations(1)
18404 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18405 }
18406 }
18407 }
18408 }
18409
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_gt_8)18410 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_gt_8) {
18411 TEST_REQUIRES_X86_AVX;
18412 for (size_t k = 9; k < 16; k++) {
18413 GemmMicrokernelTester()
18414 .mr(2)
18415 .nr(4)
18416 .kr(2)
18417 .sr(1)
18418 .m(2)
18419 .n(4)
18420 .k(k)
18421 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18422 }
18423 }
18424
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_gt_8_strided_a)18425 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_gt_8_strided_a) {
18426 TEST_REQUIRES_X86_AVX;
18427 for (size_t k = 9; k < 16; k++) {
18428 GemmMicrokernelTester()
18429 .mr(2)
18430 .nr(4)
18431 .kr(2)
18432 .sr(1)
18433 .m(2)
18434 .n(4)
18435 .k(k)
18436 .a_stride(19)
18437 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18438 }
18439 }
18440
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_gt_8_subtile)18441 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_gt_8_subtile) {
18442 TEST_REQUIRES_X86_AVX;
18443 for (size_t k = 9; k < 16; k++) {
18444 for (uint32_t n = 1; n <= 4; n++) {
18445 for (uint32_t m = 1; m <= 2; m++) {
18446 GemmMicrokernelTester()
18447 .mr(2)
18448 .nr(4)
18449 .kr(2)
18450 .sr(1)
18451 .m(m)
18452 .n(n)
18453 .k(k)
18454 .iterations(1)
18455 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18456 }
18457 }
18458 }
18459 }
18460
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_div_8)18461 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_div_8) {
18462 TEST_REQUIRES_X86_AVX;
18463 for (size_t k = 16; k <= 80; k += 8) {
18464 GemmMicrokernelTester()
18465 .mr(2)
18466 .nr(4)
18467 .kr(2)
18468 .sr(1)
18469 .m(2)
18470 .n(4)
18471 .k(k)
18472 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18473 }
18474 }
18475
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_div_8_strided_a)18476 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_div_8_strided_a) {
18477 TEST_REQUIRES_X86_AVX;
18478 for (size_t k = 16; k <= 80; k += 8) {
18479 GemmMicrokernelTester()
18480 .mr(2)
18481 .nr(4)
18482 .kr(2)
18483 .sr(1)
18484 .m(2)
18485 .n(4)
18486 .k(k)
18487 .a_stride(83)
18488 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18489 }
18490 }
18491
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_div_8_subtile)18492 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_div_8_subtile) {
18493 TEST_REQUIRES_X86_AVX;
18494 for (size_t k = 16; k <= 80; k += 8) {
18495 for (uint32_t n = 1; n <= 4; n++) {
18496 for (uint32_t m = 1; m <= 2; m++) {
18497 GemmMicrokernelTester()
18498 .mr(2)
18499 .nr(4)
18500 .kr(2)
18501 .sr(1)
18502 .m(m)
18503 .n(n)
18504 .k(k)
18505 .iterations(1)
18506 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18507 }
18508 }
18509 }
18510 }
18511
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_gt_4)18512 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4) {
18513 TEST_REQUIRES_X86_AVX;
18514 for (uint32_t n = 5; n < 8; n++) {
18515 for (size_t k = 1; k <= 40; k += 9) {
18516 GemmMicrokernelTester()
18517 .mr(2)
18518 .nr(4)
18519 .kr(2)
18520 .sr(1)
18521 .m(2)
18522 .n(n)
18523 .k(k)
18524 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18525 }
18526 }
18527 }
18528
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_gt_4_strided_cn)18529 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4_strided_cn) {
18530 TEST_REQUIRES_X86_AVX;
18531 for (uint32_t n = 5; n < 8; n++) {
18532 for (size_t k = 1; k <= 40; k += 9) {
18533 GemmMicrokernelTester()
18534 .mr(2)
18535 .nr(4)
18536 .kr(2)
18537 .sr(1)
18538 .m(2)
18539 .n(n)
18540 .k(k)
18541 .cn_stride(7)
18542 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18543 }
18544 }
18545 }
18546
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_gt_4_strided_a)18547 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4_strided_a) {
18548 TEST_REQUIRES_X86_AVX;
18549 for (uint32_t n = 5; n < 8; n++) {
18550 for (size_t k = 1; k <= 40; k += 9) {
18551 GemmMicrokernelTester()
18552 .mr(2)
18553 .nr(4)
18554 .kr(2)
18555 .sr(1)
18556 .m(2)
18557 .n(n)
18558 .k(k)
18559 .a_stride(43)
18560 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18561 }
18562 }
18563 }
18564
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_gt_4_subtile)18565 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4_subtile) {
18566 TEST_REQUIRES_X86_AVX;
18567 for (uint32_t n = 5; n < 8; n++) {
18568 for (size_t k = 1; k <= 40; k += 9) {
18569 for (uint32_t m = 1; m <= 2; m++) {
18570 GemmMicrokernelTester()
18571 .mr(2)
18572 .nr(4)
18573 .kr(2)
18574 .sr(1)
18575 .m(m)
18576 .n(n)
18577 .k(k)
18578 .iterations(1)
18579 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18580 }
18581 }
18582 }
18583 }
18584
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_div_4)18585 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4) {
18586 TEST_REQUIRES_X86_AVX;
18587 for (uint32_t n = 8; n <= 12; n += 4) {
18588 for (size_t k = 1; k <= 40; k += 9) {
18589 GemmMicrokernelTester()
18590 .mr(2)
18591 .nr(4)
18592 .kr(2)
18593 .sr(1)
18594 .m(2)
18595 .n(n)
18596 .k(k)
18597 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18598 }
18599 }
18600 }
18601
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_div_4_strided_cn)18602 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4_strided_cn) {
18603 TEST_REQUIRES_X86_AVX;
18604 for (uint32_t n = 8; n <= 12; n += 4) {
18605 for (size_t k = 1; k <= 40; k += 9) {
18606 GemmMicrokernelTester()
18607 .mr(2)
18608 .nr(4)
18609 .kr(2)
18610 .sr(1)
18611 .m(2)
18612 .n(n)
18613 .k(k)
18614 .cn_stride(7)
18615 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18616 }
18617 }
18618 }
18619
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_div_4_strided_a)18620 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4_strided_a) {
18621 TEST_REQUIRES_X86_AVX;
18622 for (uint32_t n = 8; n <= 12; n += 4) {
18623 for (size_t k = 1; k <= 40; k += 9) {
18624 GemmMicrokernelTester()
18625 .mr(2)
18626 .nr(4)
18627 .kr(2)
18628 .sr(1)
18629 .m(2)
18630 .n(n)
18631 .k(k)
18632 .a_stride(43)
18633 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18634 }
18635 }
18636 }
18637
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_div_4_subtile)18638 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4_subtile) {
18639 TEST_REQUIRES_X86_AVX;
18640 for (uint32_t n = 8; n <= 12; n += 4) {
18641 for (size_t k = 1; k <= 40; k += 9) {
18642 for (uint32_t m = 1; m <= 2; m++) {
18643 GemmMicrokernelTester()
18644 .mr(2)
18645 .nr(4)
18646 .kr(2)
18647 .sr(1)
18648 .m(m)
18649 .n(n)
18650 .k(k)
18651 .iterations(1)
18652 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18653 }
18654 }
18655 }
18656 }
18657
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,strided_cm_subtile)18658 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, strided_cm_subtile) {
18659 TEST_REQUIRES_X86_AVX;
18660 for (size_t k = 1; k <= 40; k += 9) {
18661 for (uint32_t n = 1; n <= 4; n++) {
18662 for (uint32_t m = 1; m <= 2; m++) {
18663 GemmMicrokernelTester()
18664 .mr(2)
18665 .nr(4)
18666 .kr(2)
18667 .sr(1)
18668 .m(m)
18669 .n(n)
18670 .k(k)
18671 .cm_stride(7)
18672 .iterations(1)
18673 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18674 }
18675 }
18676 }
18677 }
18678
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,qmin)18679 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, qmin) {
18680 TEST_REQUIRES_X86_AVX;
18681 GemmMicrokernelTester()
18682 .mr(2)
18683 .nr(4)
18684 .kr(2)
18685 .sr(1)
18686 .m(2)
18687 .n(4)
18688 .k(8)
18689 .qmin(128)
18690 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18691 }
18692
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,qmax)18693 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, qmax) {
18694 TEST_REQUIRES_X86_AVX;
18695 GemmMicrokernelTester()
18696 .mr(2)
18697 .nr(4)
18698 .kr(2)
18699 .sr(1)
18700 .m(2)
18701 .n(4)
18702 .k(8)
18703 .qmax(128)
18704 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18705 }
18706
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128,strided_cm)18707 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, strided_cm) {
18708 TEST_REQUIRES_X86_AVX;
18709 GemmMicrokernelTester()
18710 .mr(2)
18711 .nr(4)
18712 .kr(2)
18713 .sr(1)
18714 .m(2)
18715 .n(4)
18716 .k(8)
18717 .cm_stride(7)
18718 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18719 }
18720 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18721
18722
18723 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8)18724 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8) {
18725 TEST_REQUIRES_X86_AVX;
18726 GemmMicrokernelTester()
18727 .mr(3)
18728 .nr(4)
18729 .kr(2)
18730 .sr(1)
18731 .m(3)
18732 .n(4)
18733 .k(8)
18734 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18735 }
18736
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,strided_cn)18737 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cn) {
18738 TEST_REQUIRES_X86_AVX;
18739 GemmMicrokernelTester()
18740 .mr(3)
18741 .nr(4)
18742 .kr(2)
18743 .sr(1)
18744 .m(3)
18745 .n(4)
18746 .k(8)
18747 .cn_stride(7)
18748 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18749 }
18750
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_strided_a)18751 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_strided_a) {
18752 TEST_REQUIRES_X86_AVX;
18753 GemmMicrokernelTester()
18754 .mr(3)
18755 .nr(4)
18756 .kr(2)
18757 .sr(1)
18758 .m(3)
18759 .n(4)
18760 .k(8)
18761 .a_stride(11)
18762 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18763 }
18764
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_subtile)18765 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile) {
18766 TEST_REQUIRES_X86_AVX;
18767 for (uint32_t n = 1; n <= 4; n++) {
18768 for (uint32_t m = 1; m <= 3; m++) {
18769 GemmMicrokernelTester()
18770 .mr(3)
18771 .nr(4)
18772 .kr(2)
18773 .sr(1)
18774 .m(m)
18775 .n(n)
18776 .k(8)
18777 .iterations(1)
18778 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18779 }
18780 }
18781 }
18782
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_subtile_m)18783 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_m) {
18784 TEST_REQUIRES_X86_AVX;
18785 for (uint32_t m = 1; m <= 3; m++) {
18786 GemmMicrokernelTester()
18787 .mr(3)
18788 .nr(4)
18789 .kr(2)
18790 .sr(1)
18791 .m(m)
18792 .n(4)
18793 .k(8)
18794 .iterations(1)
18795 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18796 }
18797 }
18798
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_subtile_n)18799 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_n) {
18800 TEST_REQUIRES_X86_AVX;
18801 for (uint32_t n = 1; n <= 4; n++) {
18802 GemmMicrokernelTester()
18803 .mr(3)
18804 .nr(4)
18805 .kr(2)
18806 .sr(1)
18807 .m(3)
18808 .n(n)
18809 .k(8)
18810 .iterations(1)
18811 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18812 }
18813 }
18814
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_lt_8)18815 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8) {
18816 TEST_REQUIRES_X86_AVX;
18817 for (size_t k = 1; k < 8; k++) {
18818 GemmMicrokernelTester()
18819 .mr(3)
18820 .nr(4)
18821 .kr(2)
18822 .sr(1)
18823 .m(3)
18824 .n(4)
18825 .k(k)
18826 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18827 }
18828 }
18829
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_lt_8_strided_a)18830 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8_strided_a) {
18831 TEST_REQUIRES_X86_AVX;
18832 for (size_t k = 1; k < 8; k++) {
18833 GemmMicrokernelTester()
18834 .mr(3)
18835 .nr(4)
18836 .kr(2)
18837 .sr(1)
18838 .m(3)
18839 .n(4)
18840 .k(k)
18841 .a_stride(11)
18842 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18843 }
18844 }
18845
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_lt_8_subtile)18846 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8_subtile) {
18847 TEST_REQUIRES_X86_AVX;
18848 for (size_t k = 1; k < 8; k++) {
18849 for (uint32_t n = 1; n <= 4; n++) {
18850 for (uint32_t m = 1; m <= 3; m++) {
18851 GemmMicrokernelTester()
18852 .mr(3)
18853 .nr(4)
18854 .kr(2)
18855 .sr(1)
18856 .m(m)
18857 .n(n)
18858 .k(k)
18859 .iterations(1)
18860 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18861 }
18862 }
18863 }
18864 }
18865
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_gt_8)18866 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8) {
18867 TEST_REQUIRES_X86_AVX;
18868 for (size_t k = 9; k < 16; k++) {
18869 GemmMicrokernelTester()
18870 .mr(3)
18871 .nr(4)
18872 .kr(2)
18873 .sr(1)
18874 .m(3)
18875 .n(4)
18876 .k(k)
18877 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18878 }
18879 }
18880
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_gt_8_strided_a)18881 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8_strided_a) {
18882 TEST_REQUIRES_X86_AVX;
18883 for (size_t k = 9; k < 16; k++) {
18884 GemmMicrokernelTester()
18885 .mr(3)
18886 .nr(4)
18887 .kr(2)
18888 .sr(1)
18889 .m(3)
18890 .n(4)
18891 .k(k)
18892 .a_stride(19)
18893 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18894 }
18895 }
18896
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_gt_8_subtile)18897 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8_subtile) {
18898 TEST_REQUIRES_X86_AVX;
18899 for (size_t k = 9; k < 16; k++) {
18900 for (uint32_t n = 1; n <= 4; n++) {
18901 for (uint32_t m = 1; m <= 3; m++) {
18902 GemmMicrokernelTester()
18903 .mr(3)
18904 .nr(4)
18905 .kr(2)
18906 .sr(1)
18907 .m(m)
18908 .n(n)
18909 .k(k)
18910 .iterations(1)
18911 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18912 }
18913 }
18914 }
18915 }
18916
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_div_8)18917 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8) {
18918 TEST_REQUIRES_X86_AVX;
18919 for (size_t k = 16; k <= 80; k += 8) {
18920 GemmMicrokernelTester()
18921 .mr(3)
18922 .nr(4)
18923 .kr(2)
18924 .sr(1)
18925 .m(3)
18926 .n(4)
18927 .k(k)
18928 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18929 }
18930 }
18931
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_div_8_strided_a)18932 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8_strided_a) {
18933 TEST_REQUIRES_X86_AVX;
18934 for (size_t k = 16; k <= 80; k += 8) {
18935 GemmMicrokernelTester()
18936 .mr(3)
18937 .nr(4)
18938 .kr(2)
18939 .sr(1)
18940 .m(3)
18941 .n(4)
18942 .k(k)
18943 .a_stride(83)
18944 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18945 }
18946 }
18947
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_div_8_subtile)18948 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8_subtile) {
18949 TEST_REQUIRES_X86_AVX;
18950 for (size_t k = 16; k <= 80; k += 8) {
18951 for (uint32_t n = 1; n <= 4; n++) {
18952 for (uint32_t m = 1; m <= 3; m++) {
18953 GemmMicrokernelTester()
18954 .mr(3)
18955 .nr(4)
18956 .kr(2)
18957 .sr(1)
18958 .m(m)
18959 .n(n)
18960 .k(k)
18961 .iterations(1)
18962 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18963 }
18964 }
18965 }
18966 }
18967
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4)18968 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4) {
18969 TEST_REQUIRES_X86_AVX;
18970 for (uint32_t n = 5; n < 8; n++) {
18971 for (size_t k = 1; k <= 40; k += 9) {
18972 GemmMicrokernelTester()
18973 .mr(3)
18974 .nr(4)
18975 .kr(2)
18976 .sr(1)
18977 .m(3)
18978 .n(n)
18979 .k(k)
18980 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18981 }
18982 }
18983 }
18984
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4_strided_cn)18985 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_strided_cn) {
18986 TEST_REQUIRES_X86_AVX;
18987 for (uint32_t n = 5; n < 8; n++) {
18988 for (size_t k = 1; k <= 40; k += 9) {
18989 GemmMicrokernelTester()
18990 .mr(3)
18991 .nr(4)
18992 .kr(2)
18993 .sr(1)
18994 .m(3)
18995 .n(n)
18996 .k(k)
18997 .cn_stride(7)
18998 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18999 }
19000 }
19001 }
19002
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4_strided_a)19003 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_strided_a) {
19004 TEST_REQUIRES_X86_AVX;
19005 for (uint32_t n = 5; n < 8; n++) {
19006 for (size_t k = 1; k <= 40; k += 9) {
19007 GemmMicrokernelTester()
19008 .mr(3)
19009 .nr(4)
19010 .kr(2)
19011 .sr(1)
19012 .m(3)
19013 .n(n)
19014 .k(k)
19015 .a_stride(43)
19016 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19017 }
19018 }
19019 }
19020
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4_subtile)19021 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_subtile) {
19022 TEST_REQUIRES_X86_AVX;
19023 for (uint32_t n = 5; n < 8; n++) {
19024 for (size_t k = 1; k <= 40; k += 9) {
19025 for (uint32_t m = 1; m <= 3; m++) {
19026 GemmMicrokernelTester()
19027 .mr(3)
19028 .nr(4)
19029 .kr(2)
19030 .sr(1)
19031 .m(m)
19032 .n(n)
19033 .k(k)
19034 .iterations(1)
19035 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19036 }
19037 }
19038 }
19039 }
19040
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4)19041 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4) {
19042 TEST_REQUIRES_X86_AVX;
19043 for (uint32_t n = 8; n <= 12; n += 4) {
19044 for (size_t k = 1; k <= 40; k += 9) {
19045 GemmMicrokernelTester()
19046 .mr(3)
19047 .nr(4)
19048 .kr(2)
19049 .sr(1)
19050 .m(3)
19051 .n(n)
19052 .k(k)
19053 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19054 }
19055 }
19056 }
19057
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4_strided_cn)19058 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_strided_cn) {
19059 TEST_REQUIRES_X86_AVX;
19060 for (uint32_t n = 8; n <= 12; n += 4) {
19061 for (size_t k = 1; k <= 40; k += 9) {
19062 GemmMicrokernelTester()
19063 .mr(3)
19064 .nr(4)
19065 .kr(2)
19066 .sr(1)
19067 .m(3)
19068 .n(n)
19069 .k(k)
19070 .cn_stride(7)
19071 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19072 }
19073 }
19074 }
19075
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4_strided_a)19076 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_strided_a) {
19077 TEST_REQUIRES_X86_AVX;
19078 for (uint32_t n = 8; n <= 12; n += 4) {
19079 for (size_t k = 1; k <= 40; k += 9) {
19080 GemmMicrokernelTester()
19081 .mr(3)
19082 .nr(4)
19083 .kr(2)
19084 .sr(1)
19085 .m(3)
19086 .n(n)
19087 .k(k)
19088 .a_stride(43)
19089 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19090 }
19091 }
19092 }
19093
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4_subtile)19094 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_subtile) {
19095 TEST_REQUIRES_X86_AVX;
19096 for (uint32_t n = 8; n <= 12; n += 4) {
19097 for (size_t k = 1; k <= 40; k += 9) {
19098 for (uint32_t m = 1; m <= 3; m++) {
19099 GemmMicrokernelTester()
19100 .mr(3)
19101 .nr(4)
19102 .kr(2)
19103 .sr(1)
19104 .m(m)
19105 .n(n)
19106 .k(k)
19107 .iterations(1)
19108 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19109 }
19110 }
19111 }
19112 }
19113
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,strided_cm_subtile)19114 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm_subtile) {
19115 TEST_REQUIRES_X86_AVX;
19116 for (size_t k = 1; k <= 40; k += 9) {
19117 for (uint32_t n = 1; n <= 4; n++) {
19118 for (uint32_t m = 1; m <= 3; m++) {
19119 GemmMicrokernelTester()
19120 .mr(3)
19121 .nr(4)
19122 .kr(2)
19123 .sr(1)
19124 .m(m)
19125 .n(n)
19126 .k(k)
19127 .cm_stride(7)
19128 .iterations(1)
19129 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19130 }
19131 }
19132 }
19133 }
19134
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,qmin)19135 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmin) {
19136 TEST_REQUIRES_X86_AVX;
19137 GemmMicrokernelTester()
19138 .mr(3)
19139 .nr(4)
19140 .kr(2)
19141 .sr(1)
19142 .m(3)
19143 .n(4)
19144 .k(8)
19145 .qmin(128)
19146 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19147 }
19148
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,qmax)19149 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmax) {
19150 TEST_REQUIRES_X86_AVX;
19151 GemmMicrokernelTester()
19152 .mr(3)
19153 .nr(4)
19154 .kr(2)
19155 .sr(1)
19156 .m(3)
19157 .n(4)
19158 .k(8)
19159 .qmax(128)
19160 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19161 }
19162
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,strided_cm)19163 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm) {
19164 TEST_REQUIRES_X86_AVX;
19165 GemmMicrokernelTester()
19166 .mr(3)
19167 .nr(4)
19168 .kr(2)
19169 .sr(1)
19170 .m(3)
19171 .n(4)
19172 .k(8)
19173 .cm_stride(7)
19174 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19175 }
19176 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19177
19178
19179 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8)19180 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8) {
19181 TEST_REQUIRES_X86_AVX;
19182 GemmMicrokernelTester()
19183 .mr(4)
19184 .nr(4)
19185 .kr(2)
19186 .sr(1)
19187 .m(4)
19188 .n(4)
19189 .k(8)
19190 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19191 }
19192
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,strided_cn)19193 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cn) {
19194 TEST_REQUIRES_X86_AVX;
19195 GemmMicrokernelTester()
19196 .mr(4)
19197 .nr(4)
19198 .kr(2)
19199 .sr(1)
19200 .m(4)
19201 .n(4)
19202 .k(8)
19203 .cn_stride(7)
19204 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19205 }
19206
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8_strided_a)19207 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_strided_a) {
19208 TEST_REQUIRES_X86_AVX;
19209 GemmMicrokernelTester()
19210 .mr(4)
19211 .nr(4)
19212 .kr(2)
19213 .sr(1)
19214 .m(4)
19215 .n(4)
19216 .k(8)
19217 .a_stride(11)
19218 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19219 }
19220
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8_subtile)19221 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile) {
19222 TEST_REQUIRES_X86_AVX;
19223 for (uint32_t n = 1; n <= 4; n++) {
19224 for (uint32_t m = 1; m <= 4; m++) {
19225 GemmMicrokernelTester()
19226 .mr(4)
19227 .nr(4)
19228 .kr(2)
19229 .sr(1)
19230 .m(m)
19231 .n(n)
19232 .k(8)
19233 .iterations(1)
19234 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19235 }
19236 }
19237 }
19238
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8_subtile_m)19239 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile_m) {
19240 TEST_REQUIRES_X86_AVX;
19241 for (uint32_t m = 1; m <= 4; m++) {
19242 GemmMicrokernelTester()
19243 .mr(4)
19244 .nr(4)
19245 .kr(2)
19246 .sr(1)
19247 .m(m)
19248 .n(4)
19249 .k(8)
19250 .iterations(1)
19251 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19252 }
19253 }
19254
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8_subtile_n)19255 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile_n) {
19256 TEST_REQUIRES_X86_AVX;
19257 for (uint32_t n = 1; n <= 4; n++) {
19258 GemmMicrokernelTester()
19259 .mr(4)
19260 .nr(4)
19261 .kr(2)
19262 .sr(1)
19263 .m(4)
19264 .n(n)
19265 .k(8)
19266 .iterations(1)
19267 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19268 }
19269 }
19270
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_lt_8)19271 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_lt_8) {
19272 TEST_REQUIRES_X86_AVX;
19273 for (size_t k = 1; k < 8; k++) {
19274 GemmMicrokernelTester()
19275 .mr(4)
19276 .nr(4)
19277 .kr(2)
19278 .sr(1)
19279 .m(4)
19280 .n(4)
19281 .k(k)
19282 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19283 }
19284 }
19285
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_lt_8_strided_a)19286 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_lt_8_strided_a) {
19287 TEST_REQUIRES_X86_AVX;
19288 for (size_t k = 1; k < 8; k++) {
19289 GemmMicrokernelTester()
19290 .mr(4)
19291 .nr(4)
19292 .kr(2)
19293 .sr(1)
19294 .m(4)
19295 .n(4)
19296 .k(k)
19297 .a_stride(11)
19298 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19299 }
19300 }
19301
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_lt_8_subtile)19302 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_lt_8_subtile) {
19303 TEST_REQUIRES_X86_AVX;
19304 for (size_t k = 1; k < 8; k++) {
19305 for (uint32_t n = 1; n <= 4; n++) {
19306 for (uint32_t m = 1; m <= 4; m++) {
19307 GemmMicrokernelTester()
19308 .mr(4)
19309 .nr(4)
19310 .kr(2)
19311 .sr(1)
19312 .m(m)
19313 .n(n)
19314 .k(k)
19315 .iterations(1)
19316 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19317 }
19318 }
19319 }
19320 }
19321
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_gt_8)19322 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_gt_8) {
19323 TEST_REQUIRES_X86_AVX;
19324 for (size_t k = 9; k < 16; k++) {
19325 GemmMicrokernelTester()
19326 .mr(4)
19327 .nr(4)
19328 .kr(2)
19329 .sr(1)
19330 .m(4)
19331 .n(4)
19332 .k(k)
19333 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19334 }
19335 }
19336
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_gt_8_strided_a)19337 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_gt_8_strided_a) {
19338 TEST_REQUIRES_X86_AVX;
19339 for (size_t k = 9; k < 16; k++) {
19340 GemmMicrokernelTester()
19341 .mr(4)
19342 .nr(4)
19343 .kr(2)
19344 .sr(1)
19345 .m(4)
19346 .n(4)
19347 .k(k)
19348 .a_stride(19)
19349 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19350 }
19351 }
19352
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_gt_8_subtile)19353 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_gt_8_subtile) {
19354 TEST_REQUIRES_X86_AVX;
19355 for (size_t k = 9; k < 16; k++) {
19356 for (uint32_t n = 1; n <= 4; n++) {
19357 for (uint32_t m = 1; m <= 4; m++) {
19358 GemmMicrokernelTester()
19359 .mr(4)
19360 .nr(4)
19361 .kr(2)
19362 .sr(1)
19363 .m(m)
19364 .n(n)
19365 .k(k)
19366 .iterations(1)
19367 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19368 }
19369 }
19370 }
19371 }
19372
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_div_8)19373 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_div_8) {
19374 TEST_REQUIRES_X86_AVX;
19375 for (size_t k = 16; k <= 80; k += 8) {
19376 GemmMicrokernelTester()
19377 .mr(4)
19378 .nr(4)
19379 .kr(2)
19380 .sr(1)
19381 .m(4)
19382 .n(4)
19383 .k(k)
19384 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19385 }
19386 }
19387
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_div_8_strided_a)19388 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_div_8_strided_a) {
19389 TEST_REQUIRES_X86_AVX;
19390 for (size_t k = 16; k <= 80; k += 8) {
19391 GemmMicrokernelTester()
19392 .mr(4)
19393 .nr(4)
19394 .kr(2)
19395 .sr(1)
19396 .m(4)
19397 .n(4)
19398 .k(k)
19399 .a_stride(83)
19400 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19401 }
19402 }
19403
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_div_8_subtile)19404 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_div_8_subtile) {
19405 TEST_REQUIRES_X86_AVX;
19406 for (size_t k = 16; k <= 80; k += 8) {
19407 for (uint32_t n = 1; n <= 4; n++) {
19408 for (uint32_t m = 1; m <= 4; m++) {
19409 GemmMicrokernelTester()
19410 .mr(4)
19411 .nr(4)
19412 .kr(2)
19413 .sr(1)
19414 .m(m)
19415 .n(n)
19416 .k(k)
19417 .iterations(1)
19418 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19419 }
19420 }
19421 }
19422 }
19423
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4)19424 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4) {
19425 TEST_REQUIRES_X86_AVX;
19426 for (uint32_t n = 5; n < 8; n++) {
19427 for (size_t k = 1; k <= 40; k += 9) {
19428 GemmMicrokernelTester()
19429 .mr(4)
19430 .nr(4)
19431 .kr(2)
19432 .sr(1)
19433 .m(4)
19434 .n(n)
19435 .k(k)
19436 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19437 }
19438 }
19439 }
19440
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4_strided_cn)19441 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_strided_cn) {
19442 TEST_REQUIRES_X86_AVX;
19443 for (uint32_t n = 5; n < 8; n++) {
19444 for (size_t k = 1; k <= 40; k += 9) {
19445 GemmMicrokernelTester()
19446 .mr(4)
19447 .nr(4)
19448 .kr(2)
19449 .sr(1)
19450 .m(4)
19451 .n(n)
19452 .k(k)
19453 .cn_stride(7)
19454 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19455 }
19456 }
19457 }
19458
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4_strided_a)19459 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_strided_a) {
19460 TEST_REQUIRES_X86_AVX;
19461 for (uint32_t n = 5; n < 8; n++) {
19462 for (size_t k = 1; k <= 40; k += 9) {
19463 GemmMicrokernelTester()
19464 .mr(4)
19465 .nr(4)
19466 .kr(2)
19467 .sr(1)
19468 .m(4)
19469 .n(n)
19470 .k(k)
19471 .a_stride(43)
19472 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19473 }
19474 }
19475 }
19476
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4_subtile)19477 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_subtile) {
19478 TEST_REQUIRES_X86_AVX;
19479 for (uint32_t n = 5; n < 8; n++) {
19480 for (size_t k = 1; k <= 40; k += 9) {
19481 for (uint32_t m = 1; m <= 4; m++) {
19482 GemmMicrokernelTester()
19483 .mr(4)
19484 .nr(4)
19485 .kr(2)
19486 .sr(1)
19487 .m(m)
19488 .n(n)
19489 .k(k)
19490 .iterations(1)
19491 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19492 }
19493 }
19494 }
19495 }
19496
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4)19497 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4) {
19498 TEST_REQUIRES_X86_AVX;
19499 for (uint32_t n = 8; n <= 12; n += 4) {
19500 for (size_t k = 1; k <= 40; k += 9) {
19501 GemmMicrokernelTester()
19502 .mr(4)
19503 .nr(4)
19504 .kr(2)
19505 .sr(1)
19506 .m(4)
19507 .n(n)
19508 .k(k)
19509 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19510 }
19511 }
19512 }
19513
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4_strided_cn)19514 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_strided_cn) {
19515 TEST_REQUIRES_X86_AVX;
19516 for (uint32_t n = 8; n <= 12; n += 4) {
19517 for (size_t k = 1; k <= 40; k += 9) {
19518 GemmMicrokernelTester()
19519 .mr(4)
19520 .nr(4)
19521 .kr(2)
19522 .sr(1)
19523 .m(4)
19524 .n(n)
19525 .k(k)
19526 .cn_stride(7)
19527 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19528 }
19529 }
19530 }
19531
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4_strided_a)19532 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_strided_a) {
19533 TEST_REQUIRES_X86_AVX;
19534 for (uint32_t n = 8; n <= 12; n += 4) {
19535 for (size_t k = 1; k <= 40; k += 9) {
19536 GemmMicrokernelTester()
19537 .mr(4)
19538 .nr(4)
19539 .kr(2)
19540 .sr(1)
19541 .m(4)
19542 .n(n)
19543 .k(k)
19544 .a_stride(43)
19545 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19546 }
19547 }
19548 }
19549
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4_subtile)19550 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_subtile) {
19551 TEST_REQUIRES_X86_AVX;
19552 for (uint32_t n = 8; n <= 12; n += 4) {
19553 for (size_t k = 1; k <= 40; k += 9) {
19554 for (uint32_t m = 1; m <= 4; m++) {
19555 GemmMicrokernelTester()
19556 .mr(4)
19557 .nr(4)
19558 .kr(2)
19559 .sr(1)
19560 .m(m)
19561 .n(n)
19562 .k(k)
19563 .iterations(1)
19564 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19565 }
19566 }
19567 }
19568 }
19569
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,strided_cm_subtile)19570 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cm_subtile) {
19571 TEST_REQUIRES_X86_AVX;
19572 for (size_t k = 1; k <= 40; k += 9) {
19573 for (uint32_t n = 1; n <= 4; n++) {
19574 for (uint32_t m = 1; m <= 4; m++) {
19575 GemmMicrokernelTester()
19576 .mr(4)
19577 .nr(4)
19578 .kr(2)
19579 .sr(1)
19580 .m(m)
19581 .n(n)
19582 .k(k)
19583 .cm_stride(7)
19584 .iterations(1)
19585 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19586 }
19587 }
19588 }
19589 }
19590
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,qmin)19591 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, qmin) {
19592 TEST_REQUIRES_X86_AVX;
19593 GemmMicrokernelTester()
19594 .mr(4)
19595 .nr(4)
19596 .kr(2)
19597 .sr(1)
19598 .m(4)
19599 .n(4)
19600 .k(8)
19601 .qmin(128)
19602 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19603 }
19604
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,qmax)19605 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, qmax) {
19606 TEST_REQUIRES_X86_AVX;
19607 GemmMicrokernelTester()
19608 .mr(4)
19609 .nr(4)
19610 .kr(2)
19611 .sr(1)
19612 .m(4)
19613 .n(4)
19614 .k(8)
19615 .qmax(128)
19616 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19617 }
19618
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128,strided_cm)19619 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cm) {
19620 TEST_REQUIRES_X86_AVX;
19621 GemmMicrokernelTester()
19622 .mr(4)
19623 .nr(4)
19624 .kr(2)
19625 .sr(1)
19626 .m(4)
19627 .n(4)
19628 .k(8)
19629 .cm_stride(7)
19630 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19631 }
19632 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19633
19634
19635 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8)19636 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8) {
19637 TEST_REQUIRES_X86_SSE41;
19638 GemmMicrokernelTester()
19639 .mr(2)
19640 .nr(4)
19641 .kr(2)
19642 .sr(4)
19643 .m(2)
19644 .n(4)
19645 .k(8)
19646 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19647 }
19648
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,strided_cn)19649 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, strided_cn) {
19650 TEST_REQUIRES_X86_SSE41;
19651 GemmMicrokernelTester()
19652 .mr(2)
19653 .nr(4)
19654 .kr(2)
19655 .sr(4)
19656 .m(2)
19657 .n(4)
19658 .k(8)
19659 .cn_stride(7)
19660 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19661 }
19662
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_strided_a)19663 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_strided_a) {
19664 TEST_REQUIRES_X86_SSE41;
19665 GemmMicrokernelTester()
19666 .mr(2)
19667 .nr(4)
19668 .kr(2)
19669 .sr(4)
19670 .m(2)
19671 .n(4)
19672 .k(8)
19673 .a_stride(11)
19674 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19675 }
19676
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_subtile)19677 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_subtile) {
19678 TEST_REQUIRES_X86_SSE41;
19679 for (uint32_t n = 1; n <= 4; n++) {
19680 for (uint32_t m = 1; m <= 2; m++) {
19681 GemmMicrokernelTester()
19682 .mr(2)
19683 .nr(4)
19684 .kr(2)
19685 .sr(4)
19686 .m(m)
19687 .n(n)
19688 .k(8)
19689 .iterations(1)
19690 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19691 }
19692 }
19693 }
19694
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_subtile_m)19695 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
19696 TEST_REQUIRES_X86_SSE41;
19697 for (uint32_t m = 1; m <= 2; m++) {
19698 GemmMicrokernelTester()
19699 .mr(2)
19700 .nr(4)
19701 .kr(2)
19702 .sr(4)
19703 .m(m)
19704 .n(4)
19705 .k(8)
19706 .iterations(1)
19707 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19708 }
19709 }
19710
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_subtile_n)19711 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
19712 TEST_REQUIRES_X86_SSE41;
19713 for (uint32_t n = 1; n <= 4; n++) {
19714 GemmMicrokernelTester()
19715 .mr(2)
19716 .nr(4)
19717 .kr(2)
19718 .sr(4)
19719 .m(2)
19720 .n(n)
19721 .k(8)
19722 .iterations(1)
19723 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19724 }
19725 }
19726
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_lt_8)19727 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_lt_8) {
19728 TEST_REQUIRES_X86_SSE41;
19729 for (size_t k = 1; k < 8; k++) {
19730 GemmMicrokernelTester()
19731 .mr(2)
19732 .nr(4)
19733 .kr(2)
19734 .sr(4)
19735 .m(2)
19736 .n(4)
19737 .k(k)
19738 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19739 }
19740 }
19741
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_lt_8_strided_a)19742 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_lt_8_strided_a) {
19743 TEST_REQUIRES_X86_SSE41;
19744 for (size_t k = 1; k < 8; k++) {
19745 GemmMicrokernelTester()
19746 .mr(2)
19747 .nr(4)
19748 .kr(2)
19749 .sr(4)
19750 .m(2)
19751 .n(4)
19752 .k(k)
19753 .a_stride(11)
19754 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19755 }
19756 }
19757
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_lt_8_subtile)19758 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_lt_8_subtile) {
19759 TEST_REQUIRES_X86_SSE41;
19760 for (size_t k = 1; k < 8; k++) {
19761 for (uint32_t n = 1; n <= 4; n++) {
19762 for (uint32_t m = 1; m <= 2; m++) {
19763 GemmMicrokernelTester()
19764 .mr(2)
19765 .nr(4)
19766 .kr(2)
19767 .sr(4)
19768 .m(m)
19769 .n(n)
19770 .k(k)
19771 .iterations(1)
19772 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19773 }
19774 }
19775 }
19776 }
19777
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_gt_8)19778 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_gt_8) {
19779 TEST_REQUIRES_X86_SSE41;
19780 for (size_t k = 9; k < 16; k++) {
19781 GemmMicrokernelTester()
19782 .mr(2)
19783 .nr(4)
19784 .kr(2)
19785 .sr(4)
19786 .m(2)
19787 .n(4)
19788 .k(k)
19789 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19790 }
19791 }
19792
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_gt_8_strided_a)19793 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_gt_8_strided_a) {
19794 TEST_REQUIRES_X86_SSE41;
19795 for (size_t k = 9; k < 16; k++) {
19796 GemmMicrokernelTester()
19797 .mr(2)
19798 .nr(4)
19799 .kr(2)
19800 .sr(4)
19801 .m(2)
19802 .n(4)
19803 .k(k)
19804 .a_stride(19)
19805 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19806 }
19807 }
19808
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_gt_8_subtile)19809 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_gt_8_subtile) {
19810 TEST_REQUIRES_X86_SSE41;
19811 for (size_t k = 9; k < 16; k++) {
19812 for (uint32_t n = 1; n <= 4; n++) {
19813 for (uint32_t m = 1; m <= 2; m++) {
19814 GemmMicrokernelTester()
19815 .mr(2)
19816 .nr(4)
19817 .kr(2)
19818 .sr(4)
19819 .m(m)
19820 .n(n)
19821 .k(k)
19822 .iterations(1)
19823 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19824 }
19825 }
19826 }
19827 }
19828
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_div_8)19829 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_div_8) {
19830 TEST_REQUIRES_X86_SSE41;
19831 for (size_t k = 16; k <= 80; k += 8) {
19832 GemmMicrokernelTester()
19833 .mr(2)
19834 .nr(4)
19835 .kr(2)
19836 .sr(4)
19837 .m(2)
19838 .n(4)
19839 .k(k)
19840 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19841 }
19842 }
19843
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_div_8_strided_a)19844 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_div_8_strided_a) {
19845 TEST_REQUIRES_X86_SSE41;
19846 for (size_t k = 16; k <= 80; k += 8) {
19847 GemmMicrokernelTester()
19848 .mr(2)
19849 .nr(4)
19850 .kr(2)
19851 .sr(4)
19852 .m(2)
19853 .n(4)
19854 .k(k)
19855 .a_stride(83)
19856 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19857 }
19858 }
19859
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_div_8_subtile)19860 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_div_8_subtile) {
19861 TEST_REQUIRES_X86_SSE41;
19862 for (size_t k = 16; k <= 80; k += 8) {
19863 for (uint32_t n = 1; n <= 4; n++) {
19864 for (uint32_t m = 1; m <= 2; m++) {
19865 GemmMicrokernelTester()
19866 .mr(2)
19867 .nr(4)
19868 .kr(2)
19869 .sr(4)
19870 .m(m)
19871 .n(n)
19872 .k(k)
19873 .iterations(1)
19874 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19875 }
19876 }
19877 }
19878 }
19879
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4)19880 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4) {
19881 TEST_REQUIRES_X86_SSE41;
19882 for (uint32_t n = 5; n < 8; n++) {
19883 for (size_t k = 1; k <= 40; k += 9) {
19884 GemmMicrokernelTester()
19885 .mr(2)
19886 .nr(4)
19887 .kr(2)
19888 .sr(4)
19889 .m(2)
19890 .n(n)
19891 .k(k)
19892 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19893 }
19894 }
19895 }
19896
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4_strided_cn)19897 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
19898 TEST_REQUIRES_X86_SSE41;
19899 for (uint32_t n = 5; n < 8; n++) {
19900 for (size_t k = 1; k <= 40; k += 9) {
19901 GemmMicrokernelTester()
19902 .mr(2)
19903 .nr(4)
19904 .kr(2)
19905 .sr(4)
19906 .m(2)
19907 .n(n)
19908 .k(k)
19909 .cn_stride(7)
19910 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19911 }
19912 }
19913 }
19914
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4_strided_a)19915 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4_strided_a) {
19916 TEST_REQUIRES_X86_SSE41;
19917 for (uint32_t n = 5; n < 8; n++) {
19918 for (size_t k = 1; k <= 40; k += 9) {
19919 GemmMicrokernelTester()
19920 .mr(2)
19921 .nr(4)
19922 .kr(2)
19923 .sr(4)
19924 .m(2)
19925 .n(n)
19926 .k(k)
19927 .a_stride(43)
19928 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19929 }
19930 }
19931 }
19932
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4_subtile)19933 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4_subtile) {
19934 TEST_REQUIRES_X86_SSE41;
19935 for (uint32_t n = 5; n < 8; n++) {
19936 for (size_t k = 1; k <= 40; k += 9) {
19937 for (uint32_t m = 1; m <= 2; m++) {
19938 GemmMicrokernelTester()
19939 .mr(2)
19940 .nr(4)
19941 .kr(2)
19942 .sr(4)
19943 .m(m)
19944 .n(n)
19945 .k(k)
19946 .iterations(1)
19947 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19948 }
19949 }
19950 }
19951 }
19952
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4)19953 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4) {
19954 TEST_REQUIRES_X86_SSE41;
19955 for (uint32_t n = 8; n <= 12; n += 4) {
19956 for (size_t k = 1; k <= 40; k += 9) {
19957 GemmMicrokernelTester()
19958 .mr(2)
19959 .nr(4)
19960 .kr(2)
19961 .sr(4)
19962 .m(2)
19963 .n(n)
19964 .k(k)
19965 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19966 }
19967 }
19968 }
19969
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4_strided_cn)19970 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
19971 TEST_REQUIRES_X86_SSE41;
19972 for (uint32_t n = 8; n <= 12; n += 4) {
19973 for (size_t k = 1; k <= 40; k += 9) {
19974 GemmMicrokernelTester()
19975 .mr(2)
19976 .nr(4)
19977 .kr(2)
19978 .sr(4)
19979 .m(2)
19980 .n(n)
19981 .k(k)
19982 .cn_stride(7)
19983 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19984 }
19985 }
19986 }
19987
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4_strided_a)19988 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4_strided_a) {
19989 TEST_REQUIRES_X86_SSE41;
19990 for (uint32_t n = 8; n <= 12; n += 4) {
19991 for (size_t k = 1; k <= 40; k += 9) {
19992 GemmMicrokernelTester()
19993 .mr(2)
19994 .nr(4)
19995 .kr(2)
19996 .sr(4)
19997 .m(2)
19998 .n(n)
19999 .k(k)
20000 .a_stride(43)
20001 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20002 }
20003 }
20004 }
20005
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4_subtile)20006 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4_subtile) {
20007 TEST_REQUIRES_X86_SSE41;
20008 for (uint32_t n = 8; n <= 12; n += 4) {
20009 for (size_t k = 1; k <= 40; k += 9) {
20010 for (uint32_t m = 1; m <= 2; m++) {
20011 GemmMicrokernelTester()
20012 .mr(2)
20013 .nr(4)
20014 .kr(2)
20015 .sr(4)
20016 .m(m)
20017 .n(n)
20018 .k(k)
20019 .iterations(1)
20020 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20021 }
20022 }
20023 }
20024 }
20025
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,strided_cm_subtile)20026 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, strided_cm_subtile) {
20027 TEST_REQUIRES_X86_SSE41;
20028 for (size_t k = 1; k <= 40; k += 9) {
20029 for (uint32_t n = 1; n <= 4; n++) {
20030 for (uint32_t m = 1; m <= 2; m++) {
20031 GemmMicrokernelTester()
20032 .mr(2)
20033 .nr(4)
20034 .kr(2)
20035 .sr(4)
20036 .m(m)
20037 .n(n)
20038 .k(k)
20039 .cm_stride(7)
20040 .iterations(1)
20041 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20042 }
20043 }
20044 }
20045 }
20046
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,qmin)20047 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, qmin) {
20048 TEST_REQUIRES_X86_SSE41;
20049 GemmMicrokernelTester()
20050 .mr(2)
20051 .nr(4)
20052 .kr(2)
20053 .sr(4)
20054 .m(2)
20055 .n(4)
20056 .k(8)
20057 .qmin(128)
20058 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20059 }
20060
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,qmax)20061 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, qmax) {
20062 TEST_REQUIRES_X86_SSE41;
20063 GemmMicrokernelTester()
20064 .mr(2)
20065 .nr(4)
20066 .kr(2)
20067 .sr(4)
20068 .m(2)
20069 .n(4)
20070 .k(8)
20071 .qmax(128)
20072 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20073 }
20074
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,strided_cm)20075 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, strided_cm) {
20076 TEST_REQUIRES_X86_SSE41;
20077 GemmMicrokernelTester()
20078 .mr(2)
20079 .nr(4)
20080 .kr(2)
20081 .sr(4)
20082 .m(2)
20083 .n(4)
20084 .k(8)
20085 .cm_stride(7)
20086 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20087 }
20088 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20089
20090
20091 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8)20092 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8) {
20093 TEST_REQUIRES_X86_SSE2;
20094 GemmMicrokernelTester()
20095 .mr(3)
20096 .nr(4)
20097 .kr(2)
20098 .sr(4)
20099 .m(3)
20100 .n(4)
20101 .k(8)
20102 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20103 }
20104
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,strided_cn)20105 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, strided_cn) {
20106 TEST_REQUIRES_X86_SSE2;
20107 GemmMicrokernelTester()
20108 .mr(3)
20109 .nr(4)
20110 .kr(2)
20111 .sr(4)
20112 .m(3)
20113 .n(4)
20114 .k(8)
20115 .cn_stride(7)
20116 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20117 }
20118
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_strided_a)20119 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_strided_a) {
20120 TEST_REQUIRES_X86_SSE2;
20121 GemmMicrokernelTester()
20122 .mr(3)
20123 .nr(4)
20124 .kr(2)
20125 .sr(4)
20126 .m(3)
20127 .n(4)
20128 .k(8)
20129 .a_stride(11)
20130 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20131 }
20132
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_subtile)20133 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_subtile) {
20134 TEST_REQUIRES_X86_SSE2;
20135 for (uint32_t n = 1; n <= 4; n++) {
20136 for (uint32_t m = 1; m <= 3; m++) {
20137 GemmMicrokernelTester()
20138 .mr(3)
20139 .nr(4)
20140 .kr(2)
20141 .sr(4)
20142 .m(m)
20143 .n(n)
20144 .k(8)
20145 .iterations(1)
20146 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20147 }
20148 }
20149 }
20150
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_subtile_m)20151 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
20152 TEST_REQUIRES_X86_SSE2;
20153 for (uint32_t m = 1; m <= 3; m++) {
20154 GemmMicrokernelTester()
20155 .mr(3)
20156 .nr(4)
20157 .kr(2)
20158 .sr(4)
20159 .m(m)
20160 .n(4)
20161 .k(8)
20162 .iterations(1)
20163 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20164 }
20165 }
20166
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_subtile_n)20167 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
20168 TEST_REQUIRES_X86_SSE2;
20169 for (uint32_t n = 1; n <= 4; n++) {
20170 GemmMicrokernelTester()
20171 .mr(3)
20172 .nr(4)
20173 .kr(2)
20174 .sr(4)
20175 .m(3)
20176 .n(n)
20177 .k(8)
20178 .iterations(1)
20179 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20180 }
20181 }
20182
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_lt_8)20183 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_lt_8) {
20184 TEST_REQUIRES_X86_SSE2;
20185 for (size_t k = 1; k < 8; k++) {
20186 GemmMicrokernelTester()
20187 .mr(3)
20188 .nr(4)
20189 .kr(2)
20190 .sr(4)
20191 .m(3)
20192 .n(4)
20193 .k(k)
20194 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20195 }
20196 }
20197
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_lt_8_strided_a)20198 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_lt_8_strided_a) {
20199 TEST_REQUIRES_X86_SSE2;
20200 for (size_t k = 1; k < 8; k++) {
20201 GemmMicrokernelTester()
20202 .mr(3)
20203 .nr(4)
20204 .kr(2)
20205 .sr(4)
20206 .m(3)
20207 .n(4)
20208 .k(k)
20209 .a_stride(11)
20210 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20211 }
20212 }
20213
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_lt_8_subtile)20214 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_lt_8_subtile) {
20215 TEST_REQUIRES_X86_SSE2;
20216 for (size_t k = 1; k < 8; k++) {
20217 for (uint32_t n = 1; n <= 4; n++) {
20218 for (uint32_t m = 1; m <= 3; m++) {
20219 GemmMicrokernelTester()
20220 .mr(3)
20221 .nr(4)
20222 .kr(2)
20223 .sr(4)
20224 .m(m)
20225 .n(n)
20226 .k(k)
20227 .iterations(1)
20228 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20229 }
20230 }
20231 }
20232 }
20233
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_gt_8)20234 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_gt_8) {
20235 TEST_REQUIRES_X86_SSE2;
20236 for (size_t k = 9; k < 16; k++) {
20237 GemmMicrokernelTester()
20238 .mr(3)
20239 .nr(4)
20240 .kr(2)
20241 .sr(4)
20242 .m(3)
20243 .n(4)
20244 .k(k)
20245 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20246 }
20247 }
20248
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_gt_8_strided_a)20249 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_gt_8_strided_a) {
20250 TEST_REQUIRES_X86_SSE2;
20251 for (size_t k = 9; k < 16; k++) {
20252 GemmMicrokernelTester()
20253 .mr(3)
20254 .nr(4)
20255 .kr(2)
20256 .sr(4)
20257 .m(3)
20258 .n(4)
20259 .k(k)
20260 .a_stride(19)
20261 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20262 }
20263 }
20264
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_gt_8_subtile)20265 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_gt_8_subtile) {
20266 TEST_REQUIRES_X86_SSE2;
20267 for (size_t k = 9; k < 16; k++) {
20268 for (uint32_t n = 1; n <= 4; n++) {
20269 for (uint32_t m = 1; m <= 3; m++) {
20270 GemmMicrokernelTester()
20271 .mr(3)
20272 .nr(4)
20273 .kr(2)
20274 .sr(4)
20275 .m(m)
20276 .n(n)
20277 .k(k)
20278 .iterations(1)
20279 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20280 }
20281 }
20282 }
20283 }
20284
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_div_8)20285 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_div_8) {
20286 TEST_REQUIRES_X86_SSE2;
20287 for (size_t k = 16; k <= 80; k += 8) {
20288 GemmMicrokernelTester()
20289 .mr(3)
20290 .nr(4)
20291 .kr(2)
20292 .sr(4)
20293 .m(3)
20294 .n(4)
20295 .k(k)
20296 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20297 }
20298 }
20299
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_div_8_strided_a)20300 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_div_8_strided_a) {
20301 TEST_REQUIRES_X86_SSE2;
20302 for (size_t k = 16; k <= 80; k += 8) {
20303 GemmMicrokernelTester()
20304 .mr(3)
20305 .nr(4)
20306 .kr(2)
20307 .sr(4)
20308 .m(3)
20309 .n(4)
20310 .k(k)
20311 .a_stride(83)
20312 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20313 }
20314 }
20315
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_div_8_subtile)20316 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_div_8_subtile) {
20317 TEST_REQUIRES_X86_SSE2;
20318 for (size_t k = 16; k <= 80; k += 8) {
20319 for (uint32_t n = 1; n <= 4; n++) {
20320 for (uint32_t m = 1; m <= 3; m++) {
20321 GemmMicrokernelTester()
20322 .mr(3)
20323 .nr(4)
20324 .kr(2)
20325 .sr(4)
20326 .m(m)
20327 .n(n)
20328 .k(k)
20329 .iterations(1)
20330 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20331 }
20332 }
20333 }
20334 }
20335
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4)20336 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4) {
20337 TEST_REQUIRES_X86_SSE2;
20338 for (uint32_t n = 5; n < 8; n++) {
20339 for (size_t k = 1; k <= 40; k += 9) {
20340 GemmMicrokernelTester()
20341 .mr(3)
20342 .nr(4)
20343 .kr(2)
20344 .sr(4)
20345 .m(3)
20346 .n(n)
20347 .k(k)
20348 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20349 }
20350 }
20351 }
20352
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4_strided_cn)20353 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
20354 TEST_REQUIRES_X86_SSE2;
20355 for (uint32_t n = 5; n < 8; n++) {
20356 for (size_t k = 1; k <= 40; k += 9) {
20357 GemmMicrokernelTester()
20358 .mr(3)
20359 .nr(4)
20360 .kr(2)
20361 .sr(4)
20362 .m(3)
20363 .n(n)
20364 .k(k)
20365 .cn_stride(7)
20366 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20367 }
20368 }
20369 }
20370
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4_strided_a)20371 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4_strided_a) {
20372 TEST_REQUIRES_X86_SSE2;
20373 for (uint32_t n = 5; n < 8; n++) {
20374 for (size_t k = 1; k <= 40; k += 9) {
20375 GemmMicrokernelTester()
20376 .mr(3)
20377 .nr(4)
20378 .kr(2)
20379 .sr(4)
20380 .m(3)
20381 .n(n)
20382 .k(k)
20383 .a_stride(43)
20384 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20385 }
20386 }
20387 }
20388
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4_subtile)20389 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4_subtile) {
20390 TEST_REQUIRES_X86_SSE2;
20391 for (uint32_t n = 5; n < 8; n++) {
20392 for (size_t k = 1; k <= 40; k += 9) {
20393 for (uint32_t m = 1; m <= 3; m++) {
20394 GemmMicrokernelTester()
20395 .mr(3)
20396 .nr(4)
20397 .kr(2)
20398 .sr(4)
20399 .m(m)
20400 .n(n)
20401 .k(k)
20402 .iterations(1)
20403 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20404 }
20405 }
20406 }
20407 }
20408
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4)20409 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4) {
20410 TEST_REQUIRES_X86_SSE2;
20411 for (uint32_t n = 8; n <= 12; n += 4) {
20412 for (size_t k = 1; k <= 40; k += 9) {
20413 GemmMicrokernelTester()
20414 .mr(3)
20415 .nr(4)
20416 .kr(2)
20417 .sr(4)
20418 .m(3)
20419 .n(n)
20420 .k(k)
20421 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20422 }
20423 }
20424 }
20425
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4_strided_cn)20426 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
20427 TEST_REQUIRES_X86_SSE2;
20428 for (uint32_t n = 8; n <= 12; n += 4) {
20429 for (size_t k = 1; k <= 40; k += 9) {
20430 GemmMicrokernelTester()
20431 .mr(3)
20432 .nr(4)
20433 .kr(2)
20434 .sr(4)
20435 .m(3)
20436 .n(n)
20437 .k(k)
20438 .cn_stride(7)
20439 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20440 }
20441 }
20442 }
20443
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4_strided_a)20444 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4_strided_a) {
20445 TEST_REQUIRES_X86_SSE2;
20446 for (uint32_t n = 8; n <= 12; n += 4) {
20447 for (size_t k = 1; k <= 40; k += 9) {
20448 GemmMicrokernelTester()
20449 .mr(3)
20450 .nr(4)
20451 .kr(2)
20452 .sr(4)
20453 .m(3)
20454 .n(n)
20455 .k(k)
20456 .a_stride(43)
20457 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20458 }
20459 }
20460 }
20461
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4_subtile)20462 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4_subtile) {
20463 TEST_REQUIRES_X86_SSE2;
20464 for (uint32_t n = 8; n <= 12; n += 4) {
20465 for (size_t k = 1; k <= 40; k += 9) {
20466 for (uint32_t m = 1; m <= 3; m++) {
20467 GemmMicrokernelTester()
20468 .mr(3)
20469 .nr(4)
20470 .kr(2)
20471 .sr(4)
20472 .m(m)
20473 .n(n)
20474 .k(k)
20475 .iterations(1)
20476 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20477 }
20478 }
20479 }
20480 }
20481
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,strided_cm_subtile)20482 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, strided_cm_subtile) {
20483 TEST_REQUIRES_X86_SSE2;
20484 for (size_t k = 1; k <= 40; k += 9) {
20485 for (uint32_t n = 1; n <= 4; n++) {
20486 for (uint32_t m = 1; m <= 3; m++) {
20487 GemmMicrokernelTester()
20488 .mr(3)
20489 .nr(4)
20490 .kr(2)
20491 .sr(4)
20492 .m(m)
20493 .n(n)
20494 .k(k)
20495 .cm_stride(7)
20496 .iterations(1)
20497 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20498 }
20499 }
20500 }
20501 }
20502
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,qmin)20503 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, qmin) {
20504 TEST_REQUIRES_X86_SSE2;
20505 GemmMicrokernelTester()
20506 .mr(3)
20507 .nr(4)
20508 .kr(2)
20509 .sr(4)
20510 .m(3)
20511 .n(4)
20512 .k(8)
20513 .qmin(128)
20514 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20515 }
20516
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,qmax)20517 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, qmax) {
20518 TEST_REQUIRES_X86_SSE2;
20519 GemmMicrokernelTester()
20520 .mr(3)
20521 .nr(4)
20522 .kr(2)
20523 .sr(4)
20524 .m(3)
20525 .n(4)
20526 .k(8)
20527 .qmax(128)
20528 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20529 }
20530
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,strided_cm)20531 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, strided_cm) {
20532 TEST_REQUIRES_X86_SSE2;
20533 GemmMicrokernelTester()
20534 .mr(3)
20535 .nr(4)
20536 .kr(2)
20537 .sr(4)
20538 .m(3)
20539 .n(4)
20540 .k(8)
20541 .cm_stride(7)
20542 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20543 }
20544 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20545
20546
20547 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8)20548 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8) {
20549 TEST_REQUIRES_X86_SSE2;
20550 GemmMicrokernelTester()
20551 .mr(4)
20552 .nr(4)
20553 .kr(2)
20554 .sr(4)
20555 .m(4)
20556 .n(4)
20557 .k(8)
20558 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20559 }
20560
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,strided_cn)20561 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, strided_cn) {
20562 TEST_REQUIRES_X86_SSE2;
20563 GemmMicrokernelTester()
20564 .mr(4)
20565 .nr(4)
20566 .kr(2)
20567 .sr(4)
20568 .m(4)
20569 .n(4)
20570 .k(8)
20571 .cn_stride(7)
20572 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20573 }
20574
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8_strided_a)20575 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8_strided_a) {
20576 TEST_REQUIRES_X86_SSE2;
20577 GemmMicrokernelTester()
20578 .mr(4)
20579 .nr(4)
20580 .kr(2)
20581 .sr(4)
20582 .m(4)
20583 .n(4)
20584 .k(8)
20585 .a_stride(11)
20586 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20587 }
20588
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8_subtile)20589 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8_subtile) {
20590 TEST_REQUIRES_X86_SSE2;
20591 for (uint32_t n = 1; n <= 4; n++) {
20592 for (uint32_t m = 1; m <= 4; m++) {
20593 GemmMicrokernelTester()
20594 .mr(4)
20595 .nr(4)
20596 .kr(2)
20597 .sr(4)
20598 .m(m)
20599 .n(n)
20600 .k(8)
20601 .iterations(1)
20602 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20603 }
20604 }
20605 }
20606
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8_subtile_m)20607 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
20608 TEST_REQUIRES_X86_SSE2;
20609 for (uint32_t m = 1; m <= 4; m++) {
20610 GemmMicrokernelTester()
20611 .mr(4)
20612 .nr(4)
20613 .kr(2)
20614 .sr(4)
20615 .m(m)
20616 .n(4)
20617 .k(8)
20618 .iterations(1)
20619 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20620 }
20621 }
20622
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8_subtile_n)20623 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
20624 TEST_REQUIRES_X86_SSE2;
20625 for (uint32_t n = 1; n <= 4; n++) {
20626 GemmMicrokernelTester()
20627 .mr(4)
20628 .nr(4)
20629 .kr(2)
20630 .sr(4)
20631 .m(4)
20632 .n(n)
20633 .k(8)
20634 .iterations(1)
20635 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20636 }
20637 }
20638
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_lt_8)20639 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_lt_8) {
20640 TEST_REQUIRES_X86_SSE2;
20641 for (size_t k = 1; k < 8; k++) {
20642 GemmMicrokernelTester()
20643 .mr(4)
20644 .nr(4)
20645 .kr(2)
20646 .sr(4)
20647 .m(4)
20648 .n(4)
20649 .k(k)
20650 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20651 }
20652 }
20653
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_lt_8_strided_a)20654 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_lt_8_strided_a) {
20655 TEST_REQUIRES_X86_SSE2;
20656 for (size_t k = 1; k < 8; k++) {
20657 GemmMicrokernelTester()
20658 .mr(4)
20659 .nr(4)
20660 .kr(2)
20661 .sr(4)
20662 .m(4)
20663 .n(4)
20664 .k(k)
20665 .a_stride(11)
20666 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20667 }
20668 }
20669
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_lt_8_subtile)20670 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_lt_8_subtile) {
20671 TEST_REQUIRES_X86_SSE2;
20672 for (size_t k = 1; k < 8; k++) {
20673 for (uint32_t n = 1; n <= 4; n++) {
20674 for (uint32_t m = 1; m <= 4; m++) {
20675 GemmMicrokernelTester()
20676 .mr(4)
20677 .nr(4)
20678 .kr(2)
20679 .sr(4)
20680 .m(m)
20681 .n(n)
20682 .k(k)
20683 .iterations(1)
20684 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20685 }
20686 }
20687 }
20688 }
20689
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_gt_8)20690 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_gt_8) {
20691 TEST_REQUIRES_X86_SSE2;
20692 for (size_t k = 9; k < 16; k++) {
20693 GemmMicrokernelTester()
20694 .mr(4)
20695 .nr(4)
20696 .kr(2)
20697 .sr(4)
20698 .m(4)
20699 .n(4)
20700 .k(k)
20701 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20702 }
20703 }
20704
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_gt_8_strided_a)20705 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_gt_8_strided_a) {
20706 TEST_REQUIRES_X86_SSE2;
20707 for (size_t k = 9; k < 16; k++) {
20708 GemmMicrokernelTester()
20709 .mr(4)
20710 .nr(4)
20711 .kr(2)
20712 .sr(4)
20713 .m(4)
20714 .n(4)
20715 .k(k)
20716 .a_stride(19)
20717 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20718 }
20719 }
20720
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_gt_8_subtile)20721 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_gt_8_subtile) {
20722 TEST_REQUIRES_X86_SSE2;
20723 for (size_t k = 9; k < 16; k++) {
20724 for (uint32_t n = 1; n <= 4; n++) {
20725 for (uint32_t m = 1; m <= 4; m++) {
20726 GemmMicrokernelTester()
20727 .mr(4)
20728 .nr(4)
20729 .kr(2)
20730 .sr(4)
20731 .m(m)
20732 .n(n)
20733 .k(k)
20734 .iterations(1)
20735 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20736 }
20737 }
20738 }
20739 }
20740
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_div_8)20741 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_div_8) {
20742 TEST_REQUIRES_X86_SSE2;
20743 for (size_t k = 16; k <= 80; k += 8) {
20744 GemmMicrokernelTester()
20745 .mr(4)
20746 .nr(4)
20747 .kr(2)
20748 .sr(4)
20749 .m(4)
20750 .n(4)
20751 .k(k)
20752 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20753 }
20754 }
20755
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_div_8_strided_a)20756 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_div_8_strided_a) {
20757 TEST_REQUIRES_X86_SSE2;
20758 for (size_t k = 16; k <= 80; k += 8) {
20759 GemmMicrokernelTester()
20760 .mr(4)
20761 .nr(4)
20762 .kr(2)
20763 .sr(4)
20764 .m(4)
20765 .n(4)
20766 .k(k)
20767 .a_stride(83)
20768 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20769 }
20770 }
20771
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_div_8_subtile)20772 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_div_8_subtile) {
20773 TEST_REQUIRES_X86_SSE2;
20774 for (size_t k = 16; k <= 80; k += 8) {
20775 for (uint32_t n = 1; n <= 4; n++) {
20776 for (uint32_t m = 1; m <= 4; m++) {
20777 GemmMicrokernelTester()
20778 .mr(4)
20779 .nr(4)
20780 .kr(2)
20781 .sr(4)
20782 .m(m)
20783 .n(n)
20784 .k(k)
20785 .iterations(1)
20786 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20787 }
20788 }
20789 }
20790 }
20791
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4)20792 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4) {
20793 TEST_REQUIRES_X86_SSE2;
20794 for (uint32_t n = 5; n < 8; n++) {
20795 for (size_t k = 1; k <= 40; k += 9) {
20796 GemmMicrokernelTester()
20797 .mr(4)
20798 .nr(4)
20799 .kr(2)
20800 .sr(4)
20801 .m(4)
20802 .n(n)
20803 .k(k)
20804 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20805 }
20806 }
20807 }
20808
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4_strided_cn)20809 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
20810 TEST_REQUIRES_X86_SSE2;
20811 for (uint32_t n = 5; n < 8; n++) {
20812 for (size_t k = 1; k <= 40; k += 9) {
20813 GemmMicrokernelTester()
20814 .mr(4)
20815 .nr(4)
20816 .kr(2)
20817 .sr(4)
20818 .m(4)
20819 .n(n)
20820 .k(k)
20821 .cn_stride(7)
20822 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20823 }
20824 }
20825 }
20826
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4_strided_a)20827 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4_strided_a) {
20828 TEST_REQUIRES_X86_SSE2;
20829 for (uint32_t n = 5; n < 8; n++) {
20830 for (size_t k = 1; k <= 40; k += 9) {
20831 GemmMicrokernelTester()
20832 .mr(4)
20833 .nr(4)
20834 .kr(2)
20835 .sr(4)
20836 .m(4)
20837 .n(n)
20838 .k(k)
20839 .a_stride(43)
20840 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20841 }
20842 }
20843 }
20844
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4_subtile)20845 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4_subtile) {
20846 TEST_REQUIRES_X86_SSE2;
20847 for (uint32_t n = 5; n < 8; n++) {
20848 for (size_t k = 1; k <= 40; k += 9) {
20849 for (uint32_t m = 1; m <= 4; m++) {
20850 GemmMicrokernelTester()
20851 .mr(4)
20852 .nr(4)
20853 .kr(2)
20854 .sr(4)
20855 .m(m)
20856 .n(n)
20857 .k(k)
20858 .iterations(1)
20859 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20860 }
20861 }
20862 }
20863 }
20864
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4)20865 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4) {
20866 TEST_REQUIRES_X86_SSE2;
20867 for (uint32_t n = 8; n <= 12; n += 4) {
20868 for (size_t k = 1; k <= 40; k += 9) {
20869 GemmMicrokernelTester()
20870 .mr(4)
20871 .nr(4)
20872 .kr(2)
20873 .sr(4)
20874 .m(4)
20875 .n(n)
20876 .k(k)
20877 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20878 }
20879 }
20880 }
20881
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4_strided_cn)20882 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
20883 TEST_REQUIRES_X86_SSE2;
20884 for (uint32_t n = 8; n <= 12; n += 4) {
20885 for (size_t k = 1; k <= 40; k += 9) {
20886 GemmMicrokernelTester()
20887 .mr(4)
20888 .nr(4)
20889 .kr(2)
20890 .sr(4)
20891 .m(4)
20892 .n(n)
20893 .k(k)
20894 .cn_stride(7)
20895 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20896 }
20897 }
20898 }
20899
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4_strided_a)20900 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4_strided_a) {
20901 TEST_REQUIRES_X86_SSE2;
20902 for (uint32_t n = 8; n <= 12; n += 4) {
20903 for (size_t k = 1; k <= 40; k += 9) {
20904 GemmMicrokernelTester()
20905 .mr(4)
20906 .nr(4)
20907 .kr(2)
20908 .sr(4)
20909 .m(4)
20910 .n(n)
20911 .k(k)
20912 .a_stride(43)
20913 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20914 }
20915 }
20916 }
20917
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4_subtile)20918 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4_subtile) {
20919 TEST_REQUIRES_X86_SSE2;
20920 for (uint32_t n = 8; n <= 12; n += 4) {
20921 for (size_t k = 1; k <= 40; k += 9) {
20922 for (uint32_t m = 1; m <= 4; m++) {
20923 GemmMicrokernelTester()
20924 .mr(4)
20925 .nr(4)
20926 .kr(2)
20927 .sr(4)
20928 .m(m)
20929 .n(n)
20930 .k(k)
20931 .iterations(1)
20932 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20933 }
20934 }
20935 }
20936 }
20937
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,strided_cm_subtile)20938 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, strided_cm_subtile) {
20939 TEST_REQUIRES_X86_SSE2;
20940 for (size_t k = 1; k <= 40; k += 9) {
20941 for (uint32_t n = 1; n <= 4; n++) {
20942 for (uint32_t m = 1; m <= 4; m++) {
20943 GemmMicrokernelTester()
20944 .mr(4)
20945 .nr(4)
20946 .kr(2)
20947 .sr(4)
20948 .m(m)
20949 .n(n)
20950 .k(k)
20951 .cm_stride(7)
20952 .iterations(1)
20953 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20954 }
20955 }
20956 }
20957 }
20958
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,qmin)20959 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, qmin) {
20960 TEST_REQUIRES_X86_SSE2;
20961 GemmMicrokernelTester()
20962 .mr(4)
20963 .nr(4)
20964 .kr(2)
20965 .sr(4)
20966 .m(4)
20967 .n(4)
20968 .k(8)
20969 .qmin(128)
20970 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20971 }
20972
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,qmax)20973 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, qmax) {
20974 TEST_REQUIRES_X86_SSE2;
20975 GemmMicrokernelTester()
20976 .mr(4)
20977 .nr(4)
20978 .kr(2)
20979 .sr(4)
20980 .m(4)
20981 .n(4)
20982 .k(8)
20983 .qmax(128)
20984 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20985 }
20986
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,strided_cm)20987 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, strided_cm) {
20988 TEST_REQUIRES_X86_SSE2;
20989 GemmMicrokernelTester()
20990 .mr(4)
20991 .nr(4)
20992 .kr(2)
20993 .sr(4)
20994 .m(4)
20995 .n(4)
20996 .k(8)
20997 .cm_stride(7)
20998 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20999 }
21000 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21001
21002
21003 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_eq_8)21004 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_eq_8) {
21005 TEST_REQUIRES_X86_AVX;
21006 GemmMicrokernelTester()
21007 .mr(2)
21008 .nr(4)
21009 .kr(2)
21010 .sr(4)
21011 .m(2)
21012 .n(4)
21013 .k(8)
21014 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21015 }
21016
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,strided_cn)21017 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, strided_cn) {
21018 TEST_REQUIRES_X86_AVX;
21019 GemmMicrokernelTester()
21020 .mr(2)
21021 .nr(4)
21022 .kr(2)
21023 .sr(4)
21024 .m(2)
21025 .n(4)
21026 .k(8)
21027 .cn_stride(7)
21028 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21029 }
21030
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_eq_8_strided_a)21031 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_eq_8_strided_a) {
21032 TEST_REQUIRES_X86_AVX;
21033 GemmMicrokernelTester()
21034 .mr(2)
21035 .nr(4)
21036 .kr(2)
21037 .sr(4)
21038 .m(2)
21039 .n(4)
21040 .k(8)
21041 .a_stride(11)
21042 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21043 }
21044
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_eq_8_subtile)21045 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_eq_8_subtile) {
21046 TEST_REQUIRES_X86_AVX;
21047 for (uint32_t n = 1; n <= 4; n++) {
21048 for (uint32_t m = 1; m <= 2; m++) {
21049 GemmMicrokernelTester()
21050 .mr(2)
21051 .nr(4)
21052 .kr(2)
21053 .sr(4)
21054 .m(m)
21055 .n(n)
21056 .k(8)
21057 .iterations(1)
21058 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21059 }
21060 }
21061 }
21062
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_eq_8_subtile_m)21063 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
21064 TEST_REQUIRES_X86_AVX;
21065 for (uint32_t m = 1; m <= 2; m++) {
21066 GemmMicrokernelTester()
21067 .mr(2)
21068 .nr(4)
21069 .kr(2)
21070 .sr(4)
21071 .m(m)
21072 .n(4)
21073 .k(8)
21074 .iterations(1)
21075 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21076 }
21077 }
21078
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_eq_8_subtile_n)21079 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
21080 TEST_REQUIRES_X86_AVX;
21081 for (uint32_t n = 1; n <= 4; n++) {
21082 GemmMicrokernelTester()
21083 .mr(2)
21084 .nr(4)
21085 .kr(2)
21086 .sr(4)
21087 .m(2)
21088 .n(n)
21089 .k(8)
21090 .iterations(1)
21091 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21092 }
21093 }
21094
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_lt_8)21095 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_lt_8) {
21096 TEST_REQUIRES_X86_AVX;
21097 for (size_t k = 1; k < 8; k++) {
21098 GemmMicrokernelTester()
21099 .mr(2)
21100 .nr(4)
21101 .kr(2)
21102 .sr(4)
21103 .m(2)
21104 .n(4)
21105 .k(k)
21106 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21107 }
21108 }
21109
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_lt_8_strided_a)21110 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_lt_8_strided_a) {
21111 TEST_REQUIRES_X86_AVX;
21112 for (size_t k = 1; k < 8; k++) {
21113 GemmMicrokernelTester()
21114 .mr(2)
21115 .nr(4)
21116 .kr(2)
21117 .sr(4)
21118 .m(2)
21119 .n(4)
21120 .k(k)
21121 .a_stride(11)
21122 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21123 }
21124 }
21125
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_lt_8_subtile)21126 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_lt_8_subtile) {
21127 TEST_REQUIRES_X86_AVX;
21128 for (size_t k = 1; k < 8; k++) {
21129 for (uint32_t n = 1; n <= 4; n++) {
21130 for (uint32_t m = 1; m <= 2; m++) {
21131 GemmMicrokernelTester()
21132 .mr(2)
21133 .nr(4)
21134 .kr(2)
21135 .sr(4)
21136 .m(m)
21137 .n(n)
21138 .k(k)
21139 .iterations(1)
21140 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21141 }
21142 }
21143 }
21144 }
21145
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_gt_8)21146 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_gt_8) {
21147 TEST_REQUIRES_X86_AVX;
21148 for (size_t k = 9; k < 16; k++) {
21149 GemmMicrokernelTester()
21150 .mr(2)
21151 .nr(4)
21152 .kr(2)
21153 .sr(4)
21154 .m(2)
21155 .n(4)
21156 .k(k)
21157 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21158 }
21159 }
21160
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_gt_8_strided_a)21161 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_gt_8_strided_a) {
21162 TEST_REQUIRES_X86_AVX;
21163 for (size_t k = 9; k < 16; k++) {
21164 GemmMicrokernelTester()
21165 .mr(2)
21166 .nr(4)
21167 .kr(2)
21168 .sr(4)
21169 .m(2)
21170 .n(4)
21171 .k(k)
21172 .a_stride(19)
21173 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21174 }
21175 }
21176
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_gt_8_subtile)21177 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_gt_8_subtile) {
21178 TEST_REQUIRES_X86_AVX;
21179 for (size_t k = 9; k < 16; k++) {
21180 for (uint32_t n = 1; n <= 4; n++) {
21181 for (uint32_t m = 1; m <= 2; m++) {
21182 GemmMicrokernelTester()
21183 .mr(2)
21184 .nr(4)
21185 .kr(2)
21186 .sr(4)
21187 .m(m)
21188 .n(n)
21189 .k(k)
21190 .iterations(1)
21191 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21192 }
21193 }
21194 }
21195 }
21196
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_div_8)21197 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_div_8) {
21198 TEST_REQUIRES_X86_AVX;
21199 for (size_t k = 16; k <= 80; k += 8) {
21200 GemmMicrokernelTester()
21201 .mr(2)
21202 .nr(4)
21203 .kr(2)
21204 .sr(4)
21205 .m(2)
21206 .n(4)
21207 .k(k)
21208 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21209 }
21210 }
21211
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_div_8_strided_a)21212 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_div_8_strided_a) {
21213 TEST_REQUIRES_X86_AVX;
21214 for (size_t k = 16; k <= 80; k += 8) {
21215 GemmMicrokernelTester()
21216 .mr(2)
21217 .nr(4)
21218 .kr(2)
21219 .sr(4)
21220 .m(2)
21221 .n(4)
21222 .k(k)
21223 .a_stride(83)
21224 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21225 }
21226 }
21227
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_div_8_subtile)21228 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_div_8_subtile) {
21229 TEST_REQUIRES_X86_AVX;
21230 for (size_t k = 16; k <= 80; k += 8) {
21231 for (uint32_t n = 1; n <= 4; n++) {
21232 for (uint32_t m = 1; m <= 2; m++) {
21233 GemmMicrokernelTester()
21234 .mr(2)
21235 .nr(4)
21236 .kr(2)
21237 .sr(4)
21238 .m(m)
21239 .n(n)
21240 .k(k)
21241 .iterations(1)
21242 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21243 }
21244 }
21245 }
21246 }
21247
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_gt_4)21248 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_gt_4) {
21249 TEST_REQUIRES_X86_AVX;
21250 for (uint32_t n = 5; n < 8; n++) {
21251 for (size_t k = 1; k <= 40; k += 9) {
21252 GemmMicrokernelTester()
21253 .mr(2)
21254 .nr(4)
21255 .kr(2)
21256 .sr(4)
21257 .m(2)
21258 .n(n)
21259 .k(k)
21260 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21261 }
21262 }
21263 }
21264
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_gt_4_strided_cn)21265 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
21266 TEST_REQUIRES_X86_AVX;
21267 for (uint32_t n = 5; n < 8; n++) {
21268 for (size_t k = 1; k <= 40; k += 9) {
21269 GemmMicrokernelTester()
21270 .mr(2)
21271 .nr(4)
21272 .kr(2)
21273 .sr(4)
21274 .m(2)
21275 .n(n)
21276 .k(k)
21277 .cn_stride(7)
21278 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21279 }
21280 }
21281 }
21282
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_gt_4_strided_a)21283 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_gt_4_strided_a) {
21284 TEST_REQUIRES_X86_AVX;
21285 for (uint32_t n = 5; n < 8; n++) {
21286 for (size_t k = 1; k <= 40; k += 9) {
21287 GemmMicrokernelTester()
21288 .mr(2)
21289 .nr(4)
21290 .kr(2)
21291 .sr(4)
21292 .m(2)
21293 .n(n)
21294 .k(k)
21295 .a_stride(43)
21296 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21297 }
21298 }
21299 }
21300
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_gt_4_subtile)21301 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_gt_4_subtile) {
21302 TEST_REQUIRES_X86_AVX;
21303 for (uint32_t n = 5; n < 8; n++) {
21304 for (size_t k = 1; k <= 40; k += 9) {
21305 for (uint32_t m = 1; m <= 2; m++) {
21306 GemmMicrokernelTester()
21307 .mr(2)
21308 .nr(4)
21309 .kr(2)
21310 .sr(4)
21311 .m(m)
21312 .n(n)
21313 .k(k)
21314 .iterations(1)
21315 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21316 }
21317 }
21318 }
21319 }
21320
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_div_4)21321 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_div_4) {
21322 TEST_REQUIRES_X86_AVX;
21323 for (uint32_t n = 8; n <= 12; n += 4) {
21324 for (size_t k = 1; k <= 40; k += 9) {
21325 GemmMicrokernelTester()
21326 .mr(2)
21327 .nr(4)
21328 .kr(2)
21329 .sr(4)
21330 .m(2)
21331 .n(n)
21332 .k(k)
21333 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21334 }
21335 }
21336 }
21337
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_div_4_strided_cn)21338 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_div_4_strided_cn) {
21339 TEST_REQUIRES_X86_AVX;
21340 for (uint32_t n = 8; n <= 12; n += 4) {
21341 for (size_t k = 1; k <= 40; k += 9) {
21342 GemmMicrokernelTester()
21343 .mr(2)
21344 .nr(4)
21345 .kr(2)
21346 .sr(4)
21347 .m(2)
21348 .n(n)
21349 .k(k)
21350 .cn_stride(7)
21351 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21352 }
21353 }
21354 }
21355
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_div_4_strided_a)21356 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_div_4_strided_a) {
21357 TEST_REQUIRES_X86_AVX;
21358 for (uint32_t n = 8; n <= 12; n += 4) {
21359 for (size_t k = 1; k <= 40; k += 9) {
21360 GemmMicrokernelTester()
21361 .mr(2)
21362 .nr(4)
21363 .kr(2)
21364 .sr(4)
21365 .m(2)
21366 .n(n)
21367 .k(k)
21368 .a_stride(43)
21369 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21370 }
21371 }
21372 }
21373
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_div_4_subtile)21374 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_div_4_subtile) {
21375 TEST_REQUIRES_X86_AVX;
21376 for (uint32_t n = 8; n <= 12; n += 4) {
21377 for (size_t k = 1; k <= 40; k += 9) {
21378 for (uint32_t m = 1; m <= 2; m++) {
21379 GemmMicrokernelTester()
21380 .mr(2)
21381 .nr(4)
21382 .kr(2)
21383 .sr(4)
21384 .m(m)
21385 .n(n)
21386 .k(k)
21387 .iterations(1)
21388 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21389 }
21390 }
21391 }
21392 }
21393
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,strided_cm_subtile)21394 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, strided_cm_subtile) {
21395 TEST_REQUIRES_X86_AVX;
21396 for (size_t k = 1; k <= 40; k += 9) {
21397 for (uint32_t n = 1; n <= 4; n++) {
21398 for (uint32_t m = 1; m <= 2; m++) {
21399 GemmMicrokernelTester()
21400 .mr(2)
21401 .nr(4)
21402 .kr(2)
21403 .sr(4)
21404 .m(m)
21405 .n(n)
21406 .k(k)
21407 .cm_stride(7)
21408 .iterations(1)
21409 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21410 }
21411 }
21412 }
21413 }
21414
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,qmin)21415 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, qmin) {
21416 TEST_REQUIRES_X86_AVX;
21417 GemmMicrokernelTester()
21418 .mr(2)
21419 .nr(4)
21420 .kr(2)
21421 .sr(4)
21422 .m(2)
21423 .n(4)
21424 .k(8)
21425 .qmin(128)
21426 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21427 }
21428
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,qmax)21429 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, qmax) {
21430 TEST_REQUIRES_X86_AVX;
21431 GemmMicrokernelTester()
21432 .mr(2)
21433 .nr(4)
21434 .kr(2)
21435 .sr(4)
21436 .m(2)
21437 .n(4)
21438 .k(8)
21439 .qmax(128)
21440 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21441 }
21442
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,strided_cm)21443 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, strided_cm) {
21444 TEST_REQUIRES_X86_AVX;
21445 GemmMicrokernelTester()
21446 .mr(2)
21447 .nr(4)
21448 .kr(2)
21449 .sr(4)
21450 .m(2)
21451 .n(4)
21452 .k(8)
21453 .cm_stride(7)
21454 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21455 }
21456 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21457
21458
21459 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_eq_8)21460 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_eq_8) {
21461 TEST_REQUIRES_X86_AVX;
21462 GemmMicrokernelTester()
21463 .mr(4)
21464 .nr(4)
21465 .kr(2)
21466 .sr(4)
21467 .m(4)
21468 .n(4)
21469 .k(8)
21470 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21471 }
21472
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,strided_cn)21473 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, strided_cn) {
21474 TEST_REQUIRES_X86_AVX;
21475 GemmMicrokernelTester()
21476 .mr(4)
21477 .nr(4)
21478 .kr(2)
21479 .sr(4)
21480 .m(4)
21481 .n(4)
21482 .k(8)
21483 .cn_stride(7)
21484 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21485 }
21486
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_eq_8_strided_a)21487 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_eq_8_strided_a) {
21488 TEST_REQUIRES_X86_AVX;
21489 GemmMicrokernelTester()
21490 .mr(4)
21491 .nr(4)
21492 .kr(2)
21493 .sr(4)
21494 .m(4)
21495 .n(4)
21496 .k(8)
21497 .a_stride(11)
21498 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21499 }
21500
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_eq_8_subtile)21501 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_eq_8_subtile) {
21502 TEST_REQUIRES_X86_AVX;
21503 for (uint32_t n = 1; n <= 4; n++) {
21504 for (uint32_t m = 1; m <= 4; m++) {
21505 GemmMicrokernelTester()
21506 .mr(4)
21507 .nr(4)
21508 .kr(2)
21509 .sr(4)
21510 .m(m)
21511 .n(n)
21512 .k(8)
21513 .iterations(1)
21514 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21515 }
21516 }
21517 }
21518
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_eq_8_subtile_m)21519 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
21520 TEST_REQUIRES_X86_AVX;
21521 for (uint32_t m = 1; m <= 4; m++) {
21522 GemmMicrokernelTester()
21523 .mr(4)
21524 .nr(4)
21525 .kr(2)
21526 .sr(4)
21527 .m(m)
21528 .n(4)
21529 .k(8)
21530 .iterations(1)
21531 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21532 }
21533 }
21534
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_eq_8_subtile_n)21535 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
21536 TEST_REQUIRES_X86_AVX;
21537 for (uint32_t n = 1; n <= 4; n++) {
21538 GemmMicrokernelTester()
21539 .mr(4)
21540 .nr(4)
21541 .kr(2)
21542 .sr(4)
21543 .m(4)
21544 .n(n)
21545 .k(8)
21546 .iterations(1)
21547 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21548 }
21549 }
21550
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_lt_8)21551 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_lt_8) {
21552 TEST_REQUIRES_X86_AVX;
21553 for (size_t k = 1; k < 8; k++) {
21554 GemmMicrokernelTester()
21555 .mr(4)
21556 .nr(4)
21557 .kr(2)
21558 .sr(4)
21559 .m(4)
21560 .n(4)
21561 .k(k)
21562 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21563 }
21564 }
21565
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_lt_8_strided_a)21566 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_lt_8_strided_a) {
21567 TEST_REQUIRES_X86_AVX;
21568 for (size_t k = 1; k < 8; k++) {
21569 GemmMicrokernelTester()
21570 .mr(4)
21571 .nr(4)
21572 .kr(2)
21573 .sr(4)
21574 .m(4)
21575 .n(4)
21576 .k(k)
21577 .a_stride(11)
21578 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21579 }
21580 }
21581
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_lt_8_subtile)21582 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_lt_8_subtile) {
21583 TEST_REQUIRES_X86_AVX;
21584 for (size_t k = 1; k < 8; k++) {
21585 for (uint32_t n = 1; n <= 4; n++) {
21586 for (uint32_t m = 1; m <= 4; m++) {
21587 GemmMicrokernelTester()
21588 .mr(4)
21589 .nr(4)
21590 .kr(2)
21591 .sr(4)
21592 .m(m)
21593 .n(n)
21594 .k(k)
21595 .iterations(1)
21596 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21597 }
21598 }
21599 }
21600 }
21601
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_gt_8)21602 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_gt_8) {
21603 TEST_REQUIRES_X86_AVX;
21604 for (size_t k = 9; k < 16; k++) {
21605 GemmMicrokernelTester()
21606 .mr(4)
21607 .nr(4)
21608 .kr(2)
21609 .sr(4)
21610 .m(4)
21611 .n(4)
21612 .k(k)
21613 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21614 }
21615 }
21616
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_gt_8_strided_a)21617 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_gt_8_strided_a) {
21618 TEST_REQUIRES_X86_AVX;
21619 for (size_t k = 9; k < 16; k++) {
21620 GemmMicrokernelTester()
21621 .mr(4)
21622 .nr(4)
21623 .kr(2)
21624 .sr(4)
21625 .m(4)
21626 .n(4)
21627 .k(k)
21628 .a_stride(19)
21629 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21630 }
21631 }
21632
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_gt_8_subtile)21633 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_gt_8_subtile) {
21634 TEST_REQUIRES_X86_AVX;
21635 for (size_t k = 9; k < 16; k++) {
21636 for (uint32_t n = 1; n <= 4; n++) {
21637 for (uint32_t m = 1; m <= 4; m++) {
21638 GemmMicrokernelTester()
21639 .mr(4)
21640 .nr(4)
21641 .kr(2)
21642 .sr(4)
21643 .m(m)
21644 .n(n)
21645 .k(k)
21646 .iterations(1)
21647 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21648 }
21649 }
21650 }
21651 }
21652
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_div_8)21653 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_div_8) {
21654 TEST_REQUIRES_X86_AVX;
21655 for (size_t k = 16; k <= 80; k += 8) {
21656 GemmMicrokernelTester()
21657 .mr(4)
21658 .nr(4)
21659 .kr(2)
21660 .sr(4)
21661 .m(4)
21662 .n(4)
21663 .k(k)
21664 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21665 }
21666 }
21667
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_div_8_strided_a)21668 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_div_8_strided_a) {
21669 TEST_REQUIRES_X86_AVX;
21670 for (size_t k = 16; k <= 80; k += 8) {
21671 GemmMicrokernelTester()
21672 .mr(4)
21673 .nr(4)
21674 .kr(2)
21675 .sr(4)
21676 .m(4)
21677 .n(4)
21678 .k(k)
21679 .a_stride(83)
21680 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21681 }
21682 }
21683
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_div_8_subtile)21684 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_div_8_subtile) {
21685 TEST_REQUIRES_X86_AVX;
21686 for (size_t k = 16; k <= 80; k += 8) {
21687 for (uint32_t n = 1; n <= 4; n++) {
21688 for (uint32_t m = 1; m <= 4; m++) {
21689 GemmMicrokernelTester()
21690 .mr(4)
21691 .nr(4)
21692 .kr(2)
21693 .sr(4)
21694 .m(m)
21695 .n(n)
21696 .k(k)
21697 .iterations(1)
21698 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21699 }
21700 }
21701 }
21702 }
21703
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_gt_4)21704 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_gt_4) {
21705 TEST_REQUIRES_X86_AVX;
21706 for (uint32_t n = 5; n < 8; n++) {
21707 for (size_t k = 1; k <= 40; k += 9) {
21708 GemmMicrokernelTester()
21709 .mr(4)
21710 .nr(4)
21711 .kr(2)
21712 .sr(4)
21713 .m(4)
21714 .n(n)
21715 .k(k)
21716 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21717 }
21718 }
21719 }
21720
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_gt_4_strided_cn)21721 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
21722 TEST_REQUIRES_X86_AVX;
21723 for (uint32_t n = 5; n < 8; n++) {
21724 for (size_t k = 1; k <= 40; k += 9) {
21725 GemmMicrokernelTester()
21726 .mr(4)
21727 .nr(4)
21728 .kr(2)
21729 .sr(4)
21730 .m(4)
21731 .n(n)
21732 .k(k)
21733 .cn_stride(7)
21734 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21735 }
21736 }
21737 }
21738
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_gt_4_strided_a)21739 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_gt_4_strided_a) {
21740 TEST_REQUIRES_X86_AVX;
21741 for (uint32_t n = 5; n < 8; n++) {
21742 for (size_t k = 1; k <= 40; k += 9) {
21743 GemmMicrokernelTester()
21744 .mr(4)
21745 .nr(4)
21746 .kr(2)
21747 .sr(4)
21748 .m(4)
21749 .n(n)
21750 .k(k)
21751 .a_stride(43)
21752 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21753 }
21754 }
21755 }
21756
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_gt_4_subtile)21757 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_gt_4_subtile) {
21758 TEST_REQUIRES_X86_AVX;
21759 for (uint32_t n = 5; n < 8; n++) {
21760 for (size_t k = 1; k <= 40; k += 9) {
21761 for (uint32_t m = 1; m <= 4; m++) {
21762 GemmMicrokernelTester()
21763 .mr(4)
21764 .nr(4)
21765 .kr(2)
21766 .sr(4)
21767 .m(m)
21768 .n(n)
21769 .k(k)
21770 .iterations(1)
21771 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21772 }
21773 }
21774 }
21775 }
21776
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_div_4)21777 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_div_4) {
21778 TEST_REQUIRES_X86_AVX;
21779 for (uint32_t n = 8; n <= 12; n += 4) {
21780 for (size_t k = 1; k <= 40; k += 9) {
21781 GemmMicrokernelTester()
21782 .mr(4)
21783 .nr(4)
21784 .kr(2)
21785 .sr(4)
21786 .m(4)
21787 .n(n)
21788 .k(k)
21789 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21790 }
21791 }
21792 }
21793
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_div_4_strided_cn)21794 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_div_4_strided_cn) {
21795 TEST_REQUIRES_X86_AVX;
21796 for (uint32_t n = 8; n <= 12; n += 4) {
21797 for (size_t k = 1; k <= 40; k += 9) {
21798 GemmMicrokernelTester()
21799 .mr(4)
21800 .nr(4)
21801 .kr(2)
21802 .sr(4)
21803 .m(4)
21804 .n(n)
21805 .k(k)
21806 .cn_stride(7)
21807 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21808 }
21809 }
21810 }
21811
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_div_4_strided_a)21812 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_div_4_strided_a) {
21813 TEST_REQUIRES_X86_AVX;
21814 for (uint32_t n = 8; n <= 12; n += 4) {
21815 for (size_t k = 1; k <= 40; k += 9) {
21816 GemmMicrokernelTester()
21817 .mr(4)
21818 .nr(4)
21819 .kr(2)
21820 .sr(4)
21821 .m(4)
21822 .n(n)
21823 .k(k)
21824 .a_stride(43)
21825 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21826 }
21827 }
21828 }
21829
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_div_4_subtile)21830 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_div_4_subtile) {
21831 TEST_REQUIRES_X86_AVX;
21832 for (uint32_t n = 8; n <= 12; n += 4) {
21833 for (size_t k = 1; k <= 40; k += 9) {
21834 for (uint32_t m = 1; m <= 4; m++) {
21835 GemmMicrokernelTester()
21836 .mr(4)
21837 .nr(4)
21838 .kr(2)
21839 .sr(4)
21840 .m(m)
21841 .n(n)
21842 .k(k)
21843 .iterations(1)
21844 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21845 }
21846 }
21847 }
21848 }
21849
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,strided_cm_subtile)21850 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, strided_cm_subtile) {
21851 TEST_REQUIRES_X86_AVX;
21852 for (size_t k = 1; k <= 40; k += 9) {
21853 for (uint32_t n = 1; n <= 4; n++) {
21854 for (uint32_t m = 1; m <= 4; m++) {
21855 GemmMicrokernelTester()
21856 .mr(4)
21857 .nr(4)
21858 .kr(2)
21859 .sr(4)
21860 .m(m)
21861 .n(n)
21862 .k(k)
21863 .cm_stride(7)
21864 .iterations(1)
21865 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21866 }
21867 }
21868 }
21869 }
21870
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,qmin)21871 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, qmin) {
21872 TEST_REQUIRES_X86_AVX;
21873 GemmMicrokernelTester()
21874 .mr(4)
21875 .nr(4)
21876 .kr(2)
21877 .sr(4)
21878 .m(4)
21879 .n(4)
21880 .k(8)
21881 .qmin(128)
21882 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21883 }
21884
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,qmax)21885 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, qmax) {
21886 TEST_REQUIRES_X86_AVX;
21887 GemmMicrokernelTester()
21888 .mr(4)
21889 .nr(4)
21890 .kr(2)
21891 .sr(4)
21892 .m(4)
21893 .n(4)
21894 .k(8)
21895 .qmax(128)
21896 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21897 }
21898
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,strided_cm)21899 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, strided_cm) {
21900 TEST_REQUIRES_X86_AVX;
21901 GemmMicrokernelTester()
21902 .mr(4)
21903 .nr(4)
21904 .kr(2)
21905 .sr(4)
21906 .m(4)
21907 .n(4)
21908 .k(8)
21909 .cm_stride(7)
21910 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21911 }
21912 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21913
21914
21915 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8)21916 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8) {
21917 TEST_REQUIRES_X86_SSE41;
21918 GemmMicrokernelTester()
21919 .mr(2)
21920 .nr(4)
21921 .kr(2)
21922 .sr(4)
21923 .m(2)
21924 .n(4)
21925 .k(8)
21926 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21927 }
21928
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,strided_cn)21929 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, strided_cn) {
21930 TEST_REQUIRES_X86_SSE41;
21931 GemmMicrokernelTester()
21932 .mr(2)
21933 .nr(4)
21934 .kr(2)
21935 .sr(4)
21936 .m(2)
21937 .n(4)
21938 .k(8)
21939 .cn_stride(7)
21940 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21941 }
21942
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8_strided_a)21943 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8_strided_a) {
21944 TEST_REQUIRES_X86_SSE41;
21945 GemmMicrokernelTester()
21946 .mr(2)
21947 .nr(4)
21948 .kr(2)
21949 .sr(4)
21950 .m(2)
21951 .n(4)
21952 .k(8)
21953 .a_stride(11)
21954 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21955 }
21956
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8_subtile)21957 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8_subtile) {
21958 TEST_REQUIRES_X86_SSE41;
21959 for (uint32_t n = 1; n <= 4; n++) {
21960 for (uint32_t m = 1; m <= 2; m++) {
21961 GemmMicrokernelTester()
21962 .mr(2)
21963 .nr(4)
21964 .kr(2)
21965 .sr(4)
21966 .m(m)
21967 .n(n)
21968 .k(8)
21969 .iterations(1)
21970 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21971 }
21972 }
21973 }
21974
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8_subtile_m)21975 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
21976 TEST_REQUIRES_X86_SSE41;
21977 for (uint32_t m = 1; m <= 2; m++) {
21978 GemmMicrokernelTester()
21979 .mr(2)
21980 .nr(4)
21981 .kr(2)
21982 .sr(4)
21983 .m(m)
21984 .n(4)
21985 .k(8)
21986 .iterations(1)
21987 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21988 }
21989 }
21990
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8_subtile_n)21991 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
21992 TEST_REQUIRES_X86_SSE41;
21993 for (uint32_t n = 1; n <= 4; n++) {
21994 GemmMicrokernelTester()
21995 .mr(2)
21996 .nr(4)
21997 .kr(2)
21998 .sr(4)
21999 .m(2)
22000 .n(n)
22001 .k(8)
22002 .iterations(1)
22003 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22004 }
22005 }
22006
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_lt_8)22007 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_lt_8) {
22008 TEST_REQUIRES_X86_SSE41;
22009 for (size_t k = 1; k < 8; k++) {
22010 GemmMicrokernelTester()
22011 .mr(2)
22012 .nr(4)
22013 .kr(2)
22014 .sr(4)
22015 .m(2)
22016 .n(4)
22017 .k(k)
22018 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22019 }
22020 }
22021
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_lt_8_strided_a)22022 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_lt_8_strided_a) {
22023 TEST_REQUIRES_X86_SSE41;
22024 for (size_t k = 1; k < 8; k++) {
22025 GemmMicrokernelTester()
22026 .mr(2)
22027 .nr(4)
22028 .kr(2)
22029 .sr(4)
22030 .m(2)
22031 .n(4)
22032 .k(k)
22033 .a_stride(11)
22034 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22035 }
22036 }
22037
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_lt_8_subtile)22038 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_lt_8_subtile) {
22039 TEST_REQUIRES_X86_SSE41;
22040 for (size_t k = 1; k < 8; k++) {
22041 for (uint32_t n = 1; n <= 4; n++) {
22042 for (uint32_t m = 1; m <= 2; m++) {
22043 GemmMicrokernelTester()
22044 .mr(2)
22045 .nr(4)
22046 .kr(2)
22047 .sr(4)
22048 .m(m)
22049 .n(n)
22050 .k(k)
22051 .iterations(1)
22052 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22053 }
22054 }
22055 }
22056 }
22057
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_gt_8)22058 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_gt_8) {
22059 TEST_REQUIRES_X86_SSE41;
22060 for (size_t k = 9; k < 16; k++) {
22061 GemmMicrokernelTester()
22062 .mr(2)
22063 .nr(4)
22064 .kr(2)
22065 .sr(4)
22066 .m(2)
22067 .n(4)
22068 .k(k)
22069 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22070 }
22071 }
22072
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_gt_8_strided_a)22073 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_gt_8_strided_a) {
22074 TEST_REQUIRES_X86_SSE41;
22075 for (size_t k = 9; k < 16; k++) {
22076 GemmMicrokernelTester()
22077 .mr(2)
22078 .nr(4)
22079 .kr(2)
22080 .sr(4)
22081 .m(2)
22082 .n(4)
22083 .k(k)
22084 .a_stride(19)
22085 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22086 }
22087 }
22088
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_gt_8_subtile)22089 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_gt_8_subtile) {
22090 TEST_REQUIRES_X86_SSE41;
22091 for (size_t k = 9; k < 16; k++) {
22092 for (uint32_t n = 1; n <= 4; n++) {
22093 for (uint32_t m = 1; m <= 2; m++) {
22094 GemmMicrokernelTester()
22095 .mr(2)
22096 .nr(4)
22097 .kr(2)
22098 .sr(4)
22099 .m(m)
22100 .n(n)
22101 .k(k)
22102 .iterations(1)
22103 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22104 }
22105 }
22106 }
22107 }
22108
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_div_8)22109 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_div_8) {
22110 TEST_REQUIRES_X86_SSE41;
22111 for (size_t k = 16; k <= 80; k += 8) {
22112 GemmMicrokernelTester()
22113 .mr(2)
22114 .nr(4)
22115 .kr(2)
22116 .sr(4)
22117 .m(2)
22118 .n(4)
22119 .k(k)
22120 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22121 }
22122 }
22123
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_div_8_strided_a)22124 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_div_8_strided_a) {
22125 TEST_REQUIRES_X86_SSE41;
22126 for (size_t k = 16; k <= 80; k += 8) {
22127 GemmMicrokernelTester()
22128 .mr(2)
22129 .nr(4)
22130 .kr(2)
22131 .sr(4)
22132 .m(2)
22133 .n(4)
22134 .k(k)
22135 .a_stride(83)
22136 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22137 }
22138 }
22139
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_div_8_subtile)22140 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_div_8_subtile) {
22141 TEST_REQUIRES_X86_SSE41;
22142 for (size_t k = 16; k <= 80; k += 8) {
22143 for (uint32_t n = 1; n <= 4; n++) {
22144 for (uint32_t m = 1; m <= 2; m++) {
22145 GemmMicrokernelTester()
22146 .mr(2)
22147 .nr(4)
22148 .kr(2)
22149 .sr(4)
22150 .m(m)
22151 .n(n)
22152 .k(k)
22153 .iterations(1)
22154 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22155 }
22156 }
22157 }
22158 }
22159
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4)22160 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4) {
22161 TEST_REQUIRES_X86_SSE41;
22162 for (uint32_t n = 5; n < 8; n++) {
22163 for (size_t k = 1; k <= 40; k += 9) {
22164 GemmMicrokernelTester()
22165 .mr(2)
22166 .nr(4)
22167 .kr(2)
22168 .sr(4)
22169 .m(2)
22170 .n(n)
22171 .k(k)
22172 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22173 }
22174 }
22175 }
22176
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4_strided_cn)22177 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
22178 TEST_REQUIRES_X86_SSE41;
22179 for (uint32_t n = 5; n < 8; n++) {
22180 for (size_t k = 1; k <= 40; k += 9) {
22181 GemmMicrokernelTester()
22182 .mr(2)
22183 .nr(4)
22184 .kr(2)
22185 .sr(4)
22186 .m(2)
22187 .n(n)
22188 .k(k)
22189 .cn_stride(7)
22190 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22191 }
22192 }
22193 }
22194
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4_strided_a)22195 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4_strided_a) {
22196 TEST_REQUIRES_X86_SSE41;
22197 for (uint32_t n = 5; n < 8; n++) {
22198 for (size_t k = 1; k <= 40; k += 9) {
22199 GemmMicrokernelTester()
22200 .mr(2)
22201 .nr(4)
22202 .kr(2)
22203 .sr(4)
22204 .m(2)
22205 .n(n)
22206 .k(k)
22207 .a_stride(43)
22208 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22209 }
22210 }
22211 }
22212
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4_subtile)22213 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4_subtile) {
22214 TEST_REQUIRES_X86_SSE41;
22215 for (uint32_t n = 5; n < 8; n++) {
22216 for (size_t k = 1; k <= 40; k += 9) {
22217 for (uint32_t m = 1; m <= 2; m++) {
22218 GemmMicrokernelTester()
22219 .mr(2)
22220 .nr(4)
22221 .kr(2)
22222 .sr(4)
22223 .m(m)
22224 .n(n)
22225 .k(k)
22226 .iterations(1)
22227 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22228 }
22229 }
22230 }
22231 }
22232
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4)22233 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4) {
22234 TEST_REQUIRES_X86_SSE41;
22235 for (uint32_t n = 8; n <= 12; n += 4) {
22236 for (size_t k = 1; k <= 40; k += 9) {
22237 GemmMicrokernelTester()
22238 .mr(2)
22239 .nr(4)
22240 .kr(2)
22241 .sr(4)
22242 .m(2)
22243 .n(n)
22244 .k(k)
22245 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22246 }
22247 }
22248 }
22249
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4_strided_cn)22250 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
22251 TEST_REQUIRES_X86_SSE41;
22252 for (uint32_t n = 8; n <= 12; n += 4) {
22253 for (size_t k = 1; k <= 40; k += 9) {
22254 GemmMicrokernelTester()
22255 .mr(2)
22256 .nr(4)
22257 .kr(2)
22258 .sr(4)
22259 .m(2)
22260 .n(n)
22261 .k(k)
22262 .cn_stride(7)
22263 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22264 }
22265 }
22266 }
22267
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4_strided_a)22268 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4_strided_a) {
22269 TEST_REQUIRES_X86_SSE41;
22270 for (uint32_t n = 8; n <= 12; n += 4) {
22271 for (size_t k = 1; k <= 40; k += 9) {
22272 GemmMicrokernelTester()
22273 .mr(2)
22274 .nr(4)
22275 .kr(2)
22276 .sr(4)
22277 .m(2)
22278 .n(n)
22279 .k(k)
22280 .a_stride(43)
22281 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22282 }
22283 }
22284 }
22285
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4_subtile)22286 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4_subtile) {
22287 TEST_REQUIRES_X86_SSE41;
22288 for (uint32_t n = 8; n <= 12; n += 4) {
22289 for (size_t k = 1; k <= 40; k += 9) {
22290 for (uint32_t m = 1; m <= 2; m++) {
22291 GemmMicrokernelTester()
22292 .mr(2)
22293 .nr(4)
22294 .kr(2)
22295 .sr(4)
22296 .m(m)
22297 .n(n)
22298 .k(k)
22299 .iterations(1)
22300 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22301 }
22302 }
22303 }
22304 }
22305
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,strided_cm_subtile)22306 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, strided_cm_subtile) {
22307 TEST_REQUIRES_X86_SSE41;
22308 for (size_t k = 1; k <= 40; k += 9) {
22309 for (uint32_t n = 1; n <= 4; n++) {
22310 for (uint32_t m = 1; m <= 2; m++) {
22311 GemmMicrokernelTester()
22312 .mr(2)
22313 .nr(4)
22314 .kr(2)
22315 .sr(4)
22316 .m(m)
22317 .n(n)
22318 .k(k)
22319 .cm_stride(7)
22320 .iterations(1)
22321 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22322 }
22323 }
22324 }
22325 }
22326
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,qmin)22327 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, qmin) {
22328 TEST_REQUIRES_X86_SSE41;
22329 GemmMicrokernelTester()
22330 .mr(2)
22331 .nr(4)
22332 .kr(2)
22333 .sr(4)
22334 .m(2)
22335 .n(4)
22336 .k(8)
22337 .qmin(128)
22338 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22339 }
22340
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,qmax)22341 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, qmax) {
22342 TEST_REQUIRES_X86_SSE41;
22343 GemmMicrokernelTester()
22344 .mr(2)
22345 .nr(4)
22346 .kr(2)
22347 .sr(4)
22348 .m(2)
22349 .n(4)
22350 .k(8)
22351 .qmax(128)
22352 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22353 }
22354
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,strided_cm)22355 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, strided_cm) {
22356 TEST_REQUIRES_X86_SSE41;
22357 GemmMicrokernelTester()
22358 .mr(2)
22359 .nr(4)
22360 .kr(2)
22361 .sr(4)
22362 .m(2)
22363 .n(4)
22364 .k(8)
22365 .cm_stride(7)
22366 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22367 }
22368 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22369
22370
22371 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8)22372 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8) {
22373 TEST_REQUIRES_X86_SSE41;
22374 GemmMicrokernelTester()
22375 .mr(4)
22376 .nr(4)
22377 .kr(2)
22378 .sr(4)
22379 .m(4)
22380 .n(4)
22381 .k(8)
22382 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22383 }
22384
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cn)22385 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cn) {
22386 TEST_REQUIRES_X86_SSE41;
22387 GemmMicrokernelTester()
22388 .mr(4)
22389 .nr(4)
22390 .kr(2)
22391 .sr(4)
22392 .m(4)
22393 .n(4)
22394 .k(8)
22395 .cn_stride(7)
22396 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22397 }
22398
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_strided_a)22399 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_strided_a) {
22400 TEST_REQUIRES_X86_SSE41;
22401 GemmMicrokernelTester()
22402 .mr(4)
22403 .nr(4)
22404 .kr(2)
22405 .sr(4)
22406 .m(4)
22407 .n(4)
22408 .k(8)
22409 .a_stride(11)
22410 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22411 }
22412
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile)22413 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile) {
22414 TEST_REQUIRES_X86_SSE41;
22415 for (uint32_t n = 1; n <= 4; n++) {
22416 for (uint32_t m = 1; m <= 4; m++) {
22417 GemmMicrokernelTester()
22418 .mr(4)
22419 .nr(4)
22420 .kr(2)
22421 .sr(4)
22422 .m(m)
22423 .n(n)
22424 .k(8)
22425 .iterations(1)
22426 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22427 }
22428 }
22429 }
22430
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile_m)22431 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
22432 TEST_REQUIRES_X86_SSE41;
22433 for (uint32_t m = 1; m <= 4; m++) {
22434 GemmMicrokernelTester()
22435 .mr(4)
22436 .nr(4)
22437 .kr(2)
22438 .sr(4)
22439 .m(m)
22440 .n(4)
22441 .k(8)
22442 .iterations(1)
22443 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22444 }
22445 }
22446
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile_n)22447 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
22448 TEST_REQUIRES_X86_SSE41;
22449 for (uint32_t n = 1; n <= 4; n++) {
22450 GemmMicrokernelTester()
22451 .mr(4)
22452 .nr(4)
22453 .kr(2)
22454 .sr(4)
22455 .m(4)
22456 .n(n)
22457 .k(8)
22458 .iterations(1)
22459 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22460 }
22461 }
22462
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_lt_8)22463 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_lt_8) {
22464 TEST_REQUIRES_X86_SSE41;
22465 for (size_t k = 1; k < 8; k++) {
22466 GemmMicrokernelTester()
22467 .mr(4)
22468 .nr(4)
22469 .kr(2)
22470 .sr(4)
22471 .m(4)
22472 .n(4)
22473 .k(k)
22474 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22475 }
22476 }
22477
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_lt_8_strided_a)22478 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_lt_8_strided_a) {
22479 TEST_REQUIRES_X86_SSE41;
22480 for (size_t k = 1; k < 8; k++) {
22481 GemmMicrokernelTester()
22482 .mr(4)
22483 .nr(4)
22484 .kr(2)
22485 .sr(4)
22486 .m(4)
22487 .n(4)
22488 .k(k)
22489 .a_stride(11)
22490 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22491 }
22492 }
22493
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_lt_8_subtile)22494 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_lt_8_subtile) {
22495 TEST_REQUIRES_X86_SSE41;
22496 for (size_t k = 1; k < 8; k++) {
22497 for (uint32_t n = 1; n <= 4; n++) {
22498 for (uint32_t m = 1; m <= 4; m++) {
22499 GemmMicrokernelTester()
22500 .mr(4)
22501 .nr(4)
22502 .kr(2)
22503 .sr(4)
22504 .m(m)
22505 .n(n)
22506 .k(k)
22507 .iterations(1)
22508 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22509 }
22510 }
22511 }
22512 }
22513
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_gt_8)22514 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_gt_8) {
22515 TEST_REQUIRES_X86_SSE41;
22516 for (size_t k = 9; k < 16; k++) {
22517 GemmMicrokernelTester()
22518 .mr(4)
22519 .nr(4)
22520 .kr(2)
22521 .sr(4)
22522 .m(4)
22523 .n(4)
22524 .k(k)
22525 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22526 }
22527 }
22528
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_gt_8_strided_a)22529 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_gt_8_strided_a) {
22530 TEST_REQUIRES_X86_SSE41;
22531 for (size_t k = 9; k < 16; k++) {
22532 GemmMicrokernelTester()
22533 .mr(4)
22534 .nr(4)
22535 .kr(2)
22536 .sr(4)
22537 .m(4)
22538 .n(4)
22539 .k(k)
22540 .a_stride(19)
22541 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22542 }
22543 }
22544
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_gt_8_subtile)22545 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_gt_8_subtile) {
22546 TEST_REQUIRES_X86_SSE41;
22547 for (size_t k = 9; k < 16; k++) {
22548 for (uint32_t n = 1; n <= 4; n++) {
22549 for (uint32_t m = 1; m <= 4; m++) {
22550 GemmMicrokernelTester()
22551 .mr(4)
22552 .nr(4)
22553 .kr(2)
22554 .sr(4)
22555 .m(m)
22556 .n(n)
22557 .k(k)
22558 .iterations(1)
22559 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22560 }
22561 }
22562 }
22563 }
22564
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_div_8)22565 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_div_8) {
22566 TEST_REQUIRES_X86_SSE41;
22567 for (size_t k = 16; k <= 80; k += 8) {
22568 GemmMicrokernelTester()
22569 .mr(4)
22570 .nr(4)
22571 .kr(2)
22572 .sr(4)
22573 .m(4)
22574 .n(4)
22575 .k(k)
22576 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22577 }
22578 }
22579
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_div_8_strided_a)22580 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_div_8_strided_a) {
22581 TEST_REQUIRES_X86_SSE41;
22582 for (size_t k = 16; k <= 80; k += 8) {
22583 GemmMicrokernelTester()
22584 .mr(4)
22585 .nr(4)
22586 .kr(2)
22587 .sr(4)
22588 .m(4)
22589 .n(4)
22590 .k(k)
22591 .a_stride(83)
22592 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22593 }
22594 }
22595
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_div_8_subtile)22596 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_div_8_subtile) {
22597 TEST_REQUIRES_X86_SSE41;
22598 for (size_t k = 16; k <= 80; k += 8) {
22599 for (uint32_t n = 1; n <= 4; n++) {
22600 for (uint32_t m = 1; m <= 4; m++) {
22601 GemmMicrokernelTester()
22602 .mr(4)
22603 .nr(4)
22604 .kr(2)
22605 .sr(4)
22606 .m(m)
22607 .n(n)
22608 .k(k)
22609 .iterations(1)
22610 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22611 }
22612 }
22613 }
22614 }
22615
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4)22616 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4) {
22617 TEST_REQUIRES_X86_SSE41;
22618 for (uint32_t n = 5; n < 8; n++) {
22619 for (size_t k = 1; k <= 40; k += 9) {
22620 GemmMicrokernelTester()
22621 .mr(4)
22622 .nr(4)
22623 .kr(2)
22624 .sr(4)
22625 .m(4)
22626 .n(n)
22627 .k(k)
22628 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22629 }
22630 }
22631 }
22632
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_strided_cn)22633 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
22634 TEST_REQUIRES_X86_SSE41;
22635 for (uint32_t n = 5; n < 8; n++) {
22636 for (size_t k = 1; k <= 40; k += 9) {
22637 GemmMicrokernelTester()
22638 .mr(4)
22639 .nr(4)
22640 .kr(2)
22641 .sr(4)
22642 .m(4)
22643 .n(n)
22644 .k(k)
22645 .cn_stride(7)
22646 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22647 }
22648 }
22649 }
22650
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_strided_a)22651 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_strided_a) {
22652 TEST_REQUIRES_X86_SSE41;
22653 for (uint32_t n = 5; n < 8; n++) {
22654 for (size_t k = 1; k <= 40; k += 9) {
22655 GemmMicrokernelTester()
22656 .mr(4)
22657 .nr(4)
22658 .kr(2)
22659 .sr(4)
22660 .m(4)
22661 .n(n)
22662 .k(k)
22663 .a_stride(43)
22664 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22665 }
22666 }
22667 }
22668
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_subtile)22669 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_subtile) {
22670 TEST_REQUIRES_X86_SSE41;
22671 for (uint32_t n = 5; n < 8; n++) {
22672 for (size_t k = 1; k <= 40; k += 9) {
22673 for (uint32_t m = 1; m <= 4; m++) {
22674 GemmMicrokernelTester()
22675 .mr(4)
22676 .nr(4)
22677 .kr(2)
22678 .sr(4)
22679 .m(m)
22680 .n(n)
22681 .k(k)
22682 .iterations(1)
22683 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22684 }
22685 }
22686 }
22687 }
22688
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4)22689 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4) {
22690 TEST_REQUIRES_X86_SSE41;
22691 for (uint32_t n = 8; n <= 12; n += 4) {
22692 for (size_t k = 1; k <= 40; k += 9) {
22693 GemmMicrokernelTester()
22694 .mr(4)
22695 .nr(4)
22696 .kr(2)
22697 .sr(4)
22698 .m(4)
22699 .n(n)
22700 .k(k)
22701 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22702 }
22703 }
22704 }
22705
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_strided_cn)22706 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
22707 TEST_REQUIRES_X86_SSE41;
22708 for (uint32_t n = 8; n <= 12; n += 4) {
22709 for (size_t k = 1; k <= 40; k += 9) {
22710 GemmMicrokernelTester()
22711 .mr(4)
22712 .nr(4)
22713 .kr(2)
22714 .sr(4)
22715 .m(4)
22716 .n(n)
22717 .k(k)
22718 .cn_stride(7)
22719 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22720 }
22721 }
22722 }
22723
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_strided_a)22724 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_strided_a) {
22725 TEST_REQUIRES_X86_SSE41;
22726 for (uint32_t n = 8; n <= 12; n += 4) {
22727 for (size_t k = 1; k <= 40; k += 9) {
22728 GemmMicrokernelTester()
22729 .mr(4)
22730 .nr(4)
22731 .kr(2)
22732 .sr(4)
22733 .m(4)
22734 .n(n)
22735 .k(k)
22736 .a_stride(43)
22737 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22738 }
22739 }
22740 }
22741
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_subtile)22742 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_subtile) {
22743 TEST_REQUIRES_X86_SSE41;
22744 for (uint32_t n = 8; n <= 12; n += 4) {
22745 for (size_t k = 1; k <= 40; k += 9) {
22746 for (uint32_t m = 1; m <= 4; m++) {
22747 GemmMicrokernelTester()
22748 .mr(4)
22749 .nr(4)
22750 .kr(2)
22751 .sr(4)
22752 .m(m)
22753 .n(n)
22754 .k(k)
22755 .iterations(1)
22756 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22757 }
22758 }
22759 }
22760 }
22761
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cm_subtile)22762 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cm_subtile) {
22763 TEST_REQUIRES_X86_SSE41;
22764 for (size_t k = 1; k <= 40; k += 9) {
22765 for (uint32_t n = 1; n <= 4; n++) {
22766 for (uint32_t m = 1; m <= 4; m++) {
22767 GemmMicrokernelTester()
22768 .mr(4)
22769 .nr(4)
22770 .kr(2)
22771 .sr(4)
22772 .m(m)
22773 .n(n)
22774 .k(k)
22775 .cm_stride(7)
22776 .iterations(1)
22777 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22778 }
22779 }
22780 }
22781 }
22782
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,qmin)22783 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, qmin) {
22784 TEST_REQUIRES_X86_SSE41;
22785 GemmMicrokernelTester()
22786 .mr(4)
22787 .nr(4)
22788 .kr(2)
22789 .sr(4)
22790 .m(4)
22791 .n(4)
22792 .k(8)
22793 .qmin(128)
22794 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22795 }
22796
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,qmax)22797 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, qmax) {
22798 TEST_REQUIRES_X86_SSE41;
22799 GemmMicrokernelTester()
22800 .mr(4)
22801 .nr(4)
22802 .kr(2)
22803 .sr(4)
22804 .m(4)
22805 .n(4)
22806 .k(8)
22807 .qmax(128)
22808 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22809 }
22810
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cm)22811 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cm) {
22812 TEST_REQUIRES_X86_SSE41;
22813 GemmMicrokernelTester()
22814 .mr(4)
22815 .nr(4)
22816 .kr(2)
22817 .sr(4)
22818 .m(4)
22819 .n(4)
22820 .k(8)
22821 .cm_stride(7)
22822 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22823 }
22824 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22825
22826
22827 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8)22828 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8) {
22829 TEST_REQUIRES_X86_SSE2;
22830 GemmMicrokernelTester()
22831 .mr(3)
22832 .nr(4)
22833 .kr(8)
22834 .sr(1)
22835 .m(3)
22836 .n(4)
22837 .k(8)
22838 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22839 }
22840
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,strided_cn)22841 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cn) {
22842 TEST_REQUIRES_X86_SSE2;
22843 GemmMicrokernelTester()
22844 .mr(3)
22845 .nr(4)
22846 .kr(8)
22847 .sr(1)
22848 .m(3)
22849 .n(4)
22850 .k(8)
22851 .cn_stride(7)
22852 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22853 }
22854
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8_strided_a)22855 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_strided_a) {
22856 TEST_REQUIRES_X86_SSE2;
22857 GemmMicrokernelTester()
22858 .mr(3)
22859 .nr(4)
22860 .kr(8)
22861 .sr(1)
22862 .m(3)
22863 .n(4)
22864 .k(8)
22865 .a_stride(11)
22866 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22867 }
22868
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8_subtile)22869 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile) {
22870 TEST_REQUIRES_X86_SSE2;
22871 for (uint32_t n = 1; n <= 4; n++) {
22872 for (uint32_t m = 1; m <= 3; m++) {
22873 GemmMicrokernelTester()
22874 .mr(3)
22875 .nr(4)
22876 .kr(8)
22877 .sr(1)
22878 .m(m)
22879 .n(n)
22880 .k(8)
22881 .iterations(1)
22882 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22883 }
22884 }
22885 }
22886
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8_subtile_m)22887 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile_m) {
22888 TEST_REQUIRES_X86_SSE2;
22889 for (uint32_t m = 1; m <= 3; m++) {
22890 GemmMicrokernelTester()
22891 .mr(3)
22892 .nr(4)
22893 .kr(8)
22894 .sr(1)
22895 .m(m)
22896 .n(4)
22897 .k(8)
22898 .iterations(1)
22899 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22900 }
22901 }
22902
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8_subtile_n)22903 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile_n) {
22904 TEST_REQUIRES_X86_SSE2;
22905 for (uint32_t n = 1; n <= 4; n++) {
22906 GemmMicrokernelTester()
22907 .mr(3)
22908 .nr(4)
22909 .kr(8)
22910 .sr(1)
22911 .m(3)
22912 .n(n)
22913 .k(8)
22914 .iterations(1)
22915 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22916 }
22917 }
22918
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_lt_8)22919 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_lt_8) {
22920 TEST_REQUIRES_X86_SSE2;
22921 for (size_t k = 1; k < 8; k++) {
22922 GemmMicrokernelTester()
22923 .mr(3)
22924 .nr(4)
22925 .kr(8)
22926 .sr(1)
22927 .m(3)
22928 .n(4)
22929 .k(k)
22930 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22931 }
22932 }
22933
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_lt_8_strided_a)22934 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_lt_8_strided_a) {
22935 TEST_REQUIRES_X86_SSE2;
22936 for (size_t k = 1; k < 8; k++) {
22937 GemmMicrokernelTester()
22938 .mr(3)
22939 .nr(4)
22940 .kr(8)
22941 .sr(1)
22942 .m(3)
22943 .n(4)
22944 .k(k)
22945 .a_stride(11)
22946 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22947 }
22948 }
22949
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_lt_8_subtile)22950 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_lt_8_subtile) {
22951 TEST_REQUIRES_X86_SSE2;
22952 for (size_t k = 1; k < 8; k++) {
22953 for (uint32_t n = 1; n <= 4; n++) {
22954 for (uint32_t m = 1; m <= 3; m++) {
22955 GemmMicrokernelTester()
22956 .mr(3)
22957 .nr(4)
22958 .kr(8)
22959 .sr(1)
22960 .m(m)
22961 .n(n)
22962 .k(k)
22963 .iterations(1)
22964 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22965 }
22966 }
22967 }
22968 }
22969
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_gt_8)22970 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_gt_8) {
22971 TEST_REQUIRES_X86_SSE2;
22972 for (size_t k = 9; k < 16; k++) {
22973 GemmMicrokernelTester()
22974 .mr(3)
22975 .nr(4)
22976 .kr(8)
22977 .sr(1)
22978 .m(3)
22979 .n(4)
22980 .k(k)
22981 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22982 }
22983 }
22984
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_gt_8_strided_a)22985 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_gt_8_strided_a) {
22986 TEST_REQUIRES_X86_SSE2;
22987 for (size_t k = 9; k < 16; k++) {
22988 GemmMicrokernelTester()
22989 .mr(3)
22990 .nr(4)
22991 .kr(8)
22992 .sr(1)
22993 .m(3)
22994 .n(4)
22995 .k(k)
22996 .a_stride(19)
22997 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22998 }
22999 }
23000
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_gt_8_subtile)23001 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_gt_8_subtile) {
23002 TEST_REQUIRES_X86_SSE2;
23003 for (size_t k = 9; k < 16; k++) {
23004 for (uint32_t n = 1; n <= 4; n++) {
23005 for (uint32_t m = 1; m <= 3; m++) {
23006 GemmMicrokernelTester()
23007 .mr(3)
23008 .nr(4)
23009 .kr(8)
23010 .sr(1)
23011 .m(m)
23012 .n(n)
23013 .k(k)
23014 .iterations(1)
23015 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23016 }
23017 }
23018 }
23019 }
23020
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_div_8)23021 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_div_8) {
23022 TEST_REQUIRES_X86_SSE2;
23023 for (size_t k = 16; k <= 80; k += 8) {
23024 GemmMicrokernelTester()
23025 .mr(3)
23026 .nr(4)
23027 .kr(8)
23028 .sr(1)
23029 .m(3)
23030 .n(4)
23031 .k(k)
23032 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23033 }
23034 }
23035
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_div_8_strided_a)23036 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_div_8_strided_a) {
23037 TEST_REQUIRES_X86_SSE2;
23038 for (size_t k = 16; k <= 80; k += 8) {
23039 GemmMicrokernelTester()
23040 .mr(3)
23041 .nr(4)
23042 .kr(8)
23043 .sr(1)
23044 .m(3)
23045 .n(4)
23046 .k(k)
23047 .a_stride(83)
23048 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23049 }
23050 }
23051
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_div_8_subtile)23052 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_div_8_subtile) {
23053 TEST_REQUIRES_X86_SSE2;
23054 for (size_t k = 16; k <= 80; k += 8) {
23055 for (uint32_t n = 1; n <= 4; n++) {
23056 for (uint32_t m = 1; m <= 3; m++) {
23057 GemmMicrokernelTester()
23058 .mr(3)
23059 .nr(4)
23060 .kr(8)
23061 .sr(1)
23062 .m(m)
23063 .n(n)
23064 .k(k)
23065 .iterations(1)
23066 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23067 }
23068 }
23069 }
23070 }
23071
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4)23072 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4) {
23073 TEST_REQUIRES_X86_SSE2;
23074 for (uint32_t n = 5; n < 8; n++) {
23075 for (size_t k = 1; k <= 40; k += 9) {
23076 GemmMicrokernelTester()
23077 .mr(3)
23078 .nr(4)
23079 .kr(8)
23080 .sr(1)
23081 .m(3)
23082 .n(n)
23083 .k(k)
23084 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23085 }
23086 }
23087 }
23088
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4_strided_cn)23089 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_strided_cn) {
23090 TEST_REQUIRES_X86_SSE2;
23091 for (uint32_t n = 5; n < 8; n++) {
23092 for (size_t k = 1; k <= 40; k += 9) {
23093 GemmMicrokernelTester()
23094 .mr(3)
23095 .nr(4)
23096 .kr(8)
23097 .sr(1)
23098 .m(3)
23099 .n(n)
23100 .k(k)
23101 .cn_stride(7)
23102 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23103 }
23104 }
23105 }
23106
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4_strided_a)23107 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_strided_a) {
23108 TEST_REQUIRES_X86_SSE2;
23109 for (uint32_t n = 5; n < 8; n++) {
23110 for (size_t k = 1; k <= 40; k += 9) {
23111 GemmMicrokernelTester()
23112 .mr(3)
23113 .nr(4)
23114 .kr(8)
23115 .sr(1)
23116 .m(3)
23117 .n(n)
23118 .k(k)
23119 .a_stride(43)
23120 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23121 }
23122 }
23123 }
23124
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4_subtile)23125 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_subtile) {
23126 TEST_REQUIRES_X86_SSE2;
23127 for (uint32_t n = 5; n < 8; n++) {
23128 for (size_t k = 1; k <= 40; k += 9) {
23129 for (uint32_t m = 1; m <= 3; m++) {
23130 GemmMicrokernelTester()
23131 .mr(3)
23132 .nr(4)
23133 .kr(8)
23134 .sr(1)
23135 .m(m)
23136 .n(n)
23137 .k(k)
23138 .iterations(1)
23139 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23140 }
23141 }
23142 }
23143 }
23144
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4)23145 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4) {
23146 TEST_REQUIRES_X86_SSE2;
23147 for (uint32_t n = 8; n <= 12; n += 4) {
23148 for (size_t k = 1; k <= 40; k += 9) {
23149 GemmMicrokernelTester()
23150 .mr(3)
23151 .nr(4)
23152 .kr(8)
23153 .sr(1)
23154 .m(3)
23155 .n(n)
23156 .k(k)
23157 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23158 }
23159 }
23160 }
23161
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4_strided_cn)23162 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_strided_cn) {
23163 TEST_REQUIRES_X86_SSE2;
23164 for (uint32_t n = 8; n <= 12; n += 4) {
23165 for (size_t k = 1; k <= 40; k += 9) {
23166 GemmMicrokernelTester()
23167 .mr(3)
23168 .nr(4)
23169 .kr(8)
23170 .sr(1)
23171 .m(3)
23172 .n(n)
23173 .k(k)
23174 .cn_stride(7)
23175 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23176 }
23177 }
23178 }
23179
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4_strided_a)23180 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_strided_a) {
23181 TEST_REQUIRES_X86_SSE2;
23182 for (uint32_t n = 8; n <= 12; n += 4) {
23183 for (size_t k = 1; k <= 40; k += 9) {
23184 GemmMicrokernelTester()
23185 .mr(3)
23186 .nr(4)
23187 .kr(8)
23188 .sr(1)
23189 .m(3)
23190 .n(n)
23191 .k(k)
23192 .a_stride(43)
23193 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23194 }
23195 }
23196 }
23197
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4_subtile)23198 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_subtile) {
23199 TEST_REQUIRES_X86_SSE2;
23200 for (uint32_t n = 8; n <= 12; n += 4) {
23201 for (size_t k = 1; k <= 40; k += 9) {
23202 for (uint32_t m = 1; m <= 3; m++) {
23203 GemmMicrokernelTester()
23204 .mr(3)
23205 .nr(4)
23206 .kr(8)
23207 .sr(1)
23208 .m(m)
23209 .n(n)
23210 .k(k)
23211 .iterations(1)
23212 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23213 }
23214 }
23215 }
23216 }
23217
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,strided_cm_subtile)23218 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cm_subtile) {
23219 TEST_REQUIRES_X86_SSE2;
23220 for (size_t k = 1; k <= 40; k += 9) {
23221 for (uint32_t n = 1; n <= 4; n++) {
23222 for (uint32_t m = 1; m <= 3; m++) {
23223 GemmMicrokernelTester()
23224 .mr(3)
23225 .nr(4)
23226 .kr(8)
23227 .sr(1)
23228 .m(m)
23229 .n(n)
23230 .k(k)
23231 .cm_stride(7)
23232 .iterations(1)
23233 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23234 }
23235 }
23236 }
23237 }
23238
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,qmin)23239 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, qmin) {
23240 TEST_REQUIRES_X86_SSE2;
23241 GemmMicrokernelTester()
23242 .mr(3)
23243 .nr(4)
23244 .kr(8)
23245 .sr(1)
23246 .m(3)
23247 .n(4)
23248 .k(8)
23249 .qmin(128)
23250 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23251 }
23252
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,qmax)23253 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, qmax) {
23254 TEST_REQUIRES_X86_SSE2;
23255 GemmMicrokernelTester()
23256 .mr(3)
23257 .nr(4)
23258 .kr(8)
23259 .sr(1)
23260 .m(3)
23261 .n(4)
23262 .k(8)
23263 .qmax(128)
23264 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23265 }
23266
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64,strided_cm)23267 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cm) {
23268 TEST_REQUIRES_X86_SSE2;
23269 GemmMicrokernelTester()
23270 .mr(3)
23271 .nr(4)
23272 .kr(8)
23273 .sr(1)
23274 .m(3)
23275 .n(4)
23276 .k(8)
23277 .cm_stride(7)
23278 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23279 }
23280 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23281
23282
23283 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_eq_8)23284 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8) {
23285 TEST_REQUIRES_X86_AVX;
23286 GemmMicrokernelTester()
23287 .mr(1)
23288 .nr(4)
23289 .kr(8)
23290 .sr(1)
23291 .m(1)
23292 .n(4)
23293 .k(8)
23294 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23295 }
23296
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,strided_cn)23297 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, strided_cn) {
23298 TEST_REQUIRES_X86_AVX;
23299 GemmMicrokernelTester()
23300 .mr(1)
23301 .nr(4)
23302 .kr(8)
23303 .sr(1)
23304 .m(1)
23305 .n(4)
23306 .k(8)
23307 .cn_stride(7)
23308 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23309 }
23310
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_eq_8_strided_a)23311 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8_strided_a) {
23312 TEST_REQUIRES_X86_AVX;
23313 GemmMicrokernelTester()
23314 .mr(1)
23315 .nr(4)
23316 .kr(8)
23317 .sr(1)
23318 .m(1)
23319 .n(4)
23320 .k(8)
23321 .a_stride(11)
23322 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23323 }
23324
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_eq_8_subtile)23325 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8_subtile) {
23326 TEST_REQUIRES_X86_AVX;
23327 for (uint32_t n = 1; n <= 4; n++) {
23328 for (uint32_t m = 1; m <= 1; m++) {
23329 GemmMicrokernelTester()
23330 .mr(1)
23331 .nr(4)
23332 .kr(8)
23333 .sr(1)
23334 .m(m)
23335 .n(n)
23336 .k(8)
23337 .iterations(1)
23338 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23339 }
23340 }
23341 }
23342
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_eq_8_subtile_m)23343 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8_subtile_m) {
23344 TEST_REQUIRES_X86_AVX;
23345 for (uint32_t m = 1; m <= 1; m++) {
23346 GemmMicrokernelTester()
23347 .mr(1)
23348 .nr(4)
23349 .kr(8)
23350 .sr(1)
23351 .m(m)
23352 .n(4)
23353 .k(8)
23354 .iterations(1)
23355 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23356 }
23357 }
23358
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_eq_8_subtile_n)23359 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8_subtile_n) {
23360 TEST_REQUIRES_X86_AVX;
23361 for (uint32_t n = 1; n <= 4; n++) {
23362 GemmMicrokernelTester()
23363 .mr(1)
23364 .nr(4)
23365 .kr(8)
23366 .sr(1)
23367 .m(1)
23368 .n(n)
23369 .k(8)
23370 .iterations(1)
23371 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23372 }
23373 }
23374
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_lt_8)23375 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_lt_8) {
23376 TEST_REQUIRES_X86_AVX;
23377 for (size_t k = 1; k < 8; k++) {
23378 GemmMicrokernelTester()
23379 .mr(1)
23380 .nr(4)
23381 .kr(8)
23382 .sr(1)
23383 .m(1)
23384 .n(4)
23385 .k(k)
23386 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23387 }
23388 }
23389
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_lt_8_strided_a)23390 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_lt_8_strided_a) {
23391 TEST_REQUIRES_X86_AVX;
23392 for (size_t k = 1; k < 8; k++) {
23393 GemmMicrokernelTester()
23394 .mr(1)
23395 .nr(4)
23396 .kr(8)
23397 .sr(1)
23398 .m(1)
23399 .n(4)
23400 .k(k)
23401 .a_stride(11)
23402 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23403 }
23404 }
23405
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_lt_8_subtile)23406 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_lt_8_subtile) {
23407 TEST_REQUIRES_X86_AVX;
23408 for (size_t k = 1; k < 8; k++) {
23409 for (uint32_t n = 1; n <= 4; n++) {
23410 for (uint32_t m = 1; m <= 1; m++) {
23411 GemmMicrokernelTester()
23412 .mr(1)
23413 .nr(4)
23414 .kr(8)
23415 .sr(1)
23416 .m(m)
23417 .n(n)
23418 .k(k)
23419 .iterations(1)
23420 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23421 }
23422 }
23423 }
23424 }
23425
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_gt_8)23426 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_gt_8) {
23427 TEST_REQUIRES_X86_AVX;
23428 for (size_t k = 9; k < 16; k++) {
23429 GemmMicrokernelTester()
23430 .mr(1)
23431 .nr(4)
23432 .kr(8)
23433 .sr(1)
23434 .m(1)
23435 .n(4)
23436 .k(k)
23437 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23438 }
23439 }
23440
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_gt_8_strided_a)23441 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_gt_8_strided_a) {
23442 TEST_REQUIRES_X86_AVX;
23443 for (size_t k = 9; k < 16; k++) {
23444 GemmMicrokernelTester()
23445 .mr(1)
23446 .nr(4)
23447 .kr(8)
23448 .sr(1)
23449 .m(1)
23450 .n(4)
23451 .k(k)
23452 .a_stride(19)
23453 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23454 }
23455 }
23456
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_gt_8_subtile)23457 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_gt_8_subtile) {
23458 TEST_REQUIRES_X86_AVX;
23459 for (size_t k = 9; k < 16; k++) {
23460 for (uint32_t n = 1; n <= 4; n++) {
23461 for (uint32_t m = 1; m <= 1; m++) {
23462 GemmMicrokernelTester()
23463 .mr(1)
23464 .nr(4)
23465 .kr(8)
23466 .sr(1)
23467 .m(m)
23468 .n(n)
23469 .k(k)
23470 .iterations(1)
23471 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23472 }
23473 }
23474 }
23475 }
23476
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_div_8)23477 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_div_8) {
23478 TEST_REQUIRES_X86_AVX;
23479 for (size_t k = 16; k <= 80; k += 8) {
23480 GemmMicrokernelTester()
23481 .mr(1)
23482 .nr(4)
23483 .kr(8)
23484 .sr(1)
23485 .m(1)
23486 .n(4)
23487 .k(k)
23488 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23489 }
23490 }
23491
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_div_8_strided_a)23492 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_div_8_strided_a) {
23493 TEST_REQUIRES_X86_AVX;
23494 for (size_t k = 16; k <= 80; k += 8) {
23495 GemmMicrokernelTester()
23496 .mr(1)
23497 .nr(4)
23498 .kr(8)
23499 .sr(1)
23500 .m(1)
23501 .n(4)
23502 .k(k)
23503 .a_stride(83)
23504 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23505 }
23506 }
23507
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_div_8_subtile)23508 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_div_8_subtile) {
23509 TEST_REQUIRES_X86_AVX;
23510 for (size_t k = 16; k <= 80; k += 8) {
23511 for (uint32_t n = 1; n <= 4; n++) {
23512 for (uint32_t m = 1; m <= 1; m++) {
23513 GemmMicrokernelTester()
23514 .mr(1)
23515 .nr(4)
23516 .kr(8)
23517 .sr(1)
23518 .m(m)
23519 .n(n)
23520 .k(k)
23521 .iterations(1)
23522 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23523 }
23524 }
23525 }
23526 }
23527
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_gt_4)23528 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4) {
23529 TEST_REQUIRES_X86_AVX;
23530 for (uint32_t n = 5; n < 8; n++) {
23531 for (size_t k = 1; k <= 40; k += 9) {
23532 GemmMicrokernelTester()
23533 .mr(1)
23534 .nr(4)
23535 .kr(8)
23536 .sr(1)
23537 .m(1)
23538 .n(n)
23539 .k(k)
23540 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23541 }
23542 }
23543 }
23544
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_gt_4_strided_cn)23545 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4_strided_cn) {
23546 TEST_REQUIRES_X86_AVX;
23547 for (uint32_t n = 5; n < 8; n++) {
23548 for (size_t k = 1; k <= 40; k += 9) {
23549 GemmMicrokernelTester()
23550 .mr(1)
23551 .nr(4)
23552 .kr(8)
23553 .sr(1)
23554 .m(1)
23555 .n(n)
23556 .k(k)
23557 .cn_stride(7)
23558 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23559 }
23560 }
23561 }
23562
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_gt_4_strided_a)23563 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4_strided_a) {
23564 TEST_REQUIRES_X86_AVX;
23565 for (uint32_t n = 5; n < 8; n++) {
23566 for (size_t k = 1; k <= 40; k += 9) {
23567 GemmMicrokernelTester()
23568 .mr(1)
23569 .nr(4)
23570 .kr(8)
23571 .sr(1)
23572 .m(1)
23573 .n(n)
23574 .k(k)
23575 .a_stride(43)
23576 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23577 }
23578 }
23579 }
23580
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_gt_4_subtile)23581 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4_subtile) {
23582 TEST_REQUIRES_X86_AVX;
23583 for (uint32_t n = 5; n < 8; n++) {
23584 for (size_t k = 1; k <= 40; k += 9) {
23585 for (uint32_t m = 1; m <= 1; m++) {
23586 GemmMicrokernelTester()
23587 .mr(1)
23588 .nr(4)
23589 .kr(8)
23590 .sr(1)
23591 .m(m)
23592 .n(n)
23593 .k(k)
23594 .iterations(1)
23595 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23596 }
23597 }
23598 }
23599 }
23600
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_div_4)23601 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4) {
23602 TEST_REQUIRES_X86_AVX;
23603 for (uint32_t n = 8; n <= 12; n += 4) {
23604 for (size_t k = 1; k <= 40; k += 9) {
23605 GemmMicrokernelTester()
23606 .mr(1)
23607 .nr(4)
23608 .kr(8)
23609 .sr(1)
23610 .m(1)
23611 .n(n)
23612 .k(k)
23613 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23614 }
23615 }
23616 }
23617
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_div_4_strided_cn)23618 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4_strided_cn) {
23619 TEST_REQUIRES_X86_AVX;
23620 for (uint32_t n = 8; n <= 12; n += 4) {
23621 for (size_t k = 1; k <= 40; k += 9) {
23622 GemmMicrokernelTester()
23623 .mr(1)
23624 .nr(4)
23625 .kr(8)
23626 .sr(1)
23627 .m(1)
23628 .n(n)
23629 .k(k)
23630 .cn_stride(7)
23631 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23632 }
23633 }
23634 }
23635
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_div_4_strided_a)23636 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4_strided_a) {
23637 TEST_REQUIRES_X86_AVX;
23638 for (uint32_t n = 8; n <= 12; n += 4) {
23639 for (size_t k = 1; k <= 40; k += 9) {
23640 GemmMicrokernelTester()
23641 .mr(1)
23642 .nr(4)
23643 .kr(8)
23644 .sr(1)
23645 .m(1)
23646 .n(n)
23647 .k(k)
23648 .a_stride(43)
23649 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23650 }
23651 }
23652 }
23653
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_div_4_subtile)23654 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4_subtile) {
23655 TEST_REQUIRES_X86_AVX;
23656 for (uint32_t n = 8; n <= 12; n += 4) {
23657 for (size_t k = 1; k <= 40; k += 9) {
23658 for (uint32_t m = 1; m <= 1; m++) {
23659 GemmMicrokernelTester()
23660 .mr(1)
23661 .nr(4)
23662 .kr(8)
23663 .sr(1)
23664 .m(m)
23665 .n(n)
23666 .k(k)
23667 .iterations(1)
23668 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23669 }
23670 }
23671 }
23672 }
23673
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,strided_cm_subtile)23674 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, strided_cm_subtile) {
23675 TEST_REQUIRES_X86_AVX;
23676 for (size_t k = 1; k <= 40; k += 9) {
23677 for (uint32_t n = 1; n <= 4; n++) {
23678 for (uint32_t m = 1; m <= 1; m++) {
23679 GemmMicrokernelTester()
23680 .mr(1)
23681 .nr(4)
23682 .kr(8)
23683 .sr(1)
23684 .m(m)
23685 .n(n)
23686 .k(k)
23687 .cm_stride(7)
23688 .iterations(1)
23689 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23690 }
23691 }
23692 }
23693 }
23694
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,qmin)23695 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, qmin) {
23696 TEST_REQUIRES_X86_AVX;
23697 GemmMicrokernelTester()
23698 .mr(1)
23699 .nr(4)
23700 .kr(8)
23701 .sr(1)
23702 .m(1)
23703 .n(4)
23704 .k(8)
23705 .qmin(128)
23706 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23707 }
23708
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,qmax)23709 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, qmax) {
23710 TEST_REQUIRES_X86_AVX;
23711 GemmMicrokernelTester()
23712 .mr(1)
23713 .nr(4)
23714 .kr(8)
23715 .sr(1)
23716 .m(1)
23717 .n(4)
23718 .k(8)
23719 .qmax(128)
23720 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23721 }
23722
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64,strided_cm)23723 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD64, strided_cm) {
23724 TEST_REQUIRES_X86_AVX;
23725 GemmMicrokernelTester()
23726 .mr(1)
23727 .nr(4)
23728 .kr(8)
23729 .sr(1)
23730 .m(1)
23731 .n(4)
23732 .k(8)
23733 .cm_stride(7)
23734 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23735 }
23736 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23737
23738
23739 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8)23740 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8) {
23741 TEST_REQUIRES_X86_AVX;
23742 GemmMicrokernelTester()
23743 .mr(2)
23744 .nr(4)
23745 .kr(8)
23746 .sr(1)
23747 .m(2)
23748 .n(4)
23749 .k(8)
23750 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23751 }
23752
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,strided_cn)23753 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cn) {
23754 TEST_REQUIRES_X86_AVX;
23755 GemmMicrokernelTester()
23756 .mr(2)
23757 .nr(4)
23758 .kr(8)
23759 .sr(1)
23760 .m(2)
23761 .n(4)
23762 .k(8)
23763 .cn_stride(7)
23764 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23765 }
23766
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_strided_a)23767 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_strided_a) {
23768 TEST_REQUIRES_X86_AVX;
23769 GemmMicrokernelTester()
23770 .mr(2)
23771 .nr(4)
23772 .kr(8)
23773 .sr(1)
23774 .m(2)
23775 .n(4)
23776 .k(8)
23777 .a_stride(11)
23778 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23779 }
23780
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_subtile)23781 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile) {
23782 TEST_REQUIRES_X86_AVX;
23783 for (uint32_t n = 1; n <= 4; n++) {
23784 for (uint32_t m = 1; m <= 2; m++) {
23785 GemmMicrokernelTester()
23786 .mr(2)
23787 .nr(4)
23788 .kr(8)
23789 .sr(1)
23790 .m(m)
23791 .n(n)
23792 .k(8)
23793 .iterations(1)
23794 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23795 }
23796 }
23797 }
23798
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_subtile_m)23799 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_m) {
23800 TEST_REQUIRES_X86_AVX;
23801 for (uint32_t m = 1; m <= 2; m++) {
23802 GemmMicrokernelTester()
23803 .mr(2)
23804 .nr(4)
23805 .kr(8)
23806 .sr(1)
23807 .m(m)
23808 .n(4)
23809 .k(8)
23810 .iterations(1)
23811 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23812 }
23813 }
23814
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_subtile_n)23815 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_n) {
23816 TEST_REQUIRES_X86_AVX;
23817 for (uint32_t n = 1; n <= 4; n++) {
23818 GemmMicrokernelTester()
23819 .mr(2)
23820 .nr(4)
23821 .kr(8)
23822 .sr(1)
23823 .m(2)
23824 .n(n)
23825 .k(8)
23826 .iterations(1)
23827 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23828 }
23829 }
23830
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_lt_8)23831 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8) {
23832 TEST_REQUIRES_X86_AVX;
23833 for (size_t k = 1; k < 8; k++) {
23834 GemmMicrokernelTester()
23835 .mr(2)
23836 .nr(4)
23837 .kr(8)
23838 .sr(1)
23839 .m(2)
23840 .n(4)
23841 .k(k)
23842 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23843 }
23844 }
23845
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_lt_8_strided_a)23846 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8_strided_a) {
23847 TEST_REQUIRES_X86_AVX;
23848 for (size_t k = 1; k < 8; k++) {
23849 GemmMicrokernelTester()
23850 .mr(2)
23851 .nr(4)
23852 .kr(8)
23853 .sr(1)
23854 .m(2)
23855 .n(4)
23856 .k(k)
23857 .a_stride(11)
23858 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23859 }
23860 }
23861
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_lt_8_subtile)23862 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8_subtile) {
23863 TEST_REQUIRES_X86_AVX;
23864 for (size_t k = 1; k < 8; k++) {
23865 for (uint32_t n = 1; n <= 4; n++) {
23866 for (uint32_t m = 1; m <= 2; m++) {
23867 GemmMicrokernelTester()
23868 .mr(2)
23869 .nr(4)
23870 .kr(8)
23871 .sr(1)
23872 .m(m)
23873 .n(n)
23874 .k(k)
23875 .iterations(1)
23876 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23877 }
23878 }
23879 }
23880 }
23881
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_gt_8)23882 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8) {
23883 TEST_REQUIRES_X86_AVX;
23884 for (size_t k = 9; k < 16; k++) {
23885 GemmMicrokernelTester()
23886 .mr(2)
23887 .nr(4)
23888 .kr(8)
23889 .sr(1)
23890 .m(2)
23891 .n(4)
23892 .k(k)
23893 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23894 }
23895 }
23896
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_gt_8_strided_a)23897 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8_strided_a) {
23898 TEST_REQUIRES_X86_AVX;
23899 for (size_t k = 9; k < 16; k++) {
23900 GemmMicrokernelTester()
23901 .mr(2)
23902 .nr(4)
23903 .kr(8)
23904 .sr(1)
23905 .m(2)
23906 .n(4)
23907 .k(k)
23908 .a_stride(19)
23909 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23910 }
23911 }
23912
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_gt_8_subtile)23913 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8_subtile) {
23914 TEST_REQUIRES_X86_AVX;
23915 for (size_t k = 9; k < 16; k++) {
23916 for (uint32_t n = 1; n <= 4; n++) {
23917 for (uint32_t m = 1; m <= 2; m++) {
23918 GemmMicrokernelTester()
23919 .mr(2)
23920 .nr(4)
23921 .kr(8)
23922 .sr(1)
23923 .m(m)
23924 .n(n)
23925 .k(k)
23926 .iterations(1)
23927 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23928 }
23929 }
23930 }
23931 }
23932
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_div_8)23933 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8) {
23934 TEST_REQUIRES_X86_AVX;
23935 for (size_t k = 16; k <= 80; k += 8) {
23936 GemmMicrokernelTester()
23937 .mr(2)
23938 .nr(4)
23939 .kr(8)
23940 .sr(1)
23941 .m(2)
23942 .n(4)
23943 .k(k)
23944 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23945 }
23946 }
23947
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_div_8_strided_a)23948 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8_strided_a) {
23949 TEST_REQUIRES_X86_AVX;
23950 for (size_t k = 16; k <= 80; k += 8) {
23951 GemmMicrokernelTester()
23952 .mr(2)
23953 .nr(4)
23954 .kr(8)
23955 .sr(1)
23956 .m(2)
23957 .n(4)
23958 .k(k)
23959 .a_stride(83)
23960 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23961 }
23962 }
23963
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_div_8_subtile)23964 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8_subtile) {
23965 TEST_REQUIRES_X86_AVX;
23966 for (size_t k = 16; k <= 80; k += 8) {
23967 for (uint32_t n = 1; n <= 4; n++) {
23968 for (uint32_t m = 1; m <= 2; m++) {
23969 GemmMicrokernelTester()
23970 .mr(2)
23971 .nr(4)
23972 .kr(8)
23973 .sr(1)
23974 .m(m)
23975 .n(n)
23976 .k(k)
23977 .iterations(1)
23978 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23979 }
23980 }
23981 }
23982 }
23983
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4)23984 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4) {
23985 TEST_REQUIRES_X86_AVX;
23986 for (uint32_t n = 5; n < 8; n++) {
23987 for (size_t k = 1; k <= 40; k += 9) {
23988 GemmMicrokernelTester()
23989 .mr(2)
23990 .nr(4)
23991 .kr(8)
23992 .sr(1)
23993 .m(2)
23994 .n(n)
23995 .k(k)
23996 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23997 }
23998 }
23999 }
24000
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4_strided_cn)24001 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_strided_cn) {
24002 TEST_REQUIRES_X86_AVX;
24003 for (uint32_t n = 5; n < 8; n++) {
24004 for (size_t k = 1; k <= 40; k += 9) {
24005 GemmMicrokernelTester()
24006 .mr(2)
24007 .nr(4)
24008 .kr(8)
24009 .sr(1)
24010 .m(2)
24011 .n(n)
24012 .k(k)
24013 .cn_stride(7)
24014 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24015 }
24016 }
24017 }
24018
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4_strided_a)24019 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_strided_a) {
24020 TEST_REQUIRES_X86_AVX;
24021 for (uint32_t n = 5; n < 8; n++) {
24022 for (size_t k = 1; k <= 40; k += 9) {
24023 GemmMicrokernelTester()
24024 .mr(2)
24025 .nr(4)
24026 .kr(8)
24027 .sr(1)
24028 .m(2)
24029 .n(n)
24030 .k(k)
24031 .a_stride(43)
24032 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24033 }
24034 }
24035 }
24036
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4_subtile)24037 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_subtile) {
24038 TEST_REQUIRES_X86_AVX;
24039 for (uint32_t n = 5; n < 8; n++) {
24040 for (size_t k = 1; k <= 40; k += 9) {
24041 for (uint32_t m = 1; m <= 2; m++) {
24042 GemmMicrokernelTester()
24043 .mr(2)
24044 .nr(4)
24045 .kr(8)
24046 .sr(1)
24047 .m(m)
24048 .n(n)
24049 .k(k)
24050 .iterations(1)
24051 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24052 }
24053 }
24054 }
24055 }
24056
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4)24057 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4) {
24058 TEST_REQUIRES_X86_AVX;
24059 for (uint32_t n = 8; n <= 12; n += 4) {
24060 for (size_t k = 1; k <= 40; k += 9) {
24061 GemmMicrokernelTester()
24062 .mr(2)
24063 .nr(4)
24064 .kr(8)
24065 .sr(1)
24066 .m(2)
24067 .n(n)
24068 .k(k)
24069 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24070 }
24071 }
24072 }
24073
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4_strided_cn)24074 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_strided_cn) {
24075 TEST_REQUIRES_X86_AVX;
24076 for (uint32_t n = 8; n <= 12; n += 4) {
24077 for (size_t k = 1; k <= 40; k += 9) {
24078 GemmMicrokernelTester()
24079 .mr(2)
24080 .nr(4)
24081 .kr(8)
24082 .sr(1)
24083 .m(2)
24084 .n(n)
24085 .k(k)
24086 .cn_stride(7)
24087 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24088 }
24089 }
24090 }
24091
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4_strided_a)24092 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_strided_a) {
24093 TEST_REQUIRES_X86_AVX;
24094 for (uint32_t n = 8; n <= 12; n += 4) {
24095 for (size_t k = 1; k <= 40; k += 9) {
24096 GemmMicrokernelTester()
24097 .mr(2)
24098 .nr(4)
24099 .kr(8)
24100 .sr(1)
24101 .m(2)
24102 .n(n)
24103 .k(k)
24104 .a_stride(43)
24105 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24106 }
24107 }
24108 }
24109
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4_subtile)24110 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_subtile) {
24111 TEST_REQUIRES_X86_AVX;
24112 for (uint32_t n = 8; n <= 12; n += 4) {
24113 for (size_t k = 1; k <= 40; k += 9) {
24114 for (uint32_t m = 1; m <= 2; m++) {
24115 GemmMicrokernelTester()
24116 .mr(2)
24117 .nr(4)
24118 .kr(8)
24119 .sr(1)
24120 .m(m)
24121 .n(n)
24122 .k(k)
24123 .iterations(1)
24124 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24125 }
24126 }
24127 }
24128 }
24129
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,strided_cm_subtile)24130 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm_subtile) {
24131 TEST_REQUIRES_X86_AVX;
24132 for (size_t k = 1; k <= 40; k += 9) {
24133 for (uint32_t n = 1; n <= 4; n++) {
24134 for (uint32_t m = 1; m <= 2; m++) {
24135 GemmMicrokernelTester()
24136 .mr(2)
24137 .nr(4)
24138 .kr(8)
24139 .sr(1)
24140 .m(m)
24141 .n(n)
24142 .k(k)
24143 .cm_stride(7)
24144 .iterations(1)
24145 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24146 }
24147 }
24148 }
24149 }
24150
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,qmin)24151 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmin) {
24152 TEST_REQUIRES_X86_AVX;
24153 GemmMicrokernelTester()
24154 .mr(2)
24155 .nr(4)
24156 .kr(8)
24157 .sr(1)
24158 .m(2)
24159 .n(4)
24160 .k(8)
24161 .qmin(128)
24162 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24163 }
24164
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,qmax)24165 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmax) {
24166 TEST_REQUIRES_X86_AVX;
24167 GemmMicrokernelTester()
24168 .mr(2)
24169 .nr(4)
24170 .kr(8)
24171 .sr(1)
24172 .m(2)
24173 .n(4)
24174 .k(8)
24175 .qmax(128)
24176 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24177 }
24178
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,strided_cm)24179 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm) {
24180 TEST_REQUIRES_X86_AVX;
24181 GemmMicrokernelTester()
24182 .mr(2)
24183 .nr(4)
24184 .kr(8)
24185 .sr(1)
24186 .m(2)
24187 .n(4)
24188 .k(8)
24189 .cm_stride(7)
24190 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24191 }
24192 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24193
24194
24195 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_eq_8)24196 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8) {
24197 TEST_REQUIRES_X86_XOP;
24198 GemmMicrokernelTester()
24199 .mr(1)
24200 .nr(4)
24201 .kr(8)
24202 .sr(1)
24203 .m(1)
24204 .n(4)
24205 .k(8)
24206 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24207 }
24208
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,strided_cn)24209 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, strided_cn) {
24210 TEST_REQUIRES_X86_XOP;
24211 GemmMicrokernelTester()
24212 .mr(1)
24213 .nr(4)
24214 .kr(8)
24215 .sr(1)
24216 .m(1)
24217 .n(4)
24218 .k(8)
24219 .cn_stride(7)
24220 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24221 }
24222
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_eq_8_strided_a)24223 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_strided_a) {
24224 TEST_REQUIRES_X86_XOP;
24225 GemmMicrokernelTester()
24226 .mr(1)
24227 .nr(4)
24228 .kr(8)
24229 .sr(1)
24230 .m(1)
24231 .n(4)
24232 .k(8)
24233 .a_stride(11)
24234 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24235 }
24236
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_eq_8_subtile)24237 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_subtile) {
24238 TEST_REQUIRES_X86_XOP;
24239 for (uint32_t n = 1; n <= 4; n++) {
24240 for (uint32_t m = 1; m <= 1; m++) {
24241 GemmMicrokernelTester()
24242 .mr(1)
24243 .nr(4)
24244 .kr(8)
24245 .sr(1)
24246 .m(m)
24247 .n(n)
24248 .k(8)
24249 .iterations(1)
24250 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24251 }
24252 }
24253 }
24254
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_eq_8_subtile_m)24255 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_subtile_m) {
24256 TEST_REQUIRES_X86_XOP;
24257 for (uint32_t m = 1; m <= 1; m++) {
24258 GemmMicrokernelTester()
24259 .mr(1)
24260 .nr(4)
24261 .kr(8)
24262 .sr(1)
24263 .m(m)
24264 .n(4)
24265 .k(8)
24266 .iterations(1)
24267 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24268 }
24269 }
24270
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_eq_8_subtile_n)24271 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_subtile_n) {
24272 TEST_REQUIRES_X86_XOP;
24273 for (uint32_t n = 1; n <= 4; n++) {
24274 GemmMicrokernelTester()
24275 .mr(1)
24276 .nr(4)
24277 .kr(8)
24278 .sr(1)
24279 .m(1)
24280 .n(n)
24281 .k(8)
24282 .iterations(1)
24283 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24284 }
24285 }
24286
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_lt_8)24287 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_lt_8) {
24288 TEST_REQUIRES_X86_XOP;
24289 for (size_t k = 1; k < 8; k++) {
24290 GemmMicrokernelTester()
24291 .mr(1)
24292 .nr(4)
24293 .kr(8)
24294 .sr(1)
24295 .m(1)
24296 .n(4)
24297 .k(k)
24298 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24299 }
24300 }
24301
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_lt_8_strided_a)24302 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_lt_8_strided_a) {
24303 TEST_REQUIRES_X86_XOP;
24304 for (size_t k = 1; k < 8; k++) {
24305 GemmMicrokernelTester()
24306 .mr(1)
24307 .nr(4)
24308 .kr(8)
24309 .sr(1)
24310 .m(1)
24311 .n(4)
24312 .k(k)
24313 .a_stride(11)
24314 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24315 }
24316 }
24317
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_lt_8_subtile)24318 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_lt_8_subtile) {
24319 TEST_REQUIRES_X86_XOP;
24320 for (size_t k = 1; k < 8; k++) {
24321 for (uint32_t n = 1; n <= 4; n++) {
24322 for (uint32_t m = 1; m <= 1; m++) {
24323 GemmMicrokernelTester()
24324 .mr(1)
24325 .nr(4)
24326 .kr(8)
24327 .sr(1)
24328 .m(m)
24329 .n(n)
24330 .k(k)
24331 .iterations(1)
24332 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24333 }
24334 }
24335 }
24336 }
24337
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_gt_8)24338 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_gt_8) {
24339 TEST_REQUIRES_X86_XOP;
24340 for (size_t k = 9; k < 16; k++) {
24341 GemmMicrokernelTester()
24342 .mr(1)
24343 .nr(4)
24344 .kr(8)
24345 .sr(1)
24346 .m(1)
24347 .n(4)
24348 .k(k)
24349 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24350 }
24351 }
24352
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_gt_8_strided_a)24353 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_gt_8_strided_a) {
24354 TEST_REQUIRES_X86_XOP;
24355 for (size_t k = 9; k < 16; k++) {
24356 GemmMicrokernelTester()
24357 .mr(1)
24358 .nr(4)
24359 .kr(8)
24360 .sr(1)
24361 .m(1)
24362 .n(4)
24363 .k(k)
24364 .a_stride(19)
24365 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24366 }
24367 }
24368
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_gt_8_subtile)24369 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_gt_8_subtile) {
24370 TEST_REQUIRES_X86_XOP;
24371 for (size_t k = 9; k < 16; k++) {
24372 for (uint32_t n = 1; n <= 4; n++) {
24373 for (uint32_t m = 1; m <= 1; m++) {
24374 GemmMicrokernelTester()
24375 .mr(1)
24376 .nr(4)
24377 .kr(8)
24378 .sr(1)
24379 .m(m)
24380 .n(n)
24381 .k(k)
24382 .iterations(1)
24383 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24384 }
24385 }
24386 }
24387 }
24388
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_div_8)24389 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_div_8) {
24390 TEST_REQUIRES_X86_XOP;
24391 for (size_t k = 16; k <= 80; k += 8) {
24392 GemmMicrokernelTester()
24393 .mr(1)
24394 .nr(4)
24395 .kr(8)
24396 .sr(1)
24397 .m(1)
24398 .n(4)
24399 .k(k)
24400 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24401 }
24402 }
24403
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_div_8_strided_a)24404 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_div_8_strided_a) {
24405 TEST_REQUIRES_X86_XOP;
24406 for (size_t k = 16; k <= 80; k += 8) {
24407 GemmMicrokernelTester()
24408 .mr(1)
24409 .nr(4)
24410 .kr(8)
24411 .sr(1)
24412 .m(1)
24413 .n(4)
24414 .k(k)
24415 .a_stride(83)
24416 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24417 }
24418 }
24419
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_div_8_subtile)24420 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_div_8_subtile) {
24421 TEST_REQUIRES_X86_XOP;
24422 for (size_t k = 16; k <= 80; k += 8) {
24423 for (uint32_t n = 1; n <= 4; n++) {
24424 for (uint32_t m = 1; m <= 1; m++) {
24425 GemmMicrokernelTester()
24426 .mr(1)
24427 .nr(4)
24428 .kr(8)
24429 .sr(1)
24430 .m(m)
24431 .n(n)
24432 .k(k)
24433 .iterations(1)
24434 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24435 }
24436 }
24437 }
24438 }
24439
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_gt_4)24440 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4) {
24441 TEST_REQUIRES_X86_XOP;
24442 for (uint32_t n = 5; n < 8; n++) {
24443 for (size_t k = 1; k <= 40; k += 9) {
24444 GemmMicrokernelTester()
24445 .mr(1)
24446 .nr(4)
24447 .kr(8)
24448 .sr(1)
24449 .m(1)
24450 .n(n)
24451 .k(k)
24452 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24453 }
24454 }
24455 }
24456
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_gt_4_strided_cn)24457 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4_strided_cn) {
24458 TEST_REQUIRES_X86_XOP;
24459 for (uint32_t n = 5; n < 8; n++) {
24460 for (size_t k = 1; k <= 40; k += 9) {
24461 GemmMicrokernelTester()
24462 .mr(1)
24463 .nr(4)
24464 .kr(8)
24465 .sr(1)
24466 .m(1)
24467 .n(n)
24468 .k(k)
24469 .cn_stride(7)
24470 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24471 }
24472 }
24473 }
24474
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_gt_4_strided_a)24475 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4_strided_a) {
24476 TEST_REQUIRES_X86_XOP;
24477 for (uint32_t n = 5; n < 8; n++) {
24478 for (size_t k = 1; k <= 40; k += 9) {
24479 GemmMicrokernelTester()
24480 .mr(1)
24481 .nr(4)
24482 .kr(8)
24483 .sr(1)
24484 .m(1)
24485 .n(n)
24486 .k(k)
24487 .a_stride(43)
24488 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24489 }
24490 }
24491 }
24492
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_gt_4_subtile)24493 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4_subtile) {
24494 TEST_REQUIRES_X86_XOP;
24495 for (uint32_t n = 5; n < 8; n++) {
24496 for (size_t k = 1; k <= 40; k += 9) {
24497 for (uint32_t m = 1; m <= 1; m++) {
24498 GemmMicrokernelTester()
24499 .mr(1)
24500 .nr(4)
24501 .kr(8)
24502 .sr(1)
24503 .m(m)
24504 .n(n)
24505 .k(k)
24506 .iterations(1)
24507 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24508 }
24509 }
24510 }
24511 }
24512
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_div_4)24513 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4) {
24514 TEST_REQUIRES_X86_XOP;
24515 for (uint32_t n = 8; n <= 12; n += 4) {
24516 for (size_t k = 1; k <= 40; k += 9) {
24517 GemmMicrokernelTester()
24518 .mr(1)
24519 .nr(4)
24520 .kr(8)
24521 .sr(1)
24522 .m(1)
24523 .n(n)
24524 .k(k)
24525 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24526 }
24527 }
24528 }
24529
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_div_4_strided_cn)24530 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4_strided_cn) {
24531 TEST_REQUIRES_X86_XOP;
24532 for (uint32_t n = 8; n <= 12; n += 4) {
24533 for (size_t k = 1; k <= 40; k += 9) {
24534 GemmMicrokernelTester()
24535 .mr(1)
24536 .nr(4)
24537 .kr(8)
24538 .sr(1)
24539 .m(1)
24540 .n(n)
24541 .k(k)
24542 .cn_stride(7)
24543 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24544 }
24545 }
24546 }
24547
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_div_4_strided_a)24548 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4_strided_a) {
24549 TEST_REQUIRES_X86_XOP;
24550 for (uint32_t n = 8; n <= 12; n += 4) {
24551 for (size_t k = 1; k <= 40; k += 9) {
24552 GemmMicrokernelTester()
24553 .mr(1)
24554 .nr(4)
24555 .kr(8)
24556 .sr(1)
24557 .m(1)
24558 .n(n)
24559 .k(k)
24560 .a_stride(43)
24561 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24562 }
24563 }
24564 }
24565
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_div_4_subtile)24566 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4_subtile) {
24567 TEST_REQUIRES_X86_XOP;
24568 for (uint32_t n = 8; n <= 12; n += 4) {
24569 for (size_t k = 1; k <= 40; k += 9) {
24570 for (uint32_t m = 1; m <= 1; m++) {
24571 GemmMicrokernelTester()
24572 .mr(1)
24573 .nr(4)
24574 .kr(8)
24575 .sr(1)
24576 .m(m)
24577 .n(n)
24578 .k(k)
24579 .iterations(1)
24580 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24581 }
24582 }
24583 }
24584 }
24585
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,strided_cm_subtile)24586 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, strided_cm_subtile) {
24587 TEST_REQUIRES_X86_XOP;
24588 for (size_t k = 1; k <= 40; k += 9) {
24589 for (uint32_t n = 1; n <= 4; n++) {
24590 for (uint32_t m = 1; m <= 1; m++) {
24591 GemmMicrokernelTester()
24592 .mr(1)
24593 .nr(4)
24594 .kr(8)
24595 .sr(1)
24596 .m(m)
24597 .n(n)
24598 .k(k)
24599 .cm_stride(7)
24600 .iterations(1)
24601 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24602 }
24603 }
24604 }
24605 }
24606
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,qmin)24607 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, qmin) {
24608 TEST_REQUIRES_X86_XOP;
24609 GemmMicrokernelTester()
24610 .mr(1)
24611 .nr(4)
24612 .kr(8)
24613 .sr(1)
24614 .m(1)
24615 .n(4)
24616 .k(8)
24617 .qmin(128)
24618 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24619 }
24620
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,qmax)24621 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, qmax) {
24622 TEST_REQUIRES_X86_XOP;
24623 GemmMicrokernelTester()
24624 .mr(1)
24625 .nr(4)
24626 .kr(8)
24627 .sr(1)
24628 .m(1)
24629 .n(4)
24630 .k(8)
24631 .qmax(128)
24632 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24633 }
24634
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128,strided_cm)24635 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, strided_cm) {
24636 TEST_REQUIRES_X86_XOP;
24637 GemmMicrokernelTester()
24638 .mr(1)
24639 .nr(4)
24640 .kr(8)
24641 .sr(1)
24642 .m(1)
24643 .n(4)
24644 .k(8)
24645 .cm_stride(7)
24646 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24647 }
24648 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24649
24650
24651 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8)24652 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8) {
24653 TEST_REQUIRES_X86_AVX;
24654 GemmMicrokernelTester()
24655 .mr(2)
24656 .nr(4)
24657 .kr(8)
24658 .sr(1)
24659 .m(2)
24660 .n(4)
24661 .k(8)
24662 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24663 }
24664
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,strided_cn)24665 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cn) {
24666 TEST_REQUIRES_X86_AVX;
24667 GemmMicrokernelTester()
24668 .mr(2)
24669 .nr(4)
24670 .kr(8)
24671 .sr(1)
24672 .m(2)
24673 .n(4)
24674 .k(8)
24675 .cn_stride(7)
24676 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24677 }
24678
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_strided_a)24679 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_strided_a) {
24680 TEST_REQUIRES_X86_AVX;
24681 GemmMicrokernelTester()
24682 .mr(2)
24683 .nr(4)
24684 .kr(8)
24685 .sr(1)
24686 .m(2)
24687 .n(4)
24688 .k(8)
24689 .a_stride(11)
24690 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24691 }
24692
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_subtile)24693 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile) {
24694 TEST_REQUIRES_X86_AVX;
24695 for (uint32_t n = 1; n <= 4; n++) {
24696 for (uint32_t m = 1; m <= 2; m++) {
24697 GemmMicrokernelTester()
24698 .mr(2)
24699 .nr(4)
24700 .kr(8)
24701 .sr(1)
24702 .m(m)
24703 .n(n)
24704 .k(8)
24705 .iterations(1)
24706 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24707 }
24708 }
24709 }
24710
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_subtile_m)24711 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_m) {
24712 TEST_REQUIRES_X86_AVX;
24713 for (uint32_t m = 1; m <= 2; m++) {
24714 GemmMicrokernelTester()
24715 .mr(2)
24716 .nr(4)
24717 .kr(8)
24718 .sr(1)
24719 .m(m)
24720 .n(4)
24721 .k(8)
24722 .iterations(1)
24723 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24724 }
24725 }
24726
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_subtile_n)24727 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_n) {
24728 TEST_REQUIRES_X86_AVX;
24729 for (uint32_t n = 1; n <= 4; n++) {
24730 GemmMicrokernelTester()
24731 .mr(2)
24732 .nr(4)
24733 .kr(8)
24734 .sr(1)
24735 .m(2)
24736 .n(n)
24737 .k(8)
24738 .iterations(1)
24739 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24740 }
24741 }
24742
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_lt_8)24743 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8) {
24744 TEST_REQUIRES_X86_AVX;
24745 for (size_t k = 1; k < 8; k++) {
24746 GemmMicrokernelTester()
24747 .mr(2)
24748 .nr(4)
24749 .kr(8)
24750 .sr(1)
24751 .m(2)
24752 .n(4)
24753 .k(k)
24754 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24755 }
24756 }
24757
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_lt_8_strided_a)24758 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8_strided_a) {
24759 TEST_REQUIRES_X86_AVX;
24760 for (size_t k = 1; k < 8; k++) {
24761 GemmMicrokernelTester()
24762 .mr(2)
24763 .nr(4)
24764 .kr(8)
24765 .sr(1)
24766 .m(2)
24767 .n(4)
24768 .k(k)
24769 .a_stride(11)
24770 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24771 }
24772 }
24773
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_lt_8_subtile)24774 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8_subtile) {
24775 TEST_REQUIRES_X86_AVX;
24776 for (size_t k = 1; k < 8; k++) {
24777 for (uint32_t n = 1; n <= 4; n++) {
24778 for (uint32_t m = 1; m <= 2; m++) {
24779 GemmMicrokernelTester()
24780 .mr(2)
24781 .nr(4)
24782 .kr(8)
24783 .sr(1)
24784 .m(m)
24785 .n(n)
24786 .k(k)
24787 .iterations(1)
24788 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24789 }
24790 }
24791 }
24792 }
24793
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_gt_8)24794 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8) {
24795 TEST_REQUIRES_X86_AVX;
24796 for (size_t k = 9; k < 16; k++) {
24797 GemmMicrokernelTester()
24798 .mr(2)
24799 .nr(4)
24800 .kr(8)
24801 .sr(1)
24802 .m(2)
24803 .n(4)
24804 .k(k)
24805 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24806 }
24807 }
24808
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_gt_8_strided_a)24809 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8_strided_a) {
24810 TEST_REQUIRES_X86_AVX;
24811 for (size_t k = 9; k < 16; k++) {
24812 GemmMicrokernelTester()
24813 .mr(2)
24814 .nr(4)
24815 .kr(8)
24816 .sr(1)
24817 .m(2)
24818 .n(4)
24819 .k(k)
24820 .a_stride(19)
24821 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24822 }
24823 }
24824
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_gt_8_subtile)24825 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8_subtile) {
24826 TEST_REQUIRES_X86_AVX;
24827 for (size_t k = 9; k < 16; k++) {
24828 for (uint32_t n = 1; n <= 4; n++) {
24829 for (uint32_t m = 1; m <= 2; m++) {
24830 GemmMicrokernelTester()
24831 .mr(2)
24832 .nr(4)
24833 .kr(8)
24834 .sr(1)
24835 .m(m)
24836 .n(n)
24837 .k(k)
24838 .iterations(1)
24839 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24840 }
24841 }
24842 }
24843 }
24844
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_div_8)24845 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8) {
24846 TEST_REQUIRES_X86_AVX;
24847 for (size_t k = 16; k <= 80; k += 8) {
24848 GemmMicrokernelTester()
24849 .mr(2)
24850 .nr(4)
24851 .kr(8)
24852 .sr(1)
24853 .m(2)
24854 .n(4)
24855 .k(k)
24856 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24857 }
24858 }
24859
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_div_8_strided_a)24860 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8_strided_a) {
24861 TEST_REQUIRES_X86_AVX;
24862 for (size_t k = 16; k <= 80; k += 8) {
24863 GemmMicrokernelTester()
24864 .mr(2)
24865 .nr(4)
24866 .kr(8)
24867 .sr(1)
24868 .m(2)
24869 .n(4)
24870 .k(k)
24871 .a_stride(83)
24872 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24873 }
24874 }
24875
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_div_8_subtile)24876 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8_subtile) {
24877 TEST_REQUIRES_X86_AVX;
24878 for (size_t k = 16; k <= 80; k += 8) {
24879 for (uint32_t n = 1; n <= 4; n++) {
24880 for (uint32_t m = 1; m <= 2; m++) {
24881 GemmMicrokernelTester()
24882 .mr(2)
24883 .nr(4)
24884 .kr(8)
24885 .sr(1)
24886 .m(m)
24887 .n(n)
24888 .k(k)
24889 .iterations(1)
24890 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24891 }
24892 }
24893 }
24894 }
24895
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4)24896 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4) {
24897 TEST_REQUIRES_X86_AVX;
24898 for (uint32_t n = 5; n < 8; n++) {
24899 for (size_t k = 1; k <= 40; k += 9) {
24900 GemmMicrokernelTester()
24901 .mr(2)
24902 .nr(4)
24903 .kr(8)
24904 .sr(1)
24905 .m(2)
24906 .n(n)
24907 .k(k)
24908 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24909 }
24910 }
24911 }
24912
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4_strided_cn)24913 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_strided_cn) {
24914 TEST_REQUIRES_X86_AVX;
24915 for (uint32_t n = 5; n < 8; n++) {
24916 for (size_t k = 1; k <= 40; k += 9) {
24917 GemmMicrokernelTester()
24918 .mr(2)
24919 .nr(4)
24920 .kr(8)
24921 .sr(1)
24922 .m(2)
24923 .n(n)
24924 .k(k)
24925 .cn_stride(7)
24926 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24927 }
24928 }
24929 }
24930
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4_strided_a)24931 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_strided_a) {
24932 TEST_REQUIRES_X86_AVX;
24933 for (uint32_t n = 5; n < 8; n++) {
24934 for (size_t k = 1; k <= 40; k += 9) {
24935 GemmMicrokernelTester()
24936 .mr(2)
24937 .nr(4)
24938 .kr(8)
24939 .sr(1)
24940 .m(2)
24941 .n(n)
24942 .k(k)
24943 .a_stride(43)
24944 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24945 }
24946 }
24947 }
24948
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4_subtile)24949 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_subtile) {
24950 TEST_REQUIRES_X86_AVX;
24951 for (uint32_t n = 5; n < 8; n++) {
24952 for (size_t k = 1; k <= 40; k += 9) {
24953 for (uint32_t m = 1; m <= 2; m++) {
24954 GemmMicrokernelTester()
24955 .mr(2)
24956 .nr(4)
24957 .kr(8)
24958 .sr(1)
24959 .m(m)
24960 .n(n)
24961 .k(k)
24962 .iterations(1)
24963 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24964 }
24965 }
24966 }
24967 }
24968
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4)24969 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4) {
24970 TEST_REQUIRES_X86_AVX;
24971 for (uint32_t n = 8; n <= 12; n += 4) {
24972 for (size_t k = 1; k <= 40; k += 9) {
24973 GemmMicrokernelTester()
24974 .mr(2)
24975 .nr(4)
24976 .kr(8)
24977 .sr(1)
24978 .m(2)
24979 .n(n)
24980 .k(k)
24981 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24982 }
24983 }
24984 }
24985
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4_strided_cn)24986 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_strided_cn) {
24987 TEST_REQUIRES_X86_AVX;
24988 for (uint32_t n = 8; n <= 12; n += 4) {
24989 for (size_t k = 1; k <= 40; k += 9) {
24990 GemmMicrokernelTester()
24991 .mr(2)
24992 .nr(4)
24993 .kr(8)
24994 .sr(1)
24995 .m(2)
24996 .n(n)
24997 .k(k)
24998 .cn_stride(7)
24999 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25000 }
25001 }
25002 }
25003
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4_strided_a)25004 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_strided_a) {
25005 TEST_REQUIRES_X86_AVX;
25006 for (uint32_t n = 8; n <= 12; n += 4) {
25007 for (size_t k = 1; k <= 40; k += 9) {
25008 GemmMicrokernelTester()
25009 .mr(2)
25010 .nr(4)
25011 .kr(8)
25012 .sr(1)
25013 .m(2)
25014 .n(n)
25015 .k(k)
25016 .a_stride(43)
25017 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25018 }
25019 }
25020 }
25021
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4_subtile)25022 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_subtile) {
25023 TEST_REQUIRES_X86_AVX;
25024 for (uint32_t n = 8; n <= 12; n += 4) {
25025 for (size_t k = 1; k <= 40; k += 9) {
25026 for (uint32_t m = 1; m <= 2; m++) {
25027 GemmMicrokernelTester()
25028 .mr(2)
25029 .nr(4)
25030 .kr(8)
25031 .sr(1)
25032 .m(m)
25033 .n(n)
25034 .k(k)
25035 .iterations(1)
25036 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25037 }
25038 }
25039 }
25040 }
25041
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,strided_cm_subtile)25042 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm_subtile) {
25043 TEST_REQUIRES_X86_AVX;
25044 for (size_t k = 1; k <= 40; k += 9) {
25045 for (uint32_t n = 1; n <= 4; n++) {
25046 for (uint32_t m = 1; m <= 2; m++) {
25047 GemmMicrokernelTester()
25048 .mr(2)
25049 .nr(4)
25050 .kr(8)
25051 .sr(1)
25052 .m(m)
25053 .n(n)
25054 .k(k)
25055 .cm_stride(7)
25056 .iterations(1)
25057 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25058 }
25059 }
25060 }
25061 }
25062
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,qmin)25063 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmin) {
25064 TEST_REQUIRES_X86_AVX;
25065 GemmMicrokernelTester()
25066 .mr(2)
25067 .nr(4)
25068 .kr(8)
25069 .sr(1)
25070 .m(2)
25071 .n(4)
25072 .k(8)
25073 .qmin(128)
25074 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25075 }
25076
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,qmax)25077 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmax) {
25078 TEST_REQUIRES_X86_AVX;
25079 GemmMicrokernelTester()
25080 .mr(2)
25081 .nr(4)
25082 .kr(8)
25083 .sr(1)
25084 .m(2)
25085 .n(4)
25086 .k(8)
25087 .qmax(128)
25088 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25089 }
25090
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,strided_cm)25091 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm) {
25092 TEST_REQUIRES_X86_AVX;
25093 GemmMicrokernelTester()
25094 .mr(2)
25095 .nr(4)
25096 .kr(8)
25097 .sr(1)
25098 .m(2)
25099 .n(4)
25100 .k(8)
25101 .cm_stride(7)
25102 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25103 }
25104 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25105
25106
25107 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_eq_8)25108 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8) {
25109 TEST_REQUIRES_X86_XOP;
25110 GemmMicrokernelTester()
25111 .mr(2)
25112 .nr(4)
25113 .kr(8)
25114 .sr(1)
25115 .m(2)
25116 .n(4)
25117 .k(8)
25118 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25119 }
25120
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,strided_cn)25121 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, strided_cn) {
25122 TEST_REQUIRES_X86_XOP;
25123 GemmMicrokernelTester()
25124 .mr(2)
25125 .nr(4)
25126 .kr(8)
25127 .sr(1)
25128 .m(2)
25129 .n(4)
25130 .k(8)
25131 .cn_stride(7)
25132 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25133 }
25134
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_eq_8_strided_a)25135 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_strided_a) {
25136 TEST_REQUIRES_X86_XOP;
25137 GemmMicrokernelTester()
25138 .mr(2)
25139 .nr(4)
25140 .kr(8)
25141 .sr(1)
25142 .m(2)
25143 .n(4)
25144 .k(8)
25145 .a_stride(11)
25146 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25147 }
25148
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_eq_8_subtile)25149 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_subtile) {
25150 TEST_REQUIRES_X86_XOP;
25151 for (uint32_t n = 1; n <= 4; n++) {
25152 for (uint32_t m = 1; m <= 2; m++) {
25153 GemmMicrokernelTester()
25154 .mr(2)
25155 .nr(4)
25156 .kr(8)
25157 .sr(1)
25158 .m(m)
25159 .n(n)
25160 .k(8)
25161 .iterations(1)
25162 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25163 }
25164 }
25165 }
25166
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_eq_8_subtile_m)25167 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_subtile_m) {
25168 TEST_REQUIRES_X86_XOP;
25169 for (uint32_t m = 1; m <= 2; m++) {
25170 GemmMicrokernelTester()
25171 .mr(2)
25172 .nr(4)
25173 .kr(8)
25174 .sr(1)
25175 .m(m)
25176 .n(4)
25177 .k(8)
25178 .iterations(1)
25179 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25180 }
25181 }
25182
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_eq_8_subtile_n)25183 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_subtile_n) {
25184 TEST_REQUIRES_X86_XOP;
25185 for (uint32_t n = 1; n <= 4; n++) {
25186 GemmMicrokernelTester()
25187 .mr(2)
25188 .nr(4)
25189 .kr(8)
25190 .sr(1)
25191 .m(2)
25192 .n(n)
25193 .k(8)
25194 .iterations(1)
25195 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25196 }
25197 }
25198
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_lt_8)25199 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_lt_8) {
25200 TEST_REQUIRES_X86_XOP;
25201 for (size_t k = 1; k < 8; k++) {
25202 GemmMicrokernelTester()
25203 .mr(2)
25204 .nr(4)
25205 .kr(8)
25206 .sr(1)
25207 .m(2)
25208 .n(4)
25209 .k(k)
25210 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25211 }
25212 }
25213
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_lt_8_strided_a)25214 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_lt_8_strided_a) {
25215 TEST_REQUIRES_X86_XOP;
25216 for (size_t k = 1; k < 8; k++) {
25217 GemmMicrokernelTester()
25218 .mr(2)
25219 .nr(4)
25220 .kr(8)
25221 .sr(1)
25222 .m(2)
25223 .n(4)
25224 .k(k)
25225 .a_stride(11)
25226 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25227 }
25228 }
25229
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_lt_8_subtile)25230 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_lt_8_subtile) {
25231 TEST_REQUIRES_X86_XOP;
25232 for (size_t k = 1; k < 8; k++) {
25233 for (uint32_t n = 1; n <= 4; n++) {
25234 for (uint32_t m = 1; m <= 2; m++) {
25235 GemmMicrokernelTester()
25236 .mr(2)
25237 .nr(4)
25238 .kr(8)
25239 .sr(1)
25240 .m(m)
25241 .n(n)
25242 .k(k)
25243 .iterations(1)
25244 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25245 }
25246 }
25247 }
25248 }
25249
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_gt_8)25250 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_gt_8) {
25251 TEST_REQUIRES_X86_XOP;
25252 for (size_t k = 9; k < 16; k++) {
25253 GemmMicrokernelTester()
25254 .mr(2)
25255 .nr(4)
25256 .kr(8)
25257 .sr(1)
25258 .m(2)
25259 .n(4)
25260 .k(k)
25261 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25262 }
25263 }
25264
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_gt_8_strided_a)25265 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_gt_8_strided_a) {
25266 TEST_REQUIRES_X86_XOP;
25267 for (size_t k = 9; k < 16; k++) {
25268 GemmMicrokernelTester()
25269 .mr(2)
25270 .nr(4)
25271 .kr(8)
25272 .sr(1)
25273 .m(2)
25274 .n(4)
25275 .k(k)
25276 .a_stride(19)
25277 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25278 }
25279 }
25280
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_gt_8_subtile)25281 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_gt_8_subtile) {
25282 TEST_REQUIRES_X86_XOP;
25283 for (size_t k = 9; k < 16; k++) {
25284 for (uint32_t n = 1; n <= 4; n++) {
25285 for (uint32_t m = 1; m <= 2; m++) {
25286 GemmMicrokernelTester()
25287 .mr(2)
25288 .nr(4)
25289 .kr(8)
25290 .sr(1)
25291 .m(m)
25292 .n(n)
25293 .k(k)
25294 .iterations(1)
25295 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25296 }
25297 }
25298 }
25299 }
25300
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_div_8)25301 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_div_8) {
25302 TEST_REQUIRES_X86_XOP;
25303 for (size_t k = 16; k <= 80; k += 8) {
25304 GemmMicrokernelTester()
25305 .mr(2)
25306 .nr(4)
25307 .kr(8)
25308 .sr(1)
25309 .m(2)
25310 .n(4)
25311 .k(k)
25312 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25313 }
25314 }
25315
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_div_8_strided_a)25316 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_div_8_strided_a) {
25317 TEST_REQUIRES_X86_XOP;
25318 for (size_t k = 16; k <= 80; k += 8) {
25319 GemmMicrokernelTester()
25320 .mr(2)
25321 .nr(4)
25322 .kr(8)
25323 .sr(1)
25324 .m(2)
25325 .n(4)
25326 .k(k)
25327 .a_stride(83)
25328 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25329 }
25330 }
25331
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_div_8_subtile)25332 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_div_8_subtile) {
25333 TEST_REQUIRES_X86_XOP;
25334 for (size_t k = 16; k <= 80; k += 8) {
25335 for (uint32_t n = 1; n <= 4; n++) {
25336 for (uint32_t m = 1; m <= 2; m++) {
25337 GemmMicrokernelTester()
25338 .mr(2)
25339 .nr(4)
25340 .kr(8)
25341 .sr(1)
25342 .m(m)
25343 .n(n)
25344 .k(k)
25345 .iterations(1)
25346 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25347 }
25348 }
25349 }
25350 }
25351
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_gt_4)25352 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4) {
25353 TEST_REQUIRES_X86_XOP;
25354 for (uint32_t n = 5; n < 8; n++) {
25355 for (size_t k = 1; k <= 40; k += 9) {
25356 GemmMicrokernelTester()
25357 .mr(2)
25358 .nr(4)
25359 .kr(8)
25360 .sr(1)
25361 .m(2)
25362 .n(n)
25363 .k(k)
25364 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25365 }
25366 }
25367 }
25368
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_gt_4_strided_cn)25369 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4_strided_cn) {
25370 TEST_REQUIRES_X86_XOP;
25371 for (uint32_t n = 5; n < 8; n++) {
25372 for (size_t k = 1; k <= 40; k += 9) {
25373 GemmMicrokernelTester()
25374 .mr(2)
25375 .nr(4)
25376 .kr(8)
25377 .sr(1)
25378 .m(2)
25379 .n(n)
25380 .k(k)
25381 .cn_stride(7)
25382 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25383 }
25384 }
25385 }
25386
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_gt_4_strided_a)25387 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4_strided_a) {
25388 TEST_REQUIRES_X86_XOP;
25389 for (uint32_t n = 5; n < 8; n++) {
25390 for (size_t k = 1; k <= 40; k += 9) {
25391 GemmMicrokernelTester()
25392 .mr(2)
25393 .nr(4)
25394 .kr(8)
25395 .sr(1)
25396 .m(2)
25397 .n(n)
25398 .k(k)
25399 .a_stride(43)
25400 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25401 }
25402 }
25403 }
25404
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_gt_4_subtile)25405 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4_subtile) {
25406 TEST_REQUIRES_X86_XOP;
25407 for (uint32_t n = 5; n < 8; n++) {
25408 for (size_t k = 1; k <= 40; k += 9) {
25409 for (uint32_t m = 1; m <= 2; m++) {
25410 GemmMicrokernelTester()
25411 .mr(2)
25412 .nr(4)
25413 .kr(8)
25414 .sr(1)
25415 .m(m)
25416 .n(n)
25417 .k(k)
25418 .iterations(1)
25419 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25420 }
25421 }
25422 }
25423 }
25424
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_div_4)25425 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4) {
25426 TEST_REQUIRES_X86_XOP;
25427 for (uint32_t n = 8; n <= 12; n += 4) {
25428 for (size_t k = 1; k <= 40; k += 9) {
25429 GemmMicrokernelTester()
25430 .mr(2)
25431 .nr(4)
25432 .kr(8)
25433 .sr(1)
25434 .m(2)
25435 .n(n)
25436 .k(k)
25437 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25438 }
25439 }
25440 }
25441
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_div_4_strided_cn)25442 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4_strided_cn) {
25443 TEST_REQUIRES_X86_XOP;
25444 for (uint32_t n = 8; n <= 12; n += 4) {
25445 for (size_t k = 1; k <= 40; k += 9) {
25446 GemmMicrokernelTester()
25447 .mr(2)
25448 .nr(4)
25449 .kr(8)
25450 .sr(1)
25451 .m(2)
25452 .n(n)
25453 .k(k)
25454 .cn_stride(7)
25455 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25456 }
25457 }
25458 }
25459
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_div_4_strided_a)25460 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4_strided_a) {
25461 TEST_REQUIRES_X86_XOP;
25462 for (uint32_t n = 8; n <= 12; n += 4) {
25463 for (size_t k = 1; k <= 40; k += 9) {
25464 GemmMicrokernelTester()
25465 .mr(2)
25466 .nr(4)
25467 .kr(8)
25468 .sr(1)
25469 .m(2)
25470 .n(n)
25471 .k(k)
25472 .a_stride(43)
25473 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25474 }
25475 }
25476 }
25477
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_div_4_subtile)25478 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4_subtile) {
25479 TEST_REQUIRES_X86_XOP;
25480 for (uint32_t n = 8; n <= 12; n += 4) {
25481 for (size_t k = 1; k <= 40; k += 9) {
25482 for (uint32_t m = 1; m <= 2; m++) {
25483 GemmMicrokernelTester()
25484 .mr(2)
25485 .nr(4)
25486 .kr(8)
25487 .sr(1)
25488 .m(m)
25489 .n(n)
25490 .k(k)
25491 .iterations(1)
25492 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25493 }
25494 }
25495 }
25496 }
25497
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,strided_cm_subtile)25498 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, strided_cm_subtile) {
25499 TEST_REQUIRES_X86_XOP;
25500 for (size_t k = 1; k <= 40; k += 9) {
25501 for (uint32_t n = 1; n <= 4; n++) {
25502 for (uint32_t m = 1; m <= 2; m++) {
25503 GemmMicrokernelTester()
25504 .mr(2)
25505 .nr(4)
25506 .kr(8)
25507 .sr(1)
25508 .m(m)
25509 .n(n)
25510 .k(k)
25511 .cm_stride(7)
25512 .iterations(1)
25513 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25514 }
25515 }
25516 }
25517 }
25518
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,qmin)25519 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, qmin) {
25520 TEST_REQUIRES_X86_XOP;
25521 GemmMicrokernelTester()
25522 .mr(2)
25523 .nr(4)
25524 .kr(8)
25525 .sr(1)
25526 .m(2)
25527 .n(4)
25528 .k(8)
25529 .qmin(128)
25530 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25531 }
25532
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,qmax)25533 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, qmax) {
25534 TEST_REQUIRES_X86_XOP;
25535 GemmMicrokernelTester()
25536 .mr(2)
25537 .nr(4)
25538 .kr(8)
25539 .sr(1)
25540 .m(2)
25541 .n(4)
25542 .k(8)
25543 .qmax(128)
25544 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25545 }
25546
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128,strided_cm)25547 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, strided_cm) {
25548 TEST_REQUIRES_X86_XOP;
25549 GemmMicrokernelTester()
25550 .mr(2)
25551 .nr(4)
25552 .kr(8)
25553 .sr(1)
25554 .m(2)
25555 .n(4)
25556 .k(8)
25557 .cm_stride(7)
25558 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25559 }
25560 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25561
25562
25563 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_eq_8)25564 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8) {
25565 TEST_REQUIRES_X86_AVX;
25566 GemmMicrokernelTester()
25567 .mr(3)
25568 .nr(4)
25569 .kr(8)
25570 .sr(1)
25571 .m(3)
25572 .n(4)
25573 .k(8)
25574 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25575 }
25576
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,strided_cn)25577 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, strided_cn) {
25578 TEST_REQUIRES_X86_AVX;
25579 GemmMicrokernelTester()
25580 .mr(3)
25581 .nr(4)
25582 .kr(8)
25583 .sr(1)
25584 .m(3)
25585 .n(4)
25586 .k(8)
25587 .cn_stride(7)
25588 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25589 }
25590
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_eq_8_strided_a)25591 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_strided_a) {
25592 TEST_REQUIRES_X86_AVX;
25593 GemmMicrokernelTester()
25594 .mr(3)
25595 .nr(4)
25596 .kr(8)
25597 .sr(1)
25598 .m(3)
25599 .n(4)
25600 .k(8)
25601 .a_stride(11)
25602 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25603 }
25604
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_eq_8_subtile)25605 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_subtile) {
25606 TEST_REQUIRES_X86_AVX;
25607 for (uint32_t n = 1; n <= 4; n++) {
25608 for (uint32_t m = 1; m <= 3; m++) {
25609 GemmMicrokernelTester()
25610 .mr(3)
25611 .nr(4)
25612 .kr(8)
25613 .sr(1)
25614 .m(m)
25615 .n(n)
25616 .k(8)
25617 .iterations(1)
25618 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25619 }
25620 }
25621 }
25622
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_eq_8_subtile_m)25623 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_subtile_m) {
25624 TEST_REQUIRES_X86_AVX;
25625 for (uint32_t m = 1; m <= 3; m++) {
25626 GemmMicrokernelTester()
25627 .mr(3)
25628 .nr(4)
25629 .kr(8)
25630 .sr(1)
25631 .m(m)
25632 .n(4)
25633 .k(8)
25634 .iterations(1)
25635 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25636 }
25637 }
25638
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_eq_8_subtile_n)25639 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_subtile_n) {
25640 TEST_REQUIRES_X86_AVX;
25641 for (uint32_t n = 1; n <= 4; n++) {
25642 GemmMicrokernelTester()
25643 .mr(3)
25644 .nr(4)
25645 .kr(8)
25646 .sr(1)
25647 .m(3)
25648 .n(n)
25649 .k(8)
25650 .iterations(1)
25651 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25652 }
25653 }
25654
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_lt_8)25655 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_lt_8) {
25656 TEST_REQUIRES_X86_AVX;
25657 for (size_t k = 1; k < 8; k++) {
25658 GemmMicrokernelTester()
25659 .mr(3)
25660 .nr(4)
25661 .kr(8)
25662 .sr(1)
25663 .m(3)
25664 .n(4)
25665 .k(k)
25666 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25667 }
25668 }
25669
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_lt_8_strided_a)25670 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_lt_8_strided_a) {
25671 TEST_REQUIRES_X86_AVX;
25672 for (size_t k = 1; k < 8; k++) {
25673 GemmMicrokernelTester()
25674 .mr(3)
25675 .nr(4)
25676 .kr(8)
25677 .sr(1)
25678 .m(3)
25679 .n(4)
25680 .k(k)
25681 .a_stride(11)
25682 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25683 }
25684 }
25685
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_lt_8_subtile)25686 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_lt_8_subtile) {
25687 TEST_REQUIRES_X86_AVX;
25688 for (size_t k = 1; k < 8; k++) {
25689 for (uint32_t n = 1; n <= 4; n++) {
25690 for (uint32_t m = 1; m <= 3; m++) {
25691 GemmMicrokernelTester()
25692 .mr(3)
25693 .nr(4)
25694 .kr(8)
25695 .sr(1)
25696 .m(m)
25697 .n(n)
25698 .k(k)
25699 .iterations(1)
25700 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25701 }
25702 }
25703 }
25704 }
25705
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_gt_8)25706 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_gt_8) {
25707 TEST_REQUIRES_X86_AVX;
25708 for (size_t k = 9; k < 16; k++) {
25709 GemmMicrokernelTester()
25710 .mr(3)
25711 .nr(4)
25712 .kr(8)
25713 .sr(1)
25714 .m(3)
25715 .n(4)
25716 .k(k)
25717 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25718 }
25719 }
25720
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_gt_8_strided_a)25721 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_gt_8_strided_a) {
25722 TEST_REQUIRES_X86_AVX;
25723 for (size_t k = 9; k < 16; k++) {
25724 GemmMicrokernelTester()
25725 .mr(3)
25726 .nr(4)
25727 .kr(8)
25728 .sr(1)
25729 .m(3)
25730 .n(4)
25731 .k(k)
25732 .a_stride(19)
25733 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25734 }
25735 }
25736
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_gt_8_subtile)25737 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_gt_8_subtile) {
25738 TEST_REQUIRES_X86_AVX;
25739 for (size_t k = 9; k < 16; k++) {
25740 for (uint32_t n = 1; n <= 4; n++) {
25741 for (uint32_t m = 1; m <= 3; m++) {
25742 GemmMicrokernelTester()
25743 .mr(3)
25744 .nr(4)
25745 .kr(8)
25746 .sr(1)
25747 .m(m)
25748 .n(n)
25749 .k(k)
25750 .iterations(1)
25751 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25752 }
25753 }
25754 }
25755 }
25756
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_div_8)25757 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_div_8) {
25758 TEST_REQUIRES_X86_AVX;
25759 for (size_t k = 16; k <= 80; k += 8) {
25760 GemmMicrokernelTester()
25761 .mr(3)
25762 .nr(4)
25763 .kr(8)
25764 .sr(1)
25765 .m(3)
25766 .n(4)
25767 .k(k)
25768 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25769 }
25770 }
25771
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_div_8_strided_a)25772 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_div_8_strided_a) {
25773 TEST_REQUIRES_X86_AVX;
25774 for (size_t k = 16; k <= 80; k += 8) {
25775 GemmMicrokernelTester()
25776 .mr(3)
25777 .nr(4)
25778 .kr(8)
25779 .sr(1)
25780 .m(3)
25781 .n(4)
25782 .k(k)
25783 .a_stride(83)
25784 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25785 }
25786 }
25787
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_div_8_subtile)25788 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_div_8_subtile) {
25789 TEST_REQUIRES_X86_AVX;
25790 for (size_t k = 16; k <= 80; k += 8) {
25791 for (uint32_t n = 1; n <= 4; n++) {
25792 for (uint32_t m = 1; m <= 3; m++) {
25793 GemmMicrokernelTester()
25794 .mr(3)
25795 .nr(4)
25796 .kr(8)
25797 .sr(1)
25798 .m(m)
25799 .n(n)
25800 .k(k)
25801 .iterations(1)
25802 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25803 }
25804 }
25805 }
25806 }
25807
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_gt_4)25808 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4) {
25809 TEST_REQUIRES_X86_AVX;
25810 for (uint32_t n = 5; n < 8; n++) {
25811 for (size_t k = 1; k <= 40; k += 9) {
25812 GemmMicrokernelTester()
25813 .mr(3)
25814 .nr(4)
25815 .kr(8)
25816 .sr(1)
25817 .m(3)
25818 .n(n)
25819 .k(k)
25820 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25821 }
25822 }
25823 }
25824
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_gt_4_strided_cn)25825 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4_strided_cn) {
25826 TEST_REQUIRES_X86_AVX;
25827 for (uint32_t n = 5; n < 8; n++) {
25828 for (size_t k = 1; k <= 40; k += 9) {
25829 GemmMicrokernelTester()
25830 .mr(3)
25831 .nr(4)
25832 .kr(8)
25833 .sr(1)
25834 .m(3)
25835 .n(n)
25836 .k(k)
25837 .cn_stride(7)
25838 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25839 }
25840 }
25841 }
25842
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_gt_4_strided_a)25843 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4_strided_a) {
25844 TEST_REQUIRES_X86_AVX;
25845 for (uint32_t n = 5; n < 8; n++) {
25846 for (size_t k = 1; k <= 40; k += 9) {
25847 GemmMicrokernelTester()
25848 .mr(3)
25849 .nr(4)
25850 .kr(8)
25851 .sr(1)
25852 .m(3)
25853 .n(n)
25854 .k(k)
25855 .a_stride(43)
25856 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25857 }
25858 }
25859 }
25860
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_gt_4_subtile)25861 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4_subtile) {
25862 TEST_REQUIRES_X86_AVX;
25863 for (uint32_t n = 5; n < 8; n++) {
25864 for (size_t k = 1; k <= 40; k += 9) {
25865 for (uint32_t m = 1; m <= 3; m++) {
25866 GemmMicrokernelTester()
25867 .mr(3)
25868 .nr(4)
25869 .kr(8)
25870 .sr(1)
25871 .m(m)
25872 .n(n)
25873 .k(k)
25874 .iterations(1)
25875 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25876 }
25877 }
25878 }
25879 }
25880
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_div_4)25881 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4) {
25882 TEST_REQUIRES_X86_AVX;
25883 for (uint32_t n = 8; n <= 12; n += 4) {
25884 for (size_t k = 1; k <= 40; k += 9) {
25885 GemmMicrokernelTester()
25886 .mr(3)
25887 .nr(4)
25888 .kr(8)
25889 .sr(1)
25890 .m(3)
25891 .n(n)
25892 .k(k)
25893 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25894 }
25895 }
25896 }
25897
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_div_4_strided_cn)25898 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4_strided_cn) {
25899 TEST_REQUIRES_X86_AVX;
25900 for (uint32_t n = 8; n <= 12; n += 4) {
25901 for (size_t k = 1; k <= 40; k += 9) {
25902 GemmMicrokernelTester()
25903 .mr(3)
25904 .nr(4)
25905 .kr(8)
25906 .sr(1)
25907 .m(3)
25908 .n(n)
25909 .k(k)
25910 .cn_stride(7)
25911 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25912 }
25913 }
25914 }
25915
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_div_4_strided_a)25916 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4_strided_a) {
25917 TEST_REQUIRES_X86_AVX;
25918 for (uint32_t n = 8; n <= 12; n += 4) {
25919 for (size_t k = 1; k <= 40; k += 9) {
25920 GemmMicrokernelTester()
25921 .mr(3)
25922 .nr(4)
25923 .kr(8)
25924 .sr(1)
25925 .m(3)
25926 .n(n)
25927 .k(k)
25928 .a_stride(43)
25929 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25930 }
25931 }
25932 }
25933
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_div_4_subtile)25934 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4_subtile) {
25935 TEST_REQUIRES_X86_AVX;
25936 for (uint32_t n = 8; n <= 12; n += 4) {
25937 for (size_t k = 1; k <= 40; k += 9) {
25938 for (uint32_t m = 1; m <= 3; m++) {
25939 GemmMicrokernelTester()
25940 .mr(3)
25941 .nr(4)
25942 .kr(8)
25943 .sr(1)
25944 .m(m)
25945 .n(n)
25946 .k(k)
25947 .iterations(1)
25948 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25949 }
25950 }
25951 }
25952 }
25953
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,strided_cm_subtile)25954 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, strided_cm_subtile) {
25955 TEST_REQUIRES_X86_AVX;
25956 for (size_t k = 1; k <= 40; k += 9) {
25957 for (uint32_t n = 1; n <= 4; n++) {
25958 for (uint32_t m = 1; m <= 3; m++) {
25959 GemmMicrokernelTester()
25960 .mr(3)
25961 .nr(4)
25962 .kr(8)
25963 .sr(1)
25964 .m(m)
25965 .n(n)
25966 .k(k)
25967 .cm_stride(7)
25968 .iterations(1)
25969 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25970 }
25971 }
25972 }
25973 }
25974
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,qmin)25975 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, qmin) {
25976 TEST_REQUIRES_X86_AVX;
25977 GemmMicrokernelTester()
25978 .mr(3)
25979 .nr(4)
25980 .kr(8)
25981 .sr(1)
25982 .m(3)
25983 .n(4)
25984 .k(8)
25985 .qmin(128)
25986 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25987 }
25988
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,qmax)25989 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, qmax) {
25990 TEST_REQUIRES_X86_AVX;
25991 GemmMicrokernelTester()
25992 .mr(3)
25993 .nr(4)
25994 .kr(8)
25995 .sr(1)
25996 .m(3)
25997 .n(4)
25998 .k(8)
25999 .qmax(128)
26000 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26001 }
26002
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128,strided_cm)26003 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, strided_cm) {
26004 TEST_REQUIRES_X86_AVX;
26005 GemmMicrokernelTester()
26006 .mr(3)
26007 .nr(4)
26008 .kr(8)
26009 .sr(1)
26010 .m(3)
26011 .n(4)
26012 .k(8)
26013 .cm_stride(7)
26014 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26015 }
26016 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26017
26018
26019 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8)26020 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8) {
26021 TEST_REQUIRES_X86_AVX2;
26022 GemmMicrokernelTester()
26023 .mr(2)
26024 .nr(8)
26025 .kr(8)
26026 .sr(1)
26027 .m(2)
26028 .n(8)
26029 .k(8)
26030 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26031 }
26032
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,strided_cn)26033 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, strided_cn) {
26034 TEST_REQUIRES_X86_AVX2;
26035 GemmMicrokernelTester()
26036 .mr(2)
26037 .nr(8)
26038 .kr(8)
26039 .sr(1)
26040 .m(2)
26041 .n(8)
26042 .k(8)
26043 .cn_stride(11)
26044 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26045 }
26046
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8_strided_a)26047 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_strided_a) {
26048 TEST_REQUIRES_X86_AVX2;
26049 GemmMicrokernelTester()
26050 .mr(2)
26051 .nr(8)
26052 .kr(8)
26053 .sr(1)
26054 .m(2)
26055 .n(8)
26056 .k(8)
26057 .a_stride(11)
26058 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26059 }
26060
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8_subtile)26061 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile) {
26062 TEST_REQUIRES_X86_AVX2;
26063 for (uint32_t n = 1; n <= 8; n++) {
26064 for (uint32_t m = 1; m <= 2; m++) {
26065 GemmMicrokernelTester()
26066 .mr(2)
26067 .nr(8)
26068 .kr(8)
26069 .sr(1)
26070 .m(m)
26071 .n(n)
26072 .k(8)
26073 .iterations(1)
26074 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26075 }
26076 }
26077 }
26078
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8_subtile_m)26079 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_m) {
26080 TEST_REQUIRES_X86_AVX2;
26081 for (uint32_t m = 1; m <= 2; m++) {
26082 GemmMicrokernelTester()
26083 .mr(2)
26084 .nr(8)
26085 .kr(8)
26086 .sr(1)
26087 .m(m)
26088 .n(8)
26089 .k(8)
26090 .iterations(1)
26091 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26092 }
26093 }
26094
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8_subtile_n)26095 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_n) {
26096 TEST_REQUIRES_X86_AVX2;
26097 for (uint32_t n = 1; n <= 8; n++) {
26098 GemmMicrokernelTester()
26099 .mr(2)
26100 .nr(8)
26101 .kr(8)
26102 .sr(1)
26103 .m(2)
26104 .n(n)
26105 .k(8)
26106 .iterations(1)
26107 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26108 }
26109 }
26110
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_lt_8)26111 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8) {
26112 TEST_REQUIRES_X86_AVX2;
26113 for (size_t k = 1; k < 8; k++) {
26114 GemmMicrokernelTester()
26115 .mr(2)
26116 .nr(8)
26117 .kr(8)
26118 .sr(1)
26119 .m(2)
26120 .n(8)
26121 .k(k)
26122 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26123 }
26124 }
26125
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_lt_8_strided_a)26126 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8_strided_a) {
26127 TEST_REQUIRES_X86_AVX2;
26128 for (size_t k = 1; k < 8; k++) {
26129 GemmMicrokernelTester()
26130 .mr(2)
26131 .nr(8)
26132 .kr(8)
26133 .sr(1)
26134 .m(2)
26135 .n(8)
26136 .k(k)
26137 .a_stride(11)
26138 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26139 }
26140 }
26141
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_lt_8_subtile)26142 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8_subtile) {
26143 TEST_REQUIRES_X86_AVX2;
26144 for (size_t k = 1; k < 8; k++) {
26145 for (uint32_t n = 1; n <= 8; n++) {
26146 for (uint32_t m = 1; m <= 2; m++) {
26147 GemmMicrokernelTester()
26148 .mr(2)
26149 .nr(8)
26150 .kr(8)
26151 .sr(1)
26152 .m(m)
26153 .n(n)
26154 .k(k)
26155 .iterations(1)
26156 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26157 }
26158 }
26159 }
26160 }
26161
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_gt_8)26162 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8) {
26163 TEST_REQUIRES_X86_AVX2;
26164 for (size_t k = 9; k < 16; k++) {
26165 GemmMicrokernelTester()
26166 .mr(2)
26167 .nr(8)
26168 .kr(8)
26169 .sr(1)
26170 .m(2)
26171 .n(8)
26172 .k(k)
26173 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26174 }
26175 }
26176
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_gt_8_strided_a)26177 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8_strided_a) {
26178 TEST_REQUIRES_X86_AVX2;
26179 for (size_t k = 9; k < 16; k++) {
26180 GemmMicrokernelTester()
26181 .mr(2)
26182 .nr(8)
26183 .kr(8)
26184 .sr(1)
26185 .m(2)
26186 .n(8)
26187 .k(k)
26188 .a_stride(19)
26189 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26190 }
26191 }
26192
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_gt_8_subtile)26193 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8_subtile) {
26194 TEST_REQUIRES_X86_AVX2;
26195 for (size_t k = 9; k < 16; k++) {
26196 for (uint32_t n = 1; n <= 8; n++) {
26197 for (uint32_t m = 1; m <= 2; m++) {
26198 GemmMicrokernelTester()
26199 .mr(2)
26200 .nr(8)
26201 .kr(8)
26202 .sr(1)
26203 .m(m)
26204 .n(n)
26205 .k(k)
26206 .iterations(1)
26207 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26208 }
26209 }
26210 }
26211 }
26212
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_div_8)26213 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8) {
26214 TEST_REQUIRES_X86_AVX2;
26215 for (size_t k = 16; k <= 80; k += 8) {
26216 GemmMicrokernelTester()
26217 .mr(2)
26218 .nr(8)
26219 .kr(8)
26220 .sr(1)
26221 .m(2)
26222 .n(8)
26223 .k(k)
26224 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26225 }
26226 }
26227
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_div_8_strided_a)26228 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8_strided_a) {
26229 TEST_REQUIRES_X86_AVX2;
26230 for (size_t k = 16; k <= 80; k += 8) {
26231 GemmMicrokernelTester()
26232 .mr(2)
26233 .nr(8)
26234 .kr(8)
26235 .sr(1)
26236 .m(2)
26237 .n(8)
26238 .k(k)
26239 .a_stride(83)
26240 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26241 }
26242 }
26243
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_div_8_subtile)26244 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8_subtile) {
26245 TEST_REQUIRES_X86_AVX2;
26246 for (size_t k = 16; k <= 80; k += 8) {
26247 for (uint32_t n = 1; n <= 8; n++) {
26248 for (uint32_t m = 1; m <= 2; m++) {
26249 GemmMicrokernelTester()
26250 .mr(2)
26251 .nr(8)
26252 .kr(8)
26253 .sr(1)
26254 .m(m)
26255 .n(n)
26256 .k(k)
26257 .iterations(1)
26258 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26259 }
26260 }
26261 }
26262 }
26263
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8)26264 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8) {
26265 TEST_REQUIRES_X86_AVX2;
26266 for (uint32_t n = 9; n < 16; n++) {
26267 for (size_t k = 1; k <= 40; k += 9) {
26268 GemmMicrokernelTester()
26269 .mr(2)
26270 .nr(8)
26271 .kr(8)
26272 .sr(1)
26273 .m(2)
26274 .n(n)
26275 .k(k)
26276 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26277 }
26278 }
26279 }
26280
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8_strided_cn)26281 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_cn) {
26282 TEST_REQUIRES_X86_AVX2;
26283 for (uint32_t n = 9; n < 16; n++) {
26284 for (size_t k = 1; k <= 40; k += 9) {
26285 GemmMicrokernelTester()
26286 .mr(2)
26287 .nr(8)
26288 .kr(8)
26289 .sr(1)
26290 .m(2)
26291 .n(n)
26292 .k(k)
26293 .cn_stride(11)
26294 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26295 }
26296 }
26297 }
26298
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8_strided_a)26299 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_a) {
26300 TEST_REQUIRES_X86_AVX2;
26301 for (uint32_t n = 9; n < 16; n++) {
26302 for (size_t k = 1; k <= 40; k += 9) {
26303 GemmMicrokernelTester()
26304 .mr(2)
26305 .nr(8)
26306 .kr(8)
26307 .sr(1)
26308 .m(2)
26309 .n(n)
26310 .k(k)
26311 .a_stride(43)
26312 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26313 }
26314 }
26315 }
26316
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8_subtile)26317 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_subtile) {
26318 TEST_REQUIRES_X86_AVX2;
26319 for (uint32_t n = 9; n < 16; n++) {
26320 for (size_t k = 1; k <= 40; k += 9) {
26321 for (uint32_t m = 1; m <= 2; m++) {
26322 GemmMicrokernelTester()
26323 .mr(2)
26324 .nr(8)
26325 .kr(8)
26326 .sr(1)
26327 .m(m)
26328 .n(n)
26329 .k(k)
26330 .iterations(1)
26331 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26332 }
26333 }
26334 }
26335 }
26336
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8)26337 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8) {
26338 TEST_REQUIRES_X86_AVX2;
26339 for (uint32_t n = 16; n <= 24; n += 8) {
26340 for (size_t k = 1; k <= 40; k += 9) {
26341 GemmMicrokernelTester()
26342 .mr(2)
26343 .nr(8)
26344 .kr(8)
26345 .sr(1)
26346 .m(2)
26347 .n(n)
26348 .k(k)
26349 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26350 }
26351 }
26352 }
26353
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8_strided_cn)26354 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_cn) {
26355 TEST_REQUIRES_X86_AVX2;
26356 for (uint32_t n = 16; n <= 24; n += 8) {
26357 for (size_t k = 1; k <= 40; k += 9) {
26358 GemmMicrokernelTester()
26359 .mr(2)
26360 .nr(8)
26361 .kr(8)
26362 .sr(1)
26363 .m(2)
26364 .n(n)
26365 .k(k)
26366 .cn_stride(11)
26367 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26368 }
26369 }
26370 }
26371
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8_strided_a)26372 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_a) {
26373 TEST_REQUIRES_X86_AVX2;
26374 for (uint32_t n = 16; n <= 24; n += 8) {
26375 for (size_t k = 1; k <= 40; k += 9) {
26376 GemmMicrokernelTester()
26377 .mr(2)
26378 .nr(8)
26379 .kr(8)
26380 .sr(1)
26381 .m(2)
26382 .n(n)
26383 .k(k)
26384 .a_stride(43)
26385 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26386 }
26387 }
26388 }
26389
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8_subtile)26390 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_subtile) {
26391 TEST_REQUIRES_X86_AVX2;
26392 for (uint32_t n = 16; n <= 24; n += 8) {
26393 for (size_t k = 1; k <= 40; k += 9) {
26394 for (uint32_t m = 1; m <= 2; m++) {
26395 GemmMicrokernelTester()
26396 .mr(2)
26397 .nr(8)
26398 .kr(8)
26399 .sr(1)
26400 .m(m)
26401 .n(n)
26402 .k(k)
26403 .iterations(1)
26404 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26405 }
26406 }
26407 }
26408 }
26409
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,strided_cm_subtile)26410 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm_subtile) {
26411 TEST_REQUIRES_X86_AVX2;
26412 for (size_t k = 1; k <= 40; k += 9) {
26413 for (uint32_t n = 1; n <= 8; n++) {
26414 for (uint32_t m = 1; m <= 2; m++) {
26415 GemmMicrokernelTester()
26416 .mr(2)
26417 .nr(8)
26418 .kr(8)
26419 .sr(1)
26420 .m(m)
26421 .n(n)
26422 .k(k)
26423 .cm_stride(11)
26424 .iterations(1)
26425 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26426 }
26427 }
26428 }
26429 }
26430
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,qmin)26431 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, qmin) {
26432 TEST_REQUIRES_X86_AVX2;
26433 GemmMicrokernelTester()
26434 .mr(2)
26435 .nr(8)
26436 .kr(8)
26437 .sr(1)
26438 .m(2)
26439 .n(8)
26440 .k(8)
26441 .qmin(128)
26442 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26443 }
26444
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,qmax)26445 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, qmax) {
26446 TEST_REQUIRES_X86_AVX2;
26447 GemmMicrokernelTester()
26448 .mr(2)
26449 .nr(8)
26450 .kr(8)
26451 .sr(1)
26452 .m(2)
26453 .n(8)
26454 .k(8)
26455 .qmax(128)
26456 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26457 }
26458
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2,strided_cm)26459 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm) {
26460 TEST_REQUIRES_X86_AVX2;
26461 GemmMicrokernelTester()
26462 .mr(2)
26463 .nr(8)
26464 .kr(8)
26465 .sr(1)
26466 .m(2)
26467 .n(8)
26468 .k(8)
26469 .cm_stride(11)
26470 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26471 }
26472 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26473
26474
26475 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,k_eq_8)26476 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_eq_8) {
26477 TEST_REQUIRES_X86_AVX2;
26478 GemmMicrokernelTester()
26479 .extended_weights(true)
26480 .mr(1)
26481 .nr(8)
26482 .kr(8)
26483 .sr(1)
26484 .m(1)
26485 .n(8)
26486 .k(8)
26487 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26488 }
26489
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,strided_cn)26490 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, strided_cn) {
26491 TEST_REQUIRES_X86_AVX2;
26492 GemmMicrokernelTester()
26493 .extended_weights(true)
26494 .mr(1)
26495 .nr(8)
26496 .kr(8)
26497 .sr(1)
26498 .m(1)
26499 .n(8)
26500 .k(8)
26501 .cn_stride(11)
26502 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26503 }
26504
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,k_eq_8_strided_a)26505 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_eq_8_strided_a) {
26506 TEST_REQUIRES_X86_AVX2;
26507 GemmMicrokernelTester()
26508 .extended_weights(true)
26509 .mr(1)
26510 .nr(8)
26511 .kr(8)
26512 .sr(1)
26513 .m(1)
26514 .n(8)
26515 .k(8)
26516 .a_stride(11)
26517 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26518 }
26519
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile)26520 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile) {
26521 TEST_REQUIRES_X86_AVX2;
26522 for (uint32_t n = 1; n <= 8; n++) {
26523 for (uint32_t m = 1; m <= 1; m++) {
26524 GemmMicrokernelTester()
26525 .extended_weights(true)
26526 .mr(1)
26527 .nr(8)
26528 .kr(8)
26529 .sr(1)
26530 .m(m)
26531 .n(n)
26532 .k(8)
26533 .iterations(1)
26534 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26535 }
26536 }
26537 }
26538
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile_m)26539 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_m) {
26540 TEST_REQUIRES_X86_AVX2;
26541 for (uint32_t m = 1; m <= 1; m++) {
26542 GemmMicrokernelTester()
26543 .extended_weights(true)
26544 .mr(1)
26545 .nr(8)
26546 .kr(8)
26547 .sr(1)
26548 .m(m)
26549 .n(8)
26550 .k(8)
26551 .iterations(1)
26552 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26553 }
26554 }
26555
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile_n)26556 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_n) {
26557 TEST_REQUIRES_X86_AVX2;
26558 for (uint32_t n = 1; n <= 8; n++) {
26559 GemmMicrokernelTester()
26560 .extended_weights(true)
26561 .mr(1)
26562 .nr(8)
26563 .kr(8)
26564 .sr(1)
26565 .m(1)
26566 .n(n)
26567 .k(8)
26568 .iterations(1)
26569 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26570 }
26571 }
26572
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,k_lt_8)26573 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_lt_8) {
26574 TEST_REQUIRES_X86_AVX2;
26575 for (size_t k = 1; k < 8; k++) {
26576 GemmMicrokernelTester()
26577 .extended_weights(true)
26578 .mr(1)
26579 .nr(8)
26580 .kr(8)
26581 .sr(1)
26582 .m(1)
26583 .n(8)
26584 .k(k)
26585 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26586 }
26587 }
26588
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,k_lt_8_strided_a)26589 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_lt_8_strided_a) {
26590 TEST_REQUIRES_X86_AVX2;
26591 for (size_t k = 1; k < 8; k++) {
26592 GemmMicrokernelTester()
26593 .extended_weights(true)
26594 .mr(1)
26595 .nr(8)
26596 .kr(8)
26597 .sr(1)
26598 .m(1)
26599 .n(8)
26600 .k(k)
26601 .a_stride(11)
26602 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26603 }
26604 }
26605
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,k_lt_8_subtile)26606 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_lt_8_subtile) {
26607 TEST_REQUIRES_X86_AVX2;
26608 for (size_t k = 1; k < 8; k++) {
26609 for (uint32_t n = 1; n <= 8; n++) {
26610 for (uint32_t m = 1; m <= 1; m++) {
26611 GemmMicrokernelTester()
26612 .extended_weights(true)
26613 .mr(1)
26614 .nr(8)
26615 .kr(8)
26616 .sr(1)
26617 .m(m)
26618 .n(n)
26619 .k(k)
26620 .iterations(1)
26621 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26622 }
26623 }
26624 }
26625 }
26626
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,k_gt_8)26627 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_gt_8) {
26628 TEST_REQUIRES_X86_AVX2;
26629 for (size_t k = 9; k < 16; k++) {
26630 GemmMicrokernelTester()
26631 .extended_weights(true)
26632 .mr(1)
26633 .nr(8)
26634 .kr(8)
26635 .sr(1)
26636 .m(1)
26637 .n(8)
26638 .k(k)
26639 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26640 }
26641 }
26642
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,k_gt_8_strided_a)26643 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_gt_8_strided_a) {
26644 TEST_REQUIRES_X86_AVX2;
26645 for (size_t k = 9; k < 16; k++) {
26646 GemmMicrokernelTester()
26647 .extended_weights(true)
26648 .mr(1)
26649 .nr(8)
26650 .kr(8)
26651 .sr(1)
26652 .m(1)
26653 .n(8)
26654 .k(k)
26655 .a_stride(19)
26656 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26657 }
26658 }
26659
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,k_gt_8_subtile)26660 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_gt_8_subtile) {
26661 TEST_REQUIRES_X86_AVX2;
26662 for (size_t k = 9; k < 16; k++) {
26663 for (uint32_t n = 1; n <= 8; n++) {
26664 for (uint32_t m = 1; m <= 1; m++) {
26665 GemmMicrokernelTester()
26666 .extended_weights(true)
26667 .mr(1)
26668 .nr(8)
26669 .kr(8)
26670 .sr(1)
26671 .m(m)
26672 .n(n)
26673 .k(k)
26674 .iterations(1)
26675 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26676 }
26677 }
26678 }
26679 }
26680
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,k_div_8)26681 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_div_8) {
26682 TEST_REQUIRES_X86_AVX2;
26683 for (size_t k = 16; k <= 80; k += 8) {
26684 GemmMicrokernelTester()
26685 .extended_weights(true)
26686 .mr(1)
26687 .nr(8)
26688 .kr(8)
26689 .sr(1)
26690 .m(1)
26691 .n(8)
26692 .k(k)
26693 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26694 }
26695 }
26696
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,k_div_8_strided_a)26697 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_div_8_strided_a) {
26698 TEST_REQUIRES_X86_AVX2;
26699 for (size_t k = 16; k <= 80; k += 8) {
26700 GemmMicrokernelTester()
26701 .extended_weights(true)
26702 .mr(1)
26703 .nr(8)
26704 .kr(8)
26705 .sr(1)
26706 .m(1)
26707 .n(8)
26708 .k(k)
26709 .a_stride(83)
26710 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26711 }
26712 }
26713
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,k_div_8_subtile)26714 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_div_8_subtile) {
26715 TEST_REQUIRES_X86_AVX2;
26716 for (size_t k = 16; k <= 80; k += 8) {
26717 for (uint32_t n = 1; n <= 8; n++) {
26718 for (uint32_t m = 1; m <= 1; m++) {
26719 GemmMicrokernelTester()
26720 .extended_weights(true)
26721 .mr(1)
26722 .nr(8)
26723 .kr(8)
26724 .sr(1)
26725 .m(m)
26726 .n(n)
26727 .k(k)
26728 .iterations(1)
26729 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26730 }
26731 }
26732 }
26733 }
26734
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,n_gt_8)26735 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_gt_8) {
26736 TEST_REQUIRES_X86_AVX2;
26737 for (uint32_t n = 9; n < 16; n++) {
26738 for (size_t k = 1; k <= 40; k += 9) {
26739 GemmMicrokernelTester()
26740 .extended_weights(true)
26741 .mr(1)
26742 .nr(8)
26743 .kr(8)
26744 .sr(1)
26745 .m(1)
26746 .n(n)
26747 .k(k)
26748 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26749 }
26750 }
26751 }
26752
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,n_gt_8_strided_cn)26753 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_cn) {
26754 TEST_REQUIRES_X86_AVX2;
26755 for (uint32_t n = 9; n < 16; n++) {
26756 for (size_t k = 1; k <= 40; k += 9) {
26757 GemmMicrokernelTester()
26758 .extended_weights(true)
26759 .mr(1)
26760 .nr(8)
26761 .kr(8)
26762 .sr(1)
26763 .m(1)
26764 .n(n)
26765 .k(k)
26766 .cn_stride(11)
26767 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26768 }
26769 }
26770 }
26771
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,n_gt_8_strided_a)26772 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_a) {
26773 TEST_REQUIRES_X86_AVX2;
26774 for (uint32_t n = 9; n < 16; n++) {
26775 for (size_t k = 1; k <= 40; k += 9) {
26776 GemmMicrokernelTester()
26777 .extended_weights(true)
26778 .mr(1)
26779 .nr(8)
26780 .kr(8)
26781 .sr(1)
26782 .m(1)
26783 .n(n)
26784 .k(k)
26785 .a_stride(43)
26786 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26787 }
26788 }
26789 }
26790
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,n_gt_8_subtile)26791 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_gt_8_subtile) {
26792 TEST_REQUIRES_X86_AVX2;
26793 for (uint32_t n = 9; n < 16; n++) {
26794 for (size_t k = 1; k <= 40; k += 9) {
26795 for (uint32_t m = 1; m <= 1; m++) {
26796 GemmMicrokernelTester()
26797 .extended_weights(true)
26798 .mr(1)
26799 .nr(8)
26800 .kr(8)
26801 .sr(1)
26802 .m(m)
26803 .n(n)
26804 .k(k)
26805 .iterations(1)
26806 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26807 }
26808 }
26809 }
26810 }
26811
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,n_div_8)26812 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_div_8) {
26813 TEST_REQUIRES_X86_AVX2;
26814 for (uint32_t n = 16; n <= 24; n += 8) {
26815 for (size_t k = 1; k <= 40; k += 9) {
26816 GemmMicrokernelTester()
26817 .extended_weights(true)
26818 .mr(1)
26819 .nr(8)
26820 .kr(8)
26821 .sr(1)
26822 .m(1)
26823 .n(n)
26824 .k(k)
26825 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26826 }
26827 }
26828 }
26829
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,n_div_8_strided_cn)26830 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_cn) {
26831 TEST_REQUIRES_X86_AVX2;
26832 for (uint32_t n = 16; n <= 24; n += 8) {
26833 for (size_t k = 1; k <= 40; k += 9) {
26834 GemmMicrokernelTester()
26835 .extended_weights(true)
26836 .mr(1)
26837 .nr(8)
26838 .kr(8)
26839 .sr(1)
26840 .m(1)
26841 .n(n)
26842 .k(k)
26843 .cn_stride(11)
26844 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26845 }
26846 }
26847 }
26848
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,n_div_8_strided_a)26849 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_a) {
26850 TEST_REQUIRES_X86_AVX2;
26851 for (uint32_t n = 16; n <= 24; n += 8) {
26852 for (size_t k = 1; k <= 40; k += 9) {
26853 GemmMicrokernelTester()
26854 .extended_weights(true)
26855 .mr(1)
26856 .nr(8)
26857 .kr(8)
26858 .sr(1)
26859 .m(1)
26860 .n(n)
26861 .k(k)
26862 .a_stride(43)
26863 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26864 }
26865 }
26866 }
26867
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,n_div_8_subtile)26868 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_div_8_subtile) {
26869 TEST_REQUIRES_X86_AVX2;
26870 for (uint32_t n = 16; n <= 24; n += 8) {
26871 for (size_t k = 1; k <= 40; k += 9) {
26872 for (uint32_t m = 1; m <= 1; m++) {
26873 GemmMicrokernelTester()
26874 .extended_weights(true)
26875 .mr(1)
26876 .nr(8)
26877 .kr(8)
26878 .sr(1)
26879 .m(m)
26880 .n(n)
26881 .k(k)
26882 .iterations(1)
26883 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26884 }
26885 }
26886 }
26887 }
26888
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,strided_cm_subtile)26889 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, strided_cm_subtile) {
26890 TEST_REQUIRES_X86_AVX2;
26891 for (size_t k = 1; k <= 40; k += 9) {
26892 for (uint32_t n = 1; n <= 8; n++) {
26893 for (uint32_t m = 1; m <= 1; m++) {
26894 GemmMicrokernelTester()
26895 .extended_weights(true)
26896 .mr(1)
26897 .nr(8)
26898 .kr(8)
26899 .sr(1)
26900 .m(m)
26901 .n(n)
26902 .k(k)
26903 .cm_stride(11)
26904 .iterations(1)
26905 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26906 }
26907 }
26908 }
26909 }
26910
TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2,strided_cm)26911 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, strided_cm) {
26912 TEST_REQUIRES_X86_AVX2;
26913 GemmMicrokernelTester()
26914 .extended_weights(true)
26915 .mr(1)
26916 .nr(8)
26917 .kr(8)
26918 .sr(1)
26919 .m(1)
26920 .n(8)
26921 .k(8)
26922 .cm_stride(11)
26923 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26924 }
26925 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26926
26927
26928 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,k_eq_8)26929 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_eq_8) {
26930 TEST_REQUIRES_X86_AVX2;
26931 GemmMicrokernelTester()
26932 .extended_weights(true)
26933 .mr(3)
26934 .nr(8)
26935 .kr(8)
26936 .sr(1)
26937 .m(3)
26938 .n(8)
26939 .k(8)
26940 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26941 }
26942
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,strided_cn)26943 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, strided_cn) {
26944 TEST_REQUIRES_X86_AVX2;
26945 GemmMicrokernelTester()
26946 .extended_weights(true)
26947 .mr(3)
26948 .nr(8)
26949 .kr(8)
26950 .sr(1)
26951 .m(3)
26952 .n(8)
26953 .k(8)
26954 .cn_stride(11)
26955 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26956 }
26957
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,k_eq_8_strided_a)26958 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_eq_8_strided_a) {
26959 TEST_REQUIRES_X86_AVX2;
26960 GemmMicrokernelTester()
26961 .extended_weights(true)
26962 .mr(3)
26963 .nr(8)
26964 .kr(8)
26965 .sr(1)
26966 .m(3)
26967 .n(8)
26968 .k(8)
26969 .a_stride(11)
26970 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26971 }
26972
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile)26973 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile) {
26974 TEST_REQUIRES_X86_AVX2;
26975 for (uint32_t n = 1; n <= 8; n++) {
26976 for (uint32_t m = 1; m <= 3; m++) {
26977 GemmMicrokernelTester()
26978 .extended_weights(true)
26979 .mr(3)
26980 .nr(8)
26981 .kr(8)
26982 .sr(1)
26983 .m(m)
26984 .n(n)
26985 .k(8)
26986 .iterations(1)
26987 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
26988 }
26989 }
26990 }
26991
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile_m)26992 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_m) {
26993 TEST_REQUIRES_X86_AVX2;
26994 for (uint32_t m = 1; m <= 3; m++) {
26995 GemmMicrokernelTester()
26996 .extended_weights(true)
26997 .mr(3)
26998 .nr(8)
26999 .kr(8)
27000 .sr(1)
27001 .m(m)
27002 .n(8)
27003 .k(8)
27004 .iterations(1)
27005 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
27006 }
27007 }
27008
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile_n)27009 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_n) {
27010 TEST_REQUIRES_X86_AVX2;
27011 for (uint32_t n = 1; n <= 8; n++) {
27012 GemmMicrokernelTester()
27013 .extended_weights(true)
27014 .mr(3)
27015 .nr(8)
27016 .kr(8)
27017 .sr(1)
27018 .m(3)
27019 .n(n)
27020 .k(8)
27021 .iterations(1)
27022 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
27023 }
27024 }
27025
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,k_lt_8)27026 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_lt_8) {
27027 TEST_REQUIRES_X86_AVX2;
27028 for (size_t k = 1; k < 8; k++) {
27029 GemmMicrokernelTester()
27030 .extended_weights(true)
27031 .mr(3)
27032 .nr(8)
27033 .kr(8)
27034 .sr(1)
27035 .m(3)
27036 .n(8)
27037 .k(k)
27038 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
27039 }
27040 }
27041
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,k_lt_8_strided_a)27042 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_lt_8_strided_a) {
27043 TEST_REQUIRES_X86_AVX2;
27044 for (size_t k = 1; k < 8; k++) {
27045 GemmMicrokernelTester()
27046 .extended_weights(true)
27047 .mr(3)
27048 .nr(8)
27049 .kr(8)
27050 .sr(1)
27051 .m(3)
27052 .n(8)
27053 .k(k)
27054 .a_stride(11)
27055 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
27056 }
27057 }
27058
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,k_lt_8_subtile)27059 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_lt_8_subtile) {
27060 TEST_REQUIRES_X86_AVX2;
27061 for (size_t k = 1; k < 8; k++) {
27062 for (uint32_t n = 1; n <= 8; n++) {
27063 for (uint32_t m = 1; m <= 3; m++) {
27064 GemmMicrokernelTester()
27065 .extended_weights(true)
27066 .mr(3)
27067 .nr(8)
27068 .kr(8)
27069 .sr(1)
27070 .m(m)
27071 .n(n)
27072 .k(k)
27073 .iterations(1)
27074 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
27075 }
27076 }
27077 }
27078 }
27079
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,k_gt_8)27080 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_gt_8) {
27081 TEST_REQUIRES_X86_AVX2;
27082 for (size_t k = 9; k < 16; k++) {
27083 GemmMicrokernelTester()
27084 .extended_weights(true)
27085 .mr(3)
27086 .nr(8)
27087 .kr(8)
27088 .sr(1)
27089 .m(3)
27090 .n(8)
27091 .k(k)
27092 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
27093 }
27094 }
27095
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,k_gt_8_strided_a)27096 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_gt_8_strided_a) {
27097 TEST_REQUIRES_X86_AVX2;
27098 for (size_t k = 9; k < 16; k++) {
27099 GemmMicrokernelTester()
27100 .extended_weights(true)
27101 .mr(3)
27102 .nr(8)
27103 .kr(8)
27104 .sr(1)
27105 .m(3)
27106 .n(8)
27107 .k(k)
27108 .a_stride(19)
27109 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
27110 }
27111 }
27112
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,k_gt_8_subtile)27113 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_gt_8_subtile) {
27114 TEST_REQUIRES_X86_AVX2;
27115 for (size_t k = 9; k < 16; k++) {
27116 for (uint32_t n = 1; n <= 8; n++) {
27117 for (uint32_t m = 1; m <= 3; m++) {
27118 GemmMicrokernelTester()
27119 .extended_weights(true)
27120 .mr(3)
27121 .nr(8)
27122 .kr(8)
27123 .sr(1)
27124 .m(m)
27125 .n(n)
27126 .k(k)
27127 .iterations(1)
27128 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
27129 }
27130 }
27131 }
27132 }
27133
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,k_div_8)27134 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_div_8) {
27135 TEST_REQUIRES_X86_AVX2;
27136 for (size_t k = 16; k <= 80; k += 8) {
27137 GemmMicrokernelTester()
27138 .extended_weights(true)
27139 .mr(3)
27140 .nr(8)
27141 .kr(8)
27142 .sr(1)
27143 .m(3)
27144 .n(8)
27145 .k(k)
27146 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
27147 }
27148 }
27149
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,k_div_8_strided_a)27150 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_div_8_strided_a) {
27151 TEST_REQUIRES_X86_AVX2;
27152 for (size_t k = 16; k <= 80; k += 8) {
27153 GemmMicrokernelTester()
27154 .extended_weights(true)
27155 .mr(3)
27156 .nr(8)
27157 .kr(8)
27158 .sr(1)
27159 .m(3)
27160 .n(8)
27161 .k(k)
27162 .a_stride(83)
27163 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
27164 }
27165 }
27166
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,k_div_8_subtile)27167 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_div_8_subtile) {
27168 TEST_REQUIRES_X86_AVX2;
27169 for (size_t k = 16; k <= 80; k += 8) {
27170 for (uint32_t n = 1; n <= 8; n++) {
27171 for (uint32_t m = 1; m <= 3; m++) {
27172 GemmMicrokernelTester()
27173 .extended_weights(true)
27174 .mr(3)
27175 .nr(8)
27176 .kr(8)
27177 .sr(1)
27178 .m(m)
27179 .n(n)
27180 .k(k)
27181 .iterations(1)
27182 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
27183 }
27184 }
27185 }
27186 }
27187
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,n_gt_8)27188 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, n_gt_8) {
27189 TEST_REQUIRES_X86_AVX2;
27190 for (uint32_t n = 9; n < 16; n++) {
27191 for (size_t k = 1; k <= 40; k += 9) {
27192 GemmMicrokernelTester()
27193 .extended_weights(true)
27194 .mr(3)
27195 .nr(8)
27196 .kr(8)
27197 .sr(1)
27198 .m(3)
27199 .n(n)
27200 .k(k)
27201 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
27202 }
27203 }
27204 }
27205
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,n_gt_8_strided_cn)27206 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_cn) {
27207 TEST_REQUIRES_X86_AVX2;
27208 for (uint32_t n = 9; n < 16; n++) {
27209 for (size_t k = 1; k <= 40; k += 9) {
27210 GemmMicrokernelTester()
27211 .extended_weights(true)
27212 .mr(3)
27213 .nr(8)
27214 .kr(8)
27215 .sr(1)
27216 .m(3)
27217 .n(n)
27218 .k(k)
27219 .cn_stride(11)
27220 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
27221 }
27222 }
27223 }
27224
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,n_gt_8_strided_a)27225 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_a) {
27226 TEST_REQUIRES_X86_AVX2;
27227 for (uint32_t n = 9; n < 16; n++) {
27228 for (size_t k = 1; k <= 40; k += 9) {
27229 GemmMicrokernelTester()
27230 .extended_weights(true)
27231 .mr(3)
27232 .nr(8)
27233 .kr(8)
27234 .sr(1)
27235 .m(3)
27236 .n(n)
27237 .k(k)
27238 .a_stride(43)
27239 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
27240 }
27241 }
27242 }
27243
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,n_gt_8_subtile)27244 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, n_gt_8_subtile) {
27245 TEST_REQUIRES_X86_AVX2;
27246 for (uint32_t n = 9; n < 16; n++) {
27247 for (size_t k = 1; k <= 40; k += 9) {
27248 for (uint32_t m = 1; m <= 3; m++) {
27249 GemmMicrokernelTester()
27250 .extended_weights(true)
27251 .mr(3)
27252 .nr(8)
27253 .kr(8)
27254 .sr(1)
27255 .m(m)
27256 .n(n)
27257 .k(k)
27258 .iterations(1)
27259 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
27260 }
27261 }
27262 }
27263 }
27264
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,n_div_8)27265 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, n_div_8) {
27266 TEST_REQUIRES_X86_AVX2;
27267 for (uint32_t n = 16; n <= 24; n += 8) {
27268 for (size_t k = 1; k <= 40; k += 9) {
27269 GemmMicrokernelTester()
27270 .extended_weights(true)
27271 .mr(3)
27272 .nr(8)
27273 .kr(8)
27274 .sr(1)
27275 .m(3)
27276 .n(n)
27277 .k(k)
27278 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
27279 }
27280 }
27281 }
27282
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,n_div_8_strided_cn)27283 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_cn) {
27284 TEST_REQUIRES_X86_AVX2;
27285 for (uint32_t n = 16; n <= 24; n += 8) {
27286 for (size_t k = 1; k <= 40; k += 9) {
27287 GemmMicrokernelTester()
27288 .extended_weights(true)
27289 .mr(3)
27290 .nr(8)
27291 .kr(8)
27292 .sr(1)
27293 .m(3)
27294 .n(n)
27295 .k(k)
27296 .cn_stride(11)
27297 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
27298 }
27299 }
27300 }
27301
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,n_div_8_strided_a)27302 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_a) {
27303 TEST_REQUIRES_X86_AVX2;
27304 for (uint32_t n = 16; n <= 24; n += 8) {
27305 for (size_t k = 1; k <= 40; k += 9) {
27306 GemmMicrokernelTester()
27307 .extended_weights(true)
27308 .mr(3)
27309 .nr(8)
27310 .kr(8)
27311 .sr(1)
27312 .m(3)
27313 .n(n)
27314 .k(k)
27315 .a_stride(43)
27316 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
27317 }
27318 }
27319 }
27320
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,n_div_8_subtile)27321 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, n_div_8_subtile) {
27322 TEST_REQUIRES_X86_AVX2;
27323 for (uint32_t n = 16; n <= 24; n += 8) {
27324 for (size_t k = 1; k <= 40; k += 9) {
27325 for (uint32_t m = 1; m <= 3; m++) {
27326 GemmMicrokernelTester()
27327 .extended_weights(true)
27328 .mr(3)
27329 .nr(8)
27330 .kr(8)
27331 .sr(1)
27332 .m(m)
27333 .n(n)
27334 .k(k)
27335 .iterations(1)
27336 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
27337 }
27338 }
27339 }
27340 }
27341
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,strided_cm_subtile)27342 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, strided_cm_subtile) {
27343 TEST_REQUIRES_X86_AVX2;
27344 for (size_t k = 1; k <= 40; k += 9) {
27345 for (uint32_t n = 1; n <= 8; n++) {
27346 for (uint32_t m = 1; m <= 3; m++) {
27347 GemmMicrokernelTester()
27348 .extended_weights(true)
27349 .mr(3)
27350 .nr(8)
27351 .kr(8)
27352 .sr(1)
27353 .m(m)
27354 .n(n)
27355 .k(k)
27356 .cm_stride(11)
27357 .iterations(1)
27358 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
27359 }
27360 }
27361 }
27362 }
27363
TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2,strided_cm)27364 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, strided_cm) {
27365 TEST_REQUIRES_X86_AVX2;
27366 GemmMicrokernelTester()
27367 .extended_weights(true)
27368 .mr(3)
27369 .nr(8)
27370 .kr(8)
27371 .sr(1)
27372 .m(3)
27373 .n(8)
27374 .k(8)
27375 .cm_stride(11)
27376 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
27377 }
27378 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27379
27380
27381 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_eq_8)27382 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8) {
27383 TEST_REQUIRES_X86_AVX512SKX;
27384 GemmMicrokernelTester()
27385 .mr(1)
27386 .nr(16)
27387 .kr(8)
27388 .sr(1)
27389 .m(1)
27390 .n(16)
27391 .k(8)
27392 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27393 }
27394
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,strided_cn)27395 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cn) {
27396 TEST_REQUIRES_X86_AVX512SKX;
27397 GemmMicrokernelTester()
27398 .mr(1)
27399 .nr(16)
27400 .kr(8)
27401 .sr(1)
27402 .m(1)
27403 .n(16)
27404 .k(8)
27405 .cn_stride(19)
27406 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27407 }
27408
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_eq_8_strided_a)27409 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_strided_a) {
27410 TEST_REQUIRES_X86_AVX512SKX;
27411 GemmMicrokernelTester()
27412 .mr(1)
27413 .nr(16)
27414 .kr(8)
27415 .sr(1)
27416 .m(1)
27417 .n(16)
27418 .k(8)
27419 .a_stride(11)
27420 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27421 }
27422
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_eq_8_subtile)27423 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile) {
27424 TEST_REQUIRES_X86_AVX512SKX;
27425 for (uint32_t n = 1; n <= 16; n++) {
27426 for (uint32_t m = 1; m <= 1; m++) {
27427 GemmMicrokernelTester()
27428 .mr(1)
27429 .nr(16)
27430 .kr(8)
27431 .sr(1)
27432 .m(m)
27433 .n(n)
27434 .k(8)
27435 .iterations(1)
27436 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27437 }
27438 }
27439 }
27440
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_eq_8_subtile_m)27441 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile_m) {
27442 TEST_REQUIRES_X86_AVX512SKX;
27443 for (uint32_t m = 1; m <= 1; m++) {
27444 GemmMicrokernelTester()
27445 .mr(1)
27446 .nr(16)
27447 .kr(8)
27448 .sr(1)
27449 .m(m)
27450 .n(16)
27451 .k(8)
27452 .iterations(1)
27453 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27454 }
27455 }
27456
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_eq_8_subtile_n)27457 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile_n) {
27458 TEST_REQUIRES_X86_AVX512SKX;
27459 for (uint32_t n = 1; n <= 16; n++) {
27460 GemmMicrokernelTester()
27461 .mr(1)
27462 .nr(16)
27463 .kr(8)
27464 .sr(1)
27465 .m(1)
27466 .n(n)
27467 .k(8)
27468 .iterations(1)
27469 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27470 }
27471 }
27472
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_lt_8)27473 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_lt_8) {
27474 TEST_REQUIRES_X86_AVX512SKX;
27475 for (size_t k = 1; k < 8; k++) {
27476 GemmMicrokernelTester()
27477 .mr(1)
27478 .nr(16)
27479 .kr(8)
27480 .sr(1)
27481 .m(1)
27482 .n(16)
27483 .k(k)
27484 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27485 }
27486 }
27487
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_lt_8_strided_a)27488 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_lt_8_strided_a) {
27489 TEST_REQUIRES_X86_AVX512SKX;
27490 for (size_t k = 1; k < 8; k++) {
27491 GemmMicrokernelTester()
27492 .mr(1)
27493 .nr(16)
27494 .kr(8)
27495 .sr(1)
27496 .m(1)
27497 .n(16)
27498 .k(k)
27499 .a_stride(11)
27500 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27501 }
27502 }
27503
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_lt_8_subtile)27504 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_lt_8_subtile) {
27505 TEST_REQUIRES_X86_AVX512SKX;
27506 for (size_t k = 1; k < 8; k++) {
27507 for (uint32_t n = 1; n <= 16; n++) {
27508 for (uint32_t m = 1; m <= 1; m++) {
27509 GemmMicrokernelTester()
27510 .mr(1)
27511 .nr(16)
27512 .kr(8)
27513 .sr(1)
27514 .m(m)
27515 .n(n)
27516 .k(k)
27517 .iterations(1)
27518 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27519 }
27520 }
27521 }
27522 }
27523
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_gt_8)27524 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_gt_8) {
27525 TEST_REQUIRES_X86_AVX512SKX;
27526 for (size_t k = 9; k < 16; k++) {
27527 GemmMicrokernelTester()
27528 .mr(1)
27529 .nr(16)
27530 .kr(8)
27531 .sr(1)
27532 .m(1)
27533 .n(16)
27534 .k(k)
27535 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27536 }
27537 }
27538
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_gt_8_strided_a)27539 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_gt_8_strided_a) {
27540 TEST_REQUIRES_X86_AVX512SKX;
27541 for (size_t k = 9; k < 16; k++) {
27542 GemmMicrokernelTester()
27543 .mr(1)
27544 .nr(16)
27545 .kr(8)
27546 .sr(1)
27547 .m(1)
27548 .n(16)
27549 .k(k)
27550 .a_stride(19)
27551 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27552 }
27553 }
27554
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_gt_8_subtile)27555 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_gt_8_subtile) {
27556 TEST_REQUIRES_X86_AVX512SKX;
27557 for (size_t k = 9; k < 16; k++) {
27558 for (uint32_t n = 1; n <= 16; n++) {
27559 for (uint32_t m = 1; m <= 1; m++) {
27560 GemmMicrokernelTester()
27561 .mr(1)
27562 .nr(16)
27563 .kr(8)
27564 .sr(1)
27565 .m(m)
27566 .n(n)
27567 .k(k)
27568 .iterations(1)
27569 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27570 }
27571 }
27572 }
27573 }
27574
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_div_8)27575 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_div_8) {
27576 TEST_REQUIRES_X86_AVX512SKX;
27577 for (size_t k = 16; k <= 80; k += 8) {
27578 GemmMicrokernelTester()
27579 .mr(1)
27580 .nr(16)
27581 .kr(8)
27582 .sr(1)
27583 .m(1)
27584 .n(16)
27585 .k(k)
27586 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27587 }
27588 }
27589
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_div_8_strided_a)27590 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_div_8_strided_a) {
27591 TEST_REQUIRES_X86_AVX512SKX;
27592 for (size_t k = 16; k <= 80; k += 8) {
27593 GemmMicrokernelTester()
27594 .mr(1)
27595 .nr(16)
27596 .kr(8)
27597 .sr(1)
27598 .m(1)
27599 .n(16)
27600 .k(k)
27601 .a_stride(83)
27602 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27603 }
27604 }
27605
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_div_8_subtile)27606 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_div_8_subtile) {
27607 TEST_REQUIRES_X86_AVX512SKX;
27608 for (size_t k = 16; k <= 80; k += 8) {
27609 for (uint32_t n = 1; n <= 16; n++) {
27610 for (uint32_t m = 1; m <= 1; m++) {
27611 GemmMicrokernelTester()
27612 .mr(1)
27613 .nr(16)
27614 .kr(8)
27615 .sr(1)
27616 .m(m)
27617 .n(n)
27618 .k(k)
27619 .iterations(1)
27620 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27621 }
27622 }
27623 }
27624 }
27625
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_gt_16)27626 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16) {
27627 TEST_REQUIRES_X86_AVX512SKX;
27628 for (uint32_t n = 17; n < 32; n++) {
27629 for (size_t k = 1; k <= 40; k += 9) {
27630 GemmMicrokernelTester()
27631 .mr(1)
27632 .nr(16)
27633 .kr(8)
27634 .sr(1)
27635 .m(1)
27636 .n(n)
27637 .k(k)
27638 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27639 }
27640 }
27641 }
27642
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_gt_16_strided_cn)27643 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_strided_cn) {
27644 TEST_REQUIRES_X86_AVX512SKX;
27645 for (uint32_t n = 17; n < 32; n++) {
27646 for (size_t k = 1; k <= 40; k += 9) {
27647 GemmMicrokernelTester()
27648 .mr(1)
27649 .nr(16)
27650 .kr(8)
27651 .sr(1)
27652 .m(1)
27653 .n(n)
27654 .k(k)
27655 .cn_stride(19)
27656 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27657 }
27658 }
27659 }
27660
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_gt_16_strided_a)27661 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_strided_a) {
27662 TEST_REQUIRES_X86_AVX512SKX;
27663 for (uint32_t n = 17; n < 32; n++) {
27664 for (size_t k = 1; k <= 40; k += 9) {
27665 GemmMicrokernelTester()
27666 .mr(1)
27667 .nr(16)
27668 .kr(8)
27669 .sr(1)
27670 .m(1)
27671 .n(n)
27672 .k(k)
27673 .a_stride(43)
27674 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27675 }
27676 }
27677 }
27678
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_gt_16_subtile)27679 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_subtile) {
27680 TEST_REQUIRES_X86_AVX512SKX;
27681 for (uint32_t n = 17; n < 32; n++) {
27682 for (size_t k = 1; k <= 40; k += 9) {
27683 for (uint32_t m = 1; m <= 1; m++) {
27684 GemmMicrokernelTester()
27685 .mr(1)
27686 .nr(16)
27687 .kr(8)
27688 .sr(1)
27689 .m(m)
27690 .n(n)
27691 .k(k)
27692 .iterations(1)
27693 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27694 }
27695 }
27696 }
27697 }
27698
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_div_16)27699 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16) {
27700 TEST_REQUIRES_X86_AVX512SKX;
27701 for (uint32_t n = 32; n <= 48; n += 16) {
27702 for (size_t k = 1; k <= 40; k += 9) {
27703 GemmMicrokernelTester()
27704 .mr(1)
27705 .nr(16)
27706 .kr(8)
27707 .sr(1)
27708 .m(1)
27709 .n(n)
27710 .k(k)
27711 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27712 }
27713 }
27714 }
27715
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_div_16_strided_cn)27716 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_strided_cn) {
27717 TEST_REQUIRES_X86_AVX512SKX;
27718 for (uint32_t n = 32; n <= 48; n += 16) {
27719 for (size_t k = 1; k <= 40; k += 9) {
27720 GemmMicrokernelTester()
27721 .mr(1)
27722 .nr(16)
27723 .kr(8)
27724 .sr(1)
27725 .m(1)
27726 .n(n)
27727 .k(k)
27728 .cn_stride(19)
27729 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27730 }
27731 }
27732 }
27733
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_div_16_strided_a)27734 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_strided_a) {
27735 TEST_REQUIRES_X86_AVX512SKX;
27736 for (uint32_t n = 32; n <= 48; n += 16) {
27737 for (size_t k = 1; k <= 40; k += 9) {
27738 GemmMicrokernelTester()
27739 .mr(1)
27740 .nr(16)
27741 .kr(8)
27742 .sr(1)
27743 .m(1)
27744 .n(n)
27745 .k(k)
27746 .a_stride(43)
27747 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27748 }
27749 }
27750 }
27751
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_div_16_subtile)27752 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_subtile) {
27753 TEST_REQUIRES_X86_AVX512SKX;
27754 for (uint32_t n = 32; n <= 48; n += 16) {
27755 for (size_t k = 1; k <= 40; k += 9) {
27756 for (uint32_t m = 1; m <= 1; m++) {
27757 GemmMicrokernelTester()
27758 .mr(1)
27759 .nr(16)
27760 .kr(8)
27761 .sr(1)
27762 .m(m)
27763 .n(n)
27764 .k(k)
27765 .iterations(1)
27766 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27767 }
27768 }
27769 }
27770 }
27771
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,strided_cm_subtile)27772 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cm_subtile) {
27773 TEST_REQUIRES_X86_AVX512SKX;
27774 for (size_t k = 1; k <= 40; k += 9) {
27775 for (uint32_t n = 1; n <= 16; n++) {
27776 for (uint32_t m = 1; m <= 1; m++) {
27777 GemmMicrokernelTester()
27778 .mr(1)
27779 .nr(16)
27780 .kr(8)
27781 .sr(1)
27782 .m(m)
27783 .n(n)
27784 .k(k)
27785 .cm_stride(19)
27786 .iterations(1)
27787 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27788 }
27789 }
27790 }
27791 }
27792
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,qmin)27793 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, qmin) {
27794 TEST_REQUIRES_X86_AVX512SKX;
27795 GemmMicrokernelTester()
27796 .mr(1)
27797 .nr(16)
27798 .kr(8)
27799 .sr(1)
27800 .m(1)
27801 .n(16)
27802 .k(8)
27803 .qmin(128)
27804 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27805 }
27806
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,qmax)27807 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, qmax) {
27808 TEST_REQUIRES_X86_AVX512SKX;
27809 GemmMicrokernelTester()
27810 .mr(1)
27811 .nr(16)
27812 .kr(8)
27813 .sr(1)
27814 .m(1)
27815 .n(16)
27816 .k(8)
27817 .qmax(128)
27818 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27819 }
27820
TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX,strided_cm)27821 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cm) {
27822 TEST_REQUIRES_X86_AVX512SKX;
27823 GemmMicrokernelTester()
27824 .mr(1)
27825 .nr(16)
27826 .kr(8)
27827 .sr(1)
27828 .m(1)
27829 .n(16)
27830 .k(8)
27831 .cm_stride(19)
27832 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27833 }
27834 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27835
27836
27837 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8)27838 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8) {
27839 TEST_REQUIRES_X86_AVX512SKX;
27840 GemmMicrokernelTester()
27841 .mr(2)
27842 .nr(16)
27843 .kr(8)
27844 .sr(1)
27845 .m(2)
27846 .n(16)
27847 .k(8)
27848 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27849 }
27850
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,strided_cn)27851 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cn) {
27852 TEST_REQUIRES_X86_AVX512SKX;
27853 GemmMicrokernelTester()
27854 .mr(2)
27855 .nr(16)
27856 .kr(8)
27857 .sr(1)
27858 .m(2)
27859 .n(16)
27860 .k(8)
27861 .cn_stride(19)
27862 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27863 }
27864
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8_strided_a)27865 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_strided_a) {
27866 TEST_REQUIRES_X86_AVX512SKX;
27867 GemmMicrokernelTester()
27868 .mr(2)
27869 .nr(16)
27870 .kr(8)
27871 .sr(1)
27872 .m(2)
27873 .n(16)
27874 .k(8)
27875 .a_stride(11)
27876 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27877 }
27878
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8_subtile)27879 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile) {
27880 TEST_REQUIRES_X86_AVX512SKX;
27881 for (uint32_t n = 1; n <= 16; n++) {
27882 for (uint32_t m = 1; m <= 2; m++) {
27883 GemmMicrokernelTester()
27884 .mr(2)
27885 .nr(16)
27886 .kr(8)
27887 .sr(1)
27888 .m(m)
27889 .n(n)
27890 .k(8)
27891 .iterations(1)
27892 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27893 }
27894 }
27895 }
27896
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8_subtile_m)27897 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile_m) {
27898 TEST_REQUIRES_X86_AVX512SKX;
27899 for (uint32_t m = 1; m <= 2; m++) {
27900 GemmMicrokernelTester()
27901 .mr(2)
27902 .nr(16)
27903 .kr(8)
27904 .sr(1)
27905 .m(m)
27906 .n(16)
27907 .k(8)
27908 .iterations(1)
27909 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27910 }
27911 }
27912
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8_subtile_n)27913 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile_n) {
27914 TEST_REQUIRES_X86_AVX512SKX;
27915 for (uint32_t n = 1; n <= 16; n++) {
27916 GemmMicrokernelTester()
27917 .mr(2)
27918 .nr(16)
27919 .kr(8)
27920 .sr(1)
27921 .m(2)
27922 .n(n)
27923 .k(8)
27924 .iterations(1)
27925 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27926 }
27927 }
27928
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_lt_8)27929 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8) {
27930 TEST_REQUIRES_X86_AVX512SKX;
27931 for (size_t k = 1; k < 8; k++) {
27932 GemmMicrokernelTester()
27933 .mr(2)
27934 .nr(16)
27935 .kr(8)
27936 .sr(1)
27937 .m(2)
27938 .n(16)
27939 .k(k)
27940 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27941 }
27942 }
27943
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_lt_8_strided_a)27944 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8_strided_a) {
27945 TEST_REQUIRES_X86_AVX512SKX;
27946 for (size_t k = 1; k < 8; k++) {
27947 GemmMicrokernelTester()
27948 .mr(2)
27949 .nr(16)
27950 .kr(8)
27951 .sr(1)
27952 .m(2)
27953 .n(16)
27954 .k(k)
27955 .a_stride(11)
27956 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27957 }
27958 }
27959
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_lt_8_subtile)27960 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8_subtile) {
27961 TEST_REQUIRES_X86_AVX512SKX;
27962 for (size_t k = 1; k < 8; k++) {
27963 for (uint32_t n = 1; n <= 16; n++) {
27964 for (uint32_t m = 1; m <= 2; m++) {
27965 GemmMicrokernelTester()
27966 .mr(2)
27967 .nr(16)
27968 .kr(8)
27969 .sr(1)
27970 .m(m)
27971 .n(n)
27972 .k(k)
27973 .iterations(1)
27974 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27975 }
27976 }
27977 }
27978 }
27979
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_gt_8)27980 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8) {
27981 TEST_REQUIRES_X86_AVX512SKX;
27982 for (size_t k = 9; k < 16; k++) {
27983 GemmMicrokernelTester()
27984 .mr(2)
27985 .nr(16)
27986 .kr(8)
27987 .sr(1)
27988 .m(2)
27989 .n(16)
27990 .k(k)
27991 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
27992 }
27993 }
27994
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_gt_8_strided_a)27995 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8_strided_a) {
27996 TEST_REQUIRES_X86_AVX512SKX;
27997 for (size_t k = 9; k < 16; k++) {
27998 GemmMicrokernelTester()
27999 .mr(2)
28000 .nr(16)
28001 .kr(8)
28002 .sr(1)
28003 .m(2)
28004 .n(16)
28005 .k(k)
28006 .a_stride(19)
28007 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
28008 }
28009 }
28010
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_gt_8_subtile)28011 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8_subtile) {
28012 TEST_REQUIRES_X86_AVX512SKX;
28013 for (size_t k = 9; k < 16; k++) {
28014 for (uint32_t n = 1; n <= 16; n++) {
28015 for (uint32_t m = 1; m <= 2; m++) {
28016 GemmMicrokernelTester()
28017 .mr(2)
28018 .nr(16)
28019 .kr(8)
28020 .sr(1)
28021 .m(m)
28022 .n(n)
28023 .k(k)
28024 .iterations(1)
28025 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
28026 }
28027 }
28028 }
28029 }
28030
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_div_8)28031 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8) {
28032 TEST_REQUIRES_X86_AVX512SKX;
28033 for (size_t k = 16; k <= 80; k += 8) {
28034 GemmMicrokernelTester()
28035 .mr(2)
28036 .nr(16)
28037 .kr(8)
28038 .sr(1)
28039 .m(2)
28040 .n(16)
28041 .k(k)
28042 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
28043 }
28044 }
28045
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_div_8_strided_a)28046 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8_strided_a) {
28047 TEST_REQUIRES_X86_AVX512SKX;
28048 for (size_t k = 16; k <= 80; k += 8) {
28049 GemmMicrokernelTester()
28050 .mr(2)
28051 .nr(16)
28052 .kr(8)
28053 .sr(1)
28054 .m(2)
28055 .n(16)
28056 .k(k)
28057 .a_stride(83)
28058 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
28059 }
28060 }
28061
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_div_8_subtile)28062 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8_subtile) {
28063 TEST_REQUIRES_X86_AVX512SKX;
28064 for (size_t k = 16; k <= 80; k += 8) {
28065 for (uint32_t n = 1; n <= 16; n++) {
28066 for (uint32_t m = 1; m <= 2; m++) {
28067 GemmMicrokernelTester()
28068 .mr(2)
28069 .nr(16)
28070 .kr(8)
28071 .sr(1)
28072 .m(m)
28073 .n(n)
28074 .k(k)
28075 .iterations(1)
28076 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
28077 }
28078 }
28079 }
28080 }
28081
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16)28082 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16) {
28083 TEST_REQUIRES_X86_AVX512SKX;
28084 for (uint32_t n = 17; n < 32; n++) {
28085 for (size_t k = 1; k <= 40; k += 9) {
28086 GemmMicrokernelTester()
28087 .mr(2)
28088 .nr(16)
28089 .kr(8)
28090 .sr(1)
28091 .m(2)
28092 .n(n)
28093 .k(k)
28094 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
28095 }
28096 }
28097 }
28098
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16_strided_cn)28099 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_strided_cn) {
28100 TEST_REQUIRES_X86_AVX512SKX;
28101 for (uint32_t n = 17; n < 32; n++) {
28102 for (size_t k = 1; k <= 40; k += 9) {
28103 GemmMicrokernelTester()
28104 .mr(2)
28105 .nr(16)
28106 .kr(8)
28107 .sr(1)
28108 .m(2)
28109 .n(n)
28110 .k(k)
28111 .cn_stride(19)
28112 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
28113 }
28114 }
28115 }
28116
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16_strided_a)28117 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_strided_a) {
28118 TEST_REQUIRES_X86_AVX512SKX;
28119 for (uint32_t n = 17; n < 32; n++) {
28120 for (size_t k = 1; k <= 40; k += 9) {
28121 GemmMicrokernelTester()
28122 .mr(2)
28123 .nr(16)
28124 .kr(8)
28125 .sr(1)
28126 .m(2)
28127 .n(n)
28128 .k(k)
28129 .a_stride(43)
28130 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
28131 }
28132 }
28133 }
28134
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16_subtile)28135 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_subtile) {
28136 TEST_REQUIRES_X86_AVX512SKX;
28137 for (uint32_t n = 17; n < 32; n++) {
28138 for (size_t k = 1; k <= 40; k += 9) {
28139 for (uint32_t m = 1; m <= 2; m++) {
28140 GemmMicrokernelTester()
28141 .mr(2)
28142 .nr(16)
28143 .kr(8)
28144 .sr(1)
28145 .m(m)
28146 .n(n)
28147 .k(k)
28148 .iterations(1)
28149 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
28150 }
28151 }
28152 }
28153 }
28154
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16)28155 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16) {
28156 TEST_REQUIRES_X86_AVX512SKX;
28157 for (uint32_t n = 32; n <= 48; n += 16) {
28158 for (size_t k = 1; k <= 40; k += 9) {
28159 GemmMicrokernelTester()
28160 .mr(2)
28161 .nr(16)
28162 .kr(8)
28163 .sr(1)
28164 .m(2)
28165 .n(n)
28166 .k(k)
28167 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
28168 }
28169 }
28170 }
28171
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16_strided_cn)28172 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_strided_cn) {
28173 TEST_REQUIRES_X86_AVX512SKX;
28174 for (uint32_t n = 32; n <= 48; n += 16) {
28175 for (size_t k = 1; k <= 40; k += 9) {
28176 GemmMicrokernelTester()
28177 .mr(2)
28178 .nr(16)
28179 .kr(8)
28180 .sr(1)
28181 .m(2)
28182 .n(n)
28183 .k(k)
28184 .cn_stride(19)
28185 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
28186 }
28187 }
28188 }
28189
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16_strided_a)28190 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_strided_a) {
28191 TEST_REQUIRES_X86_AVX512SKX;
28192 for (uint32_t n = 32; n <= 48; n += 16) {
28193 for (size_t k = 1; k <= 40; k += 9) {
28194 GemmMicrokernelTester()
28195 .mr(2)
28196 .nr(16)
28197 .kr(8)
28198 .sr(1)
28199 .m(2)
28200 .n(n)
28201 .k(k)
28202 .a_stride(43)
28203 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
28204 }
28205 }
28206 }
28207
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16_subtile)28208 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_subtile) {
28209 TEST_REQUIRES_X86_AVX512SKX;
28210 for (uint32_t n = 32; n <= 48; n += 16) {
28211 for (size_t k = 1; k <= 40; k += 9) {
28212 for (uint32_t m = 1; m <= 2; m++) {
28213 GemmMicrokernelTester()
28214 .mr(2)
28215 .nr(16)
28216 .kr(8)
28217 .sr(1)
28218 .m(m)
28219 .n(n)
28220 .k(k)
28221 .iterations(1)
28222 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
28223 }
28224 }
28225 }
28226 }
28227
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,strided_cm_subtile)28228 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cm_subtile) {
28229 TEST_REQUIRES_X86_AVX512SKX;
28230 for (size_t k = 1; k <= 40; k += 9) {
28231 for (uint32_t n = 1; n <= 16; n++) {
28232 for (uint32_t m = 1; m <= 2; m++) {
28233 GemmMicrokernelTester()
28234 .mr(2)
28235 .nr(16)
28236 .kr(8)
28237 .sr(1)
28238 .m(m)
28239 .n(n)
28240 .k(k)
28241 .cm_stride(19)
28242 .iterations(1)
28243 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
28244 }
28245 }
28246 }
28247 }
28248
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,qmin)28249 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, qmin) {
28250 TEST_REQUIRES_X86_AVX512SKX;
28251 GemmMicrokernelTester()
28252 .mr(2)
28253 .nr(16)
28254 .kr(8)
28255 .sr(1)
28256 .m(2)
28257 .n(16)
28258 .k(8)
28259 .qmin(128)
28260 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
28261 }
28262
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,qmax)28263 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, qmax) {
28264 TEST_REQUIRES_X86_AVX512SKX;
28265 GemmMicrokernelTester()
28266 .mr(2)
28267 .nr(16)
28268 .kr(8)
28269 .sr(1)
28270 .m(2)
28271 .n(16)
28272 .k(8)
28273 .qmax(128)
28274 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
28275 }
28276
TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX,strided_cm)28277 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cm) {
28278 TEST_REQUIRES_X86_AVX512SKX;
28279 GemmMicrokernelTester()
28280 .mr(2)
28281 .nr(16)
28282 .kr(8)
28283 .sr(1)
28284 .m(2)
28285 .n(16)
28286 .k(8)
28287 .cm_stride(19)
28288 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
28289 }
28290 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
28291
28292
28293 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)28294 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
28295 GemmMicrokernelTester()
28296 .mr(1)
28297 .nr(4)
28298 .kr(2)
28299 .sr(1)
28300 .m(1)
28301 .n(4)
28302 .k(8)
28303 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28304 }
28305
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)28306 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
28307 GemmMicrokernelTester()
28308 .mr(1)
28309 .nr(4)
28310 .kr(2)
28311 .sr(1)
28312 .m(1)
28313 .n(4)
28314 .k(8)
28315 .cn_stride(7)
28316 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28317 }
28318
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_strided_a)28319 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
28320 GemmMicrokernelTester()
28321 .mr(1)
28322 .nr(4)
28323 .kr(2)
28324 .sr(1)
28325 .m(1)
28326 .n(4)
28327 .k(8)
28328 .a_stride(11)
28329 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28330 }
28331
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)28332 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
28333 for (uint32_t n = 1; n <= 4; n++) {
28334 for (uint32_t m = 1; m <= 1; m++) {
28335 GemmMicrokernelTester()
28336 .mr(1)
28337 .nr(4)
28338 .kr(2)
28339 .sr(1)
28340 .m(m)
28341 .n(n)
28342 .k(8)
28343 .iterations(1)
28344 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28345 }
28346 }
28347 }
28348
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)28349 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
28350 for (uint32_t m = 1; m <= 1; m++) {
28351 GemmMicrokernelTester()
28352 .mr(1)
28353 .nr(4)
28354 .kr(2)
28355 .sr(1)
28356 .m(m)
28357 .n(4)
28358 .k(8)
28359 .iterations(1)
28360 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28361 }
28362 }
28363
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)28364 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
28365 for (uint32_t n = 1; n <= 4; n++) {
28366 GemmMicrokernelTester()
28367 .mr(1)
28368 .nr(4)
28369 .kr(2)
28370 .sr(1)
28371 .m(1)
28372 .n(n)
28373 .k(8)
28374 .iterations(1)
28375 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28376 }
28377 }
28378
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)28379 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
28380 for (size_t k = 1; k < 8; k++) {
28381 GemmMicrokernelTester()
28382 .mr(1)
28383 .nr(4)
28384 .kr(2)
28385 .sr(1)
28386 .m(1)
28387 .n(4)
28388 .k(k)
28389 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28390 }
28391 }
28392
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_strided_a)28393 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
28394 for (size_t k = 1; k < 8; k++) {
28395 GemmMicrokernelTester()
28396 .mr(1)
28397 .nr(4)
28398 .kr(2)
28399 .sr(1)
28400 .m(1)
28401 .n(4)
28402 .k(k)
28403 .a_stride(11)
28404 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28405 }
28406 }
28407
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)28408 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
28409 for (size_t k = 1; k < 8; k++) {
28410 for (uint32_t n = 1; n <= 4; n++) {
28411 for (uint32_t m = 1; m <= 1; m++) {
28412 GemmMicrokernelTester()
28413 .mr(1)
28414 .nr(4)
28415 .kr(2)
28416 .sr(1)
28417 .m(m)
28418 .n(n)
28419 .k(k)
28420 .iterations(1)
28421 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28422 }
28423 }
28424 }
28425 }
28426
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)28427 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
28428 for (size_t k = 9; k < 16; k++) {
28429 GemmMicrokernelTester()
28430 .mr(1)
28431 .nr(4)
28432 .kr(2)
28433 .sr(1)
28434 .m(1)
28435 .n(4)
28436 .k(k)
28437 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28438 }
28439 }
28440
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_strided_a)28441 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
28442 for (size_t k = 9; k < 16; k++) {
28443 GemmMicrokernelTester()
28444 .mr(1)
28445 .nr(4)
28446 .kr(2)
28447 .sr(1)
28448 .m(1)
28449 .n(4)
28450 .k(k)
28451 .a_stride(19)
28452 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28453 }
28454 }
28455
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)28456 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
28457 for (size_t k = 9; k < 16; k++) {
28458 for (uint32_t n = 1; n <= 4; n++) {
28459 for (uint32_t m = 1; m <= 1; m++) {
28460 GemmMicrokernelTester()
28461 .mr(1)
28462 .nr(4)
28463 .kr(2)
28464 .sr(1)
28465 .m(m)
28466 .n(n)
28467 .k(k)
28468 .iterations(1)
28469 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28470 }
28471 }
28472 }
28473 }
28474
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)28475 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
28476 for (size_t k = 16; k <= 80; k += 8) {
28477 GemmMicrokernelTester()
28478 .mr(1)
28479 .nr(4)
28480 .kr(2)
28481 .sr(1)
28482 .m(1)
28483 .n(4)
28484 .k(k)
28485 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28486 }
28487 }
28488
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_strided_a)28489 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
28490 for (size_t k = 16; k <= 80; k += 8) {
28491 GemmMicrokernelTester()
28492 .mr(1)
28493 .nr(4)
28494 .kr(2)
28495 .sr(1)
28496 .m(1)
28497 .n(4)
28498 .k(k)
28499 .a_stride(83)
28500 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28501 }
28502 }
28503
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)28504 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
28505 for (size_t k = 16; k <= 80; k += 8) {
28506 for (uint32_t n = 1; n <= 4; n++) {
28507 for (uint32_t m = 1; m <= 1; m++) {
28508 GemmMicrokernelTester()
28509 .mr(1)
28510 .nr(4)
28511 .kr(2)
28512 .sr(1)
28513 .m(m)
28514 .n(n)
28515 .k(k)
28516 .iterations(1)
28517 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28518 }
28519 }
28520 }
28521 }
28522
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)28523 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
28524 for (uint32_t n = 5; n < 8; n++) {
28525 for (size_t k = 1; k <= 40; k += 9) {
28526 GemmMicrokernelTester()
28527 .mr(1)
28528 .nr(4)
28529 .kr(2)
28530 .sr(1)
28531 .m(1)
28532 .n(n)
28533 .k(k)
28534 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28535 }
28536 }
28537 }
28538
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)28539 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
28540 for (uint32_t n = 5; n < 8; n++) {
28541 for (size_t k = 1; k <= 40; k += 9) {
28542 GemmMicrokernelTester()
28543 .mr(1)
28544 .nr(4)
28545 .kr(2)
28546 .sr(1)
28547 .m(1)
28548 .n(n)
28549 .k(k)
28550 .cn_stride(7)
28551 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28552 }
28553 }
28554 }
28555
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_a)28556 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
28557 for (uint32_t n = 5; n < 8; n++) {
28558 for (size_t k = 1; k <= 40; k += 9) {
28559 GemmMicrokernelTester()
28560 .mr(1)
28561 .nr(4)
28562 .kr(2)
28563 .sr(1)
28564 .m(1)
28565 .n(n)
28566 .k(k)
28567 .a_stride(43)
28568 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28569 }
28570 }
28571 }
28572
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)28573 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
28574 for (uint32_t n = 5; n < 8; n++) {
28575 for (size_t k = 1; k <= 40; k += 9) {
28576 for (uint32_t m = 1; m <= 1; m++) {
28577 GemmMicrokernelTester()
28578 .mr(1)
28579 .nr(4)
28580 .kr(2)
28581 .sr(1)
28582 .m(m)
28583 .n(n)
28584 .k(k)
28585 .iterations(1)
28586 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28587 }
28588 }
28589 }
28590 }
28591
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)28592 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
28593 for (uint32_t n = 8; n <= 12; n += 4) {
28594 for (size_t k = 1; k <= 40; k += 9) {
28595 GemmMicrokernelTester()
28596 .mr(1)
28597 .nr(4)
28598 .kr(2)
28599 .sr(1)
28600 .m(1)
28601 .n(n)
28602 .k(k)
28603 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28604 }
28605 }
28606 }
28607
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)28608 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
28609 for (uint32_t n = 8; n <= 12; n += 4) {
28610 for (size_t k = 1; k <= 40; k += 9) {
28611 GemmMicrokernelTester()
28612 .mr(1)
28613 .nr(4)
28614 .kr(2)
28615 .sr(1)
28616 .m(1)
28617 .n(n)
28618 .k(k)
28619 .cn_stride(7)
28620 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28621 }
28622 }
28623 }
28624
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_a)28625 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
28626 for (uint32_t n = 8; n <= 12; n += 4) {
28627 for (size_t k = 1; k <= 40; k += 9) {
28628 GemmMicrokernelTester()
28629 .mr(1)
28630 .nr(4)
28631 .kr(2)
28632 .sr(1)
28633 .m(1)
28634 .n(n)
28635 .k(k)
28636 .a_stride(43)
28637 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28638 }
28639 }
28640 }
28641
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)28642 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
28643 for (uint32_t n = 8; n <= 12; n += 4) {
28644 for (size_t k = 1; k <= 40; k += 9) {
28645 for (uint32_t m = 1; m <= 1; m++) {
28646 GemmMicrokernelTester()
28647 .mr(1)
28648 .nr(4)
28649 .kr(2)
28650 .sr(1)
28651 .m(m)
28652 .n(n)
28653 .k(k)
28654 .iterations(1)
28655 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28656 }
28657 }
28658 }
28659 }
28660
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)28661 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
28662 for (size_t k = 1; k <= 40; k += 9) {
28663 for (uint32_t n = 1; n <= 4; n++) {
28664 for (uint32_t m = 1; m <= 1; m++) {
28665 GemmMicrokernelTester()
28666 .mr(1)
28667 .nr(4)
28668 .kr(2)
28669 .sr(1)
28670 .m(m)
28671 .n(n)
28672 .k(k)
28673 .cm_stride(7)
28674 .iterations(1)
28675 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28676 }
28677 }
28678 }
28679 }
28680
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,qmin)28681 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
28682 GemmMicrokernelTester()
28683 .mr(1)
28684 .nr(4)
28685 .kr(2)
28686 .sr(1)
28687 .m(1)
28688 .n(4)
28689 .k(8)
28690 .qmin(128)
28691 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28692 }
28693
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,qmax)28694 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
28695 GemmMicrokernelTester()
28696 .mr(1)
28697 .nr(4)
28698 .kr(2)
28699 .sr(1)
28700 .m(1)
28701 .n(4)
28702 .k(8)
28703 .qmax(128)
28704 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28705 }
28706
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)28707 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
28708 GemmMicrokernelTester()
28709 .mr(1)
28710 .nr(4)
28711 .kr(2)
28712 .sr(1)
28713 .m(1)
28714 .n(4)
28715 .k(8)
28716 .cm_stride(7)
28717 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28718 }
28719 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28720
28721
28722 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)28723 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
28724 GemmMicrokernelTester()
28725 .mr(2)
28726 .nr(4)
28727 .kr(2)
28728 .sr(1)
28729 .m(2)
28730 .n(4)
28731 .k(8)
28732 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28733 }
28734
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)28735 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
28736 GemmMicrokernelTester()
28737 .mr(2)
28738 .nr(4)
28739 .kr(2)
28740 .sr(1)
28741 .m(2)
28742 .n(4)
28743 .k(8)
28744 .cn_stride(7)
28745 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28746 }
28747
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_strided_a)28748 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
28749 GemmMicrokernelTester()
28750 .mr(2)
28751 .nr(4)
28752 .kr(2)
28753 .sr(1)
28754 .m(2)
28755 .n(4)
28756 .k(8)
28757 .a_stride(11)
28758 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28759 }
28760
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)28761 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
28762 for (uint32_t n = 1; n <= 4; n++) {
28763 for (uint32_t m = 1; m <= 2; m++) {
28764 GemmMicrokernelTester()
28765 .mr(2)
28766 .nr(4)
28767 .kr(2)
28768 .sr(1)
28769 .m(m)
28770 .n(n)
28771 .k(8)
28772 .iterations(1)
28773 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28774 }
28775 }
28776 }
28777
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)28778 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
28779 for (uint32_t m = 1; m <= 2; m++) {
28780 GemmMicrokernelTester()
28781 .mr(2)
28782 .nr(4)
28783 .kr(2)
28784 .sr(1)
28785 .m(m)
28786 .n(4)
28787 .k(8)
28788 .iterations(1)
28789 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28790 }
28791 }
28792
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)28793 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
28794 for (uint32_t n = 1; n <= 4; n++) {
28795 GemmMicrokernelTester()
28796 .mr(2)
28797 .nr(4)
28798 .kr(2)
28799 .sr(1)
28800 .m(2)
28801 .n(n)
28802 .k(8)
28803 .iterations(1)
28804 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28805 }
28806 }
28807
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)28808 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
28809 for (size_t k = 1; k < 8; k++) {
28810 GemmMicrokernelTester()
28811 .mr(2)
28812 .nr(4)
28813 .kr(2)
28814 .sr(1)
28815 .m(2)
28816 .n(4)
28817 .k(k)
28818 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28819 }
28820 }
28821
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_strided_a)28822 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
28823 for (size_t k = 1; k < 8; k++) {
28824 GemmMicrokernelTester()
28825 .mr(2)
28826 .nr(4)
28827 .kr(2)
28828 .sr(1)
28829 .m(2)
28830 .n(4)
28831 .k(k)
28832 .a_stride(11)
28833 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28834 }
28835 }
28836
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)28837 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
28838 for (size_t k = 1; k < 8; k++) {
28839 for (uint32_t n = 1; n <= 4; n++) {
28840 for (uint32_t m = 1; m <= 2; m++) {
28841 GemmMicrokernelTester()
28842 .mr(2)
28843 .nr(4)
28844 .kr(2)
28845 .sr(1)
28846 .m(m)
28847 .n(n)
28848 .k(k)
28849 .iterations(1)
28850 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28851 }
28852 }
28853 }
28854 }
28855
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)28856 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
28857 for (size_t k = 9; k < 16; k++) {
28858 GemmMicrokernelTester()
28859 .mr(2)
28860 .nr(4)
28861 .kr(2)
28862 .sr(1)
28863 .m(2)
28864 .n(4)
28865 .k(k)
28866 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28867 }
28868 }
28869
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_strided_a)28870 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
28871 for (size_t k = 9; k < 16; k++) {
28872 GemmMicrokernelTester()
28873 .mr(2)
28874 .nr(4)
28875 .kr(2)
28876 .sr(1)
28877 .m(2)
28878 .n(4)
28879 .k(k)
28880 .a_stride(19)
28881 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28882 }
28883 }
28884
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)28885 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
28886 for (size_t k = 9; k < 16; k++) {
28887 for (uint32_t n = 1; n <= 4; n++) {
28888 for (uint32_t m = 1; m <= 2; m++) {
28889 GemmMicrokernelTester()
28890 .mr(2)
28891 .nr(4)
28892 .kr(2)
28893 .sr(1)
28894 .m(m)
28895 .n(n)
28896 .k(k)
28897 .iterations(1)
28898 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28899 }
28900 }
28901 }
28902 }
28903
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)28904 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
28905 for (size_t k = 16; k <= 80; k += 8) {
28906 GemmMicrokernelTester()
28907 .mr(2)
28908 .nr(4)
28909 .kr(2)
28910 .sr(1)
28911 .m(2)
28912 .n(4)
28913 .k(k)
28914 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28915 }
28916 }
28917
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_strided_a)28918 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
28919 for (size_t k = 16; k <= 80; k += 8) {
28920 GemmMicrokernelTester()
28921 .mr(2)
28922 .nr(4)
28923 .kr(2)
28924 .sr(1)
28925 .m(2)
28926 .n(4)
28927 .k(k)
28928 .a_stride(83)
28929 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28930 }
28931 }
28932
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)28933 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
28934 for (size_t k = 16; k <= 80; k += 8) {
28935 for (uint32_t n = 1; n <= 4; n++) {
28936 for (uint32_t m = 1; m <= 2; m++) {
28937 GemmMicrokernelTester()
28938 .mr(2)
28939 .nr(4)
28940 .kr(2)
28941 .sr(1)
28942 .m(m)
28943 .n(n)
28944 .k(k)
28945 .iterations(1)
28946 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28947 }
28948 }
28949 }
28950 }
28951
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)28952 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
28953 for (uint32_t n = 5; n < 8; n++) {
28954 for (size_t k = 1; k <= 40; k += 9) {
28955 GemmMicrokernelTester()
28956 .mr(2)
28957 .nr(4)
28958 .kr(2)
28959 .sr(1)
28960 .m(2)
28961 .n(n)
28962 .k(k)
28963 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28964 }
28965 }
28966 }
28967
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)28968 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
28969 for (uint32_t n = 5; n < 8; n++) {
28970 for (size_t k = 1; k <= 40; k += 9) {
28971 GemmMicrokernelTester()
28972 .mr(2)
28973 .nr(4)
28974 .kr(2)
28975 .sr(1)
28976 .m(2)
28977 .n(n)
28978 .k(k)
28979 .cn_stride(7)
28980 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28981 }
28982 }
28983 }
28984
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_a)28985 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
28986 for (uint32_t n = 5; n < 8; n++) {
28987 for (size_t k = 1; k <= 40; k += 9) {
28988 GemmMicrokernelTester()
28989 .mr(2)
28990 .nr(4)
28991 .kr(2)
28992 .sr(1)
28993 .m(2)
28994 .n(n)
28995 .k(k)
28996 .a_stride(43)
28997 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
28998 }
28999 }
29000 }
29001
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)29002 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
29003 for (uint32_t n = 5; n < 8; n++) {
29004 for (size_t k = 1; k <= 40; k += 9) {
29005 for (uint32_t m = 1; m <= 2; m++) {
29006 GemmMicrokernelTester()
29007 .mr(2)
29008 .nr(4)
29009 .kr(2)
29010 .sr(1)
29011 .m(m)
29012 .n(n)
29013 .k(k)
29014 .iterations(1)
29015 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29016 }
29017 }
29018 }
29019 }
29020
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)29021 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
29022 for (uint32_t n = 8; n <= 12; n += 4) {
29023 for (size_t k = 1; k <= 40; k += 9) {
29024 GemmMicrokernelTester()
29025 .mr(2)
29026 .nr(4)
29027 .kr(2)
29028 .sr(1)
29029 .m(2)
29030 .n(n)
29031 .k(k)
29032 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29033 }
29034 }
29035 }
29036
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)29037 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
29038 for (uint32_t n = 8; n <= 12; n += 4) {
29039 for (size_t k = 1; k <= 40; k += 9) {
29040 GemmMicrokernelTester()
29041 .mr(2)
29042 .nr(4)
29043 .kr(2)
29044 .sr(1)
29045 .m(2)
29046 .n(n)
29047 .k(k)
29048 .cn_stride(7)
29049 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29050 }
29051 }
29052 }
29053
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_a)29054 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
29055 for (uint32_t n = 8; n <= 12; n += 4) {
29056 for (size_t k = 1; k <= 40; k += 9) {
29057 GemmMicrokernelTester()
29058 .mr(2)
29059 .nr(4)
29060 .kr(2)
29061 .sr(1)
29062 .m(2)
29063 .n(n)
29064 .k(k)
29065 .a_stride(43)
29066 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29067 }
29068 }
29069 }
29070
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)29071 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
29072 for (uint32_t n = 8; n <= 12; n += 4) {
29073 for (size_t k = 1; k <= 40; k += 9) {
29074 for (uint32_t m = 1; m <= 2; m++) {
29075 GemmMicrokernelTester()
29076 .mr(2)
29077 .nr(4)
29078 .kr(2)
29079 .sr(1)
29080 .m(m)
29081 .n(n)
29082 .k(k)
29083 .iterations(1)
29084 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29085 }
29086 }
29087 }
29088 }
29089
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)29090 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
29091 for (size_t k = 1; k <= 40; k += 9) {
29092 for (uint32_t n = 1; n <= 4; n++) {
29093 for (uint32_t m = 1; m <= 2; m++) {
29094 GemmMicrokernelTester()
29095 .mr(2)
29096 .nr(4)
29097 .kr(2)
29098 .sr(1)
29099 .m(m)
29100 .n(n)
29101 .k(k)
29102 .cm_stride(7)
29103 .iterations(1)
29104 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29105 }
29106 }
29107 }
29108 }
29109
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,qmin)29110 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
29111 GemmMicrokernelTester()
29112 .mr(2)
29113 .nr(4)
29114 .kr(2)
29115 .sr(1)
29116 .m(2)
29117 .n(4)
29118 .k(8)
29119 .qmin(128)
29120 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29121 }
29122
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,qmax)29123 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
29124 GemmMicrokernelTester()
29125 .mr(2)
29126 .nr(4)
29127 .kr(2)
29128 .sr(1)
29129 .m(2)
29130 .n(4)
29131 .k(8)
29132 .qmax(128)
29133 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29134 }
29135
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)29136 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
29137 GemmMicrokernelTester()
29138 .mr(2)
29139 .nr(4)
29140 .kr(2)
29141 .sr(1)
29142 .m(2)
29143 .n(4)
29144 .k(8)
29145 .cm_stride(7)
29146 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29147 }
29148 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
29149
29150
29151 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)29152 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
29153 GemmMicrokernelTester()
29154 .mr(3)
29155 .nr(4)
29156 .kr(2)
29157 .sr(1)
29158 .m(3)
29159 .n(4)
29160 .k(8)
29161 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29162 }
29163
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)29164 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
29165 GemmMicrokernelTester()
29166 .mr(3)
29167 .nr(4)
29168 .kr(2)
29169 .sr(1)
29170 .m(3)
29171 .n(4)
29172 .k(8)
29173 .cn_stride(7)
29174 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29175 }
29176
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_strided_a)29177 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
29178 GemmMicrokernelTester()
29179 .mr(3)
29180 .nr(4)
29181 .kr(2)
29182 .sr(1)
29183 .m(3)
29184 .n(4)
29185 .k(8)
29186 .a_stride(11)
29187 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29188 }
29189
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)29190 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
29191 for (uint32_t n = 1; n <= 4; n++) {
29192 for (uint32_t m = 1; m <= 3; m++) {
29193 GemmMicrokernelTester()
29194 .mr(3)
29195 .nr(4)
29196 .kr(2)
29197 .sr(1)
29198 .m(m)
29199 .n(n)
29200 .k(8)
29201 .iterations(1)
29202 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29203 }
29204 }
29205 }
29206
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)29207 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
29208 for (uint32_t m = 1; m <= 3; m++) {
29209 GemmMicrokernelTester()
29210 .mr(3)
29211 .nr(4)
29212 .kr(2)
29213 .sr(1)
29214 .m(m)
29215 .n(4)
29216 .k(8)
29217 .iterations(1)
29218 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29219 }
29220 }
29221
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)29222 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
29223 for (uint32_t n = 1; n <= 4; n++) {
29224 GemmMicrokernelTester()
29225 .mr(3)
29226 .nr(4)
29227 .kr(2)
29228 .sr(1)
29229 .m(3)
29230 .n(n)
29231 .k(8)
29232 .iterations(1)
29233 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29234 }
29235 }
29236
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)29237 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
29238 for (size_t k = 1; k < 8; k++) {
29239 GemmMicrokernelTester()
29240 .mr(3)
29241 .nr(4)
29242 .kr(2)
29243 .sr(1)
29244 .m(3)
29245 .n(4)
29246 .k(k)
29247 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29248 }
29249 }
29250
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_strided_a)29251 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
29252 for (size_t k = 1; k < 8; k++) {
29253 GemmMicrokernelTester()
29254 .mr(3)
29255 .nr(4)
29256 .kr(2)
29257 .sr(1)
29258 .m(3)
29259 .n(4)
29260 .k(k)
29261 .a_stride(11)
29262 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29263 }
29264 }
29265
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)29266 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
29267 for (size_t k = 1; k < 8; k++) {
29268 for (uint32_t n = 1; n <= 4; n++) {
29269 for (uint32_t m = 1; m <= 3; m++) {
29270 GemmMicrokernelTester()
29271 .mr(3)
29272 .nr(4)
29273 .kr(2)
29274 .sr(1)
29275 .m(m)
29276 .n(n)
29277 .k(k)
29278 .iterations(1)
29279 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29280 }
29281 }
29282 }
29283 }
29284
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)29285 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
29286 for (size_t k = 9; k < 16; k++) {
29287 GemmMicrokernelTester()
29288 .mr(3)
29289 .nr(4)
29290 .kr(2)
29291 .sr(1)
29292 .m(3)
29293 .n(4)
29294 .k(k)
29295 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29296 }
29297 }
29298
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_strided_a)29299 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
29300 for (size_t k = 9; k < 16; k++) {
29301 GemmMicrokernelTester()
29302 .mr(3)
29303 .nr(4)
29304 .kr(2)
29305 .sr(1)
29306 .m(3)
29307 .n(4)
29308 .k(k)
29309 .a_stride(19)
29310 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29311 }
29312 }
29313
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)29314 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
29315 for (size_t k = 9; k < 16; k++) {
29316 for (uint32_t n = 1; n <= 4; n++) {
29317 for (uint32_t m = 1; m <= 3; m++) {
29318 GemmMicrokernelTester()
29319 .mr(3)
29320 .nr(4)
29321 .kr(2)
29322 .sr(1)
29323 .m(m)
29324 .n(n)
29325 .k(k)
29326 .iterations(1)
29327 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29328 }
29329 }
29330 }
29331 }
29332
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)29333 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
29334 for (size_t k = 16; k <= 80; k += 8) {
29335 GemmMicrokernelTester()
29336 .mr(3)
29337 .nr(4)
29338 .kr(2)
29339 .sr(1)
29340 .m(3)
29341 .n(4)
29342 .k(k)
29343 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29344 }
29345 }
29346
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_strided_a)29347 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
29348 for (size_t k = 16; k <= 80; k += 8) {
29349 GemmMicrokernelTester()
29350 .mr(3)
29351 .nr(4)
29352 .kr(2)
29353 .sr(1)
29354 .m(3)
29355 .n(4)
29356 .k(k)
29357 .a_stride(83)
29358 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29359 }
29360 }
29361
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)29362 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
29363 for (size_t k = 16; k <= 80; k += 8) {
29364 for (uint32_t n = 1; n <= 4; n++) {
29365 for (uint32_t m = 1; m <= 3; m++) {
29366 GemmMicrokernelTester()
29367 .mr(3)
29368 .nr(4)
29369 .kr(2)
29370 .sr(1)
29371 .m(m)
29372 .n(n)
29373 .k(k)
29374 .iterations(1)
29375 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29376 }
29377 }
29378 }
29379 }
29380
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)29381 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
29382 for (uint32_t n = 5; n < 8; n++) {
29383 for (size_t k = 1; k <= 40; k += 9) {
29384 GemmMicrokernelTester()
29385 .mr(3)
29386 .nr(4)
29387 .kr(2)
29388 .sr(1)
29389 .m(3)
29390 .n(n)
29391 .k(k)
29392 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29393 }
29394 }
29395 }
29396
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)29397 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
29398 for (uint32_t n = 5; n < 8; n++) {
29399 for (size_t k = 1; k <= 40; k += 9) {
29400 GemmMicrokernelTester()
29401 .mr(3)
29402 .nr(4)
29403 .kr(2)
29404 .sr(1)
29405 .m(3)
29406 .n(n)
29407 .k(k)
29408 .cn_stride(7)
29409 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29410 }
29411 }
29412 }
29413
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_a)29414 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
29415 for (uint32_t n = 5; n < 8; n++) {
29416 for (size_t k = 1; k <= 40; k += 9) {
29417 GemmMicrokernelTester()
29418 .mr(3)
29419 .nr(4)
29420 .kr(2)
29421 .sr(1)
29422 .m(3)
29423 .n(n)
29424 .k(k)
29425 .a_stride(43)
29426 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29427 }
29428 }
29429 }
29430
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)29431 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
29432 for (uint32_t n = 5; n < 8; n++) {
29433 for (size_t k = 1; k <= 40; k += 9) {
29434 for (uint32_t m = 1; m <= 3; m++) {
29435 GemmMicrokernelTester()
29436 .mr(3)
29437 .nr(4)
29438 .kr(2)
29439 .sr(1)
29440 .m(m)
29441 .n(n)
29442 .k(k)
29443 .iterations(1)
29444 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29445 }
29446 }
29447 }
29448 }
29449
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)29450 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
29451 for (uint32_t n = 8; n <= 12; n += 4) {
29452 for (size_t k = 1; k <= 40; k += 9) {
29453 GemmMicrokernelTester()
29454 .mr(3)
29455 .nr(4)
29456 .kr(2)
29457 .sr(1)
29458 .m(3)
29459 .n(n)
29460 .k(k)
29461 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29462 }
29463 }
29464 }
29465
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)29466 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
29467 for (uint32_t n = 8; n <= 12; n += 4) {
29468 for (size_t k = 1; k <= 40; k += 9) {
29469 GemmMicrokernelTester()
29470 .mr(3)
29471 .nr(4)
29472 .kr(2)
29473 .sr(1)
29474 .m(3)
29475 .n(n)
29476 .k(k)
29477 .cn_stride(7)
29478 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29479 }
29480 }
29481 }
29482
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_a)29483 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
29484 for (uint32_t n = 8; n <= 12; n += 4) {
29485 for (size_t k = 1; k <= 40; k += 9) {
29486 GemmMicrokernelTester()
29487 .mr(3)
29488 .nr(4)
29489 .kr(2)
29490 .sr(1)
29491 .m(3)
29492 .n(n)
29493 .k(k)
29494 .a_stride(43)
29495 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29496 }
29497 }
29498 }
29499
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)29500 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
29501 for (uint32_t n = 8; n <= 12; n += 4) {
29502 for (size_t k = 1; k <= 40; k += 9) {
29503 for (uint32_t m = 1; m <= 3; m++) {
29504 GemmMicrokernelTester()
29505 .mr(3)
29506 .nr(4)
29507 .kr(2)
29508 .sr(1)
29509 .m(m)
29510 .n(n)
29511 .k(k)
29512 .iterations(1)
29513 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29514 }
29515 }
29516 }
29517 }
29518
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)29519 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
29520 for (size_t k = 1; k <= 40; k += 9) {
29521 for (uint32_t n = 1; n <= 4; n++) {
29522 for (uint32_t m = 1; m <= 3; m++) {
29523 GemmMicrokernelTester()
29524 .mr(3)
29525 .nr(4)
29526 .kr(2)
29527 .sr(1)
29528 .m(m)
29529 .n(n)
29530 .k(k)
29531 .cm_stride(7)
29532 .iterations(1)
29533 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29534 }
29535 }
29536 }
29537 }
29538
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,qmin)29539 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
29540 GemmMicrokernelTester()
29541 .mr(3)
29542 .nr(4)
29543 .kr(2)
29544 .sr(1)
29545 .m(3)
29546 .n(4)
29547 .k(8)
29548 .qmin(128)
29549 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29550 }
29551
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,qmax)29552 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
29553 GemmMicrokernelTester()
29554 .mr(3)
29555 .nr(4)
29556 .kr(2)
29557 .sr(1)
29558 .m(3)
29559 .n(4)
29560 .k(8)
29561 .qmax(128)
29562 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29563 }
29564
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)29565 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
29566 GemmMicrokernelTester()
29567 .mr(3)
29568 .nr(4)
29569 .kr(2)
29570 .sr(1)
29571 .m(3)
29572 .n(4)
29573 .k(8)
29574 .cm_stride(7)
29575 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29576 }
29577 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
29578
29579
29580 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)29581 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
29582 GemmMicrokernelTester()
29583 .mr(3)
29584 .nr(4)
29585 .kr(2)
29586 .sr(1)
29587 .m(3)
29588 .n(4)
29589 .k(8)
29590 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29591 }
29592
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)29593 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
29594 GemmMicrokernelTester()
29595 .mr(3)
29596 .nr(4)
29597 .kr(2)
29598 .sr(1)
29599 .m(3)
29600 .n(4)
29601 .k(8)
29602 .cn_stride(7)
29603 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29604 }
29605
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_strided_a)29606 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
29607 GemmMicrokernelTester()
29608 .mr(3)
29609 .nr(4)
29610 .kr(2)
29611 .sr(1)
29612 .m(3)
29613 .n(4)
29614 .k(8)
29615 .a_stride(11)
29616 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29617 }
29618
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)29619 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
29620 for (uint32_t n = 1; n <= 4; n++) {
29621 for (uint32_t m = 1; m <= 3; m++) {
29622 GemmMicrokernelTester()
29623 .mr(3)
29624 .nr(4)
29625 .kr(2)
29626 .sr(1)
29627 .m(m)
29628 .n(n)
29629 .k(8)
29630 .iterations(1)
29631 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29632 }
29633 }
29634 }
29635
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)29636 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
29637 for (uint32_t m = 1; m <= 3; m++) {
29638 GemmMicrokernelTester()
29639 .mr(3)
29640 .nr(4)
29641 .kr(2)
29642 .sr(1)
29643 .m(m)
29644 .n(4)
29645 .k(8)
29646 .iterations(1)
29647 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29648 }
29649 }
29650
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)29651 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
29652 for (uint32_t n = 1; n <= 4; n++) {
29653 GemmMicrokernelTester()
29654 .mr(3)
29655 .nr(4)
29656 .kr(2)
29657 .sr(1)
29658 .m(3)
29659 .n(n)
29660 .k(8)
29661 .iterations(1)
29662 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29663 }
29664 }
29665
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)29666 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
29667 for (size_t k = 1; k < 8; k++) {
29668 GemmMicrokernelTester()
29669 .mr(3)
29670 .nr(4)
29671 .kr(2)
29672 .sr(1)
29673 .m(3)
29674 .n(4)
29675 .k(k)
29676 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29677 }
29678 }
29679
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_strided_a)29680 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
29681 for (size_t k = 1; k < 8; k++) {
29682 GemmMicrokernelTester()
29683 .mr(3)
29684 .nr(4)
29685 .kr(2)
29686 .sr(1)
29687 .m(3)
29688 .n(4)
29689 .k(k)
29690 .a_stride(11)
29691 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29692 }
29693 }
29694
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)29695 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
29696 for (size_t k = 1; k < 8; k++) {
29697 for (uint32_t n = 1; n <= 4; n++) {
29698 for (uint32_t m = 1; m <= 3; m++) {
29699 GemmMicrokernelTester()
29700 .mr(3)
29701 .nr(4)
29702 .kr(2)
29703 .sr(1)
29704 .m(m)
29705 .n(n)
29706 .k(k)
29707 .iterations(1)
29708 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29709 }
29710 }
29711 }
29712 }
29713
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)29714 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
29715 for (size_t k = 9; k < 16; k++) {
29716 GemmMicrokernelTester()
29717 .mr(3)
29718 .nr(4)
29719 .kr(2)
29720 .sr(1)
29721 .m(3)
29722 .n(4)
29723 .k(k)
29724 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29725 }
29726 }
29727
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_strided_a)29728 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
29729 for (size_t k = 9; k < 16; k++) {
29730 GemmMicrokernelTester()
29731 .mr(3)
29732 .nr(4)
29733 .kr(2)
29734 .sr(1)
29735 .m(3)
29736 .n(4)
29737 .k(k)
29738 .a_stride(19)
29739 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29740 }
29741 }
29742
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)29743 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
29744 for (size_t k = 9; k < 16; k++) {
29745 for (uint32_t n = 1; n <= 4; n++) {
29746 for (uint32_t m = 1; m <= 3; m++) {
29747 GemmMicrokernelTester()
29748 .mr(3)
29749 .nr(4)
29750 .kr(2)
29751 .sr(1)
29752 .m(m)
29753 .n(n)
29754 .k(k)
29755 .iterations(1)
29756 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29757 }
29758 }
29759 }
29760 }
29761
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)29762 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
29763 for (size_t k = 16; k <= 80; k += 8) {
29764 GemmMicrokernelTester()
29765 .mr(3)
29766 .nr(4)
29767 .kr(2)
29768 .sr(1)
29769 .m(3)
29770 .n(4)
29771 .k(k)
29772 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29773 }
29774 }
29775
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_strided_a)29776 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
29777 for (size_t k = 16; k <= 80; k += 8) {
29778 GemmMicrokernelTester()
29779 .mr(3)
29780 .nr(4)
29781 .kr(2)
29782 .sr(1)
29783 .m(3)
29784 .n(4)
29785 .k(k)
29786 .a_stride(83)
29787 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29788 }
29789 }
29790
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)29791 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
29792 for (size_t k = 16; k <= 80; k += 8) {
29793 for (uint32_t n = 1; n <= 4; n++) {
29794 for (uint32_t m = 1; m <= 3; m++) {
29795 GemmMicrokernelTester()
29796 .mr(3)
29797 .nr(4)
29798 .kr(2)
29799 .sr(1)
29800 .m(m)
29801 .n(n)
29802 .k(k)
29803 .iterations(1)
29804 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29805 }
29806 }
29807 }
29808 }
29809
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)29810 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
29811 for (uint32_t n = 5; n < 8; n++) {
29812 for (size_t k = 1; k <= 40; k += 9) {
29813 GemmMicrokernelTester()
29814 .mr(3)
29815 .nr(4)
29816 .kr(2)
29817 .sr(1)
29818 .m(3)
29819 .n(n)
29820 .k(k)
29821 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29822 }
29823 }
29824 }
29825
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)29826 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
29827 for (uint32_t n = 5; n < 8; n++) {
29828 for (size_t k = 1; k <= 40; k += 9) {
29829 GemmMicrokernelTester()
29830 .mr(3)
29831 .nr(4)
29832 .kr(2)
29833 .sr(1)
29834 .m(3)
29835 .n(n)
29836 .k(k)
29837 .cn_stride(7)
29838 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29839 }
29840 }
29841 }
29842
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_a)29843 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
29844 for (uint32_t n = 5; n < 8; n++) {
29845 for (size_t k = 1; k <= 40; k += 9) {
29846 GemmMicrokernelTester()
29847 .mr(3)
29848 .nr(4)
29849 .kr(2)
29850 .sr(1)
29851 .m(3)
29852 .n(n)
29853 .k(k)
29854 .a_stride(43)
29855 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29856 }
29857 }
29858 }
29859
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)29860 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
29861 for (uint32_t n = 5; n < 8; n++) {
29862 for (size_t k = 1; k <= 40; k += 9) {
29863 for (uint32_t m = 1; m <= 3; m++) {
29864 GemmMicrokernelTester()
29865 .mr(3)
29866 .nr(4)
29867 .kr(2)
29868 .sr(1)
29869 .m(m)
29870 .n(n)
29871 .k(k)
29872 .iterations(1)
29873 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29874 }
29875 }
29876 }
29877 }
29878
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)29879 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
29880 for (uint32_t n = 8; n <= 12; n += 4) {
29881 for (size_t k = 1; k <= 40; k += 9) {
29882 GemmMicrokernelTester()
29883 .mr(3)
29884 .nr(4)
29885 .kr(2)
29886 .sr(1)
29887 .m(3)
29888 .n(n)
29889 .k(k)
29890 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29891 }
29892 }
29893 }
29894
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)29895 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
29896 for (uint32_t n = 8; n <= 12; n += 4) {
29897 for (size_t k = 1; k <= 40; k += 9) {
29898 GemmMicrokernelTester()
29899 .mr(3)
29900 .nr(4)
29901 .kr(2)
29902 .sr(1)
29903 .m(3)
29904 .n(n)
29905 .k(k)
29906 .cn_stride(7)
29907 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29908 }
29909 }
29910 }
29911
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_a)29912 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
29913 for (uint32_t n = 8; n <= 12; n += 4) {
29914 for (size_t k = 1; k <= 40; k += 9) {
29915 GemmMicrokernelTester()
29916 .mr(3)
29917 .nr(4)
29918 .kr(2)
29919 .sr(1)
29920 .m(3)
29921 .n(n)
29922 .k(k)
29923 .a_stride(43)
29924 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29925 }
29926 }
29927 }
29928
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)29929 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
29930 for (uint32_t n = 8; n <= 12; n += 4) {
29931 for (size_t k = 1; k <= 40; k += 9) {
29932 for (uint32_t m = 1; m <= 3; m++) {
29933 GemmMicrokernelTester()
29934 .mr(3)
29935 .nr(4)
29936 .kr(2)
29937 .sr(1)
29938 .m(m)
29939 .n(n)
29940 .k(k)
29941 .iterations(1)
29942 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29943 }
29944 }
29945 }
29946 }
29947
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)29948 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
29949 for (size_t k = 1; k <= 40; k += 9) {
29950 for (uint32_t n = 1; n <= 4; n++) {
29951 for (uint32_t m = 1; m <= 3; m++) {
29952 GemmMicrokernelTester()
29953 .mr(3)
29954 .nr(4)
29955 .kr(2)
29956 .sr(1)
29957 .m(m)
29958 .n(n)
29959 .k(k)
29960 .cm_stride(7)
29961 .iterations(1)
29962 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29963 }
29964 }
29965 }
29966 }
29967
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,qmin)29968 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
29969 GemmMicrokernelTester()
29970 .mr(3)
29971 .nr(4)
29972 .kr(2)
29973 .sr(1)
29974 .m(3)
29975 .n(4)
29976 .k(8)
29977 .qmin(128)
29978 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29979 }
29980
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,qmax)29981 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
29982 GemmMicrokernelTester()
29983 .mr(3)
29984 .nr(4)
29985 .kr(2)
29986 .sr(1)
29987 .m(3)
29988 .n(4)
29989 .k(8)
29990 .qmax(128)
29991 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29992 }
29993
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)29994 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
29995 GemmMicrokernelTester()
29996 .mr(3)
29997 .nr(4)
29998 .kr(2)
29999 .sr(1)
30000 .m(3)
30001 .n(4)
30002 .k(8)
30003 .cm_stride(7)
30004 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30005 }
30006 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30007
30008
30009 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)30010 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
30011 GemmMicrokernelTester()
30012 .mr(3)
30013 .nr(4)
30014 .kr(8)
30015 .sr(1)
30016 .m(3)
30017 .n(4)
30018 .k(8)
30019 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30020 }
30021
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)30022 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
30023 GemmMicrokernelTester()
30024 .mr(3)
30025 .nr(4)
30026 .kr(8)
30027 .sr(1)
30028 .m(3)
30029 .n(4)
30030 .k(8)
30031 .cn_stride(7)
30032 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30033 }
30034
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_strided_a)30035 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
30036 GemmMicrokernelTester()
30037 .mr(3)
30038 .nr(4)
30039 .kr(8)
30040 .sr(1)
30041 .m(3)
30042 .n(4)
30043 .k(8)
30044 .a_stride(11)
30045 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30046 }
30047
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)30048 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
30049 for (uint32_t n = 1; n <= 4; n++) {
30050 for (uint32_t m = 1; m <= 3; m++) {
30051 GemmMicrokernelTester()
30052 .mr(3)
30053 .nr(4)
30054 .kr(8)
30055 .sr(1)
30056 .m(m)
30057 .n(n)
30058 .k(8)
30059 .iterations(1)
30060 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30061 }
30062 }
30063 }
30064
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)30065 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
30066 for (uint32_t m = 1; m <= 3; m++) {
30067 GemmMicrokernelTester()
30068 .mr(3)
30069 .nr(4)
30070 .kr(8)
30071 .sr(1)
30072 .m(m)
30073 .n(4)
30074 .k(8)
30075 .iterations(1)
30076 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30077 }
30078 }
30079
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)30080 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
30081 for (uint32_t n = 1; n <= 4; n++) {
30082 GemmMicrokernelTester()
30083 .mr(3)
30084 .nr(4)
30085 .kr(8)
30086 .sr(1)
30087 .m(3)
30088 .n(n)
30089 .k(8)
30090 .iterations(1)
30091 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30092 }
30093 }
30094
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)30095 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
30096 for (size_t k = 1; k < 8; k++) {
30097 GemmMicrokernelTester()
30098 .mr(3)
30099 .nr(4)
30100 .kr(8)
30101 .sr(1)
30102 .m(3)
30103 .n(4)
30104 .k(k)
30105 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30106 }
30107 }
30108
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_strided_a)30109 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
30110 for (size_t k = 1; k < 8; k++) {
30111 GemmMicrokernelTester()
30112 .mr(3)
30113 .nr(4)
30114 .kr(8)
30115 .sr(1)
30116 .m(3)
30117 .n(4)
30118 .k(k)
30119 .a_stride(11)
30120 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30121 }
30122 }
30123
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)30124 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
30125 for (size_t k = 1; k < 8; k++) {
30126 for (uint32_t n = 1; n <= 4; n++) {
30127 for (uint32_t m = 1; m <= 3; m++) {
30128 GemmMicrokernelTester()
30129 .mr(3)
30130 .nr(4)
30131 .kr(8)
30132 .sr(1)
30133 .m(m)
30134 .n(n)
30135 .k(k)
30136 .iterations(1)
30137 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30138 }
30139 }
30140 }
30141 }
30142
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)30143 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
30144 for (size_t k = 9; k < 16; k++) {
30145 GemmMicrokernelTester()
30146 .mr(3)
30147 .nr(4)
30148 .kr(8)
30149 .sr(1)
30150 .m(3)
30151 .n(4)
30152 .k(k)
30153 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30154 }
30155 }
30156
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_strided_a)30157 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
30158 for (size_t k = 9; k < 16; k++) {
30159 GemmMicrokernelTester()
30160 .mr(3)
30161 .nr(4)
30162 .kr(8)
30163 .sr(1)
30164 .m(3)
30165 .n(4)
30166 .k(k)
30167 .a_stride(19)
30168 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30169 }
30170 }
30171
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)30172 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
30173 for (size_t k = 9; k < 16; k++) {
30174 for (uint32_t n = 1; n <= 4; n++) {
30175 for (uint32_t m = 1; m <= 3; m++) {
30176 GemmMicrokernelTester()
30177 .mr(3)
30178 .nr(4)
30179 .kr(8)
30180 .sr(1)
30181 .m(m)
30182 .n(n)
30183 .k(k)
30184 .iterations(1)
30185 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30186 }
30187 }
30188 }
30189 }
30190
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)30191 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
30192 for (size_t k = 16; k <= 80; k += 8) {
30193 GemmMicrokernelTester()
30194 .mr(3)
30195 .nr(4)
30196 .kr(8)
30197 .sr(1)
30198 .m(3)
30199 .n(4)
30200 .k(k)
30201 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30202 }
30203 }
30204
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_strided_a)30205 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
30206 for (size_t k = 16; k <= 80; k += 8) {
30207 GemmMicrokernelTester()
30208 .mr(3)
30209 .nr(4)
30210 .kr(8)
30211 .sr(1)
30212 .m(3)
30213 .n(4)
30214 .k(k)
30215 .a_stride(83)
30216 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30217 }
30218 }
30219
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)30220 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
30221 for (size_t k = 16; k <= 80; k += 8) {
30222 for (uint32_t n = 1; n <= 4; n++) {
30223 for (uint32_t m = 1; m <= 3; m++) {
30224 GemmMicrokernelTester()
30225 .mr(3)
30226 .nr(4)
30227 .kr(8)
30228 .sr(1)
30229 .m(m)
30230 .n(n)
30231 .k(k)
30232 .iterations(1)
30233 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30234 }
30235 }
30236 }
30237 }
30238
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)30239 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
30240 for (uint32_t n = 5; n < 8; n++) {
30241 for (size_t k = 1; k <= 40; k += 9) {
30242 GemmMicrokernelTester()
30243 .mr(3)
30244 .nr(4)
30245 .kr(8)
30246 .sr(1)
30247 .m(3)
30248 .n(n)
30249 .k(k)
30250 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30251 }
30252 }
30253 }
30254
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)30255 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
30256 for (uint32_t n = 5; n < 8; n++) {
30257 for (size_t k = 1; k <= 40; k += 9) {
30258 GemmMicrokernelTester()
30259 .mr(3)
30260 .nr(4)
30261 .kr(8)
30262 .sr(1)
30263 .m(3)
30264 .n(n)
30265 .k(k)
30266 .cn_stride(7)
30267 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30268 }
30269 }
30270 }
30271
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_a)30272 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
30273 for (uint32_t n = 5; n < 8; n++) {
30274 for (size_t k = 1; k <= 40; k += 9) {
30275 GemmMicrokernelTester()
30276 .mr(3)
30277 .nr(4)
30278 .kr(8)
30279 .sr(1)
30280 .m(3)
30281 .n(n)
30282 .k(k)
30283 .a_stride(43)
30284 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30285 }
30286 }
30287 }
30288
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)30289 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
30290 for (uint32_t n = 5; n < 8; n++) {
30291 for (size_t k = 1; k <= 40; k += 9) {
30292 for (uint32_t m = 1; m <= 3; m++) {
30293 GemmMicrokernelTester()
30294 .mr(3)
30295 .nr(4)
30296 .kr(8)
30297 .sr(1)
30298 .m(m)
30299 .n(n)
30300 .k(k)
30301 .iterations(1)
30302 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30303 }
30304 }
30305 }
30306 }
30307
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)30308 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
30309 for (uint32_t n = 8; n <= 12; n += 4) {
30310 for (size_t k = 1; k <= 40; k += 9) {
30311 GemmMicrokernelTester()
30312 .mr(3)
30313 .nr(4)
30314 .kr(8)
30315 .sr(1)
30316 .m(3)
30317 .n(n)
30318 .k(k)
30319 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30320 }
30321 }
30322 }
30323
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)30324 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
30325 for (uint32_t n = 8; n <= 12; n += 4) {
30326 for (size_t k = 1; k <= 40; k += 9) {
30327 GemmMicrokernelTester()
30328 .mr(3)
30329 .nr(4)
30330 .kr(8)
30331 .sr(1)
30332 .m(3)
30333 .n(n)
30334 .k(k)
30335 .cn_stride(7)
30336 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30337 }
30338 }
30339 }
30340
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_a)30341 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
30342 for (uint32_t n = 8; n <= 12; n += 4) {
30343 for (size_t k = 1; k <= 40; k += 9) {
30344 GemmMicrokernelTester()
30345 .mr(3)
30346 .nr(4)
30347 .kr(8)
30348 .sr(1)
30349 .m(3)
30350 .n(n)
30351 .k(k)
30352 .a_stride(43)
30353 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30354 }
30355 }
30356 }
30357
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)30358 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
30359 for (uint32_t n = 8; n <= 12; n += 4) {
30360 for (size_t k = 1; k <= 40; k += 9) {
30361 for (uint32_t m = 1; m <= 3; m++) {
30362 GemmMicrokernelTester()
30363 .mr(3)
30364 .nr(4)
30365 .kr(8)
30366 .sr(1)
30367 .m(m)
30368 .n(n)
30369 .k(k)
30370 .iterations(1)
30371 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30372 }
30373 }
30374 }
30375 }
30376
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)30377 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
30378 for (size_t k = 1; k <= 40; k += 9) {
30379 for (uint32_t n = 1; n <= 4; n++) {
30380 for (uint32_t m = 1; m <= 3; m++) {
30381 GemmMicrokernelTester()
30382 .mr(3)
30383 .nr(4)
30384 .kr(8)
30385 .sr(1)
30386 .m(m)
30387 .n(n)
30388 .k(k)
30389 .cm_stride(7)
30390 .iterations(1)
30391 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30392 }
30393 }
30394 }
30395 }
30396
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,qmin)30397 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
30398 GemmMicrokernelTester()
30399 .mr(3)
30400 .nr(4)
30401 .kr(8)
30402 .sr(1)
30403 .m(3)
30404 .n(4)
30405 .k(8)
30406 .qmin(128)
30407 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30408 }
30409
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,qmax)30410 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
30411 GemmMicrokernelTester()
30412 .mr(3)
30413 .nr(4)
30414 .kr(8)
30415 .sr(1)
30416 .m(3)
30417 .n(4)
30418 .k(8)
30419 .qmax(128)
30420 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30421 }
30422
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)30423 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
30424 GemmMicrokernelTester()
30425 .mr(3)
30426 .nr(4)
30427 .kr(8)
30428 .sr(1)
30429 .m(3)
30430 .n(4)
30431 .k(8)
30432 .cm_stride(7)
30433 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30434 }
30435 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30436
30437
30438 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)30439 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
30440 GemmMicrokernelTester()
30441 .mr(4)
30442 .nr(4)
30443 .kr(2)
30444 .sr(1)
30445 .m(4)
30446 .n(4)
30447 .k(8)
30448 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30449 }
30450
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)30451 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
30452 GemmMicrokernelTester()
30453 .mr(4)
30454 .nr(4)
30455 .kr(2)
30456 .sr(1)
30457 .m(4)
30458 .n(4)
30459 .k(8)
30460 .cn_stride(7)
30461 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30462 }
30463
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_strided_a)30464 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
30465 GemmMicrokernelTester()
30466 .mr(4)
30467 .nr(4)
30468 .kr(2)
30469 .sr(1)
30470 .m(4)
30471 .n(4)
30472 .k(8)
30473 .a_stride(11)
30474 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30475 }
30476
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)30477 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
30478 for (uint32_t n = 1; n <= 4; n++) {
30479 for (uint32_t m = 1; m <= 4; m++) {
30480 GemmMicrokernelTester()
30481 .mr(4)
30482 .nr(4)
30483 .kr(2)
30484 .sr(1)
30485 .m(m)
30486 .n(n)
30487 .k(8)
30488 .iterations(1)
30489 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30490 }
30491 }
30492 }
30493
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)30494 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
30495 for (uint32_t m = 1; m <= 4; m++) {
30496 GemmMicrokernelTester()
30497 .mr(4)
30498 .nr(4)
30499 .kr(2)
30500 .sr(1)
30501 .m(m)
30502 .n(4)
30503 .k(8)
30504 .iterations(1)
30505 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30506 }
30507 }
30508
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)30509 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
30510 for (uint32_t n = 1; n <= 4; n++) {
30511 GemmMicrokernelTester()
30512 .mr(4)
30513 .nr(4)
30514 .kr(2)
30515 .sr(1)
30516 .m(4)
30517 .n(n)
30518 .k(8)
30519 .iterations(1)
30520 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30521 }
30522 }
30523
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)30524 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
30525 for (size_t k = 1; k < 8; k++) {
30526 GemmMicrokernelTester()
30527 .mr(4)
30528 .nr(4)
30529 .kr(2)
30530 .sr(1)
30531 .m(4)
30532 .n(4)
30533 .k(k)
30534 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30535 }
30536 }
30537
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_strided_a)30538 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
30539 for (size_t k = 1; k < 8; k++) {
30540 GemmMicrokernelTester()
30541 .mr(4)
30542 .nr(4)
30543 .kr(2)
30544 .sr(1)
30545 .m(4)
30546 .n(4)
30547 .k(k)
30548 .a_stride(11)
30549 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30550 }
30551 }
30552
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)30553 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
30554 for (size_t k = 1; k < 8; k++) {
30555 for (uint32_t n = 1; n <= 4; n++) {
30556 for (uint32_t m = 1; m <= 4; m++) {
30557 GemmMicrokernelTester()
30558 .mr(4)
30559 .nr(4)
30560 .kr(2)
30561 .sr(1)
30562 .m(m)
30563 .n(n)
30564 .k(k)
30565 .iterations(1)
30566 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30567 }
30568 }
30569 }
30570 }
30571
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)30572 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
30573 for (size_t k = 9; k < 16; k++) {
30574 GemmMicrokernelTester()
30575 .mr(4)
30576 .nr(4)
30577 .kr(2)
30578 .sr(1)
30579 .m(4)
30580 .n(4)
30581 .k(k)
30582 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30583 }
30584 }
30585
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_strided_a)30586 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
30587 for (size_t k = 9; k < 16; k++) {
30588 GemmMicrokernelTester()
30589 .mr(4)
30590 .nr(4)
30591 .kr(2)
30592 .sr(1)
30593 .m(4)
30594 .n(4)
30595 .k(k)
30596 .a_stride(19)
30597 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30598 }
30599 }
30600
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)30601 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
30602 for (size_t k = 9; k < 16; k++) {
30603 for (uint32_t n = 1; n <= 4; n++) {
30604 for (uint32_t m = 1; m <= 4; m++) {
30605 GemmMicrokernelTester()
30606 .mr(4)
30607 .nr(4)
30608 .kr(2)
30609 .sr(1)
30610 .m(m)
30611 .n(n)
30612 .k(k)
30613 .iterations(1)
30614 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30615 }
30616 }
30617 }
30618 }
30619
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)30620 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
30621 for (size_t k = 16; k <= 80; k += 8) {
30622 GemmMicrokernelTester()
30623 .mr(4)
30624 .nr(4)
30625 .kr(2)
30626 .sr(1)
30627 .m(4)
30628 .n(4)
30629 .k(k)
30630 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30631 }
30632 }
30633
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_strided_a)30634 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
30635 for (size_t k = 16; k <= 80; k += 8) {
30636 GemmMicrokernelTester()
30637 .mr(4)
30638 .nr(4)
30639 .kr(2)
30640 .sr(1)
30641 .m(4)
30642 .n(4)
30643 .k(k)
30644 .a_stride(83)
30645 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30646 }
30647 }
30648
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)30649 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
30650 for (size_t k = 16; k <= 80; k += 8) {
30651 for (uint32_t n = 1; n <= 4; n++) {
30652 for (uint32_t m = 1; m <= 4; m++) {
30653 GemmMicrokernelTester()
30654 .mr(4)
30655 .nr(4)
30656 .kr(2)
30657 .sr(1)
30658 .m(m)
30659 .n(n)
30660 .k(k)
30661 .iterations(1)
30662 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30663 }
30664 }
30665 }
30666 }
30667
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)30668 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
30669 for (uint32_t n = 5; n < 8; n++) {
30670 for (size_t k = 1; k <= 40; k += 9) {
30671 GemmMicrokernelTester()
30672 .mr(4)
30673 .nr(4)
30674 .kr(2)
30675 .sr(1)
30676 .m(4)
30677 .n(n)
30678 .k(k)
30679 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30680 }
30681 }
30682 }
30683
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)30684 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
30685 for (uint32_t n = 5; n < 8; n++) {
30686 for (size_t k = 1; k <= 40; k += 9) {
30687 GemmMicrokernelTester()
30688 .mr(4)
30689 .nr(4)
30690 .kr(2)
30691 .sr(1)
30692 .m(4)
30693 .n(n)
30694 .k(k)
30695 .cn_stride(7)
30696 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30697 }
30698 }
30699 }
30700
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_a)30701 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
30702 for (uint32_t n = 5; n < 8; n++) {
30703 for (size_t k = 1; k <= 40; k += 9) {
30704 GemmMicrokernelTester()
30705 .mr(4)
30706 .nr(4)
30707 .kr(2)
30708 .sr(1)
30709 .m(4)
30710 .n(n)
30711 .k(k)
30712 .a_stride(43)
30713 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30714 }
30715 }
30716 }
30717
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)30718 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
30719 for (uint32_t n = 5; n < 8; n++) {
30720 for (size_t k = 1; k <= 40; k += 9) {
30721 for (uint32_t m = 1; m <= 4; m++) {
30722 GemmMicrokernelTester()
30723 .mr(4)
30724 .nr(4)
30725 .kr(2)
30726 .sr(1)
30727 .m(m)
30728 .n(n)
30729 .k(k)
30730 .iterations(1)
30731 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30732 }
30733 }
30734 }
30735 }
30736
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)30737 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
30738 for (uint32_t n = 8; n <= 12; n += 4) {
30739 for (size_t k = 1; k <= 40; k += 9) {
30740 GemmMicrokernelTester()
30741 .mr(4)
30742 .nr(4)
30743 .kr(2)
30744 .sr(1)
30745 .m(4)
30746 .n(n)
30747 .k(k)
30748 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30749 }
30750 }
30751 }
30752
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)30753 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
30754 for (uint32_t n = 8; n <= 12; n += 4) {
30755 for (size_t k = 1; k <= 40; k += 9) {
30756 GemmMicrokernelTester()
30757 .mr(4)
30758 .nr(4)
30759 .kr(2)
30760 .sr(1)
30761 .m(4)
30762 .n(n)
30763 .k(k)
30764 .cn_stride(7)
30765 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30766 }
30767 }
30768 }
30769
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_a)30770 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
30771 for (uint32_t n = 8; n <= 12; n += 4) {
30772 for (size_t k = 1; k <= 40; k += 9) {
30773 GemmMicrokernelTester()
30774 .mr(4)
30775 .nr(4)
30776 .kr(2)
30777 .sr(1)
30778 .m(4)
30779 .n(n)
30780 .k(k)
30781 .a_stride(43)
30782 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30783 }
30784 }
30785 }
30786
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)30787 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
30788 for (uint32_t n = 8; n <= 12; n += 4) {
30789 for (size_t k = 1; k <= 40; k += 9) {
30790 for (uint32_t m = 1; m <= 4; m++) {
30791 GemmMicrokernelTester()
30792 .mr(4)
30793 .nr(4)
30794 .kr(2)
30795 .sr(1)
30796 .m(m)
30797 .n(n)
30798 .k(k)
30799 .iterations(1)
30800 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30801 }
30802 }
30803 }
30804 }
30805
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)30806 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
30807 for (size_t k = 1; k <= 40; k += 9) {
30808 for (uint32_t n = 1; n <= 4; n++) {
30809 for (uint32_t m = 1; m <= 4; m++) {
30810 GemmMicrokernelTester()
30811 .mr(4)
30812 .nr(4)
30813 .kr(2)
30814 .sr(1)
30815 .m(m)
30816 .n(n)
30817 .k(k)
30818 .cm_stride(7)
30819 .iterations(1)
30820 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30821 }
30822 }
30823 }
30824 }
30825
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,qmin)30826 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
30827 GemmMicrokernelTester()
30828 .mr(4)
30829 .nr(4)
30830 .kr(2)
30831 .sr(1)
30832 .m(4)
30833 .n(4)
30834 .k(8)
30835 .qmin(128)
30836 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30837 }
30838
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,qmax)30839 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
30840 GemmMicrokernelTester()
30841 .mr(4)
30842 .nr(4)
30843 .kr(2)
30844 .sr(1)
30845 .m(4)
30846 .n(4)
30847 .k(8)
30848 .qmax(128)
30849 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30850 }
30851
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)30852 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
30853 GemmMicrokernelTester()
30854 .mr(4)
30855 .nr(4)
30856 .kr(2)
30857 .sr(1)
30858 .m(4)
30859 .n(4)
30860 .k(8)
30861 .cm_stride(7)
30862 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30863 }
30864 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30865
30866
30867 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)30868 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
30869 GemmMicrokernelTester()
30870 .mr(4)
30871 .nr(4)
30872 .kr(2)
30873 .sr(4)
30874 .m(4)
30875 .n(4)
30876 .k(8)
30877 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30878 }
30879
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)30880 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
30881 GemmMicrokernelTester()
30882 .mr(4)
30883 .nr(4)
30884 .kr(2)
30885 .sr(4)
30886 .m(4)
30887 .n(4)
30888 .k(8)
30889 .cn_stride(7)
30890 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30891 }
30892
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_strided_a)30893 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
30894 GemmMicrokernelTester()
30895 .mr(4)
30896 .nr(4)
30897 .kr(2)
30898 .sr(4)
30899 .m(4)
30900 .n(4)
30901 .k(8)
30902 .a_stride(11)
30903 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30904 }
30905
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)30906 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
30907 for (uint32_t n = 1; n <= 4; n++) {
30908 for (uint32_t m = 1; m <= 4; m++) {
30909 GemmMicrokernelTester()
30910 .mr(4)
30911 .nr(4)
30912 .kr(2)
30913 .sr(4)
30914 .m(m)
30915 .n(n)
30916 .k(8)
30917 .iterations(1)
30918 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30919 }
30920 }
30921 }
30922
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)30923 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
30924 for (uint32_t m = 1; m <= 4; m++) {
30925 GemmMicrokernelTester()
30926 .mr(4)
30927 .nr(4)
30928 .kr(2)
30929 .sr(4)
30930 .m(m)
30931 .n(4)
30932 .k(8)
30933 .iterations(1)
30934 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30935 }
30936 }
30937
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)30938 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
30939 for (uint32_t n = 1; n <= 4; n++) {
30940 GemmMicrokernelTester()
30941 .mr(4)
30942 .nr(4)
30943 .kr(2)
30944 .sr(4)
30945 .m(4)
30946 .n(n)
30947 .k(8)
30948 .iterations(1)
30949 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30950 }
30951 }
30952
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)30953 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
30954 for (size_t k = 1; k < 8; k++) {
30955 GemmMicrokernelTester()
30956 .mr(4)
30957 .nr(4)
30958 .kr(2)
30959 .sr(4)
30960 .m(4)
30961 .n(4)
30962 .k(k)
30963 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30964 }
30965 }
30966
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_strided_a)30967 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
30968 for (size_t k = 1; k < 8; k++) {
30969 GemmMicrokernelTester()
30970 .mr(4)
30971 .nr(4)
30972 .kr(2)
30973 .sr(4)
30974 .m(4)
30975 .n(4)
30976 .k(k)
30977 .a_stride(11)
30978 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30979 }
30980 }
30981
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)30982 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
30983 for (size_t k = 1; k < 8; k++) {
30984 for (uint32_t n = 1; n <= 4; n++) {
30985 for (uint32_t m = 1; m <= 4; m++) {
30986 GemmMicrokernelTester()
30987 .mr(4)
30988 .nr(4)
30989 .kr(2)
30990 .sr(4)
30991 .m(m)
30992 .n(n)
30993 .k(k)
30994 .iterations(1)
30995 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30996 }
30997 }
30998 }
30999 }
31000
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)31001 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
31002 for (size_t k = 9; k < 16; k++) {
31003 GemmMicrokernelTester()
31004 .mr(4)
31005 .nr(4)
31006 .kr(2)
31007 .sr(4)
31008 .m(4)
31009 .n(4)
31010 .k(k)
31011 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31012 }
31013 }
31014
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_strided_a)31015 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
31016 for (size_t k = 9; k < 16; k++) {
31017 GemmMicrokernelTester()
31018 .mr(4)
31019 .nr(4)
31020 .kr(2)
31021 .sr(4)
31022 .m(4)
31023 .n(4)
31024 .k(k)
31025 .a_stride(19)
31026 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31027 }
31028 }
31029
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)31030 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
31031 for (size_t k = 9; k < 16; k++) {
31032 for (uint32_t n = 1; n <= 4; n++) {
31033 for (uint32_t m = 1; m <= 4; m++) {
31034 GemmMicrokernelTester()
31035 .mr(4)
31036 .nr(4)
31037 .kr(2)
31038 .sr(4)
31039 .m(m)
31040 .n(n)
31041 .k(k)
31042 .iterations(1)
31043 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31044 }
31045 }
31046 }
31047 }
31048
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)31049 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
31050 for (size_t k = 16; k <= 80; k += 8) {
31051 GemmMicrokernelTester()
31052 .mr(4)
31053 .nr(4)
31054 .kr(2)
31055 .sr(4)
31056 .m(4)
31057 .n(4)
31058 .k(k)
31059 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31060 }
31061 }
31062
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_strided_a)31063 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
31064 for (size_t k = 16; k <= 80; k += 8) {
31065 GemmMicrokernelTester()
31066 .mr(4)
31067 .nr(4)
31068 .kr(2)
31069 .sr(4)
31070 .m(4)
31071 .n(4)
31072 .k(k)
31073 .a_stride(83)
31074 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31075 }
31076 }
31077
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)31078 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
31079 for (size_t k = 16; k <= 80; k += 8) {
31080 for (uint32_t n = 1; n <= 4; n++) {
31081 for (uint32_t m = 1; m <= 4; m++) {
31082 GemmMicrokernelTester()
31083 .mr(4)
31084 .nr(4)
31085 .kr(2)
31086 .sr(4)
31087 .m(m)
31088 .n(n)
31089 .k(k)
31090 .iterations(1)
31091 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31092 }
31093 }
31094 }
31095 }
31096
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)31097 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
31098 for (uint32_t n = 5; n < 8; n++) {
31099 for (size_t k = 1; k <= 40; k += 9) {
31100 GemmMicrokernelTester()
31101 .mr(4)
31102 .nr(4)
31103 .kr(2)
31104 .sr(4)
31105 .m(4)
31106 .n(n)
31107 .k(k)
31108 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31109 }
31110 }
31111 }
31112
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)31113 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
31114 for (uint32_t n = 5; n < 8; n++) {
31115 for (size_t k = 1; k <= 40; k += 9) {
31116 GemmMicrokernelTester()
31117 .mr(4)
31118 .nr(4)
31119 .kr(2)
31120 .sr(4)
31121 .m(4)
31122 .n(n)
31123 .k(k)
31124 .cn_stride(7)
31125 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31126 }
31127 }
31128 }
31129
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_a)31130 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
31131 for (uint32_t n = 5; n < 8; n++) {
31132 for (size_t k = 1; k <= 40; k += 9) {
31133 GemmMicrokernelTester()
31134 .mr(4)
31135 .nr(4)
31136 .kr(2)
31137 .sr(4)
31138 .m(4)
31139 .n(n)
31140 .k(k)
31141 .a_stride(43)
31142 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31143 }
31144 }
31145 }
31146
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)31147 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
31148 for (uint32_t n = 5; n < 8; n++) {
31149 for (size_t k = 1; k <= 40; k += 9) {
31150 for (uint32_t m = 1; m <= 4; m++) {
31151 GemmMicrokernelTester()
31152 .mr(4)
31153 .nr(4)
31154 .kr(2)
31155 .sr(4)
31156 .m(m)
31157 .n(n)
31158 .k(k)
31159 .iterations(1)
31160 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31161 }
31162 }
31163 }
31164 }
31165
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)31166 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
31167 for (uint32_t n = 8; n <= 12; n += 4) {
31168 for (size_t k = 1; k <= 40; k += 9) {
31169 GemmMicrokernelTester()
31170 .mr(4)
31171 .nr(4)
31172 .kr(2)
31173 .sr(4)
31174 .m(4)
31175 .n(n)
31176 .k(k)
31177 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31178 }
31179 }
31180 }
31181
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)31182 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
31183 for (uint32_t n = 8; n <= 12; n += 4) {
31184 for (size_t k = 1; k <= 40; k += 9) {
31185 GemmMicrokernelTester()
31186 .mr(4)
31187 .nr(4)
31188 .kr(2)
31189 .sr(4)
31190 .m(4)
31191 .n(n)
31192 .k(k)
31193 .cn_stride(7)
31194 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31195 }
31196 }
31197 }
31198
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_a)31199 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
31200 for (uint32_t n = 8; n <= 12; n += 4) {
31201 for (size_t k = 1; k <= 40; k += 9) {
31202 GemmMicrokernelTester()
31203 .mr(4)
31204 .nr(4)
31205 .kr(2)
31206 .sr(4)
31207 .m(4)
31208 .n(n)
31209 .k(k)
31210 .a_stride(43)
31211 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31212 }
31213 }
31214 }
31215
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)31216 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
31217 for (uint32_t n = 8; n <= 12; n += 4) {
31218 for (size_t k = 1; k <= 40; k += 9) {
31219 for (uint32_t m = 1; m <= 4; m++) {
31220 GemmMicrokernelTester()
31221 .mr(4)
31222 .nr(4)
31223 .kr(2)
31224 .sr(4)
31225 .m(m)
31226 .n(n)
31227 .k(k)
31228 .iterations(1)
31229 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31230 }
31231 }
31232 }
31233 }
31234
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)31235 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
31236 for (size_t k = 1; k <= 40; k += 9) {
31237 for (uint32_t n = 1; n <= 4; n++) {
31238 for (uint32_t m = 1; m <= 4; m++) {
31239 GemmMicrokernelTester()
31240 .mr(4)
31241 .nr(4)
31242 .kr(2)
31243 .sr(4)
31244 .m(m)
31245 .n(n)
31246 .k(k)
31247 .cm_stride(7)
31248 .iterations(1)
31249 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31250 }
31251 }
31252 }
31253 }
31254
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)31255 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
31256 GemmMicrokernelTester()
31257 .mr(4)
31258 .nr(4)
31259 .kr(2)
31260 .sr(4)
31261 .m(4)
31262 .n(4)
31263 .k(8)
31264 .qmin(128)
31265 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31266 }
31267
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)31268 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
31269 GemmMicrokernelTester()
31270 .mr(4)
31271 .nr(4)
31272 .kr(2)
31273 .sr(4)
31274 .m(4)
31275 .n(4)
31276 .k(8)
31277 .qmax(128)
31278 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31279 }
31280
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)31281 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
31282 GemmMicrokernelTester()
31283 .mr(4)
31284 .nr(4)
31285 .kr(2)
31286 .sr(4)
31287 .m(4)
31288 .n(4)
31289 .k(8)
31290 .cm_stride(7)
31291 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31292 }
31293 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31294
31295
31296 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)31297 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
31298 GemmMicrokernelTester()
31299 .mr(4)
31300 .nr(4)
31301 .kr(8)
31302 .sr(1)
31303 .m(4)
31304 .n(4)
31305 .k(8)
31306 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31307 }
31308
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)31309 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
31310 GemmMicrokernelTester()
31311 .mr(4)
31312 .nr(4)
31313 .kr(8)
31314 .sr(1)
31315 .m(4)
31316 .n(4)
31317 .k(8)
31318 .cn_stride(7)
31319 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31320 }
31321
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_strided_a)31322 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
31323 GemmMicrokernelTester()
31324 .mr(4)
31325 .nr(4)
31326 .kr(8)
31327 .sr(1)
31328 .m(4)
31329 .n(4)
31330 .k(8)
31331 .a_stride(11)
31332 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31333 }
31334
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)31335 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
31336 for (uint32_t n = 1; n <= 4; n++) {
31337 for (uint32_t m = 1; m <= 4; m++) {
31338 GemmMicrokernelTester()
31339 .mr(4)
31340 .nr(4)
31341 .kr(8)
31342 .sr(1)
31343 .m(m)
31344 .n(n)
31345 .k(8)
31346 .iterations(1)
31347 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31348 }
31349 }
31350 }
31351
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)31352 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
31353 for (uint32_t m = 1; m <= 4; m++) {
31354 GemmMicrokernelTester()
31355 .mr(4)
31356 .nr(4)
31357 .kr(8)
31358 .sr(1)
31359 .m(m)
31360 .n(4)
31361 .k(8)
31362 .iterations(1)
31363 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31364 }
31365 }
31366
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)31367 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
31368 for (uint32_t n = 1; n <= 4; n++) {
31369 GemmMicrokernelTester()
31370 .mr(4)
31371 .nr(4)
31372 .kr(8)
31373 .sr(1)
31374 .m(4)
31375 .n(n)
31376 .k(8)
31377 .iterations(1)
31378 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31379 }
31380 }
31381
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)31382 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
31383 for (size_t k = 1; k < 8; k++) {
31384 GemmMicrokernelTester()
31385 .mr(4)
31386 .nr(4)
31387 .kr(8)
31388 .sr(1)
31389 .m(4)
31390 .n(4)
31391 .k(k)
31392 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31393 }
31394 }
31395
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_strided_a)31396 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
31397 for (size_t k = 1; k < 8; k++) {
31398 GemmMicrokernelTester()
31399 .mr(4)
31400 .nr(4)
31401 .kr(8)
31402 .sr(1)
31403 .m(4)
31404 .n(4)
31405 .k(k)
31406 .a_stride(11)
31407 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31408 }
31409 }
31410
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)31411 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
31412 for (size_t k = 1; k < 8; k++) {
31413 for (uint32_t n = 1; n <= 4; n++) {
31414 for (uint32_t m = 1; m <= 4; m++) {
31415 GemmMicrokernelTester()
31416 .mr(4)
31417 .nr(4)
31418 .kr(8)
31419 .sr(1)
31420 .m(m)
31421 .n(n)
31422 .k(k)
31423 .iterations(1)
31424 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31425 }
31426 }
31427 }
31428 }
31429
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)31430 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
31431 for (size_t k = 9; k < 16; k++) {
31432 GemmMicrokernelTester()
31433 .mr(4)
31434 .nr(4)
31435 .kr(8)
31436 .sr(1)
31437 .m(4)
31438 .n(4)
31439 .k(k)
31440 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31441 }
31442 }
31443
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_strided_a)31444 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
31445 for (size_t k = 9; k < 16; k++) {
31446 GemmMicrokernelTester()
31447 .mr(4)
31448 .nr(4)
31449 .kr(8)
31450 .sr(1)
31451 .m(4)
31452 .n(4)
31453 .k(k)
31454 .a_stride(19)
31455 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31456 }
31457 }
31458
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)31459 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
31460 for (size_t k = 9; k < 16; k++) {
31461 for (uint32_t n = 1; n <= 4; n++) {
31462 for (uint32_t m = 1; m <= 4; m++) {
31463 GemmMicrokernelTester()
31464 .mr(4)
31465 .nr(4)
31466 .kr(8)
31467 .sr(1)
31468 .m(m)
31469 .n(n)
31470 .k(k)
31471 .iterations(1)
31472 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31473 }
31474 }
31475 }
31476 }
31477
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)31478 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
31479 for (size_t k = 16; k <= 80; k += 8) {
31480 GemmMicrokernelTester()
31481 .mr(4)
31482 .nr(4)
31483 .kr(8)
31484 .sr(1)
31485 .m(4)
31486 .n(4)
31487 .k(k)
31488 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31489 }
31490 }
31491
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_strided_a)31492 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
31493 for (size_t k = 16; k <= 80; k += 8) {
31494 GemmMicrokernelTester()
31495 .mr(4)
31496 .nr(4)
31497 .kr(8)
31498 .sr(1)
31499 .m(4)
31500 .n(4)
31501 .k(k)
31502 .a_stride(83)
31503 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31504 }
31505 }
31506
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)31507 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
31508 for (size_t k = 16; k <= 80; k += 8) {
31509 for (uint32_t n = 1; n <= 4; n++) {
31510 for (uint32_t m = 1; m <= 4; m++) {
31511 GemmMicrokernelTester()
31512 .mr(4)
31513 .nr(4)
31514 .kr(8)
31515 .sr(1)
31516 .m(m)
31517 .n(n)
31518 .k(k)
31519 .iterations(1)
31520 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31521 }
31522 }
31523 }
31524 }
31525
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)31526 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
31527 for (uint32_t n = 5; n < 8; n++) {
31528 for (size_t k = 1; k <= 40; k += 9) {
31529 GemmMicrokernelTester()
31530 .mr(4)
31531 .nr(4)
31532 .kr(8)
31533 .sr(1)
31534 .m(4)
31535 .n(n)
31536 .k(k)
31537 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31538 }
31539 }
31540 }
31541
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)31542 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
31543 for (uint32_t n = 5; n < 8; n++) {
31544 for (size_t k = 1; k <= 40; k += 9) {
31545 GemmMicrokernelTester()
31546 .mr(4)
31547 .nr(4)
31548 .kr(8)
31549 .sr(1)
31550 .m(4)
31551 .n(n)
31552 .k(k)
31553 .cn_stride(7)
31554 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31555 }
31556 }
31557 }
31558
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_a)31559 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
31560 for (uint32_t n = 5; n < 8; n++) {
31561 for (size_t k = 1; k <= 40; k += 9) {
31562 GemmMicrokernelTester()
31563 .mr(4)
31564 .nr(4)
31565 .kr(8)
31566 .sr(1)
31567 .m(4)
31568 .n(n)
31569 .k(k)
31570 .a_stride(43)
31571 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31572 }
31573 }
31574 }
31575
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)31576 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
31577 for (uint32_t n = 5; n < 8; n++) {
31578 for (size_t k = 1; k <= 40; k += 9) {
31579 for (uint32_t m = 1; m <= 4; m++) {
31580 GemmMicrokernelTester()
31581 .mr(4)
31582 .nr(4)
31583 .kr(8)
31584 .sr(1)
31585 .m(m)
31586 .n(n)
31587 .k(k)
31588 .iterations(1)
31589 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31590 }
31591 }
31592 }
31593 }
31594
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)31595 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
31596 for (uint32_t n = 8; n <= 12; n += 4) {
31597 for (size_t k = 1; k <= 40; k += 9) {
31598 GemmMicrokernelTester()
31599 .mr(4)
31600 .nr(4)
31601 .kr(8)
31602 .sr(1)
31603 .m(4)
31604 .n(n)
31605 .k(k)
31606 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31607 }
31608 }
31609 }
31610
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)31611 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
31612 for (uint32_t n = 8; n <= 12; n += 4) {
31613 for (size_t k = 1; k <= 40; k += 9) {
31614 GemmMicrokernelTester()
31615 .mr(4)
31616 .nr(4)
31617 .kr(8)
31618 .sr(1)
31619 .m(4)
31620 .n(n)
31621 .k(k)
31622 .cn_stride(7)
31623 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31624 }
31625 }
31626 }
31627
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_a)31628 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
31629 for (uint32_t n = 8; n <= 12; n += 4) {
31630 for (size_t k = 1; k <= 40; k += 9) {
31631 GemmMicrokernelTester()
31632 .mr(4)
31633 .nr(4)
31634 .kr(8)
31635 .sr(1)
31636 .m(4)
31637 .n(n)
31638 .k(k)
31639 .a_stride(43)
31640 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31641 }
31642 }
31643 }
31644
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)31645 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
31646 for (uint32_t n = 8; n <= 12; n += 4) {
31647 for (size_t k = 1; k <= 40; k += 9) {
31648 for (uint32_t m = 1; m <= 4; m++) {
31649 GemmMicrokernelTester()
31650 .mr(4)
31651 .nr(4)
31652 .kr(8)
31653 .sr(1)
31654 .m(m)
31655 .n(n)
31656 .k(k)
31657 .iterations(1)
31658 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31659 }
31660 }
31661 }
31662 }
31663
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)31664 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
31665 for (size_t k = 1; k <= 40; k += 9) {
31666 for (uint32_t n = 1; n <= 4; n++) {
31667 for (uint32_t m = 1; m <= 4; m++) {
31668 GemmMicrokernelTester()
31669 .mr(4)
31670 .nr(4)
31671 .kr(8)
31672 .sr(1)
31673 .m(m)
31674 .n(n)
31675 .k(k)
31676 .cm_stride(7)
31677 .iterations(1)
31678 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31679 }
31680 }
31681 }
31682 }
31683
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,qmin)31684 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
31685 GemmMicrokernelTester()
31686 .mr(4)
31687 .nr(4)
31688 .kr(8)
31689 .sr(1)
31690 .m(4)
31691 .n(4)
31692 .k(8)
31693 .qmin(128)
31694 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31695 }
31696
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,qmax)31697 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
31698 GemmMicrokernelTester()
31699 .mr(4)
31700 .nr(4)
31701 .kr(8)
31702 .sr(1)
31703 .m(4)
31704 .n(4)
31705 .k(8)
31706 .qmax(128)
31707 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31708 }
31709
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)31710 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
31711 GemmMicrokernelTester()
31712 .mr(4)
31713 .nr(4)
31714 .kr(8)
31715 .sr(1)
31716 .m(4)
31717 .n(4)
31718 .k(8)
31719 .cm_stride(7)
31720 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31721 }
31722 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31723
31724
31725 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1)31726 TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1) {
31727 GemmMicrokernelTester()
31728 .mr(2)
31729 .nr(2)
31730 .kr(1)
31731 .sr(1)
31732 .m(2)
31733 .n(2)
31734 .k(1)
31735 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
31736 }
31737
TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,strided_cn)31738 TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cn) {
31739 GemmMicrokernelTester()
31740 .mr(2)
31741 .nr(2)
31742 .kr(1)
31743 .sr(1)
31744 .m(2)
31745 .n(2)
31746 .k(1)
31747 .cn_stride(5)
31748 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
31749 }
31750
TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_strided_a)31751 TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_strided_a) {
31752 GemmMicrokernelTester()
31753 .mr(2)
31754 .nr(2)
31755 .kr(1)
31756 .sr(1)
31757 .m(2)
31758 .n(2)
31759 .k(1)
31760 .a_stride(3)
31761 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
31762 }
31763
TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_subtile)31764 TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile) {
31765 for (uint32_t n = 1; n <= 2; n++) {
31766 for (uint32_t m = 1; m <= 2; m++) {
31767 GemmMicrokernelTester()
31768 .mr(2)
31769 .nr(2)
31770 .kr(1)
31771 .sr(1)
31772 .m(m)
31773 .n(n)
31774 .k(1)
31775 .iterations(1)
31776 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
31777 }
31778 }
31779 }
31780
TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_subtile_m)31781 TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile_m) {
31782 for (uint32_t m = 1; m <= 2; m++) {
31783 GemmMicrokernelTester()
31784 .mr(2)
31785 .nr(2)
31786 .kr(1)
31787 .sr(1)
31788 .m(m)
31789 .n(2)
31790 .k(1)
31791 .iterations(1)
31792 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
31793 }
31794 }
31795
TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_subtile_n)31796 TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile_n) {
31797 for (uint32_t n = 1; n <= 2; n++) {
31798 GemmMicrokernelTester()
31799 .mr(2)
31800 .nr(2)
31801 .kr(1)
31802 .sr(1)
31803 .m(2)
31804 .n(n)
31805 .k(1)
31806 .iterations(1)
31807 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
31808 }
31809 }
31810
TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_gt_1)31811 TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1) {
31812 for (size_t k = 2; k < 10; k++) {
31813 GemmMicrokernelTester()
31814 .mr(2)
31815 .nr(2)
31816 .kr(1)
31817 .sr(1)
31818 .m(2)
31819 .n(2)
31820 .k(k)
31821 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
31822 }
31823 }
31824
TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_gt_1_strided_a)31825 TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1_strided_a) {
31826 for (size_t k = 2; k < 10; k++) {
31827 GemmMicrokernelTester()
31828 .mr(2)
31829 .nr(2)
31830 .kr(1)
31831 .sr(1)
31832 .m(2)
31833 .n(2)
31834 .k(k)
31835 .a_stride(11)
31836 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
31837 }
31838 }
31839
TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_gt_1_subtile)31840 TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1_subtile) {
31841 for (size_t k = 2; k < 10; k++) {
31842 for (uint32_t n = 1; n <= 2; n++) {
31843 for (uint32_t m = 1; m <= 2; m++) {
31844 GemmMicrokernelTester()
31845 .mr(2)
31846 .nr(2)
31847 .kr(1)
31848 .sr(1)
31849 .m(m)
31850 .n(n)
31851 .k(k)
31852 .iterations(1)
31853 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
31854 }
31855 }
31856 }
31857 }
31858
TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2)31859 TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2) {
31860 for (uint32_t n = 3; n < 4; n++) {
31861 for (size_t k = 1; k <= 5; k += 2) {
31862 GemmMicrokernelTester()
31863 .mr(2)
31864 .nr(2)
31865 .kr(1)
31866 .sr(1)
31867 .m(2)
31868 .n(n)
31869 .k(k)
31870 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
31871 }
31872 }
31873 }
31874
TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2_strided_cn)31875 TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_strided_cn) {
31876 for (uint32_t n = 3; n < 4; n++) {
31877 for (size_t k = 1; k <= 5; k += 2) {
31878 GemmMicrokernelTester()
31879 .mr(2)
31880 .nr(2)
31881 .kr(1)
31882 .sr(1)
31883 .m(2)
31884 .n(n)
31885 .k(k)
31886 .cn_stride(5)
31887 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
31888 }
31889 }
31890 }
31891
TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2_strided_a)31892 TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_strided_a) {
31893 for (uint32_t n = 3; n < 4; n++) {
31894 for (size_t k = 1; k <= 5; k += 2) {
31895 GemmMicrokernelTester()
31896 .mr(2)
31897 .nr(2)
31898 .kr(1)
31899 .sr(1)
31900 .m(2)
31901 .n(n)
31902 .k(k)
31903 .a_stride(7)
31904 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
31905 }
31906 }
31907 }
31908
TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2_subtile)31909 TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_subtile) {
31910 for (uint32_t n = 3; n < 4; n++) {
31911 for (size_t k = 1; k <= 5; k += 2) {
31912 for (uint32_t m = 1; m <= 2; m++) {
31913 GemmMicrokernelTester()
31914 .mr(2)
31915 .nr(2)
31916 .kr(1)
31917 .sr(1)
31918 .m(m)
31919 .n(n)
31920 .k(k)
31921 .iterations(1)
31922 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
31923 }
31924 }
31925 }
31926 }
31927
TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2)31928 TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2) {
31929 for (uint32_t n = 4; n <= 6; n += 2) {
31930 for (size_t k = 1; k <= 5; k += 2) {
31931 GemmMicrokernelTester()
31932 .mr(2)
31933 .nr(2)
31934 .kr(1)
31935 .sr(1)
31936 .m(2)
31937 .n(n)
31938 .k(k)
31939 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
31940 }
31941 }
31942 }
31943
TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2_strided_cn)31944 TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_strided_cn) {
31945 for (uint32_t n = 4; n <= 6; n += 2) {
31946 for (size_t k = 1; k <= 5; k += 2) {
31947 GemmMicrokernelTester()
31948 .mr(2)
31949 .nr(2)
31950 .kr(1)
31951 .sr(1)
31952 .m(2)
31953 .n(n)
31954 .k(k)
31955 .cn_stride(5)
31956 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
31957 }
31958 }
31959 }
31960
TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2_strided_a)31961 TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_strided_a) {
31962 for (uint32_t n = 4; n <= 6; n += 2) {
31963 for (size_t k = 1; k <= 5; k += 2) {
31964 GemmMicrokernelTester()
31965 .mr(2)
31966 .nr(2)
31967 .kr(1)
31968 .sr(1)
31969 .m(2)
31970 .n(n)
31971 .k(k)
31972 .a_stride(7)
31973 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
31974 }
31975 }
31976 }
31977
TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2_subtile)31978 TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_subtile) {
31979 for (uint32_t n = 4; n <= 6; n += 2) {
31980 for (size_t k = 1; k <= 5; k += 2) {
31981 for (uint32_t m = 1; m <= 2; m++) {
31982 GemmMicrokernelTester()
31983 .mr(2)
31984 .nr(2)
31985 .kr(1)
31986 .sr(1)
31987 .m(m)
31988 .n(n)
31989 .k(k)
31990 .iterations(1)
31991 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
31992 }
31993 }
31994 }
31995 }
31996
TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,strided_cm_subtile)31997 TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cm_subtile) {
31998 for (size_t k = 1; k <= 5; k += 2) {
31999 for (uint32_t n = 1; n <= 2; n++) {
32000 for (uint32_t m = 1; m <= 2; m++) {
32001 GemmMicrokernelTester()
32002 .mr(2)
32003 .nr(2)
32004 .kr(1)
32005 .sr(1)
32006 .m(m)
32007 .n(n)
32008 .k(k)
32009 .cm_stride(5)
32010 .iterations(1)
32011 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
32012 }
32013 }
32014 }
32015 }
32016
TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,qmin)32017 TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, qmin) {
32018 GemmMicrokernelTester()
32019 .mr(2)
32020 .nr(2)
32021 .kr(1)
32022 .sr(1)
32023 .m(2)
32024 .n(2)
32025 .k(1)
32026 .qmin(128)
32027 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
32028 }
32029
TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,qmax)32030 TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, qmax) {
32031 GemmMicrokernelTester()
32032 .mr(2)
32033 .nr(2)
32034 .kr(1)
32035 .sr(1)
32036 .m(2)
32037 .n(2)
32038 .k(1)
32039 .qmax(128)
32040 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
32041 }
32042
TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,strided_cm)32043 TEST(QC8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cm) {
32044 GemmMicrokernelTester()
32045 .mr(2)
32046 .nr(2)
32047 .kr(1)
32048 .sr(1)
32049 .m(2)
32050 .n(2)
32051 .k(1)
32052 .cm_stride(5)
32053 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
32054 }
32055 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
32056
32057
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1)32058 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
32059 GemmMicrokernelTester()
32060 .mr(1)
32061 .nr(2)
32062 .kr(1)
32063 .sr(1)
32064 .m(1)
32065 .n(2)
32066 .k(1)
32067 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32068 }
32069
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,strided_cn)32070 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
32071 GemmMicrokernelTester()
32072 .mr(1)
32073 .nr(2)
32074 .kr(1)
32075 .sr(1)
32076 .m(1)
32077 .n(2)
32078 .k(1)
32079 .cn_stride(5)
32080 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32081 }
32082
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_strided_a)32083 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_strided_a) {
32084 GemmMicrokernelTester()
32085 .mr(1)
32086 .nr(2)
32087 .kr(1)
32088 .sr(1)
32089 .m(1)
32090 .n(2)
32091 .k(1)
32092 .a_stride(3)
32093 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32094 }
32095
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_subtile)32096 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
32097 for (uint32_t n = 1; n <= 2; n++) {
32098 for (uint32_t m = 1; m <= 1; m++) {
32099 GemmMicrokernelTester()
32100 .mr(1)
32101 .nr(2)
32102 .kr(1)
32103 .sr(1)
32104 .m(m)
32105 .n(n)
32106 .k(1)
32107 .iterations(1)
32108 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32109 }
32110 }
32111 }
32112
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_subtile_m)32113 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
32114 for (uint32_t m = 1; m <= 1; m++) {
32115 GemmMicrokernelTester()
32116 .mr(1)
32117 .nr(2)
32118 .kr(1)
32119 .sr(1)
32120 .m(m)
32121 .n(2)
32122 .k(1)
32123 .iterations(1)
32124 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32125 }
32126 }
32127
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_subtile_n)32128 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
32129 for (uint32_t n = 1; n <= 2; n++) {
32130 GemmMicrokernelTester()
32131 .mr(1)
32132 .nr(2)
32133 .kr(1)
32134 .sr(1)
32135 .m(1)
32136 .n(n)
32137 .k(1)
32138 .iterations(1)
32139 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32140 }
32141 }
32142
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_gt_1)32143 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
32144 for (size_t k = 2; k < 10; k++) {
32145 GemmMicrokernelTester()
32146 .mr(1)
32147 .nr(2)
32148 .kr(1)
32149 .sr(1)
32150 .m(1)
32151 .n(2)
32152 .k(k)
32153 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32154 }
32155 }
32156
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_gt_1_strided_a)32157 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_strided_a) {
32158 for (size_t k = 2; k < 10; k++) {
32159 GemmMicrokernelTester()
32160 .mr(1)
32161 .nr(2)
32162 .kr(1)
32163 .sr(1)
32164 .m(1)
32165 .n(2)
32166 .k(k)
32167 .a_stride(11)
32168 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32169 }
32170 }
32171
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_gt_1_subtile)32172 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
32173 for (size_t k = 2; k < 10; k++) {
32174 for (uint32_t n = 1; n <= 2; n++) {
32175 for (uint32_t m = 1; m <= 1; m++) {
32176 GemmMicrokernelTester()
32177 .mr(1)
32178 .nr(2)
32179 .kr(1)
32180 .sr(1)
32181 .m(m)
32182 .n(n)
32183 .k(k)
32184 .iterations(1)
32185 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32186 }
32187 }
32188 }
32189 }
32190
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2)32191 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
32192 for (uint32_t n = 3; n < 4; n++) {
32193 for (size_t k = 1; k <= 5; k += 2) {
32194 GemmMicrokernelTester()
32195 .mr(1)
32196 .nr(2)
32197 .kr(1)
32198 .sr(1)
32199 .m(1)
32200 .n(n)
32201 .k(k)
32202 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32203 }
32204 }
32205 }
32206
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2_strided_cn)32207 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
32208 for (uint32_t n = 3; n < 4; n++) {
32209 for (size_t k = 1; k <= 5; k += 2) {
32210 GemmMicrokernelTester()
32211 .mr(1)
32212 .nr(2)
32213 .kr(1)
32214 .sr(1)
32215 .m(1)
32216 .n(n)
32217 .k(k)
32218 .cn_stride(5)
32219 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32220 }
32221 }
32222 }
32223
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2_strided_a)32224 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_a) {
32225 for (uint32_t n = 3; n < 4; n++) {
32226 for (size_t k = 1; k <= 5; k += 2) {
32227 GemmMicrokernelTester()
32228 .mr(1)
32229 .nr(2)
32230 .kr(1)
32231 .sr(1)
32232 .m(1)
32233 .n(n)
32234 .k(k)
32235 .a_stride(7)
32236 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32237 }
32238 }
32239 }
32240
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2_subtile)32241 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
32242 for (uint32_t n = 3; n < 4; n++) {
32243 for (size_t k = 1; k <= 5; k += 2) {
32244 for (uint32_t m = 1; m <= 1; m++) {
32245 GemmMicrokernelTester()
32246 .mr(1)
32247 .nr(2)
32248 .kr(1)
32249 .sr(1)
32250 .m(m)
32251 .n(n)
32252 .k(k)
32253 .iterations(1)
32254 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32255 }
32256 }
32257 }
32258 }
32259
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2)32260 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
32261 for (uint32_t n = 4; n <= 6; n += 2) {
32262 for (size_t k = 1; k <= 5; k += 2) {
32263 GemmMicrokernelTester()
32264 .mr(1)
32265 .nr(2)
32266 .kr(1)
32267 .sr(1)
32268 .m(1)
32269 .n(n)
32270 .k(k)
32271 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32272 }
32273 }
32274 }
32275
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2_strided_cn)32276 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
32277 for (uint32_t n = 4; n <= 6; n += 2) {
32278 for (size_t k = 1; k <= 5; k += 2) {
32279 GemmMicrokernelTester()
32280 .mr(1)
32281 .nr(2)
32282 .kr(1)
32283 .sr(1)
32284 .m(1)
32285 .n(n)
32286 .k(k)
32287 .cn_stride(5)
32288 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32289 }
32290 }
32291 }
32292
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2_strided_a)32293 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_a) {
32294 for (uint32_t n = 4; n <= 6; n += 2) {
32295 for (size_t k = 1; k <= 5; k += 2) {
32296 GemmMicrokernelTester()
32297 .mr(1)
32298 .nr(2)
32299 .kr(1)
32300 .sr(1)
32301 .m(1)
32302 .n(n)
32303 .k(k)
32304 .a_stride(7)
32305 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32306 }
32307 }
32308 }
32309
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2_subtile)32310 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
32311 for (uint32_t n = 4; n <= 6; n += 2) {
32312 for (size_t k = 1; k <= 5; k += 2) {
32313 for (uint32_t m = 1; m <= 1; m++) {
32314 GemmMicrokernelTester()
32315 .mr(1)
32316 .nr(2)
32317 .kr(1)
32318 .sr(1)
32319 .m(m)
32320 .n(n)
32321 .k(k)
32322 .iterations(1)
32323 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32324 }
32325 }
32326 }
32327 }
32328
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,strided_cm_subtile)32329 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
32330 for (size_t k = 1; k <= 5; k += 2) {
32331 for (uint32_t n = 1; n <= 2; n++) {
32332 for (uint32_t m = 1; m <= 1; m++) {
32333 GemmMicrokernelTester()
32334 .mr(1)
32335 .nr(2)
32336 .kr(1)
32337 .sr(1)
32338 .m(m)
32339 .n(n)
32340 .k(k)
32341 .cm_stride(5)
32342 .iterations(1)
32343 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32344 }
32345 }
32346 }
32347 }
32348
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,qmin)32349 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
32350 GemmMicrokernelTester()
32351 .mr(1)
32352 .nr(2)
32353 .kr(1)
32354 .sr(1)
32355 .m(1)
32356 .n(2)
32357 .k(1)
32358 .qmin(128)
32359 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32360 }
32361
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,qmax)32362 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
32363 GemmMicrokernelTester()
32364 .mr(1)
32365 .nr(2)
32366 .kr(1)
32367 .sr(1)
32368 .m(1)
32369 .n(2)
32370 .k(1)
32371 .qmax(128)
32372 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32373 }
32374
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,strided_cm)32375 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
32376 GemmMicrokernelTester()
32377 .mr(1)
32378 .nr(2)
32379 .kr(1)
32380 .sr(1)
32381 .m(1)
32382 .n(2)
32383 .k(1)
32384 .cm_stride(5)
32385 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32386 }
32387
32388
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1)32389 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
32390 GemmMicrokernelTester()
32391 .mr(1)
32392 .nr(4)
32393 .kr(1)
32394 .sr(1)
32395 .m(1)
32396 .n(4)
32397 .k(1)
32398 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32399 }
32400
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,strided_cn)32401 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
32402 GemmMicrokernelTester()
32403 .mr(1)
32404 .nr(4)
32405 .kr(1)
32406 .sr(1)
32407 .m(1)
32408 .n(4)
32409 .k(1)
32410 .cn_stride(7)
32411 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32412 }
32413
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_strided_a)32414 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_strided_a) {
32415 GemmMicrokernelTester()
32416 .mr(1)
32417 .nr(4)
32418 .kr(1)
32419 .sr(1)
32420 .m(1)
32421 .n(4)
32422 .k(1)
32423 .a_stride(3)
32424 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32425 }
32426
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_subtile)32427 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
32428 for (uint32_t n = 1; n <= 4; n++) {
32429 for (uint32_t m = 1; m <= 1; m++) {
32430 GemmMicrokernelTester()
32431 .mr(1)
32432 .nr(4)
32433 .kr(1)
32434 .sr(1)
32435 .m(m)
32436 .n(n)
32437 .k(1)
32438 .iterations(1)
32439 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32440 }
32441 }
32442 }
32443
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_subtile_m)32444 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
32445 for (uint32_t m = 1; m <= 1; m++) {
32446 GemmMicrokernelTester()
32447 .mr(1)
32448 .nr(4)
32449 .kr(1)
32450 .sr(1)
32451 .m(m)
32452 .n(4)
32453 .k(1)
32454 .iterations(1)
32455 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32456 }
32457 }
32458
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_subtile_n)32459 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
32460 for (uint32_t n = 1; n <= 4; n++) {
32461 GemmMicrokernelTester()
32462 .mr(1)
32463 .nr(4)
32464 .kr(1)
32465 .sr(1)
32466 .m(1)
32467 .n(n)
32468 .k(1)
32469 .iterations(1)
32470 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32471 }
32472 }
32473
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_gt_1)32474 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
32475 for (size_t k = 2; k < 10; k++) {
32476 GemmMicrokernelTester()
32477 .mr(1)
32478 .nr(4)
32479 .kr(1)
32480 .sr(1)
32481 .m(1)
32482 .n(4)
32483 .k(k)
32484 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32485 }
32486 }
32487
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_gt_1_strided_a)32488 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_strided_a) {
32489 for (size_t k = 2; k < 10; k++) {
32490 GemmMicrokernelTester()
32491 .mr(1)
32492 .nr(4)
32493 .kr(1)
32494 .sr(1)
32495 .m(1)
32496 .n(4)
32497 .k(k)
32498 .a_stride(11)
32499 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32500 }
32501 }
32502
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_gt_1_subtile)32503 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
32504 for (size_t k = 2; k < 10; k++) {
32505 for (uint32_t n = 1; n <= 4; n++) {
32506 for (uint32_t m = 1; m <= 1; m++) {
32507 GemmMicrokernelTester()
32508 .mr(1)
32509 .nr(4)
32510 .kr(1)
32511 .sr(1)
32512 .m(m)
32513 .n(n)
32514 .k(k)
32515 .iterations(1)
32516 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32517 }
32518 }
32519 }
32520 }
32521
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4)32522 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
32523 for (uint32_t n = 5; n < 8; n++) {
32524 for (size_t k = 1; k <= 5; k += 2) {
32525 GemmMicrokernelTester()
32526 .mr(1)
32527 .nr(4)
32528 .kr(1)
32529 .sr(1)
32530 .m(1)
32531 .n(n)
32532 .k(k)
32533 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32534 }
32535 }
32536 }
32537
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4_strided_cn)32538 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
32539 for (uint32_t n = 5; n < 8; n++) {
32540 for (size_t k = 1; k <= 5; k += 2) {
32541 GemmMicrokernelTester()
32542 .mr(1)
32543 .nr(4)
32544 .kr(1)
32545 .sr(1)
32546 .m(1)
32547 .n(n)
32548 .k(k)
32549 .cn_stride(7)
32550 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32551 }
32552 }
32553 }
32554
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4_strided_a)32555 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_a) {
32556 for (uint32_t n = 5; n < 8; n++) {
32557 for (size_t k = 1; k <= 5; k += 2) {
32558 GemmMicrokernelTester()
32559 .mr(1)
32560 .nr(4)
32561 .kr(1)
32562 .sr(1)
32563 .m(1)
32564 .n(n)
32565 .k(k)
32566 .a_stride(7)
32567 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32568 }
32569 }
32570 }
32571
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4_subtile)32572 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
32573 for (uint32_t n = 5; n < 8; n++) {
32574 for (size_t k = 1; k <= 5; k += 2) {
32575 for (uint32_t m = 1; m <= 1; m++) {
32576 GemmMicrokernelTester()
32577 .mr(1)
32578 .nr(4)
32579 .kr(1)
32580 .sr(1)
32581 .m(m)
32582 .n(n)
32583 .k(k)
32584 .iterations(1)
32585 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32586 }
32587 }
32588 }
32589 }
32590
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4)32591 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
32592 for (uint32_t n = 8; n <= 12; n += 4) {
32593 for (size_t k = 1; k <= 5; k += 2) {
32594 GemmMicrokernelTester()
32595 .mr(1)
32596 .nr(4)
32597 .kr(1)
32598 .sr(1)
32599 .m(1)
32600 .n(n)
32601 .k(k)
32602 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32603 }
32604 }
32605 }
32606
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4_strided_cn)32607 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
32608 for (uint32_t n = 8; n <= 12; n += 4) {
32609 for (size_t k = 1; k <= 5; k += 2) {
32610 GemmMicrokernelTester()
32611 .mr(1)
32612 .nr(4)
32613 .kr(1)
32614 .sr(1)
32615 .m(1)
32616 .n(n)
32617 .k(k)
32618 .cn_stride(7)
32619 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32620 }
32621 }
32622 }
32623
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4_strided_a)32624 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_a) {
32625 for (uint32_t n = 8; n <= 12; n += 4) {
32626 for (size_t k = 1; k <= 5; k += 2) {
32627 GemmMicrokernelTester()
32628 .mr(1)
32629 .nr(4)
32630 .kr(1)
32631 .sr(1)
32632 .m(1)
32633 .n(n)
32634 .k(k)
32635 .a_stride(7)
32636 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32637 }
32638 }
32639 }
32640
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4_subtile)32641 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
32642 for (uint32_t n = 8; n <= 12; n += 4) {
32643 for (size_t k = 1; k <= 5; k += 2) {
32644 for (uint32_t m = 1; m <= 1; m++) {
32645 GemmMicrokernelTester()
32646 .mr(1)
32647 .nr(4)
32648 .kr(1)
32649 .sr(1)
32650 .m(m)
32651 .n(n)
32652 .k(k)
32653 .iterations(1)
32654 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32655 }
32656 }
32657 }
32658 }
32659
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,strided_cm_subtile)32660 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
32661 for (size_t k = 1; k <= 5; k += 2) {
32662 for (uint32_t n = 1; n <= 4; n++) {
32663 for (uint32_t m = 1; m <= 1; m++) {
32664 GemmMicrokernelTester()
32665 .mr(1)
32666 .nr(4)
32667 .kr(1)
32668 .sr(1)
32669 .m(m)
32670 .n(n)
32671 .k(k)
32672 .cm_stride(7)
32673 .iterations(1)
32674 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32675 }
32676 }
32677 }
32678 }
32679
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,qmin)32680 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
32681 GemmMicrokernelTester()
32682 .mr(1)
32683 .nr(4)
32684 .kr(1)
32685 .sr(1)
32686 .m(1)
32687 .n(4)
32688 .k(1)
32689 .qmin(128)
32690 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32691 }
32692
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,qmax)32693 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
32694 GemmMicrokernelTester()
32695 .mr(1)
32696 .nr(4)
32697 .kr(1)
32698 .sr(1)
32699 .m(1)
32700 .n(4)
32701 .k(1)
32702 .qmax(128)
32703 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32704 }
32705
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,strided_cm)32706 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
32707 GemmMicrokernelTester()
32708 .mr(1)
32709 .nr(4)
32710 .kr(1)
32711 .sr(1)
32712 .m(1)
32713 .n(4)
32714 .k(1)
32715 .cm_stride(7)
32716 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
32717 }
32718
32719
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1)32720 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1) {
32721 GemmMicrokernelTester()
32722 .mr(2)
32723 .nr(2)
32724 .kr(1)
32725 .sr(1)
32726 .m(2)
32727 .n(2)
32728 .k(1)
32729 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
32730 }
32731
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,strided_cn)32732 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cn) {
32733 GemmMicrokernelTester()
32734 .mr(2)
32735 .nr(2)
32736 .kr(1)
32737 .sr(1)
32738 .m(2)
32739 .n(2)
32740 .k(1)
32741 .cn_stride(5)
32742 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
32743 }
32744
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1_strided_a)32745 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
32746 GemmMicrokernelTester()
32747 .mr(2)
32748 .nr(2)
32749 .kr(1)
32750 .sr(1)
32751 .m(2)
32752 .n(2)
32753 .k(1)
32754 .a_stride(3)
32755 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
32756 }
32757
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1_subtile)32758 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile) {
32759 for (uint32_t n = 1; n <= 2; n++) {
32760 for (uint32_t m = 1; m <= 2; m++) {
32761 GemmMicrokernelTester()
32762 .mr(2)
32763 .nr(2)
32764 .kr(1)
32765 .sr(1)
32766 .m(m)
32767 .n(n)
32768 .k(1)
32769 .iterations(1)
32770 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
32771 }
32772 }
32773 }
32774
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1_subtile_m)32775 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
32776 for (uint32_t m = 1; m <= 2; m++) {
32777 GemmMicrokernelTester()
32778 .mr(2)
32779 .nr(2)
32780 .kr(1)
32781 .sr(1)
32782 .m(m)
32783 .n(2)
32784 .k(1)
32785 .iterations(1)
32786 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
32787 }
32788 }
32789
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1_subtile_n)32790 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
32791 for (uint32_t n = 1; n <= 2; n++) {
32792 GemmMicrokernelTester()
32793 .mr(2)
32794 .nr(2)
32795 .kr(1)
32796 .sr(1)
32797 .m(2)
32798 .n(n)
32799 .k(1)
32800 .iterations(1)
32801 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
32802 }
32803 }
32804
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_gt_1)32805 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1) {
32806 for (size_t k = 2; k < 10; k++) {
32807 GemmMicrokernelTester()
32808 .mr(2)
32809 .nr(2)
32810 .kr(1)
32811 .sr(1)
32812 .m(2)
32813 .n(2)
32814 .k(k)
32815 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
32816 }
32817 }
32818
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_gt_1_strided_a)32819 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
32820 for (size_t k = 2; k < 10; k++) {
32821 GemmMicrokernelTester()
32822 .mr(2)
32823 .nr(2)
32824 .kr(1)
32825 .sr(1)
32826 .m(2)
32827 .n(2)
32828 .k(k)
32829 .a_stride(11)
32830 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
32831 }
32832 }
32833
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_gt_1_subtile)32834 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1_subtile) {
32835 for (size_t k = 2; k < 10; k++) {
32836 for (uint32_t n = 1; n <= 2; n++) {
32837 for (uint32_t m = 1; m <= 2; m++) {
32838 GemmMicrokernelTester()
32839 .mr(2)
32840 .nr(2)
32841 .kr(1)
32842 .sr(1)
32843 .m(m)
32844 .n(n)
32845 .k(k)
32846 .iterations(1)
32847 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
32848 }
32849 }
32850 }
32851 }
32852
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2)32853 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2) {
32854 for (uint32_t n = 3; n < 4; n++) {
32855 for (size_t k = 1; k <= 5; k += 2) {
32856 GemmMicrokernelTester()
32857 .mr(2)
32858 .nr(2)
32859 .kr(1)
32860 .sr(1)
32861 .m(2)
32862 .n(n)
32863 .k(k)
32864 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
32865 }
32866 }
32867 }
32868
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2_strided_cn)32869 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
32870 for (uint32_t n = 3; n < 4; n++) {
32871 for (size_t k = 1; k <= 5; k += 2) {
32872 GemmMicrokernelTester()
32873 .mr(2)
32874 .nr(2)
32875 .kr(1)
32876 .sr(1)
32877 .m(2)
32878 .n(n)
32879 .k(k)
32880 .cn_stride(5)
32881 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
32882 }
32883 }
32884 }
32885
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2_strided_a)32886 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
32887 for (uint32_t n = 3; n < 4; n++) {
32888 for (size_t k = 1; k <= 5; k += 2) {
32889 GemmMicrokernelTester()
32890 .mr(2)
32891 .nr(2)
32892 .kr(1)
32893 .sr(1)
32894 .m(2)
32895 .n(n)
32896 .k(k)
32897 .a_stride(7)
32898 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
32899 }
32900 }
32901 }
32902
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2_subtile)32903 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_subtile) {
32904 for (uint32_t n = 3; n < 4; n++) {
32905 for (size_t k = 1; k <= 5; k += 2) {
32906 for (uint32_t m = 1; m <= 2; m++) {
32907 GemmMicrokernelTester()
32908 .mr(2)
32909 .nr(2)
32910 .kr(1)
32911 .sr(1)
32912 .m(m)
32913 .n(n)
32914 .k(k)
32915 .iterations(1)
32916 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
32917 }
32918 }
32919 }
32920 }
32921
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2)32922 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2) {
32923 for (uint32_t n = 4; n <= 6; n += 2) {
32924 for (size_t k = 1; k <= 5; k += 2) {
32925 GemmMicrokernelTester()
32926 .mr(2)
32927 .nr(2)
32928 .kr(1)
32929 .sr(1)
32930 .m(2)
32931 .n(n)
32932 .k(k)
32933 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
32934 }
32935 }
32936 }
32937
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2_strided_cn)32938 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
32939 for (uint32_t n = 4; n <= 6; n += 2) {
32940 for (size_t k = 1; k <= 5; k += 2) {
32941 GemmMicrokernelTester()
32942 .mr(2)
32943 .nr(2)
32944 .kr(1)
32945 .sr(1)
32946 .m(2)
32947 .n(n)
32948 .k(k)
32949 .cn_stride(5)
32950 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
32951 }
32952 }
32953 }
32954
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2_strided_a)32955 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_strided_a) {
32956 for (uint32_t n = 4; n <= 6; n += 2) {
32957 for (size_t k = 1; k <= 5; k += 2) {
32958 GemmMicrokernelTester()
32959 .mr(2)
32960 .nr(2)
32961 .kr(1)
32962 .sr(1)
32963 .m(2)
32964 .n(n)
32965 .k(k)
32966 .a_stride(7)
32967 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
32968 }
32969 }
32970 }
32971
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2_subtile)32972 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_subtile) {
32973 for (uint32_t n = 4; n <= 6; n += 2) {
32974 for (size_t k = 1; k <= 5; k += 2) {
32975 for (uint32_t m = 1; m <= 2; m++) {
32976 GemmMicrokernelTester()
32977 .mr(2)
32978 .nr(2)
32979 .kr(1)
32980 .sr(1)
32981 .m(m)
32982 .n(n)
32983 .k(k)
32984 .iterations(1)
32985 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
32986 }
32987 }
32988 }
32989 }
32990
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,strided_cm_subtile)32991 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm_subtile) {
32992 for (size_t k = 1; k <= 5; k += 2) {
32993 for (uint32_t n = 1; n <= 2; n++) {
32994 for (uint32_t m = 1; m <= 2; m++) {
32995 GemmMicrokernelTester()
32996 .mr(2)
32997 .nr(2)
32998 .kr(1)
32999 .sr(1)
33000 .m(m)
33001 .n(n)
33002 .k(k)
33003 .cm_stride(5)
33004 .iterations(1)
33005 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33006 }
33007 }
33008 }
33009 }
33010
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,qmin)33011 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmin) {
33012 GemmMicrokernelTester()
33013 .mr(2)
33014 .nr(2)
33015 .kr(1)
33016 .sr(1)
33017 .m(2)
33018 .n(2)
33019 .k(1)
33020 .qmin(128)
33021 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33022 }
33023
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,qmax)33024 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmax) {
33025 GemmMicrokernelTester()
33026 .mr(2)
33027 .nr(2)
33028 .kr(1)
33029 .sr(1)
33030 .m(2)
33031 .n(2)
33032 .k(1)
33033 .qmax(128)
33034 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33035 }
33036
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,strided_cm)33037 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm) {
33038 GemmMicrokernelTester()
33039 .mr(2)
33040 .nr(2)
33041 .kr(1)
33042 .sr(1)
33043 .m(2)
33044 .n(2)
33045 .k(1)
33046 .cm_stride(5)
33047 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33048 }
33049
33050
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1)33051 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1) {
33052 GemmMicrokernelTester()
33053 .mr(2)
33054 .nr(4)
33055 .kr(1)
33056 .sr(1)
33057 .m(2)
33058 .n(4)
33059 .k(1)
33060 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
33061 }
33062
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,strided_cn)33063 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cn) {
33064 GemmMicrokernelTester()
33065 .mr(2)
33066 .nr(4)
33067 .kr(1)
33068 .sr(1)
33069 .m(2)
33070 .n(4)
33071 .k(1)
33072 .cn_stride(7)
33073 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
33074 }
33075
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1_strided_a)33076 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_strided_a) {
33077 GemmMicrokernelTester()
33078 .mr(2)
33079 .nr(4)
33080 .kr(1)
33081 .sr(1)
33082 .m(2)
33083 .n(4)
33084 .k(1)
33085 .a_stride(3)
33086 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
33087 }
33088
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1_subtile)33089 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile) {
33090 for (uint32_t n = 1; n <= 4; n++) {
33091 for (uint32_t m = 1; m <= 2; m++) {
33092 GemmMicrokernelTester()
33093 .mr(2)
33094 .nr(4)
33095 .kr(1)
33096 .sr(1)
33097 .m(m)
33098 .n(n)
33099 .k(1)
33100 .iterations(1)
33101 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
33102 }
33103 }
33104 }
33105
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1_subtile_m)33106 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
33107 for (uint32_t m = 1; m <= 2; m++) {
33108 GemmMicrokernelTester()
33109 .mr(2)
33110 .nr(4)
33111 .kr(1)
33112 .sr(1)
33113 .m(m)
33114 .n(4)
33115 .k(1)
33116 .iterations(1)
33117 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
33118 }
33119 }
33120
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1_subtile_n)33121 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
33122 for (uint32_t n = 1; n <= 4; n++) {
33123 GemmMicrokernelTester()
33124 .mr(2)
33125 .nr(4)
33126 .kr(1)
33127 .sr(1)
33128 .m(2)
33129 .n(n)
33130 .k(1)
33131 .iterations(1)
33132 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
33133 }
33134 }
33135
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_gt_1)33136 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1) {
33137 for (size_t k = 2; k < 10; k++) {
33138 GemmMicrokernelTester()
33139 .mr(2)
33140 .nr(4)
33141 .kr(1)
33142 .sr(1)
33143 .m(2)
33144 .n(4)
33145 .k(k)
33146 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
33147 }
33148 }
33149
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_gt_1_strided_a)33150 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1_strided_a) {
33151 for (size_t k = 2; k < 10; k++) {
33152 GemmMicrokernelTester()
33153 .mr(2)
33154 .nr(4)
33155 .kr(1)
33156 .sr(1)
33157 .m(2)
33158 .n(4)
33159 .k(k)
33160 .a_stride(11)
33161 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
33162 }
33163 }
33164
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_gt_1_subtile)33165 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1_subtile) {
33166 for (size_t k = 2; k < 10; k++) {
33167 for (uint32_t n = 1; n <= 4; n++) {
33168 for (uint32_t m = 1; m <= 2; m++) {
33169 GemmMicrokernelTester()
33170 .mr(2)
33171 .nr(4)
33172 .kr(1)
33173 .sr(1)
33174 .m(m)
33175 .n(n)
33176 .k(k)
33177 .iterations(1)
33178 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
33179 }
33180 }
33181 }
33182 }
33183
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4)33184 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4) {
33185 for (uint32_t n = 5; n < 8; n++) {
33186 for (size_t k = 1; k <= 5; k += 2) {
33187 GemmMicrokernelTester()
33188 .mr(2)
33189 .nr(4)
33190 .kr(1)
33191 .sr(1)
33192 .m(2)
33193 .n(n)
33194 .k(k)
33195 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
33196 }
33197 }
33198 }
33199
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4_strided_cn)33200 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
33201 for (uint32_t n = 5; n < 8; n++) {
33202 for (size_t k = 1; k <= 5; k += 2) {
33203 GemmMicrokernelTester()
33204 .mr(2)
33205 .nr(4)
33206 .kr(1)
33207 .sr(1)
33208 .m(2)
33209 .n(n)
33210 .k(k)
33211 .cn_stride(7)
33212 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
33213 }
33214 }
33215 }
33216
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4_strided_a)33217 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_strided_a) {
33218 for (uint32_t n = 5; n < 8; n++) {
33219 for (size_t k = 1; k <= 5; k += 2) {
33220 GemmMicrokernelTester()
33221 .mr(2)
33222 .nr(4)
33223 .kr(1)
33224 .sr(1)
33225 .m(2)
33226 .n(n)
33227 .k(k)
33228 .a_stride(7)
33229 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
33230 }
33231 }
33232 }
33233
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4_subtile)33234 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_subtile) {
33235 for (uint32_t n = 5; n < 8; n++) {
33236 for (size_t k = 1; k <= 5; k += 2) {
33237 for (uint32_t m = 1; m <= 2; m++) {
33238 GemmMicrokernelTester()
33239 .mr(2)
33240 .nr(4)
33241 .kr(1)
33242 .sr(1)
33243 .m(m)
33244 .n(n)
33245 .k(k)
33246 .iterations(1)
33247 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
33248 }
33249 }
33250 }
33251 }
33252
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4)33253 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4) {
33254 for (uint32_t n = 8; n <= 12; n += 4) {
33255 for (size_t k = 1; k <= 5; k += 2) {
33256 GemmMicrokernelTester()
33257 .mr(2)
33258 .nr(4)
33259 .kr(1)
33260 .sr(1)
33261 .m(2)
33262 .n(n)
33263 .k(k)
33264 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
33265 }
33266 }
33267 }
33268
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4_strided_cn)33269 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
33270 for (uint32_t n = 8; n <= 12; n += 4) {
33271 for (size_t k = 1; k <= 5; k += 2) {
33272 GemmMicrokernelTester()
33273 .mr(2)
33274 .nr(4)
33275 .kr(1)
33276 .sr(1)
33277 .m(2)
33278 .n(n)
33279 .k(k)
33280 .cn_stride(7)
33281 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
33282 }
33283 }
33284 }
33285
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4_strided_a)33286 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_strided_a) {
33287 for (uint32_t n = 8; n <= 12; n += 4) {
33288 for (size_t k = 1; k <= 5; k += 2) {
33289 GemmMicrokernelTester()
33290 .mr(2)
33291 .nr(4)
33292 .kr(1)
33293 .sr(1)
33294 .m(2)
33295 .n(n)
33296 .k(k)
33297 .a_stride(7)
33298 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
33299 }
33300 }
33301 }
33302
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4_subtile)33303 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_subtile) {
33304 for (uint32_t n = 8; n <= 12; n += 4) {
33305 for (size_t k = 1; k <= 5; k += 2) {
33306 for (uint32_t m = 1; m <= 2; m++) {
33307 GemmMicrokernelTester()
33308 .mr(2)
33309 .nr(4)
33310 .kr(1)
33311 .sr(1)
33312 .m(m)
33313 .n(n)
33314 .k(k)
33315 .iterations(1)
33316 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
33317 }
33318 }
33319 }
33320 }
33321
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,strided_cm_subtile)33322 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cm_subtile) {
33323 for (size_t k = 1; k <= 5; k += 2) {
33324 for (uint32_t n = 1; n <= 4; n++) {
33325 for (uint32_t m = 1; m <= 2; m++) {
33326 GemmMicrokernelTester()
33327 .mr(2)
33328 .nr(4)
33329 .kr(1)
33330 .sr(1)
33331 .m(m)
33332 .n(n)
33333 .k(k)
33334 .cm_stride(7)
33335 .iterations(1)
33336 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
33337 }
33338 }
33339 }
33340 }
33341
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,qmin)33342 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, qmin) {
33343 GemmMicrokernelTester()
33344 .mr(2)
33345 .nr(4)
33346 .kr(1)
33347 .sr(1)
33348 .m(2)
33349 .n(4)
33350 .k(1)
33351 .qmin(128)
33352 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
33353 }
33354
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,qmax)33355 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, qmax) {
33356 GemmMicrokernelTester()
33357 .mr(2)
33358 .nr(4)
33359 .kr(1)
33360 .sr(1)
33361 .m(2)
33362 .n(4)
33363 .k(1)
33364 .qmax(128)
33365 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
33366 }
33367
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,strided_cm)33368 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cm) {
33369 GemmMicrokernelTester()
33370 .mr(2)
33371 .nr(4)
33372 .kr(1)
33373 .sr(1)
33374 .m(2)
33375 .n(4)
33376 .k(1)
33377 .cm_stride(7)
33378 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
33379 }
33380
33381
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1)33382 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1) {
33383 GemmMicrokernelTester()
33384 .mr(2)
33385 .nr(4)
33386 .kr(1)
33387 .sr(1)
33388 .m(2)
33389 .n(4)
33390 .k(1)
33391 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33392 }
33393
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,strided_cn)33394 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cn) {
33395 GemmMicrokernelTester()
33396 .mr(2)
33397 .nr(4)
33398 .kr(1)
33399 .sr(1)
33400 .m(2)
33401 .n(4)
33402 .k(1)
33403 .cn_stride(7)
33404 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33405 }
33406
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_strided_a)33407 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
33408 GemmMicrokernelTester()
33409 .mr(2)
33410 .nr(4)
33411 .kr(1)
33412 .sr(1)
33413 .m(2)
33414 .n(4)
33415 .k(1)
33416 .a_stride(3)
33417 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33418 }
33419
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_subtile)33420 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile) {
33421 for (uint32_t n = 1; n <= 4; n++) {
33422 for (uint32_t m = 1; m <= 2; m++) {
33423 GemmMicrokernelTester()
33424 .mr(2)
33425 .nr(4)
33426 .kr(1)
33427 .sr(1)
33428 .m(m)
33429 .n(n)
33430 .k(1)
33431 .iterations(1)
33432 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33433 }
33434 }
33435 }
33436
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_subtile_m)33437 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
33438 for (uint32_t m = 1; m <= 2; m++) {
33439 GemmMicrokernelTester()
33440 .mr(2)
33441 .nr(4)
33442 .kr(1)
33443 .sr(1)
33444 .m(m)
33445 .n(4)
33446 .k(1)
33447 .iterations(1)
33448 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33449 }
33450 }
33451
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_subtile_n)33452 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
33453 for (uint32_t n = 1; n <= 4; n++) {
33454 GemmMicrokernelTester()
33455 .mr(2)
33456 .nr(4)
33457 .kr(1)
33458 .sr(1)
33459 .m(2)
33460 .n(n)
33461 .k(1)
33462 .iterations(1)
33463 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33464 }
33465 }
33466
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_gt_1)33467 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1) {
33468 for (size_t k = 2; k < 10; k++) {
33469 GemmMicrokernelTester()
33470 .mr(2)
33471 .nr(4)
33472 .kr(1)
33473 .sr(1)
33474 .m(2)
33475 .n(4)
33476 .k(k)
33477 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33478 }
33479 }
33480
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_gt_1_strided_a)33481 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
33482 for (size_t k = 2; k < 10; k++) {
33483 GemmMicrokernelTester()
33484 .mr(2)
33485 .nr(4)
33486 .kr(1)
33487 .sr(1)
33488 .m(2)
33489 .n(4)
33490 .k(k)
33491 .a_stride(11)
33492 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33493 }
33494 }
33495
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_gt_1_subtile)33496 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1_subtile) {
33497 for (size_t k = 2; k < 10; k++) {
33498 for (uint32_t n = 1; n <= 4; n++) {
33499 for (uint32_t m = 1; m <= 2; m++) {
33500 GemmMicrokernelTester()
33501 .mr(2)
33502 .nr(4)
33503 .kr(1)
33504 .sr(1)
33505 .m(m)
33506 .n(n)
33507 .k(k)
33508 .iterations(1)
33509 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33510 }
33511 }
33512 }
33513 }
33514
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4)33515 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4) {
33516 for (uint32_t n = 5; n < 8; n++) {
33517 for (size_t k = 1; k <= 5; k += 2) {
33518 GemmMicrokernelTester()
33519 .mr(2)
33520 .nr(4)
33521 .kr(1)
33522 .sr(1)
33523 .m(2)
33524 .n(n)
33525 .k(k)
33526 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33527 }
33528 }
33529 }
33530
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4_strided_cn)33531 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
33532 for (uint32_t n = 5; n < 8; n++) {
33533 for (size_t k = 1; k <= 5; k += 2) {
33534 GemmMicrokernelTester()
33535 .mr(2)
33536 .nr(4)
33537 .kr(1)
33538 .sr(1)
33539 .m(2)
33540 .n(n)
33541 .k(k)
33542 .cn_stride(7)
33543 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33544 }
33545 }
33546 }
33547
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4_strided_a)33548 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
33549 for (uint32_t n = 5; n < 8; n++) {
33550 for (size_t k = 1; k <= 5; k += 2) {
33551 GemmMicrokernelTester()
33552 .mr(2)
33553 .nr(4)
33554 .kr(1)
33555 .sr(1)
33556 .m(2)
33557 .n(n)
33558 .k(k)
33559 .a_stride(7)
33560 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33561 }
33562 }
33563 }
33564
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4_subtile)33565 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_subtile) {
33566 for (uint32_t n = 5; n < 8; n++) {
33567 for (size_t k = 1; k <= 5; k += 2) {
33568 for (uint32_t m = 1; m <= 2; m++) {
33569 GemmMicrokernelTester()
33570 .mr(2)
33571 .nr(4)
33572 .kr(1)
33573 .sr(1)
33574 .m(m)
33575 .n(n)
33576 .k(k)
33577 .iterations(1)
33578 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33579 }
33580 }
33581 }
33582 }
33583
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4)33584 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4) {
33585 for (uint32_t n = 8; n <= 12; n += 4) {
33586 for (size_t k = 1; k <= 5; k += 2) {
33587 GemmMicrokernelTester()
33588 .mr(2)
33589 .nr(4)
33590 .kr(1)
33591 .sr(1)
33592 .m(2)
33593 .n(n)
33594 .k(k)
33595 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33596 }
33597 }
33598 }
33599
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4_strided_cn)33600 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
33601 for (uint32_t n = 8; n <= 12; n += 4) {
33602 for (size_t k = 1; k <= 5; k += 2) {
33603 GemmMicrokernelTester()
33604 .mr(2)
33605 .nr(4)
33606 .kr(1)
33607 .sr(1)
33608 .m(2)
33609 .n(n)
33610 .k(k)
33611 .cn_stride(7)
33612 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33613 }
33614 }
33615 }
33616
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4_strided_a)33617 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_strided_a) {
33618 for (uint32_t n = 8; n <= 12; n += 4) {
33619 for (size_t k = 1; k <= 5; k += 2) {
33620 GemmMicrokernelTester()
33621 .mr(2)
33622 .nr(4)
33623 .kr(1)
33624 .sr(1)
33625 .m(2)
33626 .n(n)
33627 .k(k)
33628 .a_stride(7)
33629 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33630 }
33631 }
33632 }
33633
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4_subtile)33634 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_subtile) {
33635 for (uint32_t n = 8; n <= 12; n += 4) {
33636 for (size_t k = 1; k <= 5; k += 2) {
33637 for (uint32_t m = 1; m <= 2; m++) {
33638 GemmMicrokernelTester()
33639 .mr(2)
33640 .nr(4)
33641 .kr(1)
33642 .sr(1)
33643 .m(m)
33644 .n(n)
33645 .k(k)
33646 .iterations(1)
33647 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33648 }
33649 }
33650 }
33651 }
33652
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,strided_cm_subtile)33653 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm_subtile) {
33654 for (size_t k = 1; k <= 5; k += 2) {
33655 for (uint32_t n = 1; n <= 4; n++) {
33656 for (uint32_t m = 1; m <= 2; m++) {
33657 GemmMicrokernelTester()
33658 .mr(2)
33659 .nr(4)
33660 .kr(1)
33661 .sr(1)
33662 .m(m)
33663 .n(n)
33664 .k(k)
33665 .cm_stride(7)
33666 .iterations(1)
33667 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33668 }
33669 }
33670 }
33671 }
33672
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,qmin)33673 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmin) {
33674 GemmMicrokernelTester()
33675 .mr(2)
33676 .nr(4)
33677 .kr(1)
33678 .sr(1)
33679 .m(2)
33680 .n(4)
33681 .k(1)
33682 .qmin(128)
33683 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33684 }
33685
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,qmax)33686 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmax) {
33687 GemmMicrokernelTester()
33688 .mr(2)
33689 .nr(4)
33690 .kr(1)
33691 .sr(1)
33692 .m(2)
33693 .n(4)
33694 .k(1)
33695 .qmax(128)
33696 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33697 }
33698
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,strided_cm)33699 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm) {
33700 GemmMicrokernelTester()
33701 .mr(2)
33702 .nr(4)
33703 .kr(1)
33704 .sr(1)
33705 .m(2)
33706 .n(4)
33707 .k(1)
33708 .cm_stride(7)
33709 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
33710 }
33711
33712
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1)33713 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1) {
33714 GemmMicrokernelTester()
33715 .mr(2)
33716 .nr(4)
33717 .kr(1)
33718 .sr(1)
33719 .m(2)
33720 .n(4)
33721 .k(1)
33722 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
33723 }
33724
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,strided_cn)33725 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cn) {
33726 GemmMicrokernelTester()
33727 .mr(2)
33728 .nr(4)
33729 .kr(1)
33730 .sr(1)
33731 .m(2)
33732 .n(4)
33733 .k(1)
33734 .cn_stride(7)
33735 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
33736 }
33737
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1_strided_a)33738 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_strided_a) {
33739 GemmMicrokernelTester()
33740 .mr(2)
33741 .nr(4)
33742 .kr(1)
33743 .sr(1)
33744 .m(2)
33745 .n(4)
33746 .k(1)
33747 .a_stride(3)
33748 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
33749 }
33750
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1_subtile)33751 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile) {
33752 for (uint32_t n = 1; n <= 4; n++) {
33753 for (uint32_t m = 1; m <= 2; m++) {
33754 GemmMicrokernelTester()
33755 .mr(2)
33756 .nr(4)
33757 .kr(1)
33758 .sr(1)
33759 .m(m)
33760 .n(n)
33761 .k(1)
33762 .iterations(1)
33763 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
33764 }
33765 }
33766 }
33767
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1_subtile_m)33768 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
33769 for (uint32_t m = 1; m <= 2; m++) {
33770 GemmMicrokernelTester()
33771 .mr(2)
33772 .nr(4)
33773 .kr(1)
33774 .sr(1)
33775 .m(m)
33776 .n(4)
33777 .k(1)
33778 .iterations(1)
33779 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
33780 }
33781 }
33782
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1_subtile_n)33783 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
33784 for (uint32_t n = 1; n <= 4; n++) {
33785 GemmMicrokernelTester()
33786 .mr(2)
33787 .nr(4)
33788 .kr(1)
33789 .sr(1)
33790 .m(2)
33791 .n(n)
33792 .k(1)
33793 .iterations(1)
33794 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
33795 }
33796 }
33797
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_gt_1)33798 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1) {
33799 for (size_t k = 2; k < 10; k++) {
33800 GemmMicrokernelTester()
33801 .mr(2)
33802 .nr(4)
33803 .kr(1)
33804 .sr(1)
33805 .m(2)
33806 .n(4)
33807 .k(k)
33808 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
33809 }
33810 }
33811
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_gt_1_strided_a)33812 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_strided_a) {
33813 for (size_t k = 2; k < 10; k++) {
33814 GemmMicrokernelTester()
33815 .mr(2)
33816 .nr(4)
33817 .kr(1)
33818 .sr(1)
33819 .m(2)
33820 .n(4)
33821 .k(k)
33822 .a_stride(11)
33823 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
33824 }
33825 }
33826
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_gt_1_subtile)33827 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_subtile) {
33828 for (size_t k = 2; k < 10; k++) {
33829 for (uint32_t n = 1; n <= 4; n++) {
33830 for (uint32_t m = 1; m <= 2; m++) {
33831 GemmMicrokernelTester()
33832 .mr(2)
33833 .nr(4)
33834 .kr(1)
33835 .sr(1)
33836 .m(m)
33837 .n(n)
33838 .k(k)
33839 .iterations(1)
33840 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
33841 }
33842 }
33843 }
33844 }
33845
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4)33846 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4) {
33847 for (uint32_t n = 5; n < 8; n++) {
33848 for (size_t k = 1; k <= 5; k += 2) {
33849 GemmMicrokernelTester()
33850 .mr(2)
33851 .nr(4)
33852 .kr(1)
33853 .sr(1)
33854 .m(2)
33855 .n(n)
33856 .k(k)
33857 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
33858 }
33859 }
33860 }
33861
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4_strided_cn)33862 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
33863 for (uint32_t n = 5; n < 8; n++) {
33864 for (size_t k = 1; k <= 5; k += 2) {
33865 GemmMicrokernelTester()
33866 .mr(2)
33867 .nr(4)
33868 .kr(1)
33869 .sr(1)
33870 .m(2)
33871 .n(n)
33872 .k(k)
33873 .cn_stride(7)
33874 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
33875 }
33876 }
33877 }
33878
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4_strided_a)33879 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_a) {
33880 for (uint32_t n = 5; n < 8; n++) {
33881 for (size_t k = 1; k <= 5; k += 2) {
33882 GemmMicrokernelTester()
33883 .mr(2)
33884 .nr(4)
33885 .kr(1)
33886 .sr(1)
33887 .m(2)
33888 .n(n)
33889 .k(k)
33890 .a_stride(7)
33891 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
33892 }
33893 }
33894 }
33895
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4_subtile)33896 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_subtile) {
33897 for (uint32_t n = 5; n < 8; n++) {
33898 for (size_t k = 1; k <= 5; k += 2) {
33899 for (uint32_t m = 1; m <= 2; m++) {
33900 GemmMicrokernelTester()
33901 .mr(2)
33902 .nr(4)
33903 .kr(1)
33904 .sr(1)
33905 .m(m)
33906 .n(n)
33907 .k(k)
33908 .iterations(1)
33909 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
33910 }
33911 }
33912 }
33913 }
33914
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4)33915 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4) {
33916 for (uint32_t n = 8; n <= 12; n += 4) {
33917 for (size_t k = 1; k <= 5; k += 2) {
33918 GemmMicrokernelTester()
33919 .mr(2)
33920 .nr(4)
33921 .kr(1)
33922 .sr(1)
33923 .m(2)
33924 .n(n)
33925 .k(k)
33926 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
33927 }
33928 }
33929 }
33930
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4_strided_cn)33931 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_cn) {
33932 for (uint32_t n = 8; n <= 12; n += 4) {
33933 for (size_t k = 1; k <= 5; k += 2) {
33934 GemmMicrokernelTester()
33935 .mr(2)
33936 .nr(4)
33937 .kr(1)
33938 .sr(1)
33939 .m(2)
33940 .n(n)
33941 .k(k)
33942 .cn_stride(7)
33943 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
33944 }
33945 }
33946 }
33947
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4_strided_a)33948 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_a) {
33949 for (uint32_t n = 8; n <= 12; n += 4) {
33950 for (size_t k = 1; k <= 5; k += 2) {
33951 GemmMicrokernelTester()
33952 .mr(2)
33953 .nr(4)
33954 .kr(1)
33955 .sr(1)
33956 .m(2)
33957 .n(n)
33958 .k(k)
33959 .a_stride(7)
33960 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
33961 }
33962 }
33963 }
33964
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4_subtile)33965 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_subtile) {
33966 for (uint32_t n = 8; n <= 12; n += 4) {
33967 for (size_t k = 1; k <= 5; k += 2) {
33968 for (uint32_t m = 1; m <= 2; m++) {
33969 GemmMicrokernelTester()
33970 .mr(2)
33971 .nr(4)
33972 .kr(1)
33973 .sr(1)
33974 .m(m)
33975 .n(n)
33976 .k(k)
33977 .iterations(1)
33978 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
33979 }
33980 }
33981 }
33982 }
33983
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,strided_cm_subtile)33984 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm_subtile) {
33985 for (size_t k = 1; k <= 5; k += 2) {
33986 for (uint32_t n = 1; n <= 4; n++) {
33987 for (uint32_t m = 1; m <= 2; m++) {
33988 GemmMicrokernelTester()
33989 .mr(2)
33990 .nr(4)
33991 .kr(1)
33992 .sr(1)
33993 .m(m)
33994 .n(n)
33995 .k(k)
33996 .cm_stride(7)
33997 .iterations(1)
33998 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
33999 }
34000 }
34001 }
34002 }
34003
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,qmin)34004 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmin) {
34005 GemmMicrokernelTester()
34006 .mr(2)
34007 .nr(4)
34008 .kr(1)
34009 .sr(1)
34010 .m(2)
34011 .n(4)
34012 .k(1)
34013 .qmin(128)
34014 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34015 }
34016
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,qmax)34017 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmax) {
34018 GemmMicrokernelTester()
34019 .mr(2)
34020 .nr(4)
34021 .kr(1)
34022 .sr(1)
34023 .m(2)
34024 .n(4)
34025 .k(1)
34026 .qmax(128)
34027 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34028 }
34029
TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,strided_cm)34030 TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm) {
34031 GemmMicrokernelTester()
34032 .mr(2)
34033 .nr(4)
34034 .kr(1)
34035 .sr(1)
34036 .m(2)
34037 .n(4)
34038 .k(1)
34039 .cm_stride(7)
34040 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34041 }
34042
34043
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1)34044 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1) {
34045 GemmMicrokernelTester()
34046 .mr(3)
34047 .nr(4)
34048 .kr(1)
34049 .sr(1)
34050 .m(3)
34051 .n(4)
34052 .k(1)
34053 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
34054 }
34055
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,strided_cn)34056 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cn) {
34057 GemmMicrokernelTester()
34058 .mr(3)
34059 .nr(4)
34060 .kr(1)
34061 .sr(1)
34062 .m(3)
34063 .n(4)
34064 .k(1)
34065 .cn_stride(7)
34066 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
34067 }
34068
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1_strided_a)34069 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
34070 GemmMicrokernelTester()
34071 .mr(3)
34072 .nr(4)
34073 .kr(1)
34074 .sr(1)
34075 .m(3)
34076 .n(4)
34077 .k(1)
34078 .a_stride(3)
34079 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
34080 }
34081
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1_subtile)34082 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile) {
34083 for (uint32_t n = 1; n <= 4; n++) {
34084 for (uint32_t m = 1; m <= 3; m++) {
34085 GemmMicrokernelTester()
34086 .mr(3)
34087 .nr(4)
34088 .kr(1)
34089 .sr(1)
34090 .m(m)
34091 .n(n)
34092 .k(1)
34093 .iterations(1)
34094 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
34095 }
34096 }
34097 }
34098
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1_subtile_m)34099 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
34100 for (uint32_t m = 1; m <= 3; m++) {
34101 GemmMicrokernelTester()
34102 .mr(3)
34103 .nr(4)
34104 .kr(1)
34105 .sr(1)
34106 .m(m)
34107 .n(4)
34108 .k(1)
34109 .iterations(1)
34110 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
34111 }
34112 }
34113
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1_subtile_n)34114 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
34115 for (uint32_t n = 1; n <= 4; n++) {
34116 GemmMicrokernelTester()
34117 .mr(3)
34118 .nr(4)
34119 .kr(1)
34120 .sr(1)
34121 .m(3)
34122 .n(n)
34123 .k(1)
34124 .iterations(1)
34125 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
34126 }
34127 }
34128
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_gt_1)34129 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1) {
34130 for (size_t k = 2; k < 10; k++) {
34131 GemmMicrokernelTester()
34132 .mr(3)
34133 .nr(4)
34134 .kr(1)
34135 .sr(1)
34136 .m(3)
34137 .n(4)
34138 .k(k)
34139 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
34140 }
34141 }
34142
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_gt_1_strided_a)34143 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
34144 for (size_t k = 2; k < 10; k++) {
34145 GemmMicrokernelTester()
34146 .mr(3)
34147 .nr(4)
34148 .kr(1)
34149 .sr(1)
34150 .m(3)
34151 .n(4)
34152 .k(k)
34153 .a_stride(11)
34154 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
34155 }
34156 }
34157
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_gt_1_subtile)34158 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_subtile) {
34159 for (size_t k = 2; k < 10; k++) {
34160 for (uint32_t n = 1; n <= 4; n++) {
34161 for (uint32_t m = 1; m <= 3; m++) {
34162 GemmMicrokernelTester()
34163 .mr(3)
34164 .nr(4)
34165 .kr(1)
34166 .sr(1)
34167 .m(m)
34168 .n(n)
34169 .k(k)
34170 .iterations(1)
34171 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
34172 }
34173 }
34174 }
34175 }
34176
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4)34177 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4) {
34178 for (uint32_t n = 5; n < 8; n++) {
34179 for (size_t k = 1; k <= 5; k += 2) {
34180 GemmMicrokernelTester()
34181 .mr(3)
34182 .nr(4)
34183 .kr(1)
34184 .sr(1)
34185 .m(3)
34186 .n(n)
34187 .k(k)
34188 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
34189 }
34190 }
34191 }
34192
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4_strided_cn)34193 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
34194 for (uint32_t n = 5; n < 8; n++) {
34195 for (size_t k = 1; k <= 5; k += 2) {
34196 GemmMicrokernelTester()
34197 .mr(3)
34198 .nr(4)
34199 .kr(1)
34200 .sr(1)
34201 .m(3)
34202 .n(n)
34203 .k(k)
34204 .cn_stride(7)
34205 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
34206 }
34207 }
34208 }
34209
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4_strided_a)34210 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
34211 for (uint32_t n = 5; n < 8; n++) {
34212 for (size_t k = 1; k <= 5; k += 2) {
34213 GemmMicrokernelTester()
34214 .mr(3)
34215 .nr(4)
34216 .kr(1)
34217 .sr(1)
34218 .m(3)
34219 .n(n)
34220 .k(k)
34221 .a_stride(7)
34222 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
34223 }
34224 }
34225 }
34226
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4_subtile)34227 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_subtile) {
34228 for (uint32_t n = 5; n < 8; n++) {
34229 for (size_t k = 1; k <= 5; k += 2) {
34230 for (uint32_t m = 1; m <= 3; m++) {
34231 GemmMicrokernelTester()
34232 .mr(3)
34233 .nr(4)
34234 .kr(1)
34235 .sr(1)
34236 .m(m)
34237 .n(n)
34238 .k(k)
34239 .iterations(1)
34240 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
34241 }
34242 }
34243 }
34244 }
34245
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4)34246 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4) {
34247 for (uint32_t n = 8; n <= 12; n += 4) {
34248 for (size_t k = 1; k <= 5; k += 2) {
34249 GemmMicrokernelTester()
34250 .mr(3)
34251 .nr(4)
34252 .kr(1)
34253 .sr(1)
34254 .m(3)
34255 .n(n)
34256 .k(k)
34257 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
34258 }
34259 }
34260 }
34261
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4_strided_cn)34262 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
34263 for (uint32_t n = 8; n <= 12; n += 4) {
34264 for (size_t k = 1; k <= 5; k += 2) {
34265 GemmMicrokernelTester()
34266 .mr(3)
34267 .nr(4)
34268 .kr(1)
34269 .sr(1)
34270 .m(3)
34271 .n(n)
34272 .k(k)
34273 .cn_stride(7)
34274 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
34275 }
34276 }
34277 }
34278
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4_strided_a)34279 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_a) {
34280 for (uint32_t n = 8; n <= 12; n += 4) {
34281 for (size_t k = 1; k <= 5; k += 2) {
34282 GemmMicrokernelTester()
34283 .mr(3)
34284 .nr(4)
34285 .kr(1)
34286 .sr(1)
34287 .m(3)
34288 .n(n)
34289 .k(k)
34290 .a_stride(7)
34291 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
34292 }
34293 }
34294 }
34295
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4_subtile)34296 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_subtile) {
34297 for (uint32_t n = 8; n <= 12; n += 4) {
34298 for (size_t k = 1; k <= 5; k += 2) {
34299 for (uint32_t m = 1; m <= 3; m++) {
34300 GemmMicrokernelTester()
34301 .mr(3)
34302 .nr(4)
34303 .kr(1)
34304 .sr(1)
34305 .m(m)
34306 .n(n)
34307 .k(k)
34308 .iterations(1)
34309 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
34310 }
34311 }
34312 }
34313 }
34314
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,strided_cm_subtile)34315 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm_subtile) {
34316 for (size_t k = 1; k <= 5; k += 2) {
34317 for (uint32_t n = 1; n <= 4; n++) {
34318 for (uint32_t m = 1; m <= 3; m++) {
34319 GemmMicrokernelTester()
34320 .mr(3)
34321 .nr(4)
34322 .kr(1)
34323 .sr(1)
34324 .m(m)
34325 .n(n)
34326 .k(k)
34327 .cm_stride(7)
34328 .iterations(1)
34329 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
34330 }
34331 }
34332 }
34333 }
34334
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,qmin)34335 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmin) {
34336 GemmMicrokernelTester()
34337 .mr(3)
34338 .nr(4)
34339 .kr(1)
34340 .sr(1)
34341 .m(3)
34342 .n(4)
34343 .k(1)
34344 .qmin(128)
34345 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
34346 }
34347
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,qmax)34348 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmax) {
34349 GemmMicrokernelTester()
34350 .mr(3)
34351 .nr(4)
34352 .kr(1)
34353 .sr(1)
34354 .m(3)
34355 .n(4)
34356 .k(1)
34357 .qmax(128)
34358 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
34359 }
34360
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,strided_cm)34361 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm) {
34362 GemmMicrokernelTester()
34363 .mr(3)
34364 .nr(4)
34365 .kr(1)
34366 .sr(1)
34367 .m(3)
34368 .n(4)
34369 .k(1)
34370 .cm_stride(7)
34371 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
34372 }
34373
34374
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1)34375 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1) {
34376 GemmMicrokernelTester()
34377 .mr(3)
34378 .nr(4)
34379 .kr(1)
34380 .sr(1)
34381 .m(3)
34382 .n(4)
34383 .k(1)
34384 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34385 }
34386
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,strided_cn)34387 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cn) {
34388 GemmMicrokernelTester()
34389 .mr(3)
34390 .nr(4)
34391 .kr(1)
34392 .sr(1)
34393 .m(3)
34394 .n(4)
34395 .k(1)
34396 .cn_stride(7)
34397 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34398 }
34399
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1_strided_a)34400 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_strided_a) {
34401 GemmMicrokernelTester()
34402 .mr(3)
34403 .nr(4)
34404 .kr(1)
34405 .sr(1)
34406 .m(3)
34407 .n(4)
34408 .k(1)
34409 .a_stride(3)
34410 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34411 }
34412
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1_subtile)34413 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile) {
34414 for (uint32_t n = 1; n <= 4; n++) {
34415 for (uint32_t m = 1; m <= 3; m++) {
34416 GemmMicrokernelTester()
34417 .mr(3)
34418 .nr(4)
34419 .kr(1)
34420 .sr(1)
34421 .m(m)
34422 .n(n)
34423 .k(1)
34424 .iterations(1)
34425 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34426 }
34427 }
34428 }
34429
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1_subtile_m)34430 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
34431 for (uint32_t m = 1; m <= 3; m++) {
34432 GemmMicrokernelTester()
34433 .mr(3)
34434 .nr(4)
34435 .kr(1)
34436 .sr(1)
34437 .m(m)
34438 .n(4)
34439 .k(1)
34440 .iterations(1)
34441 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34442 }
34443 }
34444
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1_subtile_n)34445 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
34446 for (uint32_t n = 1; n <= 4; n++) {
34447 GemmMicrokernelTester()
34448 .mr(3)
34449 .nr(4)
34450 .kr(1)
34451 .sr(1)
34452 .m(3)
34453 .n(n)
34454 .k(1)
34455 .iterations(1)
34456 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34457 }
34458 }
34459
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_gt_1)34460 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1) {
34461 for (size_t k = 2; k < 10; k++) {
34462 GemmMicrokernelTester()
34463 .mr(3)
34464 .nr(4)
34465 .kr(1)
34466 .sr(1)
34467 .m(3)
34468 .n(4)
34469 .k(k)
34470 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34471 }
34472 }
34473
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_gt_1_strided_a)34474 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_strided_a) {
34475 for (size_t k = 2; k < 10; k++) {
34476 GemmMicrokernelTester()
34477 .mr(3)
34478 .nr(4)
34479 .kr(1)
34480 .sr(1)
34481 .m(3)
34482 .n(4)
34483 .k(k)
34484 .a_stride(11)
34485 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34486 }
34487 }
34488
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_gt_1_subtile)34489 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_subtile) {
34490 for (size_t k = 2; k < 10; k++) {
34491 for (uint32_t n = 1; n <= 4; n++) {
34492 for (uint32_t m = 1; m <= 3; m++) {
34493 GemmMicrokernelTester()
34494 .mr(3)
34495 .nr(4)
34496 .kr(1)
34497 .sr(1)
34498 .m(m)
34499 .n(n)
34500 .k(k)
34501 .iterations(1)
34502 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34503 }
34504 }
34505 }
34506 }
34507
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4)34508 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4) {
34509 for (uint32_t n = 5; n < 8; n++) {
34510 for (size_t k = 1; k <= 5; k += 2) {
34511 GemmMicrokernelTester()
34512 .mr(3)
34513 .nr(4)
34514 .kr(1)
34515 .sr(1)
34516 .m(3)
34517 .n(n)
34518 .k(k)
34519 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34520 }
34521 }
34522 }
34523
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4_strided_cn)34524 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
34525 for (uint32_t n = 5; n < 8; n++) {
34526 for (size_t k = 1; k <= 5; k += 2) {
34527 GemmMicrokernelTester()
34528 .mr(3)
34529 .nr(4)
34530 .kr(1)
34531 .sr(1)
34532 .m(3)
34533 .n(n)
34534 .k(k)
34535 .cn_stride(7)
34536 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34537 }
34538 }
34539 }
34540
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4_strided_a)34541 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_a) {
34542 for (uint32_t n = 5; n < 8; n++) {
34543 for (size_t k = 1; k <= 5; k += 2) {
34544 GemmMicrokernelTester()
34545 .mr(3)
34546 .nr(4)
34547 .kr(1)
34548 .sr(1)
34549 .m(3)
34550 .n(n)
34551 .k(k)
34552 .a_stride(7)
34553 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34554 }
34555 }
34556 }
34557
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4_subtile)34558 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_subtile) {
34559 for (uint32_t n = 5; n < 8; n++) {
34560 for (size_t k = 1; k <= 5; k += 2) {
34561 for (uint32_t m = 1; m <= 3; m++) {
34562 GemmMicrokernelTester()
34563 .mr(3)
34564 .nr(4)
34565 .kr(1)
34566 .sr(1)
34567 .m(m)
34568 .n(n)
34569 .k(k)
34570 .iterations(1)
34571 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34572 }
34573 }
34574 }
34575 }
34576
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4)34577 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4) {
34578 for (uint32_t n = 8; n <= 12; n += 4) {
34579 for (size_t k = 1; k <= 5; k += 2) {
34580 GemmMicrokernelTester()
34581 .mr(3)
34582 .nr(4)
34583 .kr(1)
34584 .sr(1)
34585 .m(3)
34586 .n(n)
34587 .k(k)
34588 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34589 }
34590 }
34591 }
34592
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4_strided_cn)34593 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_cn) {
34594 for (uint32_t n = 8; n <= 12; n += 4) {
34595 for (size_t k = 1; k <= 5; k += 2) {
34596 GemmMicrokernelTester()
34597 .mr(3)
34598 .nr(4)
34599 .kr(1)
34600 .sr(1)
34601 .m(3)
34602 .n(n)
34603 .k(k)
34604 .cn_stride(7)
34605 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34606 }
34607 }
34608 }
34609
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4_strided_a)34610 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_a) {
34611 for (uint32_t n = 8; n <= 12; n += 4) {
34612 for (size_t k = 1; k <= 5; k += 2) {
34613 GemmMicrokernelTester()
34614 .mr(3)
34615 .nr(4)
34616 .kr(1)
34617 .sr(1)
34618 .m(3)
34619 .n(n)
34620 .k(k)
34621 .a_stride(7)
34622 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34623 }
34624 }
34625 }
34626
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4_subtile)34627 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_subtile) {
34628 for (uint32_t n = 8; n <= 12; n += 4) {
34629 for (size_t k = 1; k <= 5; k += 2) {
34630 for (uint32_t m = 1; m <= 3; m++) {
34631 GemmMicrokernelTester()
34632 .mr(3)
34633 .nr(4)
34634 .kr(1)
34635 .sr(1)
34636 .m(m)
34637 .n(n)
34638 .k(k)
34639 .iterations(1)
34640 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34641 }
34642 }
34643 }
34644 }
34645
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,strided_cm_subtile)34646 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm_subtile) {
34647 for (size_t k = 1; k <= 5; k += 2) {
34648 for (uint32_t n = 1; n <= 4; n++) {
34649 for (uint32_t m = 1; m <= 3; m++) {
34650 GemmMicrokernelTester()
34651 .mr(3)
34652 .nr(4)
34653 .kr(1)
34654 .sr(1)
34655 .m(m)
34656 .n(n)
34657 .k(k)
34658 .cm_stride(7)
34659 .iterations(1)
34660 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34661 }
34662 }
34663 }
34664 }
34665
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,qmin)34666 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmin) {
34667 GemmMicrokernelTester()
34668 .mr(3)
34669 .nr(4)
34670 .kr(1)
34671 .sr(1)
34672 .m(3)
34673 .n(4)
34674 .k(1)
34675 .qmin(128)
34676 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34677 }
34678
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,qmax)34679 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmax) {
34680 GemmMicrokernelTester()
34681 .mr(3)
34682 .nr(4)
34683 .kr(1)
34684 .sr(1)
34685 .m(3)
34686 .n(4)
34687 .k(1)
34688 .qmax(128)
34689 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34690 }
34691
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,strided_cm)34692 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm) {
34693 GemmMicrokernelTester()
34694 .mr(3)
34695 .nr(4)
34696 .kr(1)
34697 .sr(1)
34698 .m(3)
34699 .n(4)
34700 .k(1)
34701 .cm_stride(7)
34702 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
34703 }
34704
34705
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1)34706 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1) {
34707 GemmMicrokernelTester()
34708 .mr(4)
34709 .nr(2)
34710 .kr(1)
34711 .sr(1)
34712 .m(4)
34713 .n(2)
34714 .k(1)
34715 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34716 }
34717
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,strided_cn)34718 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, strided_cn) {
34719 GemmMicrokernelTester()
34720 .mr(4)
34721 .nr(2)
34722 .kr(1)
34723 .sr(1)
34724 .m(4)
34725 .n(2)
34726 .k(1)
34727 .cn_stride(5)
34728 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34729 }
34730
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1_strided_a)34731 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_strided_a) {
34732 GemmMicrokernelTester()
34733 .mr(4)
34734 .nr(2)
34735 .kr(1)
34736 .sr(1)
34737 .m(4)
34738 .n(2)
34739 .k(1)
34740 .a_stride(3)
34741 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34742 }
34743
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1_subtile)34744 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_subtile) {
34745 for (uint32_t n = 1; n <= 2; n++) {
34746 for (uint32_t m = 1; m <= 4; m++) {
34747 GemmMicrokernelTester()
34748 .mr(4)
34749 .nr(2)
34750 .kr(1)
34751 .sr(1)
34752 .m(m)
34753 .n(n)
34754 .k(1)
34755 .iterations(1)
34756 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34757 }
34758 }
34759 }
34760
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1_subtile_m)34761 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
34762 for (uint32_t m = 1; m <= 4; m++) {
34763 GemmMicrokernelTester()
34764 .mr(4)
34765 .nr(2)
34766 .kr(1)
34767 .sr(1)
34768 .m(m)
34769 .n(2)
34770 .k(1)
34771 .iterations(1)
34772 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34773 }
34774 }
34775
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1_subtile_n)34776 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
34777 for (uint32_t n = 1; n <= 2; n++) {
34778 GemmMicrokernelTester()
34779 .mr(4)
34780 .nr(2)
34781 .kr(1)
34782 .sr(1)
34783 .m(4)
34784 .n(n)
34785 .k(1)
34786 .iterations(1)
34787 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34788 }
34789 }
34790
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_gt_1)34791 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_gt_1) {
34792 for (size_t k = 2; k < 10; k++) {
34793 GemmMicrokernelTester()
34794 .mr(4)
34795 .nr(2)
34796 .kr(1)
34797 .sr(1)
34798 .m(4)
34799 .n(2)
34800 .k(k)
34801 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34802 }
34803 }
34804
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_gt_1_strided_a)34805 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_gt_1_strided_a) {
34806 for (size_t k = 2; k < 10; k++) {
34807 GemmMicrokernelTester()
34808 .mr(4)
34809 .nr(2)
34810 .kr(1)
34811 .sr(1)
34812 .m(4)
34813 .n(2)
34814 .k(k)
34815 .a_stride(11)
34816 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34817 }
34818 }
34819
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_gt_1_subtile)34820 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_gt_1_subtile) {
34821 for (size_t k = 2; k < 10; k++) {
34822 for (uint32_t n = 1; n <= 2; n++) {
34823 for (uint32_t m = 1; m <= 4; m++) {
34824 GemmMicrokernelTester()
34825 .mr(4)
34826 .nr(2)
34827 .kr(1)
34828 .sr(1)
34829 .m(m)
34830 .n(n)
34831 .k(k)
34832 .iterations(1)
34833 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34834 }
34835 }
34836 }
34837 }
34838
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2)34839 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2) {
34840 for (uint32_t n = 3; n < 4; n++) {
34841 for (size_t k = 1; k <= 5; k += 2) {
34842 GemmMicrokernelTester()
34843 .mr(4)
34844 .nr(2)
34845 .kr(1)
34846 .sr(1)
34847 .m(4)
34848 .n(n)
34849 .k(k)
34850 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34851 }
34852 }
34853 }
34854
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2_strided_cn)34855 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
34856 for (uint32_t n = 3; n < 4; n++) {
34857 for (size_t k = 1; k <= 5; k += 2) {
34858 GemmMicrokernelTester()
34859 .mr(4)
34860 .nr(2)
34861 .kr(1)
34862 .sr(1)
34863 .m(4)
34864 .n(n)
34865 .k(k)
34866 .cn_stride(5)
34867 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34868 }
34869 }
34870 }
34871
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2_strided_a)34872 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2_strided_a) {
34873 for (uint32_t n = 3; n < 4; n++) {
34874 for (size_t k = 1; k <= 5; k += 2) {
34875 GemmMicrokernelTester()
34876 .mr(4)
34877 .nr(2)
34878 .kr(1)
34879 .sr(1)
34880 .m(4)
34881 .n(n)
34882 .k(k)
34883 .a_stride(7)
34884 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34885 }
34886 }
34887 }
34888
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2_subtile)34889 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2_subtile) {
34890 for (uint32_t n = 3; n < 4; n++) {
34891 for (size_t k = 1; k <= 5; k += 2) {
34892 for (uint32_t m = 1; m <= 4; m++) {
34893 GemmMicrokernelTester()
34894 .mr(4)
34895 .nr(2)
34896 .kr(1)
34897 .sr(1)
34898 .m(m)
34899 .n(n)
34900 .k(k)
34901 .iterations(1)
34902 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34903 }
34904 }
34905 }
34906 }
34907
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2)34908 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2) {
34909 for (uint32_t n = 4; n <= 6; n += 2) {
34910 for (size_t k = 1; k <= 5; k += 2) {
34911 GemmMicrokernelTester()
34912 .mr(4)
34913 .nr(2)
34914 .kr(1)
34915 .sr(1)
34916 .m(4)
34917 .n(n)
34918 .k(k)
34919 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34920 }
34921 }
34922 }
34923
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2_strided_cn)34924 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
34925 for (uint32_t n = 4; n <= 6; n += 2) {
34926 for (size_t k = 1; k <= 5; k += 2) {
34927 GemmMicrokernelTester()
34928 .mr(4)
34929 .nr(2)
34930 .kr(1)
34931 .sr(1)
34932 .m(4)
34933 .n(n)
34934 .k(k)
34935 .cn_stride(5)
34936 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34937 }
34938 }
34939 }
34940
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2_strided_a)34941 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2_strided_a) {
34942 for (uint32_t n = 4; n <= 6; n += 2) {
34943 for (size_t k = 1; k <= 5; k += 2) {
34944 GemmMicrokernelTester()
34945 .mr(4)
34946 .nr(2)
34947 .kr(1)
34948 .sr(1)
34949 .m(4)
34950 .n(n)
34951 .k(k)
34952 .a_stride(7)
34953 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34954 }
34955 }
34956 }
34957
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2_subtile)34958 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2_subtile) {
34959 for (uint32_t n = 4; n <= 6; n += 2) {
34960 for (size_t k = 1; k <= 5; k += 2) {
34961 for (uint32_t m = 1; m <= 4; m++) {
34962 GemmMicrokernelTester()
34963 .mr(4)
34964 .nr(2)
34965 .kr(1)
34966 .sr(1)
34967 .m(m)
34968 .n(n)
34969 .k(k)
34970 .iterations(1)
34971 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34972 }
34973 }
34974 }
34975 }
34976
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,strided_cm_subtile)34977 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, strided_cm_subtile) {
34978 for (size_t k = 1; k <= 5; k += 2) {
34979 for (uint32_t n = 1; n <= 2; n++) {
34980 for (uint32_t m = 1; m <= 4; m++) {
34981 GemmMicrokernelTester()
34982 .mr(4)
34983 .nr(2)
34984 .kr(1)
34985 .sr(1)
34986 .m(m)
34987 .n(n)
34988 .k(k)
34989 .cm_stride(5)
34990 .iterations(1)
34991 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34992 }
34993 }
34994 }
34995 }
34996
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,qmin)34997 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, qmin) {
34998 GemmMicrokernelTester()
34999 .mr(4)
35000 .nr(2)
35001 .kr(1)
35002 .sr(1)
35003 .m(4)
35004 .n(2)
35005 .k(1)
35006 .qmin(128)
35007 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35008 }
35009
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,qmax)35010 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, qmax) {
35011 GemmMicrokernelTester()
35012 .mr(4)
35013 .nr(2)
35014 .kr(1)
35015 .sr(1)
35016 .m(4)
35017 .n(2)
35018 .k(1)
35019 .qmax(128)
35020 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35021 }
35022
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,strided_cm)35023 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, strided_cm) {
35024 GemmMicrokernelTester()
35025 .mr(4)
35026 .nr(2)
35027 .kr(1)
35028 .sr(1)
35029 .m(4)
35030 .n(2)
35031 .k(1)
35032 .cm_stride(5)
35033 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35034 }
35035
35036
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1)35037 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1) {
35038 GemmMicrokernelTester()
35039 .mr(4)
35040 .nr(4)
35041 .kr(1)
35042 .sr(1)
35043 .m(4)
35044 .n(4)
35045 .k(1)
35046 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35047 }
35048
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,strided_cn)35049 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cn) {
35050 GemmMicrokernelTester()
35051 .mr(4)
35052 .nr(4)
35053 .kr(1)
35054 .sr(1)
35055 .m(4)
35056 .n(4)
35057 .k(1)
35058 .cn_stride(7)
35059 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35060 }
35061
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_strided_a)35062 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_strided_a) {
35063 GemmMicrokernelTester()
35064 .mr(4)
35065 .nr(4)
35066 .kr(1)
35067 .sr(1)
35068 .m(4)
35069 .n(4)
35070 .k(1)
35071 .a_stride(3)
35072 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35073 }
35074
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_subtile)35075 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile) {
35076 for (uint32_t n = 1; n <= 4; n++) {
35077 for (uint32_t m = 1; m <= 4; m++) {
35078 GemmMicrokernelTester()
35079 .mr(4)
35080 .nr(4)
35081 .kr(1)
35082 .sr(1)
35083 .m(m)
35084 .n(n)
35085 .k(1)
35086 .iterations(1)
35087 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35088 }
35089 }
35090 }
35091
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_subtile_m)35092 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
35093 for (uint32_t m = 1; m <= 4; m++) {
35094 GemmMicrokernelTester()
35095 .mr(4)
35096 .nr(4)
35097 .kr(1)
35098 .sr(1)
35099 .m(m)
35100 .n(4)
35101 .k(1)
35102 .iterations(1)
35103 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35104 }
35105 }
35106
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_subtile_n)35107 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
35108 for (uint32_t n = 1; n <= 4; n++) {
35109 GemmMicrokernelTester()
35110 .mr(4)
35111 .nr(4)
35112 .kr(1)
35113 .sr(1)
35114 .m(4)
35115 .n(n)
35116 .k(1)
35117 .iterations(1)
35118 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35119 }
35120 }
35121
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_gt_1)35122 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_gt_1) {
35123 for (size_t k = 2; k < 10; k++) {
35124 GemmMicrokernelTester()
35125 .mr(4)
35126 .nr(4)
35127 .kr(1)
35128 .sr(1)
35129 .m(4)
35130 .n(4)
35131 .k(k)
35132 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35133 }
35134 }
35135
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_gt_1_strided_a)35136 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_gt_1_strided_a) {
35137 for (size_t k = 2; k < 10; k++) {
35138 GemmMicrokernelTester()
35139 .mr(4)
35140 .nr(4)
35141 .kr(1)
35142 .sr(1)
35143 .m(4)
35144 .n(4)
35145 .k(k)
35146 .a_stride(11)
35147 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35148 }
35149 }
35150
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_gt_1_subtile)35151 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_gt_1_subtile) {
35152 for (size_t k = 2; k < 10; k++) {
35153 for (uint32_t n = 1; n <= 4; n++) {
35154 for (uint32_t m = 1; m <= 4; m++) {
35155 GemmMicrokernelTester()
35156 .mr(4)
35157 .nr(4)
35158 .kr(1)
35159 .sr(1)
35160 .m(m)
35161 .n(n)
35162 .k(k)
35163 .iterations(1)
35164 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35165 }
35166 }
35167 }
35168 }
35169
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4)35170 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4) {
35171 for (uint32_t n = 5; n < 8; n++) {
35172 for (size_t k = 1; k <= 5; k += 2) {
35173 GemmMicrokernelTester()
35174 .mr(4)
35175 .nr(4)
35176 .kr(1)
35177 .sr(1)
35178 .m(4)
35179 .n(n)
35180 .k(k)
35181 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35182 }
35183 }
35184 }
35185
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4_strided_cn)35186 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
35187 for (uint32_t n = 5; n < 8; n++) {
35188 for (size_t k = 1; k <= 5; k += 2) {
35189 GemmMicrokernelTester()
35190 .mr(4)
35191 .nr(4)
35192 .kr(1)
35193 .sr(1)
35194 .m(4)
35195 .n(n)
35196 .k(k)
35197 .cn_stride(7)
35198 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35199 }
35200 }
35201 }
35202
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4_strided_a)35203 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_strided_a) {
35204 for (uint32_t n = 5; n < 8; n++) {
35205 for (size_t k = 1; k <= 5; k += 2) {
35206 GemmMicrokernelTester()
35207 .mr(4)
35208 .nr(4)
35209 .kr(1)
35210 .sr(1)
35211 .m(4)
35212 .n(n)
35213 .k(k)
35214 .a_stride(7)
35215 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35216 }
35217 }
35218 }
35219
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4_subtile)35220 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_subtile) {
35221 for (uint32_t n = 5; n < 8; n++) {
35222 for (size_t k = 1; k <= 5; k += 2) {
35223 for (uint32_t m = 1; m <= 4; m++) {
35224 GemmMicrokernelTester()
35225 .mr(4)
35226 .nr(4)
35227 .kr(1)
35228 .sr(1)
35229 .m(m)
35230 .n(n)
35231 .k(k)
35232 .iterations(1)
35233 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35234 }
35235 }
35236 }
35237 }
35238
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4)35239 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4) {
35240 for (uint32_t n = 8; n <= 12; n += 4) {
35241 for (size_t k = 1; k <= 5; k += 2) {
35242 GemmMicrokernelTester()
35243 .mr(4)
35244 .nr(4)
35245 .kr(1)
35246 .sr(1)
35247 .m(4)
35248 .n(n)
35249 .k(k)
35250 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35251 }
35252 }
35253 }
35254
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4_strided_cn)35255 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
35256 for (uint32_t n = 8; n <= 12; n += 4) {
35257 for (size_t k = 1; k <= 5; k += 2) {
35258 GemmMicrokernelTester()
35259 .mr(4)
35260 .nr(4)
35261 .kr(1)
35262 .sr(1)
35263 .m(4)
35264 .n(n)
35265 .k(k)
35266 .cn_stride(7)
35267 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35268 }
35269 }
35270 }
35271
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4_strided_a)35272 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_strided_a) {
35273 for (uint32_t n = 8; n <= 12; n += 4) {
35274 for (size_t k = 1; k <= 5; k += 2) {
35275 GemmMicrokernelTester()
35276 .mr(4)
35277 .nr(4)
35278 .kr(1)
35279 .sr(1)
35280 .m(4)
35281 .n(n)
35282 .k(k)
35283 .a_stride(7)
35284 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35285 }
35286 }
35287 }
35288
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4_subtile)35289 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_subtile) {
35290 for (uint32_t n = 8; n <= 12; n += 4) {
35291 for (size_t k = 1; k <= 5; k += 2) {
35292 for (uint32_t m = 1; m <= 4; m++) {
35293 GemmMicrokernelTester()
35294 .mr(4)
35295 .nr(4)
35296 .kr(1)
35297 .sr(1)
35298 .m(m)
35299 .n(n)
35300 .k(k)
35301 .iterations(1)
35302 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35303 }
35304 }
35305 }
35306 }
35307
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,strided_cm_subtile)35308 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cm_subtile) {
35309 for (size_t k = 1; k <= 5; k += 2) {
35310 for (uint32_t n = 1; n <= 4; n++) {
35311 for (uint32_t m = 1; m <= 4; m++) {
35312 GemmMicrokernelTester()
35313 .mr(4)
35314 .nr(4)
35315 .kr(1)
35316 .sr(1)
35317 .m(m)
35318 .n(n)
35319 .k(k)
35320 .cm_stride(7)
35321 .iterations(1)
35322 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35323 }
35324 }
35325 }
35326 }
35327
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,qmin)35328 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, qmin) {
35329 GemmMicrokernelTester()
35330 .mr(4)
35331 .nr(4)
35332 .kr(1)
35333 .sr(1)
35334 .m(4)
35335 .n(4)
35336 .k(1)
35337 .qmin(128)
35338 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35339 }
35340
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,qmax)35341 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, qmax) {
35342 GemmMicrokernelTester()
35343 .mr(4)
35344 .nr(4)
35345 .kr(1)
35346 .sr(1)
35347 .m(4)
35348 .n(4)
35349 .k(1)
35350 .qmax(128)
35351 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35352 }
35353
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,strided_cm)35354 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cm) {
35355 GemmMicrokernelTester()
35356 .mr(4)
35357 .nr(4)
35358 .kr(1)
35359 .sr(1)
35360 .m(4)
35361 .n(4)
35362 .k(1)
35363 .cm_stride(7)
35364 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35365 }
35366
35367
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1)35368 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1) {
35369 GemmMicrokernelTester()
35370 .mr(4)
35371 .nr(4)
35372 .kr(1)
35373 .sr(1)
35374 .m(4)
35375 .n(4)
35376 .k(1)
35377 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35378 }
35379
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,strided_cn)35380 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cn) {
35381 GemmMicrokernelTester()
35382 .mr(4)
35383 .nr(4)
35384 .kr(1)
35385 .sr(1)
35386 .m(4)
35387 .n(4)
35388 .k(1)
35389 .cn_stride(7)
35390 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35391 }
35392
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1_strided_a)35393 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_strided_a) {
35394 GemmMicrokernelTester()
35395 .mr(4)
35396 .nr(4)
35397 .kr(1)
35398 .sr(1)
35399 .m(4)
35400 .n(4)
35401 .k(1)
35402 .a_stride(3)
35403 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35404 }
35405
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1_subtile)35406 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile) {
35407 for (uint32_t n = 1; n <= 4; n++) {
35408 for (uint32_t m = 1; m <= 4; m++) {
35409 GemmMicrokernelTester()
35410 .mr(4)
35411 .nr(4)
35412 .kr(1)
35413 .sr(1)
35414 .m(m)
35415 .n(n)
35416 .k(1)
35417 .iterations(1)
35418 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35419 }
35420 }
35421 }
35422
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1_subtile_m)35423 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
35424 for (uint32_t m = 1; m <= 4; m++) {
35425 GemmMicrokernelTester()
35426 .mr(4)
35427 .nr(4)
35428 .kr(1)
35429 .sr(1)
35430 .m(m)
35431 .n(4)
35432 .k(1)
35433 .iterations(1)
35434 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35435 }
35436 }
35437
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1_subtile_n)35438 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
35439 for (uint32_t n = 1; n <= 4; n++) {
35440 GemmMicrokernelTester()
35441 .mr(4)
35442 .nr(4)
35443 .kr(1)
35444 .sr(1)
35445 .m(4)
35446 .n(n)
35447 .k(1)
35448 .iterations(1)
35449 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35450 }
35451 }
35452
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_gt_1)35453 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1) {
35454 for (size_t k = 2; k < 10; k++) {
35455 GemmMicrokernelTester()
35456 .mr(4)
35457 .nr(4)
35458 .kr(1)
35459 .sr(1)
35460 .m(4)
35461 .n(4)
35462 .k(k)
35463 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35464 }
35465 }
35466
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_gt_1_strided_a)35467 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_strided_a) {
35468 for (size_t k = 2; k < 10; k++) {
35469 GemmMicrokernelTester()
35470 .mr(4)
35471 .nr(4)
35472 .kr(1)
35473 .sr(1)
35474 .m(4)
35475 .n(4)
35476 .k(k)
35477 .a_stride(11)
35478 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35479 }
35480 }
35481
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_gt_1_subtile)35482 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_subtile) {
35483 for (size_t k = 2; k < 10; k++) {
35484 for (uint32_t n = 1; n <= 4; n++) {
35485 for (uint32_t m = 1; m <= 4; m++) {
35486 GemmMicrokernelTester()
35487 .mr(4)
35488 .nr(4)
35489 .kr(1)
35490 .sr(1)
35491 .m(m)
35492 .n(n)
35493 .k(k)
35494 .iterations(1)
35495 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35496 }
35497 }
35498 }
35499 }
35500
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4)35501 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4) {
35502 for (uint32_t n = 5; n < 8; n++) {
35503 for (size_t k = 1; k <= 5; k += 2) {
35504 GemmMicrokernelTester()
35505 .mr(4)
35506 .nr(4)
35507 .kr(1)
35508 .sr(1)
35509 .m(4)
35510 .n(n)
35511 .k(k)
35512 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35513 }
35514 }
35515 }
35516
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4_strided_cn)35517 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
35518 for (uint32_t n = 5; n < 8; n++) {
35519 for (size_t k = 1; k <= 5; k += 2) {
35520 GemmMicrokernelTester()
35521 .mr(4)
35522 .nr(4)
35523 .kr(1)
35524 .sr(1)
35525 .m(4)
35526 .n(n)
35527 .k(k)
35528 .cn_stride(7)
35529 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35530 }
35531 }
35532 }
35533
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4_strided_a)35534 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_a) {
35535 for (uint32_t n = 5; n < 8; n++) {
35536 for (size_t k = 1; k <= 5; k += 2) {
35537 GemmMicrokernelTester()
35538 .mr(4)
35539 .nr(4)
35540 .kr(1)
35541 .sr(1)
35542 .m(4)
35543 .n(n)
35544 .k(k)
35545 .a_stride(7)
35546 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35547 }
35548 }
35549 }
35550
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4_subtile)35551 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_subtile) {
35552 for (uint32_t n = 5; n < 8; n++) {
35553 for (size_t k = 1; k <= 5; k += 2) {
35554 for (uint32_t m = 1; m <= 4; m++) {
35555 GemmMicrokernelTester()
35556 .mr(4)
35557 .nr(4)
35558 .kr(1)
35559 .sr(1)
35560 .m(m)
35561 .n(n)
35562 .k(k)
35563 .iterations(1)
35564 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35565 }
35566 }
35567 }
35568 }
35569
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4)35570 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4) {
35571 for (uint32_t n = 8; n <= 12; n += 4) {
35572 for (size_t k = 1; k <= 5; k += 2) {
35573 GemmMicrokernelTester()
35574 .mr(4)
35575 .nr(4)
35576 .kr(1)
35577 .sr(1)
35578 .m(4)
35579 .n(n)
35580 .k(k)
35581 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35582 }
35583 }
35584 }
35585
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4_strided_cn)35586 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_cn) {
35587 for (uint32_t n = 8; n <= 12; n += 4) {
35588 for (size_t k = 1; k <= 5; k += 2) {
35589 GemmMicrokernelTester()
35590 .mr(4)
35591 .nr(4)
35592 .kr(1)
35593 .sr(1)
35594 .m(4)
35595 .n(n)
35596 .k(k)
35597 .cn_stride(7)
35598 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35599 }
35600 }
35601 }
35602
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4_strided_a)35603 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_a) {
35604 for (uint32_t n = 8; n <= 12; n += 4) {
35605 for (size_t k = 1; k <= 5; k += 2) {
35606 GemmMicrokernelTester()
35607 .mr(4)
35608 .nr(4)
35609 .kr(1)
35610 .sr(1)
35611 .m(4)
35612 .n(n)
35613 .k(k)
35614 .a_stride(7)
35615 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35616 }
35617 }
35618 }
35619
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4_subtile)35620 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_subtile) {
35621 for (uint32_t n = 8; n <= 12; n += 4) {
35622 for (size_t k = 1; k <= 5; k += 2) {
35623 for (uint32_t m = 1; m <= 4; m++) {
35624 GemmMicrokernelTester()
35625 .mr(4)
35626 .nr(4)
35627 .kr(1)
35628 .sr(1)
35629 .m(m)
35630 .n(n)
35631 .k(k)
35632 .iterations(1)
35633 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35634 }
35635 }
35636 }
35637 }
35638
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,strided_cm_subtile)35639 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm_subtile) {
35640 for (size_t k = 1; k <= 5; k += 2) {
35641 for (uint32_t n = 1; n <= 4; n++) {
35642 for (uint32_t m = 1; m <= 4; m++) {
35643 GemmMicrokernelTester()
35644 .mr(4)
35645 .nr(4)
35646 .kr(1)
35647 .sr(1)
35648 .m(m)
35649 .n(n)
35650 .k(k)
35651 .cm_stride(7)
35652 .iterations(1)
35653 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35654 }
35655 }
35656 }
35657 }
35658
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,qmin)35659 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmin) {
35660 GemmMicrokernelTester()
35661 .mr(4)
35662 .nr(4)
35663 .kr(1)
35664 .sr(1)
35665 .m(4)
35666 .n(4)
35667 .k(1)
35668 .qmin(128)
35669 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35670 }
35671
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,qmax)35672 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmax) {
35673 GemmMicrokernelTester()
35674 .mr(4)
35675 .nr(4)
35676 .kr(1)
35677 .sr(1)
35678 .m(4)
35679 .n(4)
35680 .k(1)
35681 .qmax(128)
35682 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35683 }
35684
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,strided_cm)35685 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm) {
35686 GemmMicrokernelTester()
35687 .mr(4)
35688 .nr(4)
35689 .kr(1)
35690 .sr(1)
35691 .m(4)
35692 .n(4)
35693 .k(1)
35694 .cm_stride(7)
35695 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35696 }
35697
35698
35699 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8)35700 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8) {
35701 TEST_REQUIRES_ARM_NEON_V8;
35702 GemmMicrokernelTester()
35703 .mr(4)
35704 .nr(8)
35705 .kr(1)
35706 .sr(1)
35707 .m(4)
35708 .n(8)
35709 .k(8)
35710 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
35711 }
35712
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,strided_cn)35713 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, strided_cn) {
35714 TEST_REQUIRES_ARM_NEON_V8;
35715 GemmMicrokernelTester()
35716 .mr(4)
35717 .nr(8)
35718 .kr(1)
35719 .sr(1)
35720 .m(4)
35721 .n(8)
35722 .k(8)
35723 .cn_stride(11)
35724 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
35725 }
35726
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8_strided_a)35727 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8_strided_a) {
35728 TEST_REQUIRES_ARM_NEON_V8;
35729 GemmMicrokernelTester()
35730 .mr(4)
35731 .nr(8)
35732 .kr(1)
35733 .sr(1)
35734 .m(4)
35735 .n(8)
35736 .k(8)
35737 .a_stride(11)
35738 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
35739 }
35740
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8_subtile)35741 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8_subtile) {
35742 TEST_REQUIRES_ARM_NEON_V8;
35743 for (uint32_t n = 1; n <= 8; n++) {
35744 for (uint32_t m = 1; m <= 4; m++) {
35745 GemmMicrokernelTester()
35746 .mr(4)
35747 .nr(8)
35748 .kr(1)
35749 .sr(1)
35750 .m(m)
35751 .n(n)
35752 .k(8)
35753 .iterations(1)
35754 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
35755 }
35756 }
35757 }
35758
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8_subtile_m)35759 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8_subtile_m) {
35760 TEST_REQUIRES_ARM_NEON_V8;
35761 for (uint32_t m = 1; m <= 4; m++) {
35762 GemmMicrokernelTester()
35763 .mr(4)
35764 .nr(8)
35765 .kr(1)
35766 .sr(1)
35767 .m(m)
35768 .n(8)
35769 .k(8)
35770 .iterations(1)
35771 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
35772 }
35773 }
35774
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8_subtile_n)35775 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8_subtile_n) {
35776 TEST_REQUIRES_ARM_NEON_V8;
35777 for (uint32_t n = 1; n <= 8; n++) {
35778 GemmMicrokernelTester()
35779 .mr(4)
35780 .nr(8)
35781 .kr(1)
35782 .sr(1)
35783 .m(4)
35784 .n(n)
35785 .k(8)
35786 .iterations(1)
35787 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
35788 }
35789 }
35790
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_lt_8)35791 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_lt_8) {
35792 TEST_REQUIRES_ARM_NEON_V8;
35793 for (size_t k = 1; k < 8; k++) {
35794 GemmMicrokernelTester()
35795 .mr(4)
35796 .nr(8)
35797 .kr(1)
35798 .sr(1)
35799 .m(4)
35800 .n(8)
35801 .k(k)
35802 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
35803 }
35804 }
35805
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_lt_8_strided_a)35806 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_lt_8_strided_a) {
35807 TEST_REQUIRES_ARM_NEON_V8;
35808 for (size_t k = 1; k < 8; k++) {
35809 GemmMicrokernelTester()
35810 .mr(4)
35811 .nr(8)
35812 .kr(1)
35813 .sr(1)
35814 .m(4)
35815 .n(8)
35816 .k(k)
35817 .a_stride(11)
35818 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
35819 }
35820 }
35821
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_lt_8_subtile)35822 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_lt_8_subtile) {
35823 TEST_REQUIRES_ARM_NEON_V8;
35824 for (size_t k = 1; k < 8; k++) {
35825 for (uint32_t n = 1; n <= 8; n++) {
35826 for (uint32_t m = 1; m <= 4; m++) {
35827 GemmMicrokernelTester()
35828 .mr(4)
35829 .nr(8)
35830 .kr(1)
35831 .sr(1)
35832 .m(m)
35833 .n(n)
35834 .k(k)
35835 .iterations(1)
35836 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
35837 }
35838 }
35839 }
35840 }
35841
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_gt_8)35842 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_gt_8) {
35843 TEST_REQUIRES_ARM_NEON_V8;
35844 for (size_t k = 9; k < 16; k++) {
35845 GemmMicrokernelTester()
35846 .mr(4)
35847 .nr(8)
35848 .kr(1)
35849 .sr(1)
35850 .m(4)
35851 .n(8)
35852 .k(k)
35853 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
35854 }
35855 }
35856
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_gt_8_strided_a)35857 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_gt_8_strided_a) {
35858 TEST_REQUIRES_ARM_NEON_V8;
35859 for (size_t k = 9; k < 16; k++) {
35860 GemmMicrokernelTester()
35861 .mr(4)
35862 .nr(8)
35863 .kr(1)
35864 .sr(1)
35865 .m(4)
35866 .n(8)
35867 .k(k)
35868 .a_stride(19)
35869 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
35870 }
35871 }
35872
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_gt_8_subtile)35873 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_gt_8_subtile) {
35874 TEST_REQUIRES_ARM_NEON_V8;
35875 for (size_t k = 9; k < 16; k++) {
35876 for (uint32_t n = 1; n <= 8; n++) {
35877 for (uint32_t m = 1; m <= 4; m++) {
35878 GemmMicrokernelTester()
35879 .mr(4)
35880 .nr(8)
35881 .kr(1)
35882 .sr(1)
35883 .m(m)
35884 .n(n)
35885 .k(k)
35886 .iterations(1)
35887 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
35888 }
35889 }
35890 }
35891 }
35892
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_div_8)35893 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_div_8) {
35894 TEST_REQUIRES_ARM_NEON_V8;
35895 for (size_t k = 16; k <= 80; k += 8) {
35896 GemmMicrokernelTester()
35897 .mr(4)
35898 .nr(8)
35899 .kr(1)
35900 .sr(1)
35901 .m(4)
35902 .n(8)
35903 .k(k)
35904 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
35905 }
35906 }
35907
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_div_8_strided_a)35908 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_div_8_strided_a) {
35909 TEST_REQUIRES_ARM_NEON_V8;
35910 for (size_t k = 16; k <= 80; k += 8) {
35911 GemmMicrokernelTester()
35912 .mr(4)
35913 .nr(8)
35914 .kr(1)
35915 .sr(1)
35916 .m(4)
35917 .n(8)
35918 .k(k)
35919 .a_stride(83)
35920 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
35921 }
35922 }
35923
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_div_8_subtile)35924 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_div_8_subtile) {
35925 TEST_REQUIRES_ARM_NEON_V8;
35926 for (size_t k = 16; k <= 80; k += 8) {
35927 for (uint32_t n = 1; n <= 8; n++) {
35928 for (uint32_t m = 1; m <= 4; m++) {
35929 GemmMicrokernelTester()
35930 .mr(4)
35931 .nr(8)
35932 .kr(1)
35933 .sr(1)
35934 .m(m)
35935 .n(n)
35936 .k(k)
35937 .iterations(1)
35938 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
35939 }
35940 }
35941 }
35942 }
35943
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_gt_8)35944 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_gt_8) {
35945 TEST_REQUIRES_ARM_NEON_V8;
35946 for (uint32_t n = 9; n < 16; n++) {
35947 for (size_t k = 1; k <= 40; k += 9) {
35948 GemmMicrokernelTester()
35949 .mr(4)
35950 .nr(8)
35951 .kr(1)
35952 .sr(1)
35953 .m(4)
35954 .n(n)
35955 .k(k)
35956 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
35957 }
35958 }
35959 }
35960
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_gt_8_strided_cn)35961 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_gt_8_strided_cn) {
35962 TEST_REQUIRES_ARM_NEON_V8;
35963 for (uint32_t n = 9; n < 16; n++) {
35964 for (size_t k = 1; k <= 40; k += 9) {
35965 GemmMicrokernelTester()
35966 .mr(4)
35967 .nr(8)
35968 .kr(1)
35969 .sr(1)
35970 .m(4)
35971 .n(n)
35972 .k(k)
35973 .cn_stride(11)
35974 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
35975 }
35976 }
35977 }
35978
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_gt_8_strided_a)35979 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_gt_8_strided_a) {
35980 TEST_REQUIRES_ARM_NEON_V8;
35981 for (uint32_t n = 9; n < 16; n++) {
35982 for (size_t k = 1; k <= 40; k += 9) {
35983 GemmMicrokernelTester()
35984 .mr(4)
35985 .nr(8)
35986 .kr(1)
35987 .sr(1)
35988 .m(4)
35989 .n(n)
35990 .k(k)
35991 .a_stride(43)
35992 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
35993 }
35994 }
35995 }
35996
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_gt_8_subtile)35997 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_gt_8_subtile) {
35998 TEST_REQUIRES_ARM_NEON_V8;
35999 for (uint32_t n = 9; n < 16; n++) {
36000 for (size_t k = 1; k <= 40; k += 9) {
36001 for (uint32_t m = 1; m <= 4; m++) {
36002 GemmMicrokernelTester()
36003 .mr(4)
36004 .nr(8)
36005 .kr(1)
36006 .sr(1)
36007 .m(m)
36008 .n(n)
36009 .k(k)
36010 .iterations(1)
36011 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
36012 }
36013 }
36014 }
36015 }
36016
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_div_8)36017 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_div_8) {
36018 TEST_REQUIRES_ARM_NEON_V8;
36019 for (uint32_t n = 16; n <= 24; n += 8) {
36020 for (size_t k = 1; k <= 40; k += 9) {
36021 GemmMicrokernelTester()
36022 .mr(4)
36023 .nr(8)
36024 .kr(1)
36025 .sr(1)
36026 .m(4)
36027 .n(n)
36028 .k(k)
36029 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
36030 }
36031 }
36032 }
36033
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_div_8_strided_cn)36034 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_div_8_strided_cn) {
36035 TEST_REQUIRES_ARM_NEON_V8;
36036 for (uint32_t n = 16; n <= 24; n += 8) {
36037 for (size_t k = 1; k <= 40; k += 9) {
36038 GemmMicrokernelTester()
36039 .mr(4)
36040 .nr(8)
36041 .kr(1)
36042 .sr(1)
36043 .m(4)
36044 .n(n)
36045 .k(k)
36046 .cn_stride(11)
36047 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
36048 }
36049 }
36050 }
36051
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_div_8_strided_a)36052 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_div_8_strided_a) {
36053 TEST_REQUIRES_ARM_NEON_V8;
36054 for (uint32_t n = 16; n <= 24; n += 8) {
36055 for (size_t k = 1; k <= 40; k += 9) {
36056 GemmMicrokernelTester()
36057 .mr(4)
36058 .nr(8)
36059 .kr(1)
36060 .sr(1)
36061 .m(4)
36062 .n(n)
36063 .k(k)
36064 .a_stride(43)
36065 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
36066 }
36067 }
36068 }
36069
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_div_8_subtile)36070 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_div_8_subtile) {
36071 TEST_REQUIRES_ARM_NEON_V8;
36072 for (uint32_t n = 16; n <= 24; n += 8) {
36073 for (size_t k = 1; k <= 40; k += 9) {
36074 for (uint32_t m = 1; m <= 4; m++) {
36075 GemmMicrokernelTester()
36076 .mr(4)
36077 .nr(8)
36078 .kr(1)
36079 .sr(1)
36080 .m(m)
36081 .n(n)
36082 .k(k)
36083 .iterations(1)
36084 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
36085 }
36086 }
36087 }
36088 }
36089
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,strided_cm_subtile)36090 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, strided_cm_subtile) {
36091 TEST_REQUIRES_ARM_NEON_V8;
36092 for (size_t k = 1; k <= 40; k += 9) {
36093 for (uint32_t n = 1; n <= 8; n++) {
36094 for (uint32_t m = 1; m <= 4; m++) {
36095 GemmMicrokernelTester()
36096 .mr(4)
36097 .nr(8)
36098 .kr(1)
36099 .sr(1)
36100 .m(m)
36101 .n(n)
36102 .k(k)
36103 .cm_stride(11)
36104 .iterations(1)
36105 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
36106 }
36107 }
36108 }
36109 }
36110
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,qmin)36111 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, qmin) {
36112 TEST_REQUIRES_ARM_NEON_V8;
36113 GemmMicrokernelTester()
36114 .mr(4)
36115 .nr(8)
36116 .kr(1)
36117 .sr(1)
36118 .m(4)
36119 .n(8)
36120 .k(8)
36121 .qmin(128)
36122 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
36123 }
36124
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,qmax)36125 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, qmax) {
36126 TEST_REQUIRES_ARM_NEON_V8;
36127 GemmMicrokernelTester()
36128 .mr(4)
36129 .nr(8)
36130 .kr(1)
36131 .sr(1)
36132 .m(4)
36133 .n(8)
36134 .k(8)
36135 .qmax(128)
36136 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
36137 }
36138
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,strided_cm)36139 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, strided_cm) {
36140 TEST_REQUIRES_ARM_NEON_V8;
36141 GemmMicrokernelTester()
36142 .mr(4)
36143 .nr(8)
36144 .kr(1)
36145 .sr(1)
36146 .m(4)
36147 .n(8)
36148 .k(8)
36149 .cm_stride(11)
36150 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
36151 }
36152 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
36153