1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/qc8-gemm-minmax-fp32.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_eq_8)28 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_eq_8) {
29 TEST_REQUIRES_ARM_NEON_V8;
30 GemmMicrokernelTester()
31 .mr(1)
32 .nr(8)
33 .kr(1)
34 .sr(1)
35 .m(1)
36 .n(8)
37 .k(8)
38 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39 }
40
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,strided_cn)41 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, strided_cn) {
42 TEST_REQUIRES_ARM_NEON_V8;
43 GemmMicrokernelTester()
44 .mr(1)
45 .nr(8)
46 .kr(1)
47 .sr(1)
48 .m(1)
49 .n(8)
50 .k(8)
51 .cn_stride(11)
52 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
53 }
54
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_eq_8_strided_a)55 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_eq_8_strided_a) {
56 TEST_REQUIRES_ARM_NEON_V8;
57 GemmMicrokernelTester()
58 .mr(1)
59 .nr(8)
60 .kr(1)
61 .sr(1)
62 .m(1)
63 .n(8)
64 .k(8)
65 .a_stride(11)
66 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
67 }
68
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_eq_8_subtile)69 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_eq_8_subtile) {
70 TEST_REQUIRES_ARM_NEON_V8;
71 for (uint32_t n = 1; n <= 8; n++) {
72 for (uint32_t m = 1; m <= 1; m++) {
73 GemmMicrokernelTester()
74 .mr(1)
75 .nr(8)
76 .kr(1)
77 .sr(1)
78 .m(m)
79 .n(n)
80 .k(8)
81 .iterations(1)
82 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
83 }
84 }
85 }
86
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_eq_8_subtile_m)87 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_eq_8_subtile_m) {
88 TEST_REQUIRES_ARM_NEON_V8;
89 for (uint32_t m = 1; m <= 1; m++) {
90 GemmMicrokernelTester()
91 .mr(1)
92 .nr(8)
93 .kr(1)
94 .sr(1)
95 .m(m)
96 .n(8)
97 .k(8)
98 .iterations(1)
99 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
100 }
101 }
102
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_eq_8_subtile_n)103 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_eq_8_subtile_n) {
104 TEST_REQUIRES_ARM_NEON_V8;
105 for (uint32_t n = 1; n <= 8; n++) {
106 GemmMicrokernelTester()
107 .mr(1)
108 .nr(8)
109 .kr(1)
110 .sr(1)
111 .m(1)
112 .n(n)
113 .k(8)
114 .iterations(1)
115 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
116 }
117 }
118
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_lt_8)119 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_lt_8) {
120 TEST_REQUIRES_ARM_NEON_V8;
121 for (size_t k = 1; k < 8; k++) {
122 GemmMicrokernelTester()
123 .mr(1)
124 .nr(8)
125 .kr(1)
126 .sr(1)
127 .m(1)
128 .n(8)
129 .k(k)
130 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
131 }
132 }
133
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_lt_8_strided_a)134 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_lt_8_strided_a) {
135 TEST_REQUIRES_ARM_NEON_V8;
136 for (size_t k = 1; k < 8; k++) {
137 GemmMicrokernelTester()
138 .mr(1)
139 .nr(8)
140 .kr(1)
141 .sr(1)
142 .m(1)
143 .n(8)
144 .k(k)
145 .a_stride(11)
146 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
147 }
148 }
149
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_lt_8_subtile)150 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_lt_8_subtile) {
151 TEST_REQUIRES_ARM_NEON_V8;
152 for (size_t k = 1; k < 8; k++) {
153 for (uint32_t n = 1; n <= 8; n++) {
154 for (uint32_t m = 1; m <= 1; m++) {
155 GemmMicrokernelTester()
156 .mr(1)
157 .nr(8)
158 .kr(1)
159 .sr(1)
160 .m(m)
161 .n(n)
162 .k(k)
163 .iterations(1)
164 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
165 }
166 }
167 }
168 }
169
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_gt_8)170 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_gt_8) {
171 TEST_REQUIRES_ARM_NEON_V8;
172 for (size_t k = 9; k < 16; k++) {
173 GemmMicrokernelTester()
174 .mr(1)
175 .nr(8)
176 .kr(1)
177 .sr(1)
178 .m(1)
179 .n(8)
180 .k(k)
181 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
182 }
183 }
184
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_gt_8_strided_a)185 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_gt_8_strided_a) {
186 TEST_REQUIRES_ARM_NEON_V8;
187 for (size_t k = 9; k < 16; k++) {
188 GemmMicrokernelTester()
189 .mr(1)
190 .nr(8)
191 .kr(1)
192 .sr(1)
193 .m(1)
194 .n(8)
195 .k(k)
196 .a_stride(19)
197 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
198 }
199 }
200
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_gt_8_subtile)201 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_gt_8_subtile) {
202 TEST_REQUIRES_ARM_NEON_V8;
203 for (size_t k = 9; k < 16; k++) {
204 for (uint32_t n = 1; n <= 8; n++) {
205 for (uint32_t m = 1; m <= 1; m++) {
206 GemmMicrokernelTester()
207 .mr(1)
208 .nr(8)
209 .kr(1)
210 .sr(1)
211 .m(m)
212 .n(n)
213 .k(k)
214 .iterations(1)
215 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
216 }
217 }
218 }
219 }
220
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_div_8)221 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_div_8) {
222 TEST_REQUIRES_ARM_NEON_V8;
223 for (size_t k = 16; k <= 80; k += 8) {
224 GemmMicrokernelTester()
225 .mr(1)
226 .nr(8)
227 .kr(1)
228 .sr(1)
229 .m(1)
230 .n(8)
231 .k(k)
232 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
233 }
234 }
235
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_div_8_strided_a)236 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_div_8_strided_a) {
237 TEST_REQUIRES_ARM_NEON_V8;
238 for (size_t k = 16; k <= 80; k += 8) {
239 GemmMicrokernelTester()
240 .mr(1)
241 .nr(8)
242 .kr(1)
243 .sr(1)
244 .m(1)
245 .n(8)
246 .k(k)
247 .a_stride(83)
248 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
249 }
250 }
251
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_div_8_subtile)252 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_div_8_subtile) {
253 TEST_REQUIRES_ARM_NEON_V8;
254 for (size_t k = 16; k <= 80; k += 8) {
255 for (uint32_t n = 1; n <= 8; n++) {
256 for (uint32_t m = 1; m <= 1; m++) {
257 GemmMicrokernelTester()
258 .mr(1)
259 .nr(8)
260 .kr(1)
261 .sr(1)
262 .m(m)
263 .n(n)
264 .k(k)
265 .iterations(1)
266 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
267 }
268 }
269 }
270 }
271
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_gt_8)272 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_gt_8) {
273 TEST_REQUIRES_ARM_NEON_V8;
274 for (uint32_t n = 9; n < 16; n++) {
275 for (size_t k = 1; k <= 40; k += 9) {
276 GemmMicrokernelTester()
277 .mr(1)
278 .nr(8)
279 .kr(1)
280 .sr(1)
281 .m(1)
282 .n(n)
283 .k(k)
284 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
285 }
286 }
287 }
288
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_gt_8_strided_cn)289 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_gt_8_strided_cn) {
290 TEST_REQUIRES_ARM_NEON_V8;
291 for (uint32_t n = 9; n < 16; n++) {
292 for (size_t k = 1; k <= 40; k += 9) {
293 GemmMicrokernelTester()
294 .mr(1)
295 .nr(8)
296 .kr(1)
297 .sr(1)
298 .m(1)
299 .n(n)
300 .k(k)
301 .cn_stride(11)
302 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
303 }
304 }
305 }
306
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_gt_8_strided_a)307 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_gt_8_strided_a) {
308 TEST_REQUIRES_ARM_NEON_V8;
309 for (uint32_t n = 9; n < 16; n++) {
310 for (size_t k = 1; k <= 40; k += 9) {
311 GemmMicrokernelTester()
312 .mr(1)
313 .nr(8)
314 .kr(1)
315 .sr(1)
316 .m(1)
317 .n(n)
318 .k(k)
319 .a_stride(43)
320 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
321 }
322 }
323 }
324
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_gt_8_subtile)325 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_gt_8_subtile) {
326 TEST_REQUIRES_ARM_NEON_V8;
327 for (uint32_t n = 9; n < 16; n++) {
328 for (size_t k = 1; k <= 40; k += 9) {
329 for (uint32_t m = 1; m <= 1; m++) {
330 GemmMicrokernelTester()
331 .mr(1)
332 .nr(8)
333 .kr(1)
334 .sr(1)
335 .m(m)
336 .n(n)
337 .k(k)
338 .iterations(1)
339 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
340 }
341 }
342 }
343 }
344
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_div_8)345 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_div_8) {
346 TEST_REQUIRES_ARM_NEON_V8;
347 for (uint32_t n = 16; n <= 24; n += 8) {
348 for (size_t k = 1; k <= 40; k += 9) {
349 GemmMicrokernelTester()
350 .mr(1)
351 .nr(8)
352 .kr(1)
353 .sr(1)
354 .m(1)
355 .n(n)
356 .k(k)
357 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
358 }
359 }
360 }
361
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_div_8_strided_cn)362 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_div_8_strided_cn) {
363 TEST_REQUIRES_ARM_NEON_V8;
364 for (uint32_t n = 16; n <= 24; n += 8) {
365 for (size_t k = 1; k <= 40; k += 9) {
366 GemmMicrokernelTester()
367 .mr(1)
368 .nr(8)
369 .kr(1)
370 .sr(1)
371 .m(1)
372 .n(n)
373 .k(k)
374 .cn_stride(11)
375 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
376 }
377 }
378 }
379
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_div_8_strided_a)380 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_div_8_strided_a) {
381 TEST_REQUIRES_ARM_NEON_V8;
382 for (uint32_t n = 16; n <= 24; n += 8) {
383 for (size_t k = 1; k <= 40; k += 9) {
384 GemmMicrokernelTester()
385 .mr(1)
386 .nr(8)
387 .kr(1)
388 .sr(1)
389 .m(1)
390 .n(n)
391 .k(k)
392 .a_stride(43)
393 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
394 }
395 }
396 }
397
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_div_8_subtile)398 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_div_8_subtile) {
399 TEST_REQUIRES_ARM_NEON_V8;
400 for (uint32_t n = 16; n <= 24; n += 8) {
401 for (size_t k = 1; k <= 40; k += 9) {
402 for (uint32_t m = 1; m <= 1; m++) {
403 GemmMicrokernelTester()
404 .mr(1)
405 .nr(8)
406 .kr(1)
407 .sr(1)
408 .m(m)
409 .n(n)
410 .k(k)
411 .iterations(1)
412 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
413 }
414 }
415 }
416 }
417
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,strided_cm_subtile)418 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, strided_cm_subtile) {
419 TEST_REQUIRES_ARM_NEON_V8;
420 for (size_t k = 1; k <= 40; k += 9) {
421 for (uint32_t n = 1; n <= 8; n++) {
422 for (uint32_t m = 1; m <= 1; m++) {
423 GemmMicrokernelTester()
424 .mr(1)
425 .nr(8)
426 .kr(1)
427 .sr(1)
428 .m(m)
429 .n(n)
430 .k(k)
431 .cm_stride(11)
432 .iterations(1)
433 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
434 }
435 }
436 }
437 }
438
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,qmin)439 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, qmin) {
440 TEST_REQUIRES_ARM_NEON_V8;
441 GemmMicrokernelTester()
442 .mr(1)
443 .nr(8)
444 .kr(1)
445 .sr(1)
446 .m(1)
447 .n(8)
448 .k(8)
449 .qmin(128)
450 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
451 }
452
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,qmax)453 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, qmax) {
454 TEST_REQUIRES_ARM_NEON_V8;
455 GemmMicrokernelTester()
456 .mr(1)
457 .nr(8)
458 .kr(1)
459 .sr(1)
460 .m(1)
461 .n(8)
462 .k(8)
463 .qmax(128)
464 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
465 }
466
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,strided_cm)467 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, strided_cm) {
468 TEST_REQUIRES_ARM_NEON_V8;
469 GemmMicrokernelTester()
470 .mr(1)
471 .nr(8)
472 .kr(1)
473 .sr(1)
474 .m(1)
475 .n(8)
476 .k(8)
477 .cm_stride(11)
478 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
479 }
480 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
481
482
483 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_eq_8)484 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8) {
485 TEST_REQUIRES_ARM_NEON;
486 GemmMicrokernelTester()
487 .mr(4)
488 .nr(8)
489 .kr(1)
490 .sr(1)
491 .m(4)
492 .n(8)
493 .k(8)
494 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
495 }
496
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,strided_cn)497 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, strided_cn) {
498 TEST_REQUIRES_ARM_NEON;
499 GemmMicrokernelTester()
500 .mr(4)
501 .nr(8)
502 .kr(1)
503 .sr(1)
504 .m(4)
505 .n(8)
506 .k(8)
507 .cn_stride(11)
508 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
509 }
510
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_strided_a)511 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_strided_a) {
512 TEST_REQUIRES_ARM_NEON;
513 GemmMicrokernelTester()
514 .mr(4)
515 .nr(8)
516 .kr(1)
517 .sr(1)
518 .m(4)
519 .n(8)
520 .k(8)
521 .a_stride(11)
522 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
523 }
524
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile)525 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile) {
526 TEST_REQUIRES_ARM_NEON;
527 for (uint32_t n = 1; n <= 8; n++) {
528 for (uint32_t m = 1; m <= 4; m++) {
529 GemmMicrokernelTester()
530 .mr(4)
531 .nr(8)
532 .kr(1)
533 .sr(1)
534 .m(m)
535 .n(n)
536 .k(8)
537 .iterations(1)
538 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
539 }
540 }
541 }
542
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_m)543 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_m) {
544 TEST_REQUIRES_ARM_NEON;
545 for (uint32_t m = 1; m <= 4; m++) {
546 GemmMicrokernelTester()
547 .mr(4)
548 .nr(8)
549 .kr(1)
550 .sr(1)
551 .m(m)
552 .n(8)
553 .k(8)
554 .iterations(1)
555 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
556 }
557 }
558
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_n)559 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_n) {
560 TEST_REQUIRES_ARM_NEON;
561 for (uint32_t n = 1; n <= 8; n++) {
562 GemmMicrokernelTester()
563 .mr(4)
564 .nr(8)
565 .kr(1)
566 .sr(1)
567 .m(4)
568 .n(n)
569 .k(8)
570 .iterations(1)
571 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
572 }
573 }
574
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_lt_8)575 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_lt_8) {
576 TEST_REQUIRES_ARM_NEON;
577 for (size_t k = 1; k < 8; k++) {
578 GemmMicrokernelTester()
579 .mr(4)
580 .nr(8)
581 .kr(1)
582 .sr(1)
583 .m(4)
584 .n(8)
585 .k(k)
586 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
587 }
588 }
589
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_lt_8_strided_a)590 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_strided_a) {
591 TEST_REQUIRES_ARM_NEON;
592 for (size_t k = 1; k < 8; k++) {
593 GemmMicrokernelTester()
594 .mr(4)
595 .nr(8)
596 .kr(1)
597 .sr(1)
598 .m(4)
599 .n(8)
600 .k(k)
601 .a_stride(11)
602 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
603 }
604 }
605
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_lt_8_subtile)606 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_subtile) {
607 TEST_REQUIRES_ARM_NEON;
608 for (size_t k = 1; k < 8; k++) {
609 for (uint32_t n = 1; n <= 8; n++) {
610 for (uint32_t m = 1; m <= 4; m++) {
611 GemmMicrokernelTester()
612 .mr(4)
613 .nr(8)
614 .kr(1)
615 .sr(1)
616 .m(m)
617 .n(n)
618 .k(k)
619 .iterations(1)
620 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
621 }
622 }
623 }
624 }
625
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_gt_8)626 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_gt_8) {
627 TEST_REQUIRES_ARM_NEON;
628 for (size_t k = 9; k < 16; k++) {
629 GemmMicrokernelTester()
630 .mr(4)
631 .nr(8)
632 .kr(1)
633 .sr(1)
634 .m(4)
635 .n(8)
636 .k(k)
637 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
638 }
639 }
640
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_gt_8_strided_a)641 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_strided_a) {
642 TEST_REQUIRES_ARM_NEON;
643 for (size_t k = 9; k < 16; k++) {
644 GemmMicrokernelTester()
645 .mr(4)
646 .nr(8)
647 .kr(1)
648 .sr(1)
649 .m(4)
650 .n(8)
651 .k(k)
652 .a_stride(19)
653 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
654 }
655 }
656
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_gt_8_subtile)657 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_subtile) {
658 TEST_REQUIRES_ARM_NEON;
659 for (size_t k = 9; k < 16; k++) {
660 for (uint32_t n = 1; n <= 8; n++) {
661 for (uint32_t m = 1; m <= 4; m++) {
662 GemmMicrokernelTester()
663 .mr(4)
664 .nr(8)
665 .kr(1)
666 .sr(1)
667 .m(m)
668 .n(n)
669 .k(k)
670 .iterations(1)
671 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
672 }
673 }
674 }
675 }
676
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_div_8)677 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_div_8) {
678 TEST_REQUIRES_ARM_NEON;
679 for (size_t k = 16; k <= 80; k += 8) {
680 GemmMicrokernelTester()
681 .mr(4)
682 .nr(8)
683 .kr(1)
684 .sr(1)
685 .m(4)
686 .n(8)
687 .k(k)
688 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
689 }
690 }
691
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_div_8_strided_a)692 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_div_8_strided_a) {
693 TEST_REQUIRES_ARM_NEON;
694 for (size_t k = 16; k <= 80; k += 8) {
695 GemmMicrokernelTester()
696 .mr(4)
697 .nr(8)
698 .kr(1)
699 .sr(1)
700 .m(4)
701 .n(8)
702 .k(k)
703 .a_stride(83)
704 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
705 }
706 }
707
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_div_8_subtile)708 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_div_8_subtile) {
709 TEST_REQUIRES_ARM_NEON;
710 for (size_t k = 16; k <= 80; k += 8) {
711 for (uint32_t n = 1; n <= 8; n++) {
712 for (uint32_t m = 1; m <= 4; m++) {
713 GemmMicrokernelTester()
714 .mr(4)
715 .nr(8)
716 .kr(1)
717 .sr(1)
718 .m(m)
719 .n(n)
720 .k(k)
721 .iterations(1)
722 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
723 }
724 }
725 }
726 }
727
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_gt_8)728 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_gt_8) {
729 TEST_REQUIRES_ARM_NEON;
730 for (uint32_t n = 9; n < 16; n++) {
731 for (size_t k = 1; k <= 40; k += 9) {
732 GemmMicrokernelTester()
733 .mr(4)
734 .nr(8)
735 .kr(1)
736 .sr(1)
737 .m(4)
738 .n(n)
739 .k(k)
740 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
741 }
742 }
743 }
744
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_gt_8_strided_cn)745 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_gt_8_strided_cn) {
746 TEST_REQUIRES_ARM_NEON;
747 for (uint32_t n = 9; n < 16; n++) {
748 for (size_t k = 1; k <= 40; k += 9) {
749 GemmMicrokernelTester()
750 .mr(4)
751 .nr(8)
752 .kr(1)
753 .sr(1)
754 .m(4)
755 .n(n)
756 .k(k)
757 .cn_stride(11)
758 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
759 }
760 }
761 }
762
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_gt_8_strided_a)763 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_gt_8_strided_a) {
764 TEST_REQUIRES_ARM_NEON;
765 for (uint32_t n = 9; n < 16; n++) {
766 for (size_t k = 1; k <= 40; k += 9) {
767 GemmMicrokernelTester()
768 .mr(4)
769 .nr(8)
770 .kr(1)
771 .sr(1)
772 .m(4)
773 .n(n)
774 .k(k)
775 .a_stride(43)
776 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
777 }
778 }
779 }
780
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_gt_8_subtile)781 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_gt_8_subtile) {
782 TEST_REQUIRES_ARM_NEON;
783 for (uint32_t n = 9; n < 16; n++) {
784 for (size_t k = 1; k <= 40; k += 9) {
785 for (uint32_t m = 1; m <= 4; m++) {
786 GemmMicrokernelTester()
787 .mr(4)
788 .nr(8)
789 .kr(1)
790 .sr(1)
791 .m(m)
792 .n(n)
793 .k(k)
794 .iterations(1)
795 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
796 }
797 }
798 }
799 }
800
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_div_8)801 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_div_8) {
802 TEST_REQUIRES_ARM_NEON;
803 for (uint32_t n = 16; n <= 24; n += 8) {
804 for (size_t k = 1; k <= 40; k += 9) {
805 GemmMicrokernelTester()
806 .mr(4)
807 .nr(8)
808 .kr(1)
809 .sr(1)
810 .m(4)
811 .n(n)
812 .k(k)
813 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
814 }
815 }
816 }
817
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_div_8_strided_cn)818 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_div_8_strided_cn) {
819 TEST_REQUIRES_ARM_NEON;
820 for (uint32_t n = 16; n <= 24; n += 8) {
821 for (size_t k = 1; k <= 40; k += 9) {
822 GemmMicrokernelTester()
823 .mr(4)
824 .nr(8)
825 .kr(1)
826 .sr(1)
827 .m(4)
828 .n(n)
829 .k(k)
830 .cn_stride(11)
831 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
832 }
833 }
834 }
835
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_div_8_strided_a)836 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_div_8_strided_a) {
837 TEST_REQUIRES_ARM_NEON;
838 for (uint32_t n = 16; n <= 24; n += 8) {
839 for (size_t k = 1; k <= 40; k += 9) {
840 GemmMicrokernelTester()
841 .mr(4)
842 .nr(8)
843 .kr(1)
844 .sr(1)
845 .m(4)
846 .n(n)
847 .k(k)
848 .a_stride(43)
849 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
850 }
851 }
852 }
853
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_div_8_subtile)854 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_div_8_subtile) {
855 TEST_REQUIRES_ARM_NEON;
856 for (uint32_t n = 16; n <= 24; n += 8) {
857 for (size_t k = 1; k <= 40; k += 9) {
858 for (uint32_t m = 1; m <= 4; m++) {
859 GemmMicrokernelTester()
860 .mr(4)
861 .nr(8)
862 .kr(1)
863 .sr(1)
864 .m(m)
865 .n(n)
866 .k(k)
867 .iterations(1)
868 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
869 }
870 }
871 }
872 }
873
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,strided_cm_subtile)874 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, strided_cm_subtile) {
875 TEST_REQUIRES_ARM_NEON;
876 for (size_t k = 1; k <= 40; k += 9) {
877 for (uint32_t n = 1; n <= 8; n++) {
878 for (uint32_t m = 1; m <= 4; m++) {
879 GemmMicrokernelTester()
880 .mr(4)
881 .nr(8)
882 .kr(1)
883 .sr(1)
884 .m(m)
885 .n(n)
886 .k(k)
887 .cm_stride(11)
888 .iterations(1)
889 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
890 }
891 }
892 }
893 }
894
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,qmin)895 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, qmin) {
896 TEST_REQUIRES_ARM_NEON;
897 GemmMicrokernelTester()
898 .mr(4)
899 .nr(8)
900 .kr(1)
901 .sr(1)
902 .m(4)
903 .n(8)
904 .k(8)
905 .qmin(128)
906 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
907 }
908
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,qmax)909 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, qmax) {
910 TEST_REQUIRES_ARM_NEON;
911 GemmMicrokernelTester()
912 .mr(4)
913 .nr(8)
914 .kr(1)
915 .sr(1)
916 .m(4)
917 .n(8)
918 .k(8)
919 .qmax(128)
920 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
921 }
922
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,strided_cm)923 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, strided_cm) {
924 TEST_REQUIRES_ARM_NEON;
925 GemmMicrokernelTester()
926 .mr(4)
927 .nr(8)
928 .kr(1)
929 .sr(1)
930 .m(4)
931 .n(8)
932 .k(8)
933 .cm_stride(11)
934 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
935 }
936 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
937
938
939 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8)940 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
941 TEST_REQUIRES_ARM_NEON;
942 GemmMicrokernelTester()
943 .mr(4)
944 .nr(8)
945 .kr(1)
946 .sr(1)
947 .m(4)
948 .n(8)
949 .k(8)
950 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
951 }
952
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cn)953 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
954 TEST_REQUIRES_ARM_NEON;
955 GemmMicrokernelTester()
956 .mr(4)
957 .nr(8)
958 .kr(1)
959 .sr(1)
960 .m(4)
961 .n(8)
962 .k(8)
963 .cn_stride(11)
964 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
965 }
966
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_strided_a)967 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_strided_a) {
968 TEST_REQUIRES_ARM_NEON;
969 GemmMicrokernelTester()
970 .mr(4)
971 .nr(8)
972 .kr(1)
973 .sr(1)
974 .m(4)
975 .n(8)
976 .k(8)
977 .a_stride(11)
978 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
979 }
980
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile)981 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
982 TEST_REQUIRES_ARM_NEON;
983 for (uint32_t n = 1; n <= 8; n++) {
984 for (uint32_t m = 1; m <= 4; m++) {
985 GemmMicrokernelTester()
986 .mr(4)
987 .nr(8)
988 .kr(1)
989 .sr(1)
990 .m(m)
991 .n(n)
992 .k(8)
993 .iterations(1)
994 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
995 }
996 }
997 }
998
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_m)999 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
1000 TEST_REQUIRES_ARM_NEON;
1001 for (uint32_t m = 1; m <= 4; m++) {
1002 GemmMicrokernelTester()
1003 .mr(4)
1004 .nr(8)
1005 .kr(1)
1006 .sr(1)
1007 .m(m)
1008 .n(8)
1009 .k(8)
1010 .iterations(1)
1011 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1012 }
1013 }
1014
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_n)1015 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
1016 TEST_REQUIRES_ARM_NEON;
1017 for (uint32_t n = 1; n <= 8; n++) {
1018 GemmMicrokernelTester()
1019 .mr(4)
1020 .nr(8)
1021 .kr(1)
1022 .sr(1)
1023 .m(4)
1024 .n(n)
1025 .k(8)
1026 .iterations(1)
1027 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1028 }
1029 }
1030
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8)1031 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
1032 TEST_REQUIRES_ARM_NEON;
1033 for (size_t k = 1; k < 8; k++) {
1034 GemmMicrokernelTester()
1035 .mr(4)
1036 .nr(8)
1037 .kr(1)
1038 .sr(1)
1039 .m(4)
1040 .n(8)
1041 .k(k)
1042 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1043 }
1044 }
1045
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8_strided_a)1046 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_strided_a) {
1047 TEST_REQUIRES_ARM_NEON;
1048 for (size_t k = 1; k < 8; k++) {
1049 GemmMicrokernelTester()
1050 .mr(4)
1051 .nr(8)
1052 .kr(1)
1053 .sr(1)
1054 .m(4)
1055 .n(8)
1056 .k(k)
1057 .a_stride(11)
1058 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1059 }
1060 }
1061
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8_subtile)1062 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
1063 TEST_REQUIRES_ARM_NEON;
1064 for (size_t k = 1; k < 8; k++) {
1065 for (uint32_t n = 1; n <= 8; n++) {
1066 for (uint32_t m = 1; m <= 4; m++) {
1067 GemmMicrokernelTester()
1068 .mr(4)
1069 .nr(8)
1070 .kr(1)
1071 .sr(1)
1072 .m(m)
1073 .n(n)
1074 .k(k)
1075 .iterations(1)
1076 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1077 }
1078 }
1079 }
1080 }
1081
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8)1082 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
1083 TEST_REQUIRES_ARM_NEON;
1084 for (size_t k = 9; k < 16; k++) {
1085 GemmMicrokernelTester()
1086 .mr(4)
1087 .nr(8)
1088 .kr(1)
1089 .sr(1)
1090 .m(4)
1091 .n(8)
1092 .k(k)
1093 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1094 }
1095 }
1096
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8_strided_a)1097 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_strided_a) {
1098 TEST_REQUIRES_ARM_NEON;
1099 for (size_t k = 9; k < 16; k++) {
1100 GemmMicrokernelTester()
1101 .mr(4)
1102 .nr(8)
1103 .kr(1)
1104 .sr(1)
1105 .m(4)
1106 .n(8)
1107 .k(k)
1108 .a_stride(19)
1109 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1110 }
1111 }
1112
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8_subtile)1113 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
1114 TEST_REQUIRES_ARM_NEON;
1115 for (size_t k = 9; k < 16; k++) {
1116 for (uint32_t n = 1; n <= 8; n++) {
1117 for (uint32_t m = 1; m <= 4; m++) {
1118 GemmMicrokernelTester()
1119 .mr(4)
1120 .nr(8)
1121 .kr(1)
1122 .sr(1)
1123 .m(m)
1124 .n(n)
1125 .k(k)
1126 .iterations(1)
1127 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1128 }
1129 }
1130 }
1131 }
1132
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8)1133 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
1134 TEST_REQUIRES_ARM_NEON;
1135 for (size_t k = 16; k <= 80; k += 8) {
1136 GemmMicrokernelTester()
1137 .mr(4)
1138 .nr(8)
1139 .kr(1)
1140 .sr(1)
1141 .m(4)
1142 .n(8)
1143 .k(k)
1144 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1145 }
1146 }
1147
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8_strided_a)1148 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_strided_a) {
1149 TEST_REQUIRES_ARM_NEON;
1150 for (size_t k = 16; k <= 80; k += 8) {
1151 GemmMicrokernelTester()
1152 .mr(4)
1153 .nr(8)
1154 .kr(1)
1155 .sr(1)
1156 .m(4)
1157 .n(8)
1158 .k(k)
1159 .a_stride(83)
1160 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1161 }
1162 }
1163
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8_subtile)1164 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
1165 TEST_REQUIRES_ARM_NEON;
1166 for (size_t k = 16; k <= 80; k += 8) {
1167 for (uint32_t n = 1; n <= 8; n++) {
1168 for (uint32_t m = 1; m <= 4; m++) {
1169 GemmMicrokernelTester()
1170 .mr(4)
1171 .nr(8)
1172 .kr(1)
1173 .sr(1)
1174 .m(m)
1175 .n(n)
1176 .k(k)
1177 .iterations(1)
1178 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1179 }
1180 }
1181 }
1182 }
1183
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8)1184 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8) {
1185 TEST_REQUIRES_ARM_NEON;
1186 for (uint32_t n = 9; n < 16; n++) {
1187 for (size_t k = 1; k <= 40; k += 9) {
1188 GemmMicrokernelTester()
1189 .mr(4)
1190 .nr(8)
1191 .kr(1)
1192 .sr(1)
1193 .m(4)
1194 .n(n)
1195 .k(k)
1196 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1197 }
1198 }
1199 }
1200
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8_strided_cn)1201 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
1202 TEST_REQUIRES_ARM_NEON;
1203 for (uint32_t n = 9; n < 16; n++) {
1204 for (size_t k = 1; k <= 40; k += 9) {
1205 GemmMicrokernelTester()
1206 .mr(4)
1207 .nr(8)
1208 .kr(1)
1209 .sr(1)
1210 .m(4)
1211 .n(n)
1212 .k(k)
1213 .cn_stride(11)
1214 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1215 }
1216 }
1217 }
1218
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8_strided_a)1219 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_strided_a) {
1220 TEST_REQUIRES_ARM_NEON;
1221 for (uint32_t n = 9; n < 16; n++) {
1222 for (size_t k = 1; k <= 40; k += 9) {
1223 GemmMicrokernelTester()
1224 .mr(4)
1225 .nr(8)
1226 .kr(1)
1227 .sr(1)
1228 .m(4)
1229 .n(n)
1230 .k(k)
1231 .a_stride(43)
1232 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1233 }
1234 }
1235 }
1236
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8_subtile)1237 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_subtile) {
1238 TEST_REQUIRES_ARM_NEON;
1239 for (uint32_t n = 9; n < 16; n++) {
1240 for (size_t k = 1; k <= 40; k += 9) {
1241 for (uint32_t m = 1; m <= 4; m++) {
1242 GemmMicrokernelTester()
1243 .mr(4)
1244 .nr(8)
1245 .kr(1)
1246 .sr(1)
1247 .m(m)
1248 .n(n)
1249 .k(k)
1250 .iterations(1)
1251 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1252 }
1253 }
1254 }
1255 }
1256
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_8)1257 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8) {
1258 TEST_REQUIRES_ARM_NEON;
1259 for (uint32_t n = 16; n <= 24; n += 8) {
1260 for (size_t k = 1; k <= 40; k += 9) {
1261 GemmMicrokernelTester()
1262 .mr(4)
1263 .nr(8)
1264 .kr(1)
1265 .sr(1)
1266 .m(4)
1267 .n(n)
1268 .k(k)
1269 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1270 }
1271 }
1272 }
1273
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_8_strided_cn)1274 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_strided_cn) {
1275 TEST_REQUIRES_ARM_NEON;
1276 for (uint32_t n = 16; n <= 24; n += 8) {
1277 for (size_t k = 1; k <= 40; k += 9) {
1278 GemmMicrokernelTester()
1279 .mr(4)
1280 .nr(8)
1281 .kr(1)
1282 .sr(1)
1283 .m(4)
1284 .n(n)
1285 .k(k)
1286 .cn_stride(11)
1287 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1288 }
1289 }
1290 }
1291
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_8_strided_a)1292 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_strided_a) {
1293 TEST_REQUIRES_ARM_NEON;
1294 for (uint32_t n = 16; n <= 24; n += 8) {
1295 for (size_t k = 1; k <= 40; k += 9) {
1296 GemmMicrokernelTester()
1297 .mr(4)
1298 .nr(8)
1299 .kr(1)
1300 .sr(1)
1301 .m(4)
1302 .n(n)
1303 .k(k)
1304 .a_stride(43)
1305 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1306 }
1307 }
1308 }
1309
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_8_subtile)1310 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_subtile) {
1311 TEST_REQUIRES_ARM_NEON;
1312 for (uint32_t n = 16; n <= 24; n += 8) {
1313 for (size_t k = 1; k <= 40; k += 9) {
1314 for (uint32_t m = 1; m <= 4; m++) {
1315 GemmMicrokernelTester()
1316 .mr(4)
1317 .nr(8)
1318 .kr(1)
1319 .sr(1)
1320 .m(m)
1321 .n(n)
1322 .k(k)
1323 .iterations(1)
1324 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1325 }
1326 }
1327 }
1328 }
1329
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm_subtile)1330 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
1331 TEST_REQUIRES_ARM_NEON;
1332 for (size_t k = 1; k <= 40; k += 9) {
1333 for (uint32_t n = 1; n <= 8; n++) {
1334 for (uint32_t m = 1; m <= 4; m++) {
1335 GemmMicrokernelTester()
1336 .mr(4)
1337 .nr(8)
1338 .kr(1)
1339 .sr(1)
1340 .m(m)
1341 .n(n)
1342 .k(k)
1343 .cm_stride(11)
1344 .iterations(1)
1345 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1346 }
1347 }
1348 }
1349 }
1350
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmin)1351 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
1352 TEST_REQUIRES_ARM_NEON;
1353 GemmMicrokernelTester()
1354 .mr(4)
1355 .nr(8)
1356 .kr(1)
1357 .sr(1)
1358 .m(4)
1359 .n(8)
1360 .k(8)
1361 .qmin(128)
1362 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1363 }
1364
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmax)1365 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
1366 TEST_REQUIRES_ARM_NEON;
1367 GemmMicrokernelTester()
1368 .mr(4)
1369 .nr(8)
1370 .kr(1)
1371 .sr(1)
1372 .m(4)
1373 .n(8)
1374 .k(8)
1375 .qmax(128)
1376 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1377 }
1378
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm)1379 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
1380 TEST_REQUIRES_ARM_NEON;
1381 GemmMicrokernelTester()
1382 .mr(4)
1383 .nr(8)
1384 .kr(1)
1385 .sr(1)
1386 .m(4)
1387 .n(8)
1388 .k(8)
1389 .cm_stride(11)
1390 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1391 }
1392 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
1393
1394
1395 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_eq_8)1396 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_eq_8) {
1397 TEST_REQUIRES_ARM_NEON_V8;
1398 GemmMicrokernelTester()
1399 .mr(4)
1400 .nr(8)
1401 .kr(1)
1402 .sr(1)
1403 .m(4)
1404 .n(8)
1405 .k(8)
1406 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1407 }
1408
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,strided_cn)1409 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, strided_cn) {
1410 TEST_REQUIRES_ARM_NEON_V8;
1411 GemmMicrokernelTester()
1412 .mr(4)
1413 .nr(8)
1414 .kr(1)
1415 .sr(1)
1416 .m(4)
1417 .n(8)
1418 .k(8)
1419 .cn_stride(11)
1420 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1421 }
1422
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_eq_8_strided_a)1423 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_eq_8_strided_a) {
1424 TEST_REQUIRES_ARM_NEON_V8;
1425 GemmMicrokernelTester()
1426 .mr(4)
1427 .nr(8)
1428 .kr(1)
1429 .sr(1)
1430 .m(4)
1431 .n(8)
1432 .k(8)
1433 .a_stride(11)
1434 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1435 }
1436
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_eq_8_subtile)1437 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_eq_8_subtile) {
1438 TEST_REQUIRES_ARM_NEON_V8;
1439 for (uint32_t n = 1; n <= 8; n++) {
1440 for (uint32_t m = 1; m <= 4; m++) {
1441 GemmMicrokernelTester()
1442 .mr(4)
1443 .nr(8)
1444 .kr(1)
1445 .sr(1)
1446 .m(m)
1447 .n(n)
1448 .k(8)
1449 .iterations(1)
1450 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1451 }
1452 }
1453 }
1454
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_eq_8_subtile_m)1455 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_eq_8_subtile_m) {
1456 TEST_REQUIRES_ARM_NEON_V8;
1457 for (uint32_t m = 1; m <= 4; m++) {
1458 GemmMicrokernelTester()
1459 .mr(4)
1460 .nr(8)
1461 .kr(1)
1462 .sr(1)
1463 .m(m)
1464 .n(8)
1465 .k(8)
1466 .iterations(1)
1467 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1468 }
1469 }
1470
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_eq_8_subtile_n)1471 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_eq_8_subtile_n) {
1472 TEST_REQUIRES_ARM_NEON_V8;
1473 for (uint32_t n = 1; n <= 8; n++) {
1474 GemmMicrokernelTester()
1475 .mr(4)
1476 .nr(8)
1477 .kr(1)
1478 .sr(1)
1479 .m(4)
1480 .n(n)
1481 .k(8)
1482 .iterations(1)
1483 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1484 }
1485 }
1486
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_lt_8)1487 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_lt_8) {
1488 TEST_REQUIRES_ARM_NEON_V8;
1489 for (size_t k = 1; k < 8; k++) {
1490 GemmMicrokernelTester()
1491 .mr(4)
1492 .nr(8)
1493 .kr(1)
1494 .sr(1)
1495 .m(4)
1496 .n(8)
1497 .k(k)
1498 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1499 }
1500 }
1501
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_lt_8_strided_a)1502 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_lt_8_strided_a) {
1503 TEST_REQUIRES_ARM_NEON_V8;
1504 for (size_t k = 1; k < 8; k++) {
1505 GemmMicrokernelTester()
1506 .mr(4)
1507 .nr(8)
1508 .kr(1)
1509 .sr(1)
1510 .m(4)
1511 .n(8)
1512 .k(k)
1513 .a_stride(11)
1514 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1515 }
1516 }
1517
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_lt_8_subtile)1518 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_lt_8_subtile) {
1519 TEST_REQUIRES_ARM_NEON_V8;
1520 for (size_t k = 1; k < 8; k++) {
1521 for (uint32_t n = 1; n <= 8; n++) {
1522 for (uint32_t m = 1; m <= 4; m++) {
1523 GemmMicrokernelTester()
1524 .mr(4)
1525 .nr(8)
1526 .kr(1)
1527 .sr(1)
1528 .m(m)
1529 .n(n)
1530 .k(k)
1531 .iterations(1)
1532 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1533 }
1534 }
1535 }
1536 }
1537
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_gt_8)1538 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_gt_8) {
1539 TEST_REQUIRES_ARM_NEON_V8;
1540 for (size_t k = 9; k < 16; k++) {
1541 GemmMicrokernelTester()
1542 .mr(4)
1543 .nr(8)
1544 .kr(1)
1545 .sr(1)
1546 .m(4)
1547 .n(8)
1548 .k(k)
1549 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1550 }
1551 }
1552
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_gt_8_strided_a)1553 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_gt_8_strided_a) {
1554 TEST_REQUIRES_ARM_NEON_V8;
1555 for (size_t k = 9; k < 16; k++) {
1556 GemmMicrokernelTester()
1557 .mr(4)
1558 .nr(8)
1559 .kr(1)
1560 .sr(1)
1561 .m(4)
1562 .n(8)
1563 .k(k)
1564 .a_stride(19)
1565 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1566 }
1567 }
1568
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_gt_8_subtile)1569 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_gt_8_subtile) {
1570 TEST_REQUIRES_ARM_NEON_V8;
1571 for (size_t k = 9; k < 16; k++) {
1572 for (uint32_t n = 1; n <= 8; n++) {
1573 for (uint32_t m = 1; m <= 4; m++) {
1574 GemmMicrokernelTester()
1575 .mr(4)
1576 .nr(8)
1577 .kr(1)
1578 .sr(1)
1579 .m(m)
1580 .n(n)
1581 .k(k)
1582 .iterations(1)
1583 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1584 }
1585 }
1586 }
1587 }
1588
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_div_8)1589 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_div_8) {
1590 TEST_REQUIRES_ARM_NEON_V8;
1591 for (size_t k = 16; k <= 80; k += 8) {
1592 GemmMicrokernelTester()
1593 .mr(4)
1594 .nr(8)
1595 .kr(1)
1596 .sr(1)
1597 .m(4)
1598 .n(8)
1599 .k(k)
1600 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1601 }
1602 }
1603
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_div_8_strided_a)1604 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_div_8_strided_a) {
1605 TEST_REQUIRES_ARM_NEON_V8;
1606 for (size_t k = 16; k <= 80; k += 8) {
1607 GemmMicrokernelTester()
1608 .mr(4)
1609 .nr(8)
1610 .kr(1)
1611 .sr(1)
1612 .m(4)
1613 .n(8)
1614 .k(k)
1615 .a_stride(83)
1616 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1617 }
1618 }
1619
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_div_8_subtile)1620 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_div_8_subtile) {
1621 TEST_REQUIRES_ARM_NEON_V8;
1622 for (size_t k = 16; k <= 80; k += 8) {
1623 for (uint32_t n = 1; n <= 8; n++) {
1624 for (uint32_t m = 1; m <= 4; m++) {
1625 GemmMicrokernelTester()
1626 .mr(4)
1627 .nr(8)
1628 .kr(1)
1629 .sr(1)
1630 .m(m)
1631 .n(n)
1632 .k(k)
1633 .iterations(1)
1634 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1635 }
1636 }
1637 }
1638 }
1639
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_gt_8)1640 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_gt_8) {
1641 TEST_REQUIRES_ARM_NEON_V8;
1642 for (uint32_t n = 9; n < 16; n++) {
1643 for (size_t k = 1; k <= 40; k += 9) {
1644 GemmMicrokernelTester()
1645 .mr(4)
1646 .nr(8)
1647 .kr(1)
1648 .sr(1)
1649 .m(4)
1650 .n(n)
1651 .k(k)
1652 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1653 }
1654 }
1655 }
1656
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_gt_8_strided_cn)1657 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_gt_8_strided_cn) {
1658 TEST_REQUIRES_ARM_NEON_V8;
1659 for (uint32_t n = 9; n < 16; n++) {
1660 for (size_t k = 1; k <= 40; k += 9) {
1661 GemmMicrokernelTester()
1662 .mr(4)
1663 .nr(8)
1664 .kr(1)
1665 .sr(1)
1666 .m(4)
1667 .n(n)
1668 .k(k)
1669 .cn_stride(11)
1670 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1671 }
1672 }
1673 }
1674
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_gt_8_strided_a)1675 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_gt_8_strided_a) {
1676 TEST_REQUIRES_ARM_NEON_V8;
1677 for (uint32_t n = 9; n < 16; n++) {
1678 for (size_t k = 1; k <= 40; k += 9) {
1679 GemmMicrokernelTester()
1680 .mr(4)
1681 .nr(8)
1682 .kr(1)
1683 .sr(1)
1684 .m(4)
1685 .n(n)
1686 .k(k)
1687 .a_stride(43)
1688 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1689 }
1690 }
1691 }
1692
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_gt_8_subtile)1693 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_gt_8_subtile) {
1694 TEST_REQUIRES_ARM_NEON_V8;
1695 for (uint32_t n = 9; n < 16; n++) {
1696 for (size_t k = 1; k <= 40; k += 9) {
1697 for (uint32_t m = 1; m <= 4; m++) {
1698 GemmMicrokernelTester()
1699 .mr(4)
1700 .nr(8)
1701 .kr(1)
1702 .sr(1)
1703 .m(m)
1704 .n(n)
1705 .k(k)
1706 .iterations(1)
1707 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1708 }
1709 }
1710 }
1711 }
1712
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_div_8)1713 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_div_8) {
1714 TEST_REQUIRES_ARM_NEON_V8;
1715 for (uint32_t n = 16; n <= 24; n += 8) {
1716 for (size_t k = 1; k <= 40; k += 9) {
1717 GemmMicrokernelTester()
1718 .mr(4)
1719 .nr(8)
1720 .kr(1)
1721 .sr(1)
1722 .m(4)
1723 .n(n)
1724 .k(k)
1725 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1726 }
1727 }
1728 }
1729
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_div_8_strided_cn)1730 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_div_8_strided_cn) {
1731 TEST_REQUIRES_ARM_NEON_V8;
1732 for (uint32_t n = 16; n <= 24; n += 8) {
1733 for (size_t k = 1; k <= 40; k += 9) {
1734 GemmMicrokernelTester()
1735 .mr(4)
1736 .nr(8)
1737 .kr(1)
1738 .sr(1)
1739 .m(4)
1740 .n(n)
1741 .k(k)
1742 .cn_stride(11)
1743 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1744 }
1745 }
1746 }
1747
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_div_8_strided_a)1748 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_div_8_strided_a) {
1749 TEST_REQUIRES_ARM_NEON_V8;
1750 for (uint32_t n = 16; n <= 24; n += 8) {
1751 for (size_t k = 1; k <= 40; k += 9) {
1752 GemmMicrokernelTester()
1753 .mr(4)
1754 .nr(8)
1755 .kr(1)
1756 .sr(1)
1757 .m(4)
1758 .n(n)
1759 .k(k)
1760 .a_stride(43)
1761 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1762 }
1763 }
1764 }
1765
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_div_8_subtile)1766 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_div_8_subtile) {
1767 TEST_REQUIRES_ARM_NEON_V8;
1768 for (uint32_t n = 16; n <= 24; n += 8) {
1769 for (size_t k = 1; k <= 40; k += 9) {
1770 for (uint32_t m = 1; m <= 4; m++) {
1771 GemmMicrokernelTester()
1772 .mr(4)
1773 .nr(8)
1774 .kr(1)
1775 .sr(1)
1776 .m(m)
1777 .n(n)
1778 .k(k)
1779 .iterations(1)
1780 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1781 }
1782 }
1783 }
1784 }
1785
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,strided_cm_subtile)1786 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, strided_cm_subtile) {
1787 TEST_REQUIRES_ARM_NEON_V8;
1788 for (size_t k = 1; k <= 40; k += 9) {
1789 for (uint32_t n = 1; n <= 8; n++) {
1790 for (uint32_t m = 1; m <= 4; m++) {
1791 GemmMicrokernelTester()
1792 .mr(4)
1793 .nr(8)
1794 .kr(1)
1795 .sr(1)
1796 .m(m)
1797 .n(n)
1798 .k(k)
1799 .cm_stride(11)
1800 .iterations(1)
1801 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1802 }
1803 }
1804 }
1805 }
1806
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,qmin)1807 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, qmin) {
1808 TEST_REQUIRES_ARM_NEON_V8;
1809 GemmMicrokernelTester()
1810 .mr(4)
1811 .nr(8)
1812 .kr(1)
1813 .sr(1)
1814 .m(4)
1815 .n(8)
1816 .k(8)
1817 .qmin(128)
1818 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1819 }
1820
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,qmax)1821 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, qmax) {
1822 TEST_REQUIRES_ARM_NEON_V8;
1823 GemmMicrokernelTester()
1824 .mr(4)
1825 .nr(8)
1826 .kr(1)
1827 .sr(1)
1828 .m(4)
1829 .n(8)
1830 .k(8)
1831 .qmax(128)
1832 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1833 }
1834
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,strided_cm)1835 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, strided_cm) {
1836 TEST_REQUIRES_ARM_NEON_V8;
1837 GemmMicrokernelTester()
1838 .mr(4)
1839 .nr(8)
1840 .kr(1)
1841 .sr(1)
1842 .m(4)
1843 .n(8)
1844 .k(8)
1845 .cm_stride(11)
1846 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1847 }
1848 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
1849
1850
1851 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_eq_8)1852 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_eq_8) {
1853 TEST_REQUIRES_ARM_NEON_DOT;
1854 GemmMicrokernelTester()
1855 .mr(4)
1856 .nr(8)
1857 .kr(4)
1858 .sr(1)
1859 .m(4)
1860 .n(8)
1861 .k(8)
1862 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1863 }
1864
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,strided_cn)1865 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, strided_cn) {
1866 TEST_REQUIRES_ARM_NEON_DOT;
1867 GemmMicrokernelTester()
1868 .mr(4)
1869 .nr(8)
1870 .kr(4)
1871 .sr(1)
1872 .m(4)
1873 .n(8)
1874 .k(8)
1875 .cn_stride(11)
1876 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1877 }
1878
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_eq_8_strided_a)1879 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_eq_8_strided_a) {
1880 TEST_REQUIRES_ARM_NEON_DOT;
1881 GemmMicrokernelTester()
1882 .mr(4)
1883 .nr(8)
1884 .kr(4)
1885 .sr(1)
1886 .m(4)
1887 .n(8)
1888 .k(8)
1889 .a_stride(11)
1890 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1891 }
1892
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_eq_8_subtile)1893 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_eq_8_subtile) {
1894 TEST_REQUIRES_ARM_NEON_DOT;
1895 for (uint32_t n = 1; n <= 8; n++) {
1896 for (uint32_t m = 1; m <= 4; m++) {
1897 GemmMicrokernelTester()
1898 .mr(4)
1899 .nr(8)
1900 .kr(4)
1901 .sr(1)
1902 .m(m)
1903 .n(n)
1904 .k(8)
1905 .iterations(1)
1906 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1907 }
1908 }
1909 }
1910
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_eq_8_subtile_m)1911 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_eq_8_subtile_m) {
1912 TEST_REQUIRES_ARM_NEON_DOT;
1913 for (uint32_t m = 1; m <= 4; m++) {
1914 GemmMicrokernelTester()
1915 .mr(4)
1916 .nr(8)
1917 .kr(4)
1918 .sr(1)
1919 .m(m)
1920 .n(8)
1921 .k(8)
1922 .iterations(1)
1923 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1924 }
1925 }
1926
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_eq_8_subtile_n)1927 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_eq_8_subtile_n) {
1928 TEST_REQUIRES_ARM_NEON_DOT;
1929 for (uint32_t n = 1; n <= 8; n++) {
1930 GemmMicrokernelTester()
1931 .mr(4)
1932 .nr(8)
1933 .kr(4)
1934 .sr(1)
1935 .m(4)
1936 .n(n)
1937 .k(8)
1938 .iterations(1)
1939 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1940 }
1941 }
1942
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_lt_8)1943 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_lt_8) {
1944 TEST_REQUIRES_ARM_NEON_DOT;
1945 for (size_t k = 1; k < 8; k++) {
1946 GemmMicrokernelTester()
1947 .mr(4)
1948 .nr(8)
1949 .kr(4)
1950 .sr(1)
1951 .m(4)
1952 .n(8)
1953 .k(k)
1954 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1955 }
1956 }
1957
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_lt_8_strided_a)1958 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_lt_8_strided_a) {
1959 TEST_REQUIRES_ARM_NEON_DOT;
1960 for (size_t k = 1; k < 8; k++) {
1961 GemmMicrokernelTester()
1962 .mr(4)
1963 .nr(8)
1964 .kr(4)
1965 .sr(1)
1966 .m(4)
1967 .n(8)
1968 .k(k)
1969 .a_stride(11)
1970 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1971 }
1972 }
1973
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_lt_8_subtile)1974 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_lt_8_subtile) {
1975 TEST_REQUIRES_ARM_NEON_DOT;
1976 for (size_t k = 1; k < 8; k++) {
1977 for (uint32_t n = 1; n <= 8; n++) {
1978 for (uint32_t m = 1; m <= 4; m++) {
1979 GemmMicrokernelTester()
1980 .mr(4)
1981 .nr(8)
1982 .kr(4)
1983 .sr(1)
1984 .m(m)
1985 .n(n)
1986 .k(k)
1987 .iterations(1)
1988 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1989 }
1990 }
1991 }
1992 }
1993
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_gt_8)1994 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_gt_8) {
1995 TEST_REQUIRES_ARM_NEON_DOT;
1996 for (size_t k = 9; k < 16; k++) {
1997 GemmMicrokernelTester()
1998 .mr(4)
1999 .nr(8)
2000 .kr(4)
2001 .sr(1)
2002 .m(4)
2003 .n(8)
2004 .k(k)
2005 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2006 }
2007 }
2008
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_gt_8_strided_a)2009 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_gt_8_strided_a) {
2010 TEST_REQUIRES_ARM_NEON_DOT;
2011 for (size_t k = 9; k < 16; k++) {
2012 GemmMicrokernelTester()
2013 .mr(4)
2014 .nr(8)
2015 .kr(4)
2016 .sr(1)
2017 .m(4)
2018 .n(8)
2019 .k(k)
2020 .a_stride(19)
2021 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2022 }
2023 }
2024
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_gt_8_subtile)2025 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_gt_8_subtile) {
2026 TEST_REQUIRES_ARM_NEON_DOT;
2027 for (size_t k = 9; k < 16; k++) {
2028 for (uint32_t n = 1; n <= 8; n++) {
2029 for (uint32_t m = 1; m <= 4; m++) {
2030 GemmMicrokernelTester()
2031 .mr(4)
2032 .nr(8)
2033 .kr(4)
2034 .sr(1)
2035 .m(m)
2036 .n(n)
2037 .k(k)
2038 .iterations(1)
2039 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2040 }
2041 }
2042 }
2043 }
2044
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_div_8)2045 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_div_8) {
2046 TEST_REQUIRES_ARM_NEON_DOT;
2047 for (size_t k = 16; k <= 80; k += 8) {
2048 GemmMicrokernelTester()
2049 .mr(4)
2050 .nr(8)
2051 .kr(4)
2052 .sr(1)
2053 .m(4)
2054 .n(8)
2055 .k(k)
2056 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2057 }
2058 }
2059
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_div_8_strided_a)2060 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_div_8_strided_a) {
2061 TEST_REQUIRES_ARM_NEON_DOT;
2062 for (size_t k = 16; k <= 80; k += 8) {
2063 GemmMicrokernelTester()
2064 .mr(4)
2065 .nr(8)
2066 .kr(4)
2067 .sr(1)
2068 .m(4)
2069 .n(8)
2070 .k(k)
2071 .a_stride(83)
2072 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2073 }
2074 }
2075
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_div_8_subtile)2076 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_div_8_subtile) {
2077 TEST_REQUIRES_ARM_NEON_DOT;
2078 for (size_t k = 16; k <= 80; k += 8) {
2079 for (uint32_t n = 1; n <= 8; n++) {
2080 for (uint32_t m = 1; m <= 4; m++) {
2081 GemmMicrokernelTester()
2082 .mr(4)
2083 .nr(8)
2084 .kr(4)
2085 .sr(1)
2086 .m(m)
2087 .n(n)
2088 .k(k)
2089 .iterations(1)
2090 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2091 }
2092 }
2093 }
2094 }
2095
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_gt_8)2096 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_gt_8) {
2097 TEST_REQUIRES_ARM_NEON_DOT;
2098 for (uint32_t n = 9; n < 16; n++) {
2099 for (size_t k = 1; k <= 40; k += 9) {
2100 GemmMicrokernelTester()
2101 .mr(4)
2102 .nr(8)
2103 .kr(4)
2104 .sr(1)
2105 .m(4)
2106 .n(n)
2107 .k(k)
2108 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2109 }
2110 }
2111 }
2112
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_gt_8_strided_cn)2113 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_gt_8_strided_cn) {
2114 TEST_REQUIRES_ARM_NEON_DOT;
2115 for (uint32_t n = 9; n < 16; n++) {
2116 for (size_t k = 1; k <= 40; k += 9) {
2117 GemmMicrokernelTester()
2118 .mr(4)
2119 .nr(8)
2120 .kr(4)
2121 .sr(1)
2122 .m(4)
2123 .n(n)
2124 .k(k)
2125 .cn_stride(11)
2126 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2127 }
2128 }
2129 }
2130
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_gt_8_strided_a)2131 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_gt_8_strided_a) {
2132 TEST_REQUIRES_ARM_NEON_DOT;
2133 for (uint32_t n = 9; n < 16; n++) {
2134 for (size_t k = 1; k <= 40; k += 9) {
2135 GemmMicrokernelTester()
2136 .mr(4)
2137 .nr(8)
2138 .kr(4)
2139 .sr(1)
2140 .m(4)
2141 .n(n)
2142 .k(k)
2143 .a_stride(43)
2144 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2145 }
2146 }
2147 }
2148
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_gt_8_subtile)2149 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_gt_8_subtile) {
2150 TEST_REQUIRES_ARM_NEON_DOT;
2151 for (uint32_t n = 9; n < 16; n++) {
2152 for (size_t k = 1; k <= 40; k += 9) {
2153 for (uint32_t m = 1; m <= 4; m++) {
2154 GemmMicrokernelTester()
2155 .mr(4)
2156 .nr(8)
2157 .kr(4)
2158 .sr(1)
2159 .m(m)
2160 .n(n)
2161 .k(k)
2162 .iterations(1)
2163 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2164 }
2165 }
2166 }
2167 }
2168
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_div_8)2169 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_div_8) {
2170 TEST_REQUIRES_ARM_NEON_DOT;
2171 for (uint32_t n = 16; n <= 24; n += 8) {
2172 for (size_t k = 1; k <= 40; k += 9) {
2173 GemmMicrokernelTester()
2174 .mr(4)
2175 .nr(8)
2176 .kr(4)
2177 .sr(1)
2178 .m(4)
2179 .n(n)
2180 .k(k)
2181 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2182 }
2183 }
2184 }
2185
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_div_8_strided_cn)2186 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_div_8_strided_cn) {
2187 TEST_REQUIRES_ARM_NEON_DOT;
2188 for (uint32_t n = 16; n <= 24; n += 8) {
2189 for (size_t k = 1; k <= 40; k += 9) {
2190 GemmMicrokernelTester()
2191 .mr(4)
2192 .nr(8)
2193 .kr(4)
2194 .sr(1)
2195 .m(4)
2196 .n(n)
2197 .k(k)
2198 .cn_stride(11)
2199 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2200 }
2201 }
2202 }
2203
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_div_8_strided_a)2204 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_div_8_strided_a) {
2205 TEST_REQUIRES_ARM_NEON_DOT;
2206 for (uint32_t n = 16; n <= 24; n += 8) {
2207 for (size_t k = 1; k <= 40; k += 9) {
2208 GemmMicrokernelTester()
2209 .mr(4)
2210 .nr(8)
2211 .kr(4)
2212 .sr(1)
2213 .m(4)
2214 .n(n)
2215 .k(k)
2216 .a_stride(43)
2217 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2218 }
2219 }
2220 }
2221
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_div_8_subtile)2222 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_div_8_subtile) {
2223 TEST_REQUIRES_ARM_NEON_DOT;
2224 for (uint32_t n = 16; n <= 24; n += 8) {
2225 for (size_t k = 1; k <= 40; k += 9) {
2226 for (uint32_t m = 1; m <= 4; m++) {
2227 GemmMicrokernelTester()
2228 .mr(4)
2229 .nr(8)
2230 .kr(4)
2231 .sr(1)
2232 .m(m)
2233 .n(n)
2234 .k(k)
2235 .iterations(1)
2236 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2237 }
2238 }
2239 }
2240 }
2241
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,strided_cm_subtile)2242 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, strided_cm_subtile) {
2243 TEST_REQUIRES_ARM_NEON_DOT;
2244 for (size_t k = 1; k <= 40; k += 9) {
2245 for (uint32_t n = 1; n <= 8; n++) {
2246 for (uint32_t m = 1; m <= 4; m++) {
2247 GemmMicrokernelTester()
2248 .mr(4)
2249 .nr(8)
2250 .kr(4)
2251 .sr(1)
2252 .m(m)
2253 .n(n)
2254 .k(k)
2255 .cm_stride(11)
2256 .iterations(1)
2257 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2258 }
2259 }
2260 }
2261 }
2262
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,qmin)2263 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, qmin) {
2264 TEST_REQUIRES_ARM_NEON_DOT;
2265 GemmMicrokernelTester()
2266 .mr(4)
2267 .nr(8)
2268 .kr(4)
2269 .sr(1)
2270 .m(4)
2271 .n(8)
2272 .k(8)
2273 .qmin(128)
2274 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2275 }
2276
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,qmax)2277 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, qmax) {
2278 TEST_REQUIRES_ARM_NEON_DOT;
2279 GemmMicrokernelTester()
2280 .mr(4)
2281 .nr(8)
2282 .kr(4)
2283 .sr(1)
2284 .m(4)
2285 .n(8)
2286 .k(8)
2287 .qmax(128)
2288 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2289 }
2290
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,strided_cm)2291 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, strided_cm) {
2292 TEST_REQUIRES_ARM_NEON_DOT;
2293 GemmMicrokernelTester()
2294 .mr(4)
2295 .nr(8)
2296 .kr(4)
2297 .sr(1)
2298 .m(4)
2299 .n(8)
2300 .k(8)
2301 .cm_stride(11)
2302 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2303 }
2304 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY
2305
2306
2307 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,k_eq_4)2308 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4) {
2309 TEST_REQUIRES_ARM_NEON_DOT;
2310 GemmMicrokernelTester()
2311 .mr(1)
2312 .nr(16)
2313 .kr(4)
2314 .sr(1)
2315 .m(1)
2316 .n(16)
2317 .k(4)
2318 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2319 }
2320
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,strided_cn)2321 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, strided_cn) {
2322 TEST_REQUIRES_ARM_NEON_DOT;
2323 GemmMicrokernelTester()
2324 .mr(1)
2325 .nr(16)
2326 .kr(4)
2327 .sr(1)
2328 .m(1)
2329 .n(16)
2330 .k(4)
2331 .cn_stride(19)
2332 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2333 }
2334
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,k_eq_4_strided_a)2335 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4_strided_a) {
2336 TEST_REQUIRES_ARM_NEON_DOT;
2337 GemmMicrokernelTester()
2338 .mr(1)
2339 .nr(16)
2340 .kr(4)
2341 .sr(1)
2342 .m(1)
2343 .n(16)
2344 .k(4)
2345 .a_stride(7)
2346 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2347 }
2348
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,k_eq_4_subtile)2349 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile) {
2350 TEST_REQUIRES_ARM_NEON_DOT;
2351 for (uint32_t n = 1; n <= 16; n++) {
2352 for (uint32_t m = 1; m <= 1; m++) {
2353 GemmMicrokernelTester()
2354 .mr(1)
2355 .nr(16)
2356 .kr(4)
2357 .sr(1)
2358 .m(m)
2359 .n(n)
2360 .k(4)
2361 .iterations(1)
2362 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2363 }
2364 }
2365 }
2366
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,k_eq_4_subtile_m)2367 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile_m) {
2368 TEST_REQUIRES_ARM_NEON_DOT;
2369 for (uint32_t m = 1; m <= 1; m++) {
2370 GemmMicrokernelTester()
2371 .mr(1)
2372 .nr(16)
2373 .kr(4)
2374 .sr(1)
2375 .m(m)
2376 .n(16)
2377 .k(4)
2378 .iterations(1)
2379 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2380 }
2381 }
2382
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,k_eq_4_subtile_n)2383 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile_n) {
2384 TEST_REQUIRES_ARM_NEON_DOT;
2385 for (uint32_t n = 1; n <= 16; n++) {
2386 GemmMicrokernelTester()
2387 .mr(1)
2388 .nr(16)
2389 .kr(4)
2390 .sr(1)
2391 .m(1)
2392 .n(n)
2393 .k(4)
2394 .iterations(1)
2395 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2396 }
2397 }
2398
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,k_lt_4)2399 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_lt_4) {
2400 TEST_REQUIRES_ARM_NEON_DOT;
2401 for (size_t k = 1; k < 4; k++) {
2402 GemmMicrokernelTester()
2403 .mr(1)
2404 .nr(16)
2405 .kr(4)
2406 .sr(1)
2407 .m(1)
2408 .n(16)
2409 .k(k)
2410 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2411 }
2412 }
2413
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,k_lt_4_strided_a)2414 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_lt_4_strided_a) {
2415 TEST_REQUIRES_ARM_NEON_DOT;
2416 for (size_t k = 1; k < 4; k++) {
2417 GemmMicrokernelTester()
2418 .mr(1)
2419 .nr(16)
2420 .kr(4)
2421 .sr(1)
2422 .m(1)
2423 .n(16)
2424 .k(k)
2425 .a_stride(7)
2426 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2427 }
2428 }
2429
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,k_lt_4_subtile)2430 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_lt_4_subtile) {
2431 TEST_REQUIRES_ARM_NEON_DOT;
2432 for (size_t k = 1; k < 4; k++) {
2433 for (uint32_t n = 1; n <= 16; n++) {
2434 for (uint32_t m = 1; m <= 1; m++) {
2435 GemmMicrokernelTester()
2436 .mr(1)
2437 .nr(16)
2438 .kr(4)
2439 .sr(1)
2440 .m(m)
2441 .n(n)
2442 .k(k)
2443 .iterations(1)
2444 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2445 }
2446 }
2447 }
2448 }
2449
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,k_gt_4)2450 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_gt_4) {
2451 TEST_REQUIRES_ARM_NEON_DOT;
2452 for (size_t k = 5; k < 8; k++) {
2453 GemmMicrokernelTester()
2454 .mr(1)
2455 .nr(16)
2456 .kr(4)
2457 .sr(1)
2458 .m(1)
2459 .n(16)
2460 .k(k)
2461 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2462 }
2463 }
2464
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,k_gt_4_strided_a)2465 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_gt_4_strided_a) {
2466 TEST_REQUIRES_ARM_NEON_DOT;
2467 for (size_t k = 5; k < 8; k++) {
2468 GemmMicrokernelTester()
2469 .mr(1)
2470 .nr(16)
2471 .kr(4)
2472 .sr(1)
2473 .m(1)
2474 .n(16)
2475 .k(k)
2476 .a_stride(11)
2477 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2478 }
2479 }
2480
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,k_gt_4_subtile)2481 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_gt_4_subtile) {
2482 TEST_REQUIRES_ARM_NEON_DOT;
2483 for (size_t k = 5; k < 8; k++) {
2484 for (uint32_t n = 1; n <= 16; n++) {
2485 for (uint32_t m = 1; m <= 1; m++) {
2486 GemmMicrokernelTester()
2487 .mr(1)
2488 .nr(16)
2489 .kr(4)
2490 .sr(1)
2491 .m(m)
2492 .n(n)
2493 .k(k)
2494 .iterations(1)
2495 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2496 }
2497 }
2498 }
2499 }
2500
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,k_div_4)2501 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_div_4) {
2502 TEST_REQUIRES_ARM_NEON_DOT;
2503 for (size_t k = 8; k <= 40; k += 4) {
2504 GemmMicrokernelTester()
2505 .mr(1)
2506 .nr(16)
2507 .kr(4)
2508 .sr(1)
2509 .m(1)
2510 .n(16)
2511 .k(k)
2512 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2513 }
2514 }
2515
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,k_div_4_strided_a)2516 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_div_4_strided_a) {
2517 TEST_REQUIRES_ARM_NEON_DOT;
2518 for (size_t k = 8; k <= 40; k += 4) {
2519 GemmMicrokernelTester()
2520 .mr(1)
2521 .nr(16)
2522 .kr(4)
2523 .sr(1)
2524 .m(1)
2525 .n(16)
2526 .k(k)
2527 .a_stride(43)
2528 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2529 }
2530 }
2531
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,k_div_4_subtile)2532 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_div_4_subtile) {
2533 TEST_REQUIRES_ARM_NEON_DOT;
2534 for (size_t k = 8; k <= 40; k += 4) {
2535 for (uint32_t n = 1; n <= 16; n++) {
2536 for (uint32_t m = 1; m <= 1; m++) {
2537 GemmMicrokernelTester()
2538 .mr(1)
2539 .nr(16)
2540 .kr(4)
2541 .sr(1)
2542 .m(m)
2543 .n(n)
2544 .k(k)
2545 .iterations(1)
2546 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2547 }
2548 }
2549 }
2550 }
2551
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,n_gt_16)2552 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, n_gt_16) {
2553 TEST_REQUIRES_ARM_NEON_DOT;
2554 for (uint32_t n = 17; n < 32; n++) {
2555 for (size_t k = 1; k <= 20; k += 5) {
2556 GemmMicrokernelTester()
2557 .mr(1)
2558 .nr(16)
2559 .kr(4)
2560 .sr(1)
2561 .m(1)
2562 .n(n)
2563 .k(k)
2564 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2565 }
2566 }
2567 }
2568
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,n_gt_16_strided_cn)2569 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, n_gt_16_strided_cn) {
2570 TEST_REQUIRES_ARM_NEON_DOT;
2571 for (uint32_t n = 17; n < 32; n++) {
2572 for (size_t k = 1; k <= 20; k += 5) {
2573 GemmMicrokernelTester()
2574 .mr(1)
2575 .nr(16)
2576 .kr(4)
2577 .sr(1)
2578 .m(1)
2579 .n(n)
2580 .k(k)
2581 .cn_stride(19)
2582 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2583 }
2584 }
2585 }
2586
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,n_gt_16_strided_a)2587 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, n_gt_16_strided_a) {
2588 TEST_REQUIRES_ARM_NEON_DOT;
2589 for (uint32_t n = 17; n < 32; n++) {
2590 for (size_t k = 1; k <= 20; k += 5) {
2591 GemmMicrokernelTester()
2592 .mr(1)
2593 .nr(16)
2594 .kr(4)
2595 .sr(1)
2596 .m(1)
2597 .n(n)
2598 .k(k)
2599 .a_stride(23)
2600 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2601 }
2602 }
2603 }
2604
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,n_gt_16_subtile)2605 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, n_gt_16_subtile) {
2606 TEST_REQUIRES_ARM_NEON_DOT;
2607 for (uint32_t n = 17; n < 32; n++) {
2608 for (size_t k = 1; k <= 20; k += 5) {
2609 for (uint32_t m = 1; m <= 1; m++) {
2610 GemmMicrokernelTester()
2611 .mr(1)
2612 .nr(16)
2613 .kr(4)
2614 .sr(1)
2615 .m(m)
2616 .n(n)
2617 .k(k)
2618 .iterations(1)
2619 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2620 }
2621 }
2622 }
2623 }
2624
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,n_div_16)2625 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, n_div_16) {
2626 TEST_REQUIRES_ARM_NEON_DOT;
2627 for (uint32_t n = 32; n <= 48; n += 16) {
2628 for (size_t k = 1; k <= 20; k += 5) {
2629 GemmMicrokernelTester()
2630 .mr(1)
2631 .nr(16)
2632 .kr(4)
2633 .sr(1)
2634 .m(1)
2635 .n(n)
2636 .k(k)
2637 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2638 }
2639 }
2640 }
2641
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,n_div_16_strided_cn)2642 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, n_div_16_strided_cn) {
2643 TEST_REQUIRES_ARM_NEON_DOT;
2644 for (uint32_t n = 32; n <= 48; n += 16) {
2645 for (size_t k = 1; k <= 20; k += 5) {
2646 GemmMicrokernelTester()
2647 .mr(1)
2648 .nr(16)
2649 .kr(4)
2650 .sr(1)
2651 .m(1)
2652 .n(n)
2653 .k(k)
2654 .cn_stride(19)
2655 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2656 }
2657 }
2658 }
2659
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,n_div_16_strided_a)2660 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, n_div_16_strided_a) {
2661 TEST_REQUIRES_ARM_NEON_DOT;
2662 for (uint32_t n = 32; n <= 48; n += 16) {
2663 for (size_t k = 1; k <= 20; k += 5) {
2664 GemmMicrokernelTester()
2665 .mr(1)
2666 .nr(16)
2667 .kr(4)
2668 .sr(1)
2669 .m(1)
2670 .n(n)
2671 .k(k)
2672 .a_stride(23)
2673 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2674 }
2675 }
2676 }
2677
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,n_div_16_subtile)2678 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, n_div_16_subtile) {
2679 TEST_REQUIRES_ARM_NEON_DOT;
2680 for (uint32_t n = 32; n <= 48; n += 16) {
2681 for (size_t k = 1; k <= 20; k += 5) {
2682 for (uint32_t m = 1; m <= 1; m++) {
2683 GemmMicrokernelTester()
2684 .mr(1)
2685 .nr(16)
2686 .kr(4)
2687 .sr(1)
2688 .m(m)
2689 .n(n)
2690 .k(k)
2691 .iterations(1)
2692 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2693 }
2694 }
2695 }
2696 }
2697
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,strided_cm_subtile)2698 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, strided_cm_subtile) {
2699 TEST_REQUIRES_ARM_NEON_DOT;
2700 for (size_t k = 1; k <= 20; k += 5) {
2701 for (uint32_t n = 1; n <= 16; n++) {
2702 for (uint32_t m = 1; m <= 1; m++) {
2703 GemmMicrokernelTester()
2704 .mr(1)
2705 .nr(16)
2706 .kr(4)
2707 .sr(1)
2708 .m(m)
2709 .n(n)
2710 .k(k)
2711 .cm_stride(19)
2712 .iterations(1)
2713 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2714 }
2715 }
2716 }
2717 }
2718
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,qmin)2719 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, qmin) {
2720 TEST_REQUIRES_ARM_NEON_DOT;
2721 GemmMicrokernelTester()
2722 .mr(1)
2723 .nr(16)
2724 .kr(4)
2725 .sr(1)
2726 .m(1)
2727 .n(16)
2728 .k(4)
2729 .qmin(128)
2730 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2731 }
2732
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,qmax)2733 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, qmax) {
2734 TEST_REQUIRES_ARM_NEON_DOT;
2735 GemmMicrokernelTester()
2736 .mr(1)
2737 .nr(16)
2738 .kr(4)
2739 .sr(1)
2740 .m(1)
2741 .n(16)
2742 .k(4)
2743 .qmax(128)
2744 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2745 }
2746
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32,strided_cm)2747 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, strided_cm) {
2748 TEST_REQUIRES_ARM_NEON_DOT;
2749 GemmMicrokernelTester()
2750 .mr(1)
2751 .nr(16)
2752 .kr(4)
2753 .sr(1)
2754 .m(1)
2755 .n(16)
2756 .k(4)
2757 .cm_stride(19)
2758 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2759 }
2760 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2761
2762
2763 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,k_eq_8)2764 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8) {
2765 TEST_REQUIRES_ARM_NEON_DOT;
2766 GemmMicrokernelTester()
2767 .mr(1)
2768 .nr(16)
2769 .kr(4)
2770 .sr(1)
2771 .m(1)
2772 .n(16)
2773 .k(8)
2774 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2775 }
2776
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,strided_cn)2777 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, strided_cn) {
2778 TEST_REQUIRES_ARM_NEON_DOT;
2779 GemmMicrokernelTester()
2780 .mr(1)
2781 .nr(16)
2782 .kr(4)
2783 .sr(1)
2784 .m(1)
2785 .n(16)
2786 .k(8)
2787 .cn_stride(19)
2788 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2789 }
2790
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,k_eq_8_strided_a)2791 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_strided_a) {
2792 TEST_REQUIRES_ARM_NEON_DOT;
2793 GemmMicrokernelTester()
2794 .mr(1)
2795 .nr(16)
2796 .kr(4)
2797 .sr(1)
2798 .m(1)
2799 .n(16)
2800 .k(8)
2801 .a_stride(11)
2802 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2803 }
2804
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,k_eq_8_subtile)2805 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile) {
2806 TEST_REQUIRES_ARM_NEON_DOT;
2807 for (uint32_t n = 1; n <= 16; n++) {
2808 for (uint32_t m = 1; m <= 1; m++) {
2809 GemmMicrokernelTester()
2810 .mr(1)
2811 .nr(16)
2812 .kr(4)
2813 .sr(1)
2814 .m(m)
2815 .n(n)
2816 .k(8)
2817 .iterations(1)
2818 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2819 }
2820 }
2821 }
2822
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,k_eq_8_subtile_m)2823 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_m) {
2824 TEST_REQUIRES_ARM_NEON_DOT;
2825 for (uint32_t m = 1; m <= 1; m++) {
2826 GemmMicrokernelTester()
2827 .mr(1)
2828 .nr(16)
2829 .kr(4)
2830 .sr(1)
2831 .m(m)
2832 .n(16)
2833 .k(8)
2834 .iterations(1)
2835 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2836 }
2837 }
2838
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,k_eq_8_subtile_n)2839 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_n) {
2840 TEST_REQUIRES_ARM_NEON_DOT;
2841 for (uint32_t n = 1; n <= 16; n++) {
2842 GemmMicrokernelTester()
2843 .mr(1)
2844 .nr(16)
2845 .kr(4)
2846 .sr(1)
2847 .m(1)
2848 .n(n)
2849 .k(8)
2850 .iterations(1)
2851 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2852 }
2853 }
2854
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,k_lt_8)2855 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_lt_8) {
2856 TEST_REQUIRES_ARM_NEON_DOT;
2857 for (size_t k = 1; k < 8; k++) {
2858 GemmMicrokernelTester()
2859 .mr(1)
2860 .nr(16)
2861 .kr(4)
2862 .sr(1)
2863 .m(1)
2864 .n(16)
2865 .k(k)
2866 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2867 }
2868 }
2869
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,k_lt_8_strided_a)2870 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_lt_8_strided_a) {
2871 TEST_REQUIRES_ARM_NEON_DOT;
2872 for (size_t k = 1; k < 8; k++) {
2873 GemmMicrokernelTester()
2874 .mr(1)
2875 .nr(16)
2876 .kr(4)
2877 .sr(1)
2878 .m(1)
2879 .n(16)
2880 .k(k)
2881 .a_stride(11)
2882 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2883 }
2884 }
2885
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,k_lt_8_subtile)2886 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_lt_8_subtile) {
2887 TEST_REQUIRES_ARM_NEON_DOT;
2888 for (size_t k = 1; k < 8; k++) {
2889 for (uint32_t n = 1; n <= 16; n++) {
2890 for (uint32_t m = 1; m <= 1; m++) {
2891 GemmMicrokernelTester()
2892 .mr(1)
2893 .nr(16)
2894 .kr(4)
2895 .sr(1)
2896 .m(m)
2897 .n(n)
2898 .k(k)
2899 .iterations(1)
2900 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2901 }
2902 }
2903 }
2904 }
2905
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,k_gt_8)2906 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_gt_8) {
2907 TEST_REQUIRES_ARM_NEON_DOT;
2908 for (size_t k = 9; k < 16; k++) {
2909 GemmMicrokernelTester()
2910 .mr(1)
2911 .nr(16)
2912 .kr(4)
2913 .sr(1)
2914 .m(1)
2915 .n(16)
2916 .k(k)
2917 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2918 }
2919 }
2920
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,k_gt_8_strided_a)2921 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_gt_8_strided_a) {
2922 TEST_REQUIRES_ARM_NEON_DOT;
2923 for (size_t k = 9; k < 16; k++) {
2924 GemmMicrokernelTester()
2925 .mr(1)
2926 .nr(16)
2927 .kr(4)
2928 .sr(1)
2929 .m(1)
2930 .n(16)
2931 .k(k)
2932 .a_stride(19)
2933 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2934 }
2935 }
2936
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,k_gt_8_subtile)2937 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_gt_8_subtile) {
2938 TEST_REQUIRES_ARM_NEON_DOT;
2939 for (size_t k = 9; k < 16; k++) {
2940 for (uint32_t n = 1; n <= 16; n++) {
2941 for (uint32_t m = 1; m <= 1; m++) {
2942 GemmMicrokernelTester()
2943 .mr(1)
2944 .nr(16)
2945 .kr(4)
2946 .sr(1)
2947 .m(m)
2948 .n(n)
2949 .k(k)
2950 .iterations(1)
2951 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2952 }
2953 }
2954 }
2955 }
2956
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,k_div_8)2957 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_div_8) {
2958 TEST_REQUIRES_ARM_NEON_DOT;
2959 for (size_t k = 16; k <= 80; k += 8) {
2960 GemmMicrokernelTester()
2961 .mr(1)
2962 .nr(16)
2963 .kr(4)
2964 .sr(1)
2965 .m(1)
2966 .n(16)
2967 .k(k)
2968 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2969 }
2970 }
2971
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,k_div_8_strided_a)2972 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_div_8_strided_a) {
2973 TEST_REQUIRES_ARM_NEON_DOT;
2974 for (size_t k = 16; k <= 80; k += 8) {
2975 GemmMicrokernelTester()
2976 .mr(1)
2977 .nr(16)
2978 .kr(4)
2979 .sr(1)
2980 .m(1)
2981 .n(16)
2982 .k(k)
2983 .a_stride(83)
2984 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2985 }
2986 }
2987
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,k_div_8_subtile)2988 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_div_8_subtile) {
2989 TEST_REQUIRES_ARM_NEON_DOT;
2990 for (size_t k = 16; k <= 80; k += 8) {
2991 for (uint32_t n = 1; n <= 16; n++) {
2992 for (uint32_t m = 1; m <= 1; m++) {
2993 GemmMicrokernelTester()
2994 .mr(1)
2995 .nr(16)
2996 .kr(4)
2997 .sr(1)
2998 .m(m)
2999 .n(n)
3000 .k(k)
3001 .iterations(1)
3002 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3003 }
3004 }
3005 }
3006 }
3007
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,n_gt_16)3008 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, n_gt_16) {
3009 TEST_REQUIRES_ARM_NEON_DOT;
3010 for (uint32_t n = 17; n < 32; n++) {
3011 for (size_t k = 1; k <= 40; k += 9) {
3012 GemmMicrokernelTester()
3013 .mr(1)
3014 .nr(16)
3015 .kr(4)
3016 .sr(1)
3017 .m(1)
3018 .n(n)
3019 .k(k)
3020 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3021 }
3022 }
3023 }
3024
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,n_gt_16_strided_cn)3025 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_cn) {
3026 TEST_REQUIRES_ARM_NEON_DOT;
3027 for (uint32_t n = 17; n < 32; n++) {
3028 for (size_t k = 1; k <= 40; k += 9) {
3029 GemmMicrokernelTester()
3030 .mr(1)
3031 .nr(16)
3032 .kr(4)
3033 .sr(1)
3034 .m(1)
3035 .n(n)
3036 .k(k)
3037 .cn_stride(19)
3038 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3039 }
3040 }
3041 }
3042
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,n_gt_16_strided_a)3043 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_a) {
3044 TEST_REQUIRES_ARM_NEON_DOT;
3045 for (uint32_t n = 17; n < 32; n++) {
3046 for (size_t k = 1; k <= 40; k += 9) {
3047 GemmMicrokernelTester()
3048 .mr(1)
3049 .nr(16)
3050 .kr(4)
3051 .sr(1)
3052 .m(1)
3053 .n(n)
3054 .k(k)
3055 .a_stride(43)
3056 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3057 }
3058 }
3059 }
3060
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,n_gt_16_subtile)3061 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, n_gt_16_subtile) {
3062 TEST_REQUIRES_ARM_NEON_DOT;
3063 for (uint32_t n = 17; n < 32; n++) {
3064 for (size_t k = 1; k <= 40; k += 9) {
3065 for (uint32_t m = 1; m <= 1; m++) {
3066 GemmMicrokernelTester()
3067 .mr(1)
3068 .nr(16)
3069 .kr(4)
3070 .sr(1)
3071 .m(m)
3072 .n(n)
3073 .k(k)
3074 .iterations(1)
3075 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3076 }
3077 }
3078 }
3079 }
3080
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,n_div_16)3081 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, n_div_16) {
3082 TEST_REQUIRES_ARM_NEON_DOT;
3083 for (uint32_t n = 32; n <= 48; n += 16) {
3084 for (size_t k = 1; k <= 40; k += 9) {
3085 GemmMicrokernelTester()
3086 .mr(1)
3087 .nr(16)
3088 .kr(4)
3089 .sr(1)
3090 .m(1)
3091 .n(n)
3092 .k(k)
3093 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3094 }
3095 }
3096 }
3097
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,n_div_16_strided_cn)3098 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_cn) {
3099 TEST_REQUIRES_ARM_NEON_DOT;
3100 for (uint32_t n = 32; n <= 48; n += 16) {
3101 for (size_t k = 1; k <= 40; k += 9) {
3102 GemmMicrokernelTester()
3103 .mr(1)
3104 .nr(16)
3105 .kr(4)
3106 .sr(1)
3107 .m(1)
3108 .n(n)
3109 .k(k)
3110 .cn_stride(19)
3111 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3112 }
3113 }
3114 }
3115
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,n_div_16_strided_a)3116 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_a) {
3117 TEST_REQUIRES_ARM_NEON_DOT;
3118 for (uint32_t n = 32; n <= 48; n += 16) {
3119 for (size_t k = 1; k <= 40; k += 9) {
3120 GemmMicrokernelTester()
3121 .mr(1)
3122 .nr(16)
3123 .kr(4)
3124 .sr(1)
3125 .m(1)
3126 .n(n)
3127 .k(k)
3128 .a_stride(43)
3129 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3130 }
3131 }
3132 }
3133
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,n_div_16_subtile)3134 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, n_div_16_subtile) {
3135 TEST_REQUIRES_ARM_NEON_DOT;
3136 for (uint32_t n = 32; n <= 48; n += 16) {
3137 for (size_t k = 1; k <= 40; k += 9) {
3138 for (uint32_t m = 1; m <= 1; m++) {
3139 GemmMicrokernelTester()
3140 .mr(1)
3141 .nr(16)
3142 .kr(4)
3143 .sr(1)
3144 .m(m)
3145 .n(n)
3146 .k(k)
3147 .iterations(1)
3148 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3149 }
3150 }
3151 }
3152 }
3153
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,strided_cm_subtile)3154 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, strided_cm_subtile) {
3155 TEST_REQUIRES_ARM_NEON_DOT;
3156 for (size_t k = 1; k <= 40; k += 9) {
3157 for (uint32_t n = 1; n <= 16; n++) {
3158 for (uint32_t m = 1; m <= 1; m++) {
3159 GemmMicrokernelTester()
3160 .mr(1)
3161 .nr(16)
3162 .kr(4)
3163 .sr(1)
3164 .m(m)
3165 .n(n)
3166 .k(k)
3167 .cm_stride(19)
3168 .iterations(1)
3169 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3170 }
3171 }
3172 }
3173 }
3174
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,qmin)3175 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, qmin) {
3176 TEST_REQUIRES_ARM_NEON_DOT;
3177 GemmMicrokernelTester()
3178 .mr(1)
3179 .nr(16)
3180 .kr(4)
3181 .sr(1)
3182 .m(1)
3183 .n(16)
3184 .k(8)
3185 .qmin(128)
3186 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3187 }
3188
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,qmax)3189 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, qmax) {
3190 TEST_REQUIRES_ARM_NEON_DOT;
3191 GemmMicrokernelTester()
3192 .mr(1)
3193 .nr(16)
3194 .kr(4)
3195 .sr(1)
3196 .m(1)
3197 .n(16)
3198 .k(8)
3199 .qmax(128)
3200 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3201 }
3202
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64,strided_cm)3203 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, strided_cm) {
3204 TEST_REQUIRES_ARM_NEON_DOT;
3205 GemmMicrokernelTester()
3206 .mr(1)
3207 .nr(16)
3208 .kr(4)
3209 .sr(1)
3210 .m(1)
3211 .n(16)
3212 .k(8)
3213 .cm_stride(19)
3214 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3215 }
3216 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3217
3218
3219 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16)3220 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16) {
3221 TEST_REQUIRES_ARM_NEON;
3222 GemmMicrokernelTester()
3223 .mr(2)
3224 .nr(8)
3225 .kr(8)
3226 .sr(1)
3227 .m(2)
3228 .n(8)
3229 .k(16)
3230 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3231 }
3232
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,strided_cn)3233 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, strided_cn) {
3234 TEST_REQUIRES_ARM_NEON;
3235 GemmMicrokernelTester()
3236 .mr(2)
3237 .nr(8)
3238 .kr(8)
3239 .sr(1)
3240 .m(2)
3241 .n(8)
3242 .k(16)
3243 .cn_stride(11)
3244 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3245 }
3246
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_strided_a)3247 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_strided_a) {
3248 TEST_REQUIRES_ARM_NEON;
3249 GemmMicrokernelTester()
3250 .mr(2)
3251 .nr(8)
3252 .kr(8)
3253 .sr(1)
3254 .m(2)
3255 .n(8)
3256 .k(16)
3257 .a_stride(19)
3258 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3259 }
3260
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_subtile)3261 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile) {
3262 TEST_REQUIRES_ARM_NEON;
3263 for (uint32_t n = 1; n <= 8; n++) {
3264 for (uint32_t m = 1; m <= 2; m++) {
3265 GemmMicrokernelTester()
3266 .mr(2)
3267 .nr(8)
3268 .kr(8)
3269 .sr(1)
3270 .m(m)
3271 .n(n)
3272 .k(16)
3273 .iterations(1)
3274 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3275 }
3276 }
3277 }
3278
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_subtile_m)3279 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile_m) {
3280 TEST_REQUIRES_ARM_NEON;
3281 for (uint32_t m = 1; m <= 2; m++) {
3282 GemmMicrokernelTester()
3283 .mr(2)
3284 .nr(8)
3285 .kr(8)
3286 .sr(1)
3287 .m(m)
3288 .n(8)
3289 .k(16)
3290 .iterations(1)
3291 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3292 }
3293 }
3294
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_subtile_n)3295 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile_n) {
3296 TEST_REQUIRES_ARM_NEON;
3297 for (uint32_t n = 1; n <= 8; n++) {
3298 GemmMicrokernelTester()
3299 .mr(2)
3300 .nr(8)
3301 .kr(8)
3302 .sr(1)
3303 .m(2)
3304 .n(n)
3305 .k(16)
3306 .iterations(1)
3307 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3308 }
3309 }
3310
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_lt_16)3311 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_lt_16) {
3312 TEST_REQUIRES_ARM_NEON;
3313 for (size_t k = 1; k < 16; k++) {
3314 GemmMicrokernelTester()
3315 .mr(2)
3316 .nr(8)
3317 .kr(8)
3318 .sr(1)
3319 .m(2)
3320 .n(8)
3321 .k(k)
3322 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3323 }
3324 }
3325
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_lt_16_strided_a)3326 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_lt_16_strided_a) {
3327 TEST_REQUIRES_ARM_NEON;
3328 for (size_t k = 1; k < 16; k++) {
3329 GemmMicrokernelTester()
3330 .mr(2)
3331 .nr(8)
3332 .kr(8)
3333 .sr(1)
3334 .m(2)
3335 .n(8)
3336 .k(k)
3337 .a_stride(19)
3338 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3339 }
3340 }
3341
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_lt_16_subtile)3342 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_lt_16_subtile) {
3343 TEST_REQUIRES_ARM_NEON;
3344 for (size_t k = 1; k < 16; k++) {
3345 for (uint32_t n = 1; n <= 8; n++) {
3346 for (uint32_t m = 1; m <= 2; m++) {
3347 GemmMicrokernelTester()
3348 .mr(2)
3349 .nr(8)
3350 .kr(8)
3351 .sr(1)
3352 .m(m)
3353 .n(n)
3354 .k(k)
3355 .iterations(1)
3356 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3357 }
3358 }
3359 }
3360 }
3361
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_gt_16)3362 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_gt_16) {
3363 TEST_REQUIRES_ARM_NEON;
3364 for (size_t k = 17; k < 32; k++) {
3365 GemmMicrokernelTester()
3366 .mr(2)
3367 .nr(8)
3368 .kr(8)
3369 .sr(1)
3370 .m(2)
3371 .n(8)
3372 .k(k)
3373 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3374 }
3375 }
3376
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_gt_16_strided_a)3377 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_gt_16_strided_a) {
3378 TEST_REQUIRES_ARM_NEON;
3379 for (size_t k = 17; k < 32; k++) {
3380 GemmMicrokernelTester()
3381 .mr(2)
3382 .nr(8)
3383 .kr(8)
3384 .sr(1)
3385 .m(2)
3386 .n(8)
3387 .k(k)
3388 .a_stride(37)
3389 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3390 }
3391 }
3392
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_gt_16_subtile)3393 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_gt_16_subtile) {
3394 TEST_REQUIRES_ARM_NEON;
3395 for (size_t k = 17; k < 32; k++) {
3396 for (uint32_t n = 1; n <= 8; n++) {
3397 for (uint32_t m = 1; m <= 2; m++) {
3398 GemmMicrokernelTester()
3399 .mr(2)
3400 .nr(8)
3401 .kr(8)
3402 .sr(1)
3403 .m(m)
3404 .n(n)
3405 .k(k)
3406 .iterations(1)
3407 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3408 }
3409 }
3410 }
3411 }
3412
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_div_16)3413 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_div_16) {
3414 TEST_REQUIRES_ARM_NEON;
3415 for (size_t k = 32; k <= 160; k += 16) {
3416 GemmMicrokernelTester()
3417 .mr(2)
3418 .nr(8)
3419 .kr(8)
3420 .sr(1)
3421 .m(2)
3422 .n(8)
3423 .k(k)
3424 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3425 }
3426 }
3427
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_div_16_strided_a)3428 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_div_16_strided_a) {
3429 TEST_REQUIRES_ARM_NEON;
3430 for (size_t k = 32; k <= 160; k += 16) {
3431 GemmMicrokernelTester()
3432 .mr(2)
3433 .nr(8)
3434 .kr(8)
3435 .sr(1)
3436 .m(2)
3437 .n(8)
3438 .k(k)
3439 .a_stride(163)
3440 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3441 }
3442 }
3443
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_div_16_subtile)3444 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_div_16_subtile) {
3445 TEST_REQUIRES_ARM_NEON;
3446 for (size_t k = 32; k <= 160; k += 16) {
3447 for (uint32_t n = 1; n <= 8; n++) {
3448 for (uint32_t m = 1; m <= 2; m++) {
3449 GemmMicrokernelTester()
3450 .mr(2)
3451 .nr(8)
3452 .kr(8)
3453 .sr(1)
3454 .m(m)
3455 .n(n)
3456 .k(k)
3457 .iterations(1)
3458 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3459 }
3460 }
3461 }
3462 }
3463
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8)3464 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8) {
3465 TEST_REQUIRES_ARM_NEON;
3466 for (uint32_t n = 9; n < 16; n++) {
3467 for (size_t k = 1; k <= 80; k += 17) {
3468 GemmMicrokernelTester()
3469 .mr(2)
3470 .nr(8)
3471 .kr(8)
3472 .sr(1)
3473 .m(2)
3474 .n(n)
3475 .k(k)
3476 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3477 }
3478 }
3479 }
3480
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8_strided_cn)3481 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_strided_cn) {
3482 TEST_REQUIRES_ARM_NEON;
3483 for (uint32_t n = 9; n < 16; n++) {
3484 for (size_t k = 1; k <= 80; k += 17) {
3485 GemmMicrokernelTester()
3486 .mr(2)
3487 .nr(8)
3488 .kr(8)
3489 .sr(1)
3490 .m(2)
3491 .n(n)
3492 .k(k)
3493 .cn_stride(11)
3494 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3495 }
3496 }
3497 }
3498
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8_strided_a)3499 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_strided_a) {
3500 TEST_REQUIRES_ARM_NEON;
3501 for (uint32_t n = 9; n < 16; n++) {
3502 for (size_t k = 1; k <= 80; k += 17) {
3503 GemmMicrokernelTester()
3504 .mr(2)
3505 .nr(8)
3506 .kr(8)
3507 .sr(1)
3508 .m(2)
3509 .n(n)
3510 .k(k)
3511 .a_stride(83)
3512 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3513 }
3514 }
3515 }
3516
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8_subtile)3517 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_subtile) {
3518 TEST_REQUIRES_ARM_NEON;
3519 for (uint32_t n = 9; n < 16; n++) {
3520 for (size_t k = 1; k <= 80; k += 17) {
3521 for (uint32_t m = 1; m <= 2; m++) {
3522 GemmMicrokernelTester()
3523 .mr(2)
3524 .nr(8)
3525 .kr(8)
3526 .sr(1)
3527 .m(m)
3528 .n(n)
3529 .k(k)
3530 .iterations(1)
3531 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3532 }
3533 }
3534 }
3535 }
3536
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8)3537 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8) {
3538 TEST_REQUIRES_ARM_NEON;
3539 for (uint32_t n = 16; n <= 24; n += 8) {
3540 for (size_t k = 1; k <= 80; k += 17) {
3541 GemmMicrokernelTester()
3542 .mr(2)
3543 .nr(8)
3544 .kr(8)
3545 .sr(1)
3546 .m(2)
3547 .n(n)
3548 .k(k)
3549 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3550 }
3551 }
3552 }
3553
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8_strided_cn)3554 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_strided_cn) {
3555 TEST_REQUIRES_ARM_NEON;
3556 for (uint32_t n = 16; n <= 24; n += 8) {
3557 for (size_t k = 1; k <= 80; k += 17) {
3558 GemmMicrokernelTester()
3559 .mr(2)
3560 .nr(8)
3561 .kr(8)
3562 .sr(1)
3563 .m(2)
3564 .n(n)
3565 .k(k)
3566 .cn_stride(11)
3567 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3568 }
3569 }
3570 }
3571
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8_strided_a)3572 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_strided_a) {
3573 TEST_REQUIRES_ARM_NEON;
3574 for (uint32_t n = 16; n <= 24; n += 8) {
3575 for (size_t k = 1; k <= 80; k += 17) {
3576 GemmMicrokernelTester()
3577 .mr(2)
3578 .nr(8)
3579 .kr(8)
3580 .sr(1)
3581 .m(2)
3582 .n(n)
3583 .k(k)
3584 .a_stride(83)
3585 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3586 }
3587 }
3588 }
3589
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8_subtile)3590 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_subtile) {
3591 TEST_REQUIRES_ARM_NEON;
3592 for (uint32_t n = 16; n <= 24; n += 8) {
3593 for (size_t k = 1; k <= 80; k += 17) {
3594 for (uint32_t m = 1; m <= 2; m++) {
3595 GemmMicrokernelTester()
3596 .mr(2)
3597 .nr(8)
3598 .kr(8)
3599 .sr(1)
3600 .m(m)
3601 .n(n)
3602 .k(k)
3603 .iterations(1)
3604 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3605 }
3606 }
3607 }
3608 }
3609
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,strided_cm_subtile)3610 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, strided_cm_subtile) {
3611 TEST_REQUIRES_ARM_NEON;
3612 for (size_t k = 1; k <= 80; k += 17) {
3613 for (uint32_t n = 1; n <= 8; n++) {
3614 for (uint32_t m = 1; m <= 2; m++) {
3615 GemmMicrokernelTester()
3616 .mr(2)
3617 .nr(8)
3618 .kr(8)
3619 .sr(1)
3620 .m(m)
3621 .n(n)
3622 .k(k)
3623 .cm_stride(11)
3624 .iterations(1)
3625 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3626 }
3627 }
3628 }
3629 }
3630
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,qmin)3631 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, qmin) {
3632 TEST_REQUIRES_ARM_NEON;
3633 GemmMicrokernelTester()
3634 .mr(2)
3635 .nr(8)
3636 .kr(8)
3637 .sr(1)
3638 .m(2)
3639 .n(8)
3640 .k(16)
3641 .qmin(128)
3642 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3643 }
3644
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,qmax)3645 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, qmax) {
3646 TEST_REQUIRES_ARM_NEON;
3647 GemmMicrokernelTester()
3648 .mr(2)
3649 .nr(8)
3650 .kr(8)
3651 .sr(1)
3652 .m(2)
3653 .n(8)
3654 .k(16)
3655 .qmax(128)
3656 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3657 }
3658
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,strided_cm)3659 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, strided_cm) {
3660 TEST_REQUIRES_ARM_NEON;
3661 GemmMicrokernelTester()
3662 .mr(2)
3663 .nr(8)
3664 .kr(8)
3665 .sr(1)
3666 .m(2)
3667 .n(8)
3668 .k(16)
3669 .cm_stride(11)
3670 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3671 }
3672 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3673
3674
3675 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16)3676 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16) {
3677 TEST_REQUIRES_ARM_NEON;
3678 GemmMicrokernelTester()
3679 .mr(2)
3680 .nr(8)
3681 .kr(8)
3682 .sr(1)
3683 .m(2)
3684 .n(8)
3685 .k(16)
3686 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3687 }
3688
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,strided_cn)3689 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cn) {
3690 TEST_REQUIRES_ARM_NEON;
3691 GemmMicrokernelTester()
3692 .mr(2)
3693 .nr(8)
3694 .kr(8)
3695 .sr(1)
3696 .m(2)
3697 .n(8)
3698 .k(16)
3699 .cn_stride(11)
3700 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3701 }
3702
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16_strided_a)3703 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_strided_a) {
3704 TEST_REQUIRES_ARM_NEON;
3705 GemmMicrokernelTester()
3706 .mr(2)
3707 .nr(8)
3708 .kr(8)
3709 .sr(1)
3710 .m(2)
3711 .n(8)
3712 .k(16)
3713 .a_stride(19)
3714 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3715 }
3716
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16_subtile)3717 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
3718 TEST_REQUIRES_ARM_NEON;
3719 for (uint32_t n = 1; n <= 8; n++) {
3720 for (uint32_t m = 1; m <= 2; m++) {
3721 GemmMicrokernelTester()
3722 .mr(2)
3723 .nr(8)
3724 .kr(8)
3725 .sr(1)
3726 .m(m)
3727 .n(n)
3728 .k(16)
3729 .iterations(1)
3730 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3731 }
3732 }
3733 }
3734
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16_subtile_m)3735 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_m) {
3736 TEST_REQUIRES_ARM_NEON;
3737 for (uint32_t m = 1; m <= 2; m++) {
3738 GemmMicrokernelTester()
3739 .mr(2)
3740 .nr(8)
3741 .kr(8)
3742 .sr(1)
3743 .m(m)
3744 .n(8)
3745 .k(16)
3746 .iterations(1)
3747 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3748 }
3749 }
3750
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16_subtile_n)3751 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_n) {
3752 TEST_REQUIRES_ARM_NEON;
3753 for (uint32_t n = 1; n <= 8; n++) {
3754 GemmMicrokernelTester()
3755 .mr(2)
3756 .nr(8)
3757 .kr(8)
3758 .sr(1)
3759 .m(2)
3760 .n(n)
3761 .k(16)
3762 .iterations(1)
3763 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3764 }
3765 }
3766
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_lt_16)3767 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16) {
3768 TEST_REQUIRES_ARM_NEON;
3769 for (size_t k = 1; k < 16; k++) {
3770 GemmMicrokernelTester()
3771 .mr(2)
3772 .nr(8)
3773 .kr(8)
3774 .sr(1)
3775 .m(2)
3776 .n(8)
3777 .k(k)
3778 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3779 }
3780 }
3781
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_lt_16_strided_a)3782 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16_strided_a) {
3783 TEST_REQUIRES_ARM_NEON;
3784 for (size_t k = 1; k < 16; k++) {
3785 GemmMicrokernelTester()
3786 .mr(2)
3787 .nr(8)
3788 .kr(8)
3789 .sr(1)
3790 .m(2)
3791 .n(8)
3792 .k(k)
3793 .a_stride(19)
3794 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3795 }
3796 }
3797
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_lt_16_subtile)3798 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16_subtile) {
3799 TEST_REQUIRES_ARM_NEON;
3800 for (size_t k = 1; k < 16; k++) {
3801 for (uint32_t n = 1; n <= 8; n++) {
3802 for (uint32_t m = 1; m <= 2; m++) {
3803 GemmMicrokernelTester()
3804 .mr(2)
3805 .nr(8)
3806 .kr(8)
3807 .sr(1)
3808 .m(m)
3809 .n(n)
3810 .k(k)
3811 .iterations(1)
3812 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3813 }
3814 }
3815 }
3816 }
3817
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_gt_16)3818 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16) {
3819 TEST_REQUIRES_ARM_NEON;
3820 for (size_t k = 17; k < 32; k++) {
3821 GemmMicrokernelTester()
3822 .mr(2)
3823 .nr(8)
3824 .kr(8)
3825 .sr(1)
3826 .m(2)
3827 .n(8)
3828 .k(k)
3829 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3830 }
3831 }
3832
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_gt_16_strided_a)3833 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16_strided_a) {
3834 TEST_REQUIRES_ARM_NEON;
3835 for (size_t k = 17; k < 32; k++) {
3836 GemmMicrokernelTester()
3837 .mr(2)
3838 .nr(8)
3839 .kr(8)
3840 .sr(1)
3841 .m(2)
3842 .n(8)
3843 .k(k)
3844 .a_stride(37)
3845 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3846 }
3847 }
3848
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_gt_16_subtile)3849 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16_subtile) {
3850 TEST_REQUIRES_ARM_NEON;
3851 for (size_t k = 17; k < 32; k++) {
3852 for (uint32_t n = 1; n <= 8; n++) {
3853 for (uint32_t m = 1; m <= 2; m++) {
3854 GemmMicrokernelTester()
3855 .mr(2)
3856 .nr(8)
3857 .kr(8)
3858 .sr(1)
3859 .m(m)
3860 .n(n)
3861 .k(k)
3862 .iterations(1)
3863 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3864 }
3865 }
3866 }
3867 }
3868
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_div_16)3869 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16) {
3870 TEST_REQUIRES_ARM_NEON;
3871 for (size_t k = 32; k <= 160; k += 16) {
3872 GemmMicrokernelTester()
3873 .mr(2)
3874 .nr(8)
3875 .kr(8)
3876 .sr(1)
3877 .m(2)
3878 .n(8)
3879 .k(k)
3880 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3881 }
3882 }
3883
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_div_16_strided_a)3884 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16_strided_a) {
3885 TEST_REQUIRES_ARM_NEON;
3886 for (size_t k = 32; k <= 160; k += 16) {
3887 GemmMicrokernelTester()
3888 .mr(2)
3889 .nr(8)
3890 .kr(8)
3891 .sr(1)
3892 .m(2)
3893 .n(8)
3894 .k(k)
3895 .a_stride(163)
3896 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3897 }
3898 }
3899
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_div_16_subtile)3900 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16_subtile) {
3901 TEST_REQUIRES_ARM_NEON;
3902 for (size_t k = 32; k <= 160; k += 16) {
3903 for (uint32_t n = 1; n <= 8; n++) {
3904 for (uint32_t m = 1; m <= 2; m++) {
3905 GemmMicrokernelTester()
3906 .mr(2)
3907 .nr(8)
3908 .kr(8)
3909 .sr(1)
3910 .m(m)
3911 .n(n)
3912 .k(k)
3913 .iterations(1)
3914 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3915 }
3916 }
3917 }
3918 }
3919
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8)3920 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8) {
3921 TEST_REQUIRES_ARM_NEON;
3922 for (uint32_t n = 9; n < 16; n++) {
3923 for (size_t k = 1; k <= 80; k += 17) {
3924 GemmMicrokernelTester()
3925 .mr(2)
3926 .nr(8)
3927 .kr(8)
3928 .sr(1)
3929 .m(2)
3930 .n(n)
3931 .k(k)
3932 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3933 }
3934 }
3935 }
3936
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8_strided_cn)3937 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
3938 TEST_REQUIRES_ARM_NEON;
3939 for (uint32_t n = 9; n < 16; n++) {
3940 for (size_t k = 1; k <= 80; k += 17) {
3941 GemmMicrokernelTester()
3942 .mr(2)
3943 .nr(8)
3944 .kr(8)
3945 .sr(1)
3946 .m(2)
3947 .n(n)
3948 .k(k)
3949 .cn_stride(11)
3950 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3951 }
3952 }
3953 }
3954
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8_strided_a)3955 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_strided_a) {
3956 TEST_REQUIRES_ARM_NEON;
3957 for (uint32_t n = 9; n < 16; n++) {
3958 for (size_t k = 1; k <= 80; k += 17) {
3959 GemmMicrokernelTester()
3960 .mr(2)
3961 .nr(8)
3962 .kr(8)
3963 .sr(1)
3964 .m(2)
3965 .n(n)
3966 .k(k)
3967 .a_stride(83)
3968 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3969 }
3970 }
3971 }
3972
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8_subtile)3973 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_subtile) {
3974 TEST_REQUIRES_ARM_NEON;
3975 for (uint32_t n = 9; n < 16; n++) {
3976 for (size_t k = 1; k <= 80; k += 17) {
3977 for (uint32_t m = 1; m <= 2; m++) {
3978 GemmMicrokernelTester()
3979 .mr(2)
3980 .nr(8)
3981 .kr(8)
3982 .sr(1)
3983 .m(m)
3984 .n(n)
3985 .k(k)
3986 .iterations(1)
3987 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3988 }
3989 }
3990 }
3991 }
3992
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8)3993 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8) {
3994 TEST_REQUIRES_ARM_NEON;
3995 for (uint32_t n = 16; n <= 24; n += 8) {
3996 for (size_t k = 1; k <= 80; k += 17) {
3997 GemmMicrokernelTester()
3998 .mr(2)
3999 .nr(8)
4000 .kr(8)
4001 .sr(1)
4002 .m(2)
4003 .n(n)
4004 .k(k)
4005 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4006 }
4007 }
4008 }
4009
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8_strided_cn)4010 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_strided_cn) {
4011 TEST_REQUIRES_ARM_NEON;
4012 for (uint32_t n = 16; n <= 24; n += 8) {
4013 for (size_t k = 1; k <= 80; k += 17) {
4014 GemmMicrokernelTester()
4015 .mr(2)
4016 .nr(8)
4017 .kr(8)
4018 .sr(1)
4019 .m(2)
4020 .n(n)
4021 .k(k)
4022 .cn_stride(11)
4023 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4024 }
4025 }
4026 }
4027
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8_strided_a)4028 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_strided_a) {
4029 TEST_REQUIRES_ARM_NEON;
4030 for (uint32_t n = 16; n <= 24; n += 8) {
4031 for (size_t k = 1; k <= 80; k += 17) {
4032 GemmMicrokernelTester()
4033 .mr(2)
4034 .nr(8)
4035 .kr(8)
4036 .sr(1)
4037 .m(2)
4038 .n(n)
4039 .k(k)
4040 .a_stride(83)
4041 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4042 }
4043 }
4044 }
4045
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8_subtile)4046 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_subtile) {
4047 TEST_REQUIRES_ARM_NEON;
4048 for (uint32_t n = 16; n <= 24; n += 8) {
4049 for (size_t k = 1; k <= 80; k += 17) {
4050 for (uint32_t m = 1; m <= 2; m++) {
4051 GemmMicrokernelTester()
4052 .mr(2)
4053 .nr(8)
4054 .kr(8)
4055 .sr(1)
4056 .m(m)
4057 .n(n)
4058 .k(k)
4059 .iterations(1)
4060 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4061 }
4062 }
4063 }
4064 }
4065
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,strided_cm_subtile)4066 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm_subtile) {
4067 TEST_REQUIRES_ARM_NEON;
4068 for (size_t k = 1; k <= 80; k += 17) {
4069 for (uint32_t n = 1; n <= 8; n++) {
4070 for (uint32_t m = 1; m <= 2; m++) {
4071 GemmMicrokernelTester()
4072 .mr(2)
4073 .nr(8)
4074 .kr(8)
4075 .sr(1)
4076 .m(m)
4077 .n(n)
4078 .k(k)
4079 .cm_stride(11)
4080 .iterations(1)
4081 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4082 }
4083 }
4084 }
4085 }
4086
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,qmin)4087 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmin) {
4088 TEST_REQUIRES_ARM_NEON;
4089 GemmMicrokernelTester()
4090 .mr(2)
4091 .nr(8)
4092 .kr(8)
4093 .sr(1)
4094 .m(2)
4095 .n(8)
4096 .k(16)
4097 .qmin(128)
4098 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4099 }
4100
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,qmax)4101 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmax) {
4102 TEST_REQUIRES_ARM_NEON;
4103 GemmMicrokernelTester()
4104 .mr(2)
4105 .nr(8)
4106 .kr(8)
4107 .sr(1)
4108 .m(2)
4109 .n(8)
4110 .k(16)
4111 .qmax(128)
4112 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4113 }
4114
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,strided_cm)4115 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm) {
4116 TEST_REQUIRES_ARM_NEON;
4117 GemmMicrokernelTester()
4118 .mr(2)
4119 .nr(8)
4120 .kr(8)
4121 .sr(1)
4122 .m(2)
4123 .n(8)
4124 .k(16)
4125 .cm_stride(11)
4126 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4127 }
4128 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4129
4130
4131 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8)4132 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8) {
4133 TEST_REQUIRES_ARM_NEON;
4134 GemmMicrokernelTester()
4135 .mr(4)
4136 .nr(16)
4137 .kr(1)
4138 .sr(1)
4139 .m(4)
4140 .n(16)
4141 .k(8)
4142 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4143 }
4144
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,strided_cn)4145 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cn) {
4146 TEST_REQUIRES_ARM_NEON;
4147 GemmMicrokernelTester()
4148 .mr(4)
4149 .nr(16)
4150 .kr(1)
4151 .sr(1)
4152 .m(4)
4153 .n(16)
4154 .k(8)
4155 .cn_stride(19)
4156 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4157 }
4158
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_strided_a)4159 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_strided_a) {
4160 TEST_REQUIRES_ARM_NEON;
4161 GemmMicrokernelTester()
4162 .mr(4)
4163 .nr(16)
4164 .kr(1)
4165 .sr(1)
4166 .m(4)
4167 .n(16)
4168 .k(8)
4169 .a_stride(11)
4170 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4171 }
4172
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_subtile)4173 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
4174 TEST_REQUIRES_ARM_NEON;
4175 for (uint32_t n = 1; n <= 16; n++) {
4176 for (uint32_t m = 1; m <= 4; m++) {
4177 GemmMicrokernelTester()
4178 .mr(4)
4179 .nr(16)
4180 .kr(1)
4181 .sr(1)
4182 .m(m)
4183 .n(n)
4184 .k(8)
4185 .iterations(1)
4186 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4187 }
4188 }
4189 }
4190
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_subtile_m)4191 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
4192 TEST_REQUIRES_ARM_NEON;
4193 for (uint32_t m = 1; m <= 4; m++) {
4194 GemmMicrokernelTester()
4195 .mr(4)
4196 .nr(16)
4197 .kr(1)
4198 .sr(1)
4199 .m(m)
4200 .n(16)
4201 .k(8)
4202 .iterations(1)
4203 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4204 }
4205 }
4206
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_subtile_n)4207 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
4208 TEST_REQUIRES_ARM_NEON;
4209 for (uint32_t n = 1; n <= 16; n++) {
4210 GemmMicrokernelTester()
4211 .mr(4)
4212 .nr(16)
4213 .kr(1)
4214 .sr(1)
4215 .m(4)
4216 .n(n)
4217 .k(8)
4218 .iterations(1)
4219 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4220 }
4221 }
4222
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_lt_8)4223 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8) {
4224 TEST_REQUIRES_ARM_NEON;
4225 for (size_t k = 1; k < 8; k++) {
4226 GemmMicrokernelTester()
4227 .mr(4)
4228 .nr(16)
4229 .kr(1)
4230 .sr(1)
4231 .m(4)
4232 .n(16)
4233 .k(k)
4234 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4235 }
4236 }
4237
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_lt_8_strided_a)4238 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8_strided_a) {
4239 TEST_REQUIRES_ARM_NEON;
4240 for (size_t k = 1; k < 8; k++) {
4241 GemmMicrokernelTester()
4242 .mr(4)
4243 .nr(16)
4244 .kr(1)
4245 .sr(1)
4246 .m(4)
4247 .n(16)
4248 .k(k)
4249 .a_stride(11)
4250 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4251 }
4252 }
4253
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_lt_8_subtile)4254 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
4255 TEST_REQUIRES_ARM_NEON;
4256 for (size_t k = 1; k < 8; k++) {
4257 for (uint32_t n = 1; n <= 16; n++) {
4258 for (uint32_t m = 1; m <= 4; m++) {
4259 GemmMicrokernelTester()
4260 .mr(4)
4261 .nr(16)
4262 .kr(1)
4263 .sr(1)
4264 .m(m)
4265 .n(n)
4266 .k(k)
4267 .iterations(1)
4268 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4269 }
4270 }
4271 }
4272 }
4273
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_gt_8)4274 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8) {
4275 TEST_REQUIRES_ARM_NEON;
4276 for (size_t k = 9; k < 16; k++) {
4277 GemmMicrokernelTester()
4278 .mr(4)
4279 .nr(16)
4280 .kr(1)
4281 .sr(1)
4282 .m(4)
4283 .n(16)
4284 .k(k)
4285 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4286 }
4287 }
4288
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_gt_8_strided_a)4289 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8_strided_a) {
4290 TEST_REQUIRES_ARM_NEON;
4291 for (size_t k = 9; k < 16; k++) {
4292 GemmMicrokernelTester()
4293 .mr(4)
4294 .nr(16)
4295 .kr(1)
4296 .sr(1)
4297 .m(4)
4298 .n(16)
4299 .k(k)
4300 .a_stride(19)
4301 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4302 }
4303 }
4304
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_gt_8_subtile)4305 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
4306 TEST_REQUIRES_ARM_NEON;
4307 for (size_t k = 9; k < 16; k++) {
4308 for (uint32_t n = 1; n <= 16; n++) {
4309 for (uint32_t m = 1; m <= 4; m++) {
4310 GemmMicrokernelTester()
4311 .mr(4)
4312 .nr(16)
4313 .kr(1)
4314 .sr(1)
4315 .m(m)
4316 .n(n)
4317 .k(k)
4318 .iterations(1)
4319 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4320 }
4321 }
4322 }
4323 }
4324
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_div_8)4325 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8) {
4326 TEST_REQUIRES_ARM_NEON;
4327 for (size_t k = 16; k <= 80; k += 8) {
4328 GemmMicrokernelTester()
4329 .mr(4)
4330 .nr(16)
4331 .kr(1)
4332 .sr(1)
4333 .m(4)
4334 .n(16)
4335 .k(k)
4336 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4337 }
4338 }
4339
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_div_8_strided_a)4340 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8_strided_a) {
4341 TEST_REQUIRES_ARM_NEON;
4342 for (size_t k = 16; k <= 80; k += 8) {
4343 GemmMicrokernelTester()
4344 .mr(4)
4345 .nr(16)
4346 .kr(1)
4347 .sr(1)
4348 .m(4)
4349 .n(16)
4350 .k(k)
4351 .a_stride(83)
4352 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4353 }
4354 }
4355
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_div_8_subtile)4356 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
4357 TEST_REQUIRES_ARM_NEON;
4358 for (size_t k = 16; k <= 80; k += 8) {
4359 for (uint32_t n = 1; n <= 16; n++) {
4360 for (uint32_t m = 1; m <= 4; m++) {
4361 GemmMicrokernelTester()
4362 .mr(4)
4363 .nr(16)
4364 .kr(1)
4365 .sr(1)
4366 .m(m)
4367 .n(n)
4368 .k(k)
4369 .iterations(1)
4370 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4371 }
4372 }
4373 }
4374 }
4375
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16)4376 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16) {
4377 TEST_REQUIRES_ARM_NEON;
4378 for (uint32_t n = 17; n < 32; n++) {
4379 for (size_t k = 1; k <= 40; k += 9) {
4380 GemmMicrokernelTester()
4381 .mr(4)
4382 .nr(16)
4383 .kr(1)
4384 .sr(1)
4385 .m(4)
4386 .n(n)
4387 .k(k)
4388 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4389 }
4390 }
4391 }
4392
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16_strided_cn)4393 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_strided_cn) {
4394 TEST_REQUIRES_ARM_NEON;
4395 for (uint32_t n = 17; n < 32; n++) {
4396 for (size_t k = 1; k <= 40; k += 9) {
4397 GemmMicrokernelTester()
4398 .mr(4)
4399 .nr(16)
4400 .kr(1)
4401 .sr(1)
4402 .m(4)
4403 .n(n)
4404 .k(k)
4405 .cn_stride(19)
4406 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4407 }
4408 }
4409 }
4410
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16_strided_a)4411 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_strided_a) {
4412 TEST_REQUIRES_ARM_NEON;
4413 for (uint32_t n = 17; n < 32; n++) {
4414 for (size_t k = 1; k <= 40; k += 9) {
4415 GemmMicrokernelTester()
4416 .mr(4)
4417 .nr(16)
4418 .kr(1)
4419 .sr(1)
4420 .m(4)
4421 .n(n)
4422 .k(k)
4423 .a_stride(43)
4424 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4425 }
4426 }
4427 }
4428
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16_subtile)4429 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_subtile) {
4430 TEST_REQUIRES_ARM_NEON;
4431 for (uint32_t n = 17; n < 32; n++) {
4432 for (size_t k = 1; k <= 40; k += 9) {
4433 for (uint32_t m = 1; m <= 4; m++) {
4434 GemmMicrokernelTester()
4435 .mr(4)
4436 .nr(16)
4437 .kr(1)
4438 .sr(1)
4439 .m(m)
4440 .n(n)
4441 .k(k)
4442 .iterations(1)
4443 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4444 }
4445 }
4446 }
4447 }
4448
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16)4449 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16) {
4450 TEST_REQUIRES_ARM_NEON;
4451 for (uint32_t n = 32; n <= 48; n += 16) {
4452 for (size_t k = 1; k <= 40; k += 9) {
4453 GemmMicrokernelTester()
4454 .mr(4)
4455 .nr(16)
4456 .kr(1)
4457 .sr(1)
4458 .m(4)
4459 .n(n)
4460 .k(k)
4461 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4462 }
4463 }
4464 }
4465
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16_strided_cn)4466 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_strided_cn) {
4467 TEST_REQUIRES_ARM_NEON;
4468 for (uint32_t n = 32; n <= 48; n += 16) {
4469 for (size_t k = 1; k <= 40; k += 9) {
4470 GemmMicrokernelTester()
4471 .mr(4)
4472 .nr(16)
4473 .kr(1)
4474 .sr(1)
4475 .m(4)
4476 .n(n)
4477 .k(k)
4478 .cn_stride(19)
4479 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4480 }
4481 }
4482 }
4483
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16_strided_a)4484 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_strided_a) {
4485 TEST_REQUIRES_ARM_NEON;
4486 for (uint32_t n = 32; n <= 48; n += 16) {
4487 for (size_t k = 1; k <= 40; k += 9) {
4488 GemmMicrokernelTester()
4489 .mr(4)
4490 .nr(16)
4491 .kr(1)
4492 .sr(1)
4493 .m(4)
4494 .n(n)
4495 .k(k)
4496 .a_stride(43)
4497 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4498 }
4499 }
4500 }
4501
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16_subtile)4502 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_subtile) {
4503 TEST_REQUIRES_ARM_NEON;
4504 for (uint32_t n = 32; n <= 48; n += 16) {
4505 for (size_t k = 1; k <= 40; k += 9) {
4506 for (uint32_t m = 1; m <= 4; m++) {
4507 GemmMicrokernelTester()
4508 .mr(4)
4509 .nr(16)
4510 .kr(1)
4511 .sr(1)
4512 .m(m)
4513 .n(n)
4514 .k(k)
4515 .iterations(1)
4516 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4517 }
4518 }
4519 }
4520 }
4521
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,strided_cm_subtile)4522 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
4523 TEST_REQUIRES_ARM_NEON;
4524 for (size_t k = 1; k <= 40; k += 9) {
4525 for (uint32_t n = 1; n <= 16; n++) {
4526 for (uint32_t m = 1; m <= 4; m++) {
4527 GemmMicrokernelTester()
4528 .mr(4)
4529 .nr(16)
4530 .kr(1)
4531 .sr(1)
4532 .m(m)
4533 .n(n)
4534 .k(k)
4535 .cm_stride(19)
4536 .iterations(1)
4537 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4538 }
4539 }
4540 }
4541 }
4542
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,qmin)4543 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmin) {
4544 TEST_REQUIRES_ARM_NEON;
4545 GemmMicrokernelTester()
4546 .mr(4)
4547 .nr(16)
4548 .kr(1)
4549 .sr(1)
4550 .m(4)
4551 .n(16)
4552 .k(8)
4553 .qmin(128)
4554 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4555 }
4556
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,qmax)4557 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmax) {
4558 TEST_REQUIRES_ARM_NEON;
4559 GemmMicrokernelTester()
4560 .mr(4)
4561 .nr(16)
4562 .kr(1)
4563 .sr(1)
4564 .m(4)
4565 .n(16)
4566 .k(8)
4567 .qmax(128)
4568 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4569 }
4570
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,strided_cm)4571 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm) {
4572 TEST_REQUIRES_ARM_NEON;
4573 GemmMicrokernelTester()
4574 .mr(4)
4575 .nr(16)
4576 .kr(1)
4577 .sr(1)
4578 .m(4)
4579 .n(16)
4580 .k(8)
4581 .cm_stride(19)
4582 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4583 }
4584 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4585
4586
4587 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8)4588 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
4589 TEST_REQUIRES_ARM_NEON;
4590 GemmMicrokernelTester()
4591 .mr(4)
4592 .nr(16)
4593 .kr(1)
4594 .sr(1)
4595 .m(4)
4596 .n(16)
4597 .k(8)
4598 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4599 }
4600
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cn)4601 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
4602 TEST_REQUIRES_ARM_NEON;
4603 GemmMicrokernelTester()
4604 .mr(4)
4605 .nr(16)
4606 .kr(1)
4607 .sr(1)
4608 .m(4)
4609 .n(16)
4610 .k(8)
4611 .cn_stride(19)
4612 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4613 }
4614
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_strided_a)4615 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_strided_a) {
4616 TEST_REQUIRES_ARM_NEON;
4617 GemmMicrokernelTester()
4618 .mr(4)
4619 .nr(16)
4620 .kr(1)
4621 .sr(1)
4622 .m(4)
4623 .n(16)
4624 .k(8)
4625 .a_stride(11)
4626 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4627 }
4628
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile)4629 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
4630 TEST_REQUIRES_ARM_NEON;
4631 for (uint32_t n = 1; n <= 16; n++) {
4632 for (uint32_t m = 1; m <= 4; m++) {
4633 GemmMicrokernelTester()
4634 .mr(4)
4635 .nr(16)
4636 .kr(1)
4637 .sr(1)
4638 .m(m)
4639 .n(n)
4640 .k(8)
4641 .iterations(1)
4642 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4643 }
4644 }
4645 }
4646
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_m)4647 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
4648 TEST_REQUIRES_ARM_NEON;
4649 for (uint32_t m = 1; m <= 4; m++) {
4650 GemmMicrokernelTester()
4651 .mr(4)
4652 .nr(16)
4653 .kr(1)
4654 .sr(1)
4655 .m(m)
4656 .n(16)
4657 .k(8)
4658 .iterations(1)
4659 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4660 }
4661 }
4662
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_n)4663 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
4664 TEST_REQUIRES_ARM_NEON;
4665 for (uint32_t n = 1; n <= 16; n++) {
4666 GemmMicrokernelTester()
4667 .mr(4)
4668 .nr(16)
4669 .kr(1)
4670 .sr(1)
4671 .m(4)
4672 .n(n)
4673 .k(8)
4674 .iterations(1)
4675 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4676 }
4677 }
4678
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8)4679 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
4680 TEST_REQUIRES_ARM_NEON;
4681 for (size_t k = 1; k < 8; k++) {
4682 GemmMicrokernelTester()
4683 .mr(4)
4684 .nr(16)
4685 .kr(1)
4686 .sr(1)
4687 .m(4)
4688 .n(16)
4689 .k(k)
4690 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4691 }
4692 }
4693
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8_strided_a)4694 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_strided_a) {
4695 TEST_REQUIRES_ARM_NEON;
4696 for (size_t k = 1; k < 8; k++) {
4697 GemmMicrokernelTester()
4698 .mr(4)
4699 .nr(16)
4700 .kr(1)
4701 .sr(1)
4702 .m(4)
4703 .n(16)
4704 .k(k)
4705 .a_stride(11)
4706 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4707 }
4708 }
4709
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8_subtile)4710 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
4711 TEST_REQUIRES_ARM_NEON;
4712 for (size_t k = 1; k < 8; k++) {
4713 for (uint32_t n = 1; n <= 16; n++) {
4714 for (uint32_t m = 1; m <= 4; m++) {
4715 GemmMicrokernelTester()
4716 .mr(4)
4717 .nr(16)
4718 .kr(1)
4719 .sr(1)
4720 .m(m)
4721 .n(n)
4722 .k(k)
4723 .iterations(1)
4724 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4725 }
4726 }
4727 }
4728 }
4729
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8)4730 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
4731 TEST_REQUIRES_ARM_NEON;
4732 for (size_t k = 9; k < 16; k++) {
4733 GemmMicrokernelTester()
4734 .mr(4)
4735 .nr(16)
4736 .kr(1)
4737 .sr(1)
4738 .m(4)
4739 .n(16)
4740 .k(k)
4741 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4742 }
4743 }
4744
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8_strided_a)4745 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_strided_a) {
4746 TEST_REQUIRES_ARM_NEON;
4747 for (size_t k = 9; k < 16; k++) {
4748 GemmMicrokernelTester()
4749 .mr(4)
4750 .nr(16)
4751 .kr(1)
4752 .sr(1)
4753 .m(4)
4754 .n(16)
4755 .k(k)
4756 .a_stride(19)
4757 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4758 }
4759 }
4760
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8_subtile)4761 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
4762 TEST_REQUIRES_ARM_NEON;
4763 for (size_t k = 9; k < 16; k++) {
4764 for (uint32_t n = 1; n <= 16; n++) {
4765 for (uint32_t m = 1; m <= 4; m++) {
4766 GemmMicrokernelTester()
4767 .mr(4)
4768 .nr(16)
4769 .kr(1)
4770 .sr(1)
4771 .m(m)
4772 .n(n)
4773 .k(k)
4774 .iterations(1)
4775 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4776 }
4777 }
4778 }
4779 }
4780
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8)4781 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
4782 TEST_REQUIRES_ARM_NEON;
4783 for (size_t k = 16; k <= 80; k += 8) {
4784 GemmMicrokernelTester()
4785 .mr(4)
4786 .nr(16)
4787 .kr(1)
4788 .sr(1)
4789 .m(4)
4790 .n(16)
4791 .k(k)
4792 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4793 }
4794 }
4795
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8_strided_a)4796 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_strided_a) {
4797 TEST_REQUIRES_ARM_NEON;
4798 for (size_t k = 16; k <= 80; k += 8) {
4799 GemmMicrokernelTester()
4800 .mr(4)
4801 .nr(16)
4802 .kr(1)
4803 .sr(1)
4804 .m(4)
4805 .n(16)
4806 .k(k)
4807 .a_stride(83)
4808 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4809 }
4810 }
4811
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8_subtile)4812 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
4813 TEST_REQUIRES_ARM_NEON;
4814 for (size_t k = 16; k <= 80; k += 8) {
4815 for (uint32_t n = 1; n <= 16; n++) {
4816 for (uint32_t m = 1; m <= 4; m++) {
4817 GemmMicrokernelTester()
4818 .mr(4)
4819 .nr(16)
4820 .kr(1)
4821 .sr(1)
4822 .m(m)
4823 .n(n)
4824 .k(k)
4825 .iterations(1)
4826 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4827 }
4828 }
4829 }
4830 }
4831
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16)4832 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16) {
4833 TEST_REQUIRES_ARM_NEON;
4834 for (uint32_t n = 17; n < 32; n++) {
4835 for (size_t k = 1; k <= 40; k += 9) {
4836 GemmMicrokernelTester()
4837 .mr(4)
4838 .nr(16)
4839 .kr(1)
4840 .sr(1)
4841 .m(4)
4842 .n(n)
4843 .k(k)
4844 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4845 }
4846 }
4847 }
4848
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16_strided_cn)4849 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_strided_cn) {
4850 TEST_REQUIRES_ARM_NEON;
4851 for (uint32_t n = 17; n < 32; n++) {
4852 for (size_t k = 1; k <= 40; k += 9) {
4853 GemmMicrokernelTester()
4854 .mr(4)
4855 .nr(16)
4856 .kr(1)
4857 .sr(1)
4858 .m(4)
4859 .n(n)
4860 .k(k)
4861 .cn_stride(19)
4862 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4863 }
4864 }
4865 }
4866
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16_strided_a)4867 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_strided_a) {
4868 TEST_REQUIRES_ARM_NEON;
4869 for (uint32_t n = 17; n < 32; n++) {
4870 for (size_t k = 1; k <= 40; k += 9) {
4871 GemmMicrokernelTester()
4872 .mr(4)
4873 .nr(16)
4874 .kr(1)
4875 .sr(1)
4876 .m(4)
4877 .n(n)
4878 .k(k)
4879 .a_stride(43)
4880 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4881 }
4882 }
4883 }
4884
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16_subtile)4885 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_subtile) {
4886 TEST_REQUIRES_ARM_NEON;
4887 for (uint32_t n = 17; n < 32; n++) {
4888 for (size_t k = 1; k <= 40; k += 9) {
4889 for (uint32_t m = 1; m <= 4; m++) {
4890 GemmMicrokernelTester()
4891 .mr(4)
4892 .nr(16)
4893 .kr(1)
4894 .sr(1)
4895 .m(m)
4896 .n(n)
4897 .k(k)
4898 .iterations(1)
4899 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4900 }
4901 }
4902 }
4903 }
4904
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16)4905 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16) {
4906 TEST_REQUIRES_ARM_NEON;
4907 for (uint32_t n = 32; n <= 48; n += 16) {
4908 for (size_t k = 1; k <= 40; k += 9) {
4909 GemmMicrokernelTester()
4910 .mr(4)
4911 .nr(16)
4912 .kr(1)
4913 .sr(1)
4914 .m(4)
4915 .n(n)
4916 .k(k)
4917 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4918 }
4919 }
4920 }
4921
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16_strided_cn)4922 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_strided_cn) {
4923 TEST_REQUIRES_ARM_NEON;
4924 for (uint32_t n = 32; n <= 48; n += 16) {
4925 for (size_t k = 1; k <= 40; k += 9) {
4926 GemmMicrokernelTester()
4927 .mr(4)
4928 .nr(16)
4929 .kr(1)
4930 .sr(1)
4931 .m(4)
4932 .n(n)
4933 .k(k)
4934 .cn_stride(19)
4935 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4936 }
4937 }
4938 }
4939
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16_strided_a)4940 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_strided_a) {
4941 TEST_REQUIRES_ARM_NEON;
4942 for (uint32_t n = 32; n <= 48; n += 16) {
4943 for (size_t k = 1; k <= 40; k += 9) {
4944 GemmMicrokernelTester()
4945 .mr(4)
4946 .nr(16)
4947 .kr(1)
4948 .sr(1)
4949 .m(4)
4950 .n(n)
4951 .k(k)
4952 .a_stride(43)
4953 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4954 }
4955 }
4956 }
4957
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16_subtile)4958 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_subtile) {
4959 TEST_REQUIRES_ARM_NEON;
4960 for (uint32_t n = 32; n <= 48; n += 16) {
4961 for (size_t k = 1; k <= 40; k += 9) {
4962 for (uint32_t m = 1; m <= 4; m++) {
4963 GemmMicrokernelTester()
4964 .mr(4)
4965 .nr(16)
4966 .kr(1)
4967 .sr(1)
4968 .m(m)
4969 .n(n)
4970 .k(k)
4971 .iterations(1)
4972 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4973 }
4974 }
4975 }
4976 }
4977
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm_subtile)4978 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
4979 TEST_REQUIRES_ARM_NEON;
4980 for (size_t k = 1; k <= 40; k += 9) {
4981 for (uint32_t n = 1; n <= 16; n++) {
4982 for (uint32_t m = 1; m <= 4; m++) {
4983 GemmMicrokernelTester()
4984 .mr(4)
4985 .nr(16)
4986 .kr(1)
4987 .sr(1)
4988 .m(m)
4989 .n(n)
4990 .k(k)
4991 .cm_stride(19)
4992 .iterations(1)
4993 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4994 }
4995 }
4996 }
4997 }
4998
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmin)4999 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
5000 TEST_REQUIRES_ARM_NEON;
5001 GemmMicrokernelTester()
5002 .mr(4)
5003 .nr(16)
5004 .kr(1)
5005 .sr(1)
5006 .m(4)
5007 .n(16)
5008 .k(8)
5009 .qmin(128)
5010 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5011 }
5012
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmax)5013 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
5014 TEST_REQUIRES_ARM_NEON;
5015 GemmMicrokernelTester()
5016 .mr(4)
5017 .nr(16)
5018 .kr(1)
5019 .sr(1)
5020 .m(4)
5021 .n(16)
5022 .k(8)
5023 .qmax(128)
5024 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5025 }
5026
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm)5027 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
5028 TEST_REQUIRES_ARM_NEON;
5029 GemmMicrokernelTester()
5030 .mr(4)
5031 .nr(16)
5032 .kr(1)
5033 .sr(1)
5034 .m(4)
5035 .n(16)
5036 .k(8)
5037 .cm_stride(19)
5038 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5039 }
5040 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
5041
5042
5043 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8)5044 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
5045 TEST_REQUIRES_ARM_NEON;
5046 GemmMicrokernelTester()
5047 .mr(4)
5048 .nr(16)
5049 .kr(1)
5050 .sr(1)
5051 .m(4)
5052 .n(16)
5053 .k(8)
5054 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5055 }
5056
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,strided_cn)5057 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
5058 TEST_REQUIRES_ARM_NEON;
5059 GemmMicrokernelTester()
5060 .mr(4)
5061 .nr(16)
5062 .kr(1)
5063 .sr(1)
5064 .m(4)
5065 .n(16)
5066 .k(8)
5067 .cn_stride(19)
5068 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5069 }
5070
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_strided_a)5071 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_strided_a) {
5072 TEST_REQUIRES_ARM_NEON;
5073 GemmMicrokernelTester()
5074 .mr(4)
5075 .nr(16)
5076 .kr(1)
5077 .sr(1)
5078 .m(4)
5079 .n(16)
5080 .k(8)
5081 .a_stride(11)
5082 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5083 }
5084
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile)5085 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
5086 TEST_REQUIRES_ARM_NEON;
5087 for (uint32_t n = 1; n <= 16; n++) {
5088 for (uint32_t m = 1; m <= 4; m++) {
5089 GemmMicrokernelTester()
5090 .mr(4)
5091 .nr(16)
5092 .kr(1)
5093 .sr(1)
5094 .m(m)
5095 .n(n)
5096 .k(8)
5097 .iterations(1)
5098 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5099 }
5100 }
5101 }
5102
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_m)5103 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
5104 TEST_REQUIRES_ARM_NEON;
5105 for (uint32_t m = 1; m <= 4; m++) {
5106 GemmMicrokernelTester()
5107 .mr(4)
5108 .nr(16)
5109 .kr(1)
5110 .sr(1)
5111 .m(m)
5112 .n(16)
5113 .k(8)
5114 .iterations(1)
5115 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5116 }
5117 }
5118
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_n)5119 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
5120 TEST_REQUIRES_ARM_NEON;
5121 for (uint32_t n = 1; n <= 16; n++) {
5122 GemmMicrokernelTester()
5123 .mr(4)
5124 .nr(16)
5125 .kr(1)
5126 .sr(1)
5127 .m(4)
5128 .n(n)
5129 .k(8)
5130 .iterations(1)
5131 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5132 }
5133 }
5134
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_lt_8)5135 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
5136 TEST_REQUIRES_ARM_NEON;
5137 for (size_t k = 1; k < 8; k++) {
5138 GemmMicrokernelTester()
5139 .mr(4)
5140 .nr(16)
5141 .kr(1)
5142 .sr(1)
5143 .m(4)
5144 .n(16)
5145 .k(k)
5146 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5147 }
5148 }
5149
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_lt_8_strided_a)5150 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_strided_a) {
5151 TEST_REQUIRES_ARM_NEON;
5152 for (size_t k = 1; k < 8; k++) {
5153 GemmMicrokernelTester()
5154 .mr(4)
5155 .nr(16)
5156 .kr(1)
5157 .sr(1)
5158 .m(4)
5159 .n(16)
5160 .k(k)
5161 .a_stride(11)
5162 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5163 }
5164 }
5165
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_lt_8_subtile)5166 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
5167 TEST_REQUIRES_ARM_NEON;
5168 for (size_t k = 1; k < 8; k++) {
5169 for (uint32_t n = 1; n <= 16; n++) {
5170 for (uint32_t m = 1; m <= 4; m++) {
5171 GemmMicrokernelTester()
5172 .mr(4)
5173 .nr(16)
5174 .kr(1)
5175 .sr(1)
5176 .m(m)
5177 .n(n)
5178 .k(k)
5179 .iterations(1)
5180 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5181 }
5182 }
5183 }
5184 }
5185
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_gt_8)5186 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
5187 TEST_REQUIRES_ARM_NEON;
5188 for (size_t k = 9; k < 16; k++) {
5189 GemmMicrokernelTester()
5190 .mr(4)
5191 .nr(16)
5192 .kr(1)
5193 .sr(1)
5194 .m(4)
5195 .n(16)
5196 .k(k)
5197 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5198 }
5199 }
5200
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_gt_8_strided_a)5201 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_strided_a) {
5202 TEST_REQUIRES_ARM_NEON;
5203 for (size_t k = 9; k < 16; k++) {
5204 GemmMicrokernelTester()
5205 .mr(4)
5206 .nr(16)
5207 .kr(1)
5208 .sr(1)
5209 .m(4)
5210 .n(16)
5211 .k(k)
5212 .a_stride(19)
5213 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5214 }
5215 }
5216
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_gt_8_subtile)5217 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
5218 TEST_REQUIRES_ARM_NEON;
5219 for (size_t k = 9; k < 16; k++) {
5220 for (uint32_t n = 1; n <= 16; n++) {
5221 for (uint32_t m = 1; m <= 4; m++) {
5222 GemmMicrokernelTester()
5223 .mr(4)
5224 .nr(16)
5225 .kr(1)
5226 .sr(1)
5227 .m(m)
5228 .n(n)
5229 .k(k)
5230 .iterations(1)
5231 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5232 }
5233 }
5234 }
5235 }
5236
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_div_8)5237 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
5238 TEST_REQUIRES_ARM_NEON;
5239 for (size_t k = 16; k <= 80; k += 8) {
5240 GemmMicrokernelTester()
5241 .mr(4)
5242 .nr(16)
5243 .kr(1)
5244 .sr(1)
5245 .m(4)
5246 .n(16)
5247 .k(k)
5248 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5249 }
5250 }
5251
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_div_8_strided_a)5252 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8_strided_a) {
5253 TEST_REQUIRES_ARM_NEON;
5254 for (size_t k = 16; k <= 80; k += 8) {
5255 GemmMicrokernelTester()
5256 .mr(4)
5257 .nr(16)
5258 .kr(1)
5259 .sr(1)
5260 .m(4)
5261 .n(16)
5262 .k(k)
5263 .a_stride(83)
5264 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5265 }
5266 }
5267
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_div_8_subtile)5268 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
5269 TEST_REQUIRES_ARM_NEON;
5270 for (size_t k = 16; k <= 80; k += 8) {
5271 for (uint32_t n = 1; n <= 16; n++) {
5272 for (uint32_t m = 1; m <= 4; m++) {
5273 GemmMicrokernelTester()
5274 .mr(4)
5275 .nr(16)
5276 .kr(1)
5277 .sr(1)
5278 .m(m)
5279 .n(n)
5280 .k(k)
5281 .iterations(1)
5282 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5283 }
5284 }
5285 }
5286 }
5287
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16)5288 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16) {
5289 TEST_REQUIRES_ARM_NEON;
5290 for (uint32_t n = 17; n < 32; n++) {
5291 for (size_t k = 1; k <= 40; k += 9) {
5292 GemmMicrokernelTester()
5293 .mr(4)
5294 .nr(16)
5295 .kr(1)
5296 .sr(1)
5297 .m(4)
5298 .n(n)
5299 .k(k)
5300 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5301 }
5302 }
5303 }
5304
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16_strided_cn)5305 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_strided_cn) {
5306 TEST_REQUIRES_ARM_NEON;
5307 for (uint32_t n = 17; n < 32; n++) {
5308 for (size_t k = 1; k <= 40; k += 9) {
5309 GemmMicrokernelTester()
5310 .mr(4)
5311 .nr(16)
5312 .kr(1)
5313 .sr(1)
5314 .m(4)
5315 .n(n)
5316 .k(k)
5317 .cn_stride(19)
5318 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5319 }
5320 }
5321 }
5322
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16_strided_a)5323 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_strided_a) {
5324 TEST_REQUIRES_ARM_NEON;
5325 for (uint32_t n = 17; n < 32; n++) {
5326 for (size_t k = 1; k <= 40; k += 9) {
5327 GemmMicrokernelTester()
5328 .mr(4)
5329 .nr(16)
5330 .kr(1)
5331 .sr(1)
5332 .m(4)
5333 .n(n)
5334 .k(k)
5335 .a_stride(43)
5336 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5337 }
5338 }
5339 }
5340
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16_subtile)5341 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_subtile) {
5342 TEST_REQUIRES_ARM_NEON;
5343 for (uint32_t n = 17; n < 32; n++) {
5344 for (size_t k = 1; k <= 40; k += 9) {
5345 for (uint32_t m = 1; m <= 4; m++) {
5346 GemmMicrokernelTester()
5347 .mr(4)
5348 .nr(16)
5349 .kr(1)
5350 .sr(1)
5351 .m(m)
5352 .n(n)
5353 .k(k)
5354 .iterations(1)
5355 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5356 }
5357 }
5358 }
5359 }
5360
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16)5361 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16) {
5362 TEST_REQUIRES_ARM_NEON;
5363 for (uint32_t n = 32; n <= 48; n += 16) {
5364 for (size_t k = 1; k <= 40; k += 9) {
5365 GemmMicrokernelTester()
5366 .mr(4)
5367 .nr(16)
5368 .kr(1)
5369 .sr(1)
5370 .m(4)
5371 .n(n)
5372 .k(k)
5373 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5374 }
5375 }
5376 }
5377
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16_strided_cn)5378 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_strided_cn) {
5379 TEST_REQUIRES_ARM_NEON;
5380 for (uint32_t n = 32; n <= 48; n += 16) {
5381 for (size_t k = 1; k <= 40; k += 9) {
5382 GemmMicrokernelTester()
5383 .mr(4)
5384 .nr(16)
5385 .kr(1)
5386 .sr(1)
5387 .m(4)
5388 .n(n)
5389 .k(k)
5390 .cn_stride(19)
5391 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5392 }
5393 }
5394 }
5395
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16_strided_a)5396 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_strided_a) {
5397 TEST_REQUIRES_ARM_NEON;
5398 for (uint32_t n = 32; n <= 48; n += 16) {
5399 for (size_t k = 1; k <= 40; k += 9) {
5400 GemmMicrokernelTester()
5401 .mr(4)
5402 .nr(16)
5403 .kr(1)
5404 .sr(1)
5405 .m(4)
5406 .n(n)
5407 .k(k)
5408 .a_stride(43)
5409 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5410 }
5411 }
5412 }
5413
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16_subtile)5414 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_subtile) {
5415 TEST_REQUIRES_ARM_NEON;
5416 for (uint32_t n = 32; n <= 48; n += 16) {
5417 for (size_t k = 1; k <= 40; k += 9) {
5418 for (uint32_t m = 1; m <= 4; m++) {
5419 GemmMicrokernelTester()
5420 .mr(4)
5421 .nr(16)
5422 .kr(1)
5423 .sr(1)
5424 .m(m)
5425 .n(n)
5426 .k(k)
5427 .iterations(1)
5428 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5429 }
5430 }
5431 }
5432 }
5433
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,strided_cm_subtile)5434 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
5435 TEST_REQUIRES_ARM_NEON;
5436 for (size_t k = 1; k <= 40; k += 9) {
5437 for (uint32_t n = 1; n <= 16; n++) {
5438 for (uint32_t m = 1; m <= 4; m++) {
5439 GemmMicrokernelTester()
5440 .mr(4)
5441 .nr(16)
5442 .kr(1)
5443 .sr(1)
5444 .m(m)
5445 .n(n)
5446 .k(k)
5447 .cm_stride(19)
5448 .iterations(1)
5449 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5450 }
5451 }
5452 }
5453 }
5454
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,qmin)5455 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmin) {
5456 TEST_REQUIRES_ARM_NEON;
5457 GemmMicrokernelTester()
5458 .mr(4)
5459 .nr(16)
5460 .kr(1)
5461 .sr(1)
5462 .m(4)
5463 .n(16)
5464 .k(8)
5465 .qmin(128)
5466 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5467 }
5468
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,qmax)5469 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmax) {
5470 TEST_REQUIRES_ARM_NEON;
5471 GemmMicrokernelTester()
5472 .mr(4)
5473 .nr(16)
5474 .kr(1)
5475 .sr(1)
5476 .m(4)
5477 .n(16)
5478 .k(8)
5479 .qmax(128)
5480 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5481 }
5482
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,strided_cm)5483 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
5484 TEST_REQUIRES_ARM_NEON;
5485 GemmMicrokernelTester()
5486 .mr(4)
5487 .nr(16)
5488 .kr(1)
5489 .sr(1)
5490 .m(4)
5491 .n(16)
5492 .k(8)
5493 .cm_stride(19)
5494 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5495 }
5496 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
5497
5498
5499 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16)5500 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16) {
5501 TEST_REQUIRES_ARM_NEON_DOT;
5502 GemmMicrokernelTester()
5503 .mr(4)
5504 .nr(16)
5505 .kr(4)
5506 .sr(1)
5507 .m(4)
5508 .n(16)
5509 .k(16)
5510 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5511 }
5512
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cn)5513 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
5514 TEST_REQUIRES_ARM_NEON_DOT;
5515 GemmMicrokernelTester()
5516 .mr(4)
5517 .nr(16)
5518 .kr(4)
5519 .sr(1)
5520 .m(4)
5521 .n(16)
5522 .k(16)
5523 .cn_stride(19)
5524 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5525 }
5526
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_strided_a)5527 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_strided_a) {
5528 TEST_REQUIRES_ARM_NEON_DOT;
5529 GemmMicrokernelTester()
5530 .mr(4)
5531 .nr(16)
5532 .kr(4)
5533 .sr(1)
5534 .m(4)
5535 .n(16)
5536 .k(16)
5537 .a_stride(19)
5538 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5539 }
5540
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile)5541 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile) {
5542 TEST_REQUIRES_ARM_NEON_DOT;
5543 for (uint32_t n = 1; n <= 16; n++) {
5544 for (uint32_t m = 1; m <= 4; m++) {
5545 GemmMicrokernelTester()
5546 .mr(4)
5547 .nr(16)
5548 .kr(4)
5549 .sr(1)
5550 .m(m)
5551 .n(n)
5552 .k(16)
5553 .iterations(1)
5554 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5555 }
5556 }
5557 }
5558
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile_m)5559 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_m) {
5560 TEST_REQUIRES_ARM_NEON_DOT;
5561 for (uint32_t m = 1; m <= 4; m++) {
5562 GemmMicrokernelTester()
5563 .mr(4)
5564 .nr(16)
5565 .kr(4)
5566 .sr(1)
5567 .m(m)
5568 .n(16)
5569 .k(16)
5570 .iterations(1)
5571 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5572 }
5573 }
5574
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile_n)5575 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_n) {
5576 TEST_REQUIRES_ARM_NEON_DOT;
5577 for (uint32_t n = 1; n <= 16; n++) {
5578 GemmMicrokernelTester()
5579 .mr(4)
5580 .nr(16)
5581 .kr(4)
5582 .sr(1)
5583 .m(4)
5584 .n(n)
5585 .k(16)
5586 .iterations(1)
5587 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5588 }
5589 }
5590
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16)5591 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16) {
5592 TEST_REQUIRES_ARM_NEON_DOT;
5593 for (size_t k = 1; k < 16; k++) {
5594 GemmMicrokernelTester()
5595 .mr(4)
5596 .nr(16)
5597 .kr(4)
5598 .sr(1)
5599 .m(4)
5600 .n(16)
5601 .k(k)
5602 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5603 }
5604 }
5605
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16_strided_a)5606 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_strided_a) {
5607 TEST_REQUIRES_ARM_NEON_DOT;
5608 for (size_t k = 1; k < 16; k++) {
5609 GemmMicrokernelTester()
5610 .mr(4)
5611 .nr(16)
5612 .kr(4)
5613 .sr(1)
5614 .m(4)
5615 .n(16)
5616 .k(k)
5617 .a_stride(19)
5618 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5619 }
5620 }
5621
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16_subtile)5622 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_subtile) {
5623 TEST_REQUIRES_ARM_NEON_DOT;
5624 for (size_t k = 1; k < 16; k++) {
5625 for (uint32_t n = 1; n <= 16; n++) {
5626 for (uint32_t m = 1; m <= 4; m++) {
5627 GemmMicrokernelTester()
5628 .mr(4)
5629 .nr(16)
5630 .kr(4)
5631 .sr(1)
5632 .m(m)
5633 .n(n)
5634 .k(k)
5635 .iterations(1)
5636 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5637 }
5638 }
5639 }
5640 }
5641
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16)5642 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16) {
5643 TEST_REQUIRES_ARM_NEON_DOT;
5644 for (size_t k = 17; k < 32; k++) {
5645 GemmMicrokernelTester()
5646 .mr(4)
5647 .nr(16)
5648 .kr(4)
5649 .sr(1)
5650 .m(4)
5651 .n(16)
5652 .k(k)
5653 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5654 }
5655 }
5656
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16_strided_a)5657 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_strided_a) {
5658 TEST_REQUIRES_ARM_NEON_DOT;
5659 for (size_t k = 17; k < 32; k++) {
5660 GemmMicrokernelTester()
5661 .mr(4)
5662 .nr(16)
5663 .kr(4)
5664 .sr(1)
5665 .m(4)
5666 .n(16)
5667 .k(k)
5668 .a_stride(37)
5669 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5670 }
5671 }
5672
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16_subtile)5673 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_subtile) {
5674 TEST_REQUIRES_ARM_NEON_DOT;
5675 for (size_t k = 17; k < 32; k++) {
5676 for (uint32_t n = 1; n <= 16; n++) {
5677 for (uint32_t m = 1; m <= 4; m++) {
5678 GemmMicrokernelTester()
5679 .mr(4)
5680 .nr(16)
5681 .kr(4)
5682 .sr(1)
5683 .m(m)
5684 .n(n)
5685 .k(k)
5686 .iterations(1)
5687 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5688 }
5689 }
5690 }
5691 }
5692
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16)5693 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16) {
5694 TEST_REQUIRES_ARM_NEON_DOT;
5695 for (size_t k = 32; k <= 160; k += 16) {
5696 GemmMicrokernelTester()
5697 .mr(4)
5698 .nr(16)
5699 .kr(4)
5700 .sr(1)
5701 .m(4)
5702 .n(16)
5703 .k(k)
5704 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5705 }
5706 }
5707
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16_strided_a)5708 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_strided_a) {
5709 TEST_REQUIRES_ARM_NEON_DOT;
5710 for (size_t k = 32; k <= 160; k += 16) {
5711 GemmMicrokernelTester()
5712 .mr(4)
5713 .nr(16)
5714 .kr(4)
5715 .sr(1)
5716 .m(4)
5717 .n(16)
5718 .k(k)
5719 .a_stride(163)
5720 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5721 }
5722 }
5723
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16_subtile)5724 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_subtile) {
5725 TEST_REQUIRES_ARM_NEON_DOT;
5726 for (size_t k = 32; k <= 160; k += 16) {
5727 for (uint32_t n = 1; n <= 16; n++) {
5728 for (uint32_t m = 1; m <= 4; m++) {
5729 GemmMicrokernelTester()
5730 .mr(4)
5731 .nr(16)
5732 .kr(4)
5733 .sr(1)
5734 .m(m)
5735 .n(n)
5736 .k(k)
5737 .iterations(1)
5738 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5739 }
5740 }
5741 }
5742 }
5743
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16)5744 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16) {
5745 TEST_REQUIRES_ARM_NEON_DOT;
5746 for (uint32_t n = 17; n < 32; n++) {
5747 for (size_t k = 1; k <= 80; k += 17) {
5748 GemmMicrokernelTester()
5749 .mr(4)
5750 .nr(16)
5751 .kr(4)
5752 .sr(1)
5753 .m(4)
5754 .n(n)
5755 .k(k)
5756 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5757 }
5758 }
5759 }
5760
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_strided_cn)5761 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_cn) {
5762 TEST_REQUIRES_ARM_NEON_DOT;
5763 for (uint32_t n = 17; n < 32; n++) {
5764 for (size_t k = 1; k <= 80; k += 17) {
5765 GemmMicrokernelTester()
5766 .mr(4)
5767 .nr(16)
5768 .kr(4)
5769 .sr(1)
5770 .m(4)
5771 .n(n)
5772 .k(k)
5773 .cn_stride(19)
5774 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5775 }
5776 }
5777 }
5778
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_strided_a)5779 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_a) {
5780 TEST_REQUIRES_ARM_NEON_DOT;
5781 for (uint32_t n = 17; n < 32; n++) {
5782 for (size_t k = 1; k <= 80; k += 17) {
5783 GemmMicrokernelTester()
5784 .mr(4)
5785 .nr(16)
5786 .kr(4)
5787 .sr(1)
5788 .m(4)
5789 .n(n)
5790 .k(k)
5791 .a_stride(83)
5792 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5793 }
5794 }
5795 }
5796
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_subtile)5797 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_subtile) {
5798 TEST_REQUIRES_ARM_NEON_DOT;
5799 for (uint32_t n = 17; n < 32; n++) {
5800 for (size_t k = 1; k <= 80; k += 17) {
5801 for (uint32_t m = 1; m <= 4; m++) {
5802 GemmMicrokernelTester()
5803 .mr(4)
5804 .nr(16)
5805 .kr(4)
5806 .sr(1)
5807 .m(m)
5808 .n(n)
5809 .k(k)
5810 .iterations(1)
5811 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5812 }
5813 }
5814 }
5815 }
5816
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16)5817 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16) {
5818 TEST_REQUIRES_ARM_NEON_DOT;
5819 for (uint32_t n = 32; n <= 48; n += 16) {
5820 for (size_t k = 1; k <= 80; k += 17) {
5821 GemmMicrokernelTester()
5822 .mr(4)
5823 .nr(16)
5824 .kr(4)
5825 .sr(1)
5826 .m(4)
5827 .n(n)
5828 .k(k)
5829 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5830 }
5831 }
5832 }
5833
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_strided_cn)5834 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_cn) {
5835 TEST_REQUIRES_ARM_NEON_DOT;
5836 for (uint32_t n = 32; n <= 48; n += 16) {
5837 for (size_t k = 1; k <= 80; k += 17) {
5838 GemmMicrokernelTester()
5839 .mr(4)
5840 .nr(16)
5841 .kr(4)
5842 .sr(1)
5843 .m(4)
5844 .n(n)
5845 .k(k)
5846 .cn_stride(19)
5847 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5848 }
5849 }
5850 }
5851
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_strided_a)5852 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_a) {
5853 TEST_REQUIRES_ARM_NEON_DOT;
5854 for (uint32_t n = 32; n <= 48; n += 16) {
5855 for (size_t k = 1; k <= 80; k += 17) {
5856 GemmMicrokernelTester()
5857 .mr(4)
5858 .nr(16)
5859 .kr(4)
5860 .sr(1)
5861 .m(4)
5862 .n(n)
5863 .k(k)
5864 .a_stride(83)
5865 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5866 }
5867 }
5868 }
5869
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_subtile)5870 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_subtile) {
5871 TEST_REQUIRES_ARM_NEON_DOT;
5872 for (uint32_t n = 32; n <= 48; n += 16) {
5873 for (size_t k = 1; k <= 80; k += 17) {
5874 for (uint32_t m = 1; m <= 4; m++) {
5875 GemmMicrokernelTester()
5876 .mr(4)
5877 .nr(16)
5878 .kr(4)
5879 .sr(1)
5880 .m(m)
5881 .n(n)
5882 .k(k)
5883 .iterations(1)
5884 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5885 }
5886 }
5887 }
5888 }
5889
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm_subtile)5890 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) {
5891 TEST_REQUIRES_ARM_NEON_DOT;
5892 for (size_t k = 1; k <= 80; k += 17) {
5893 for (uint32_t n = 1; n <= 16; n++) {
5894 for (uint32_t m = 1; m <= 4; m++) {
5895 GemmMicrokernelTester()
5896 .mr(4)
5897 .nr(16)
5898 .kr(4)
5899 .sr(1)
5900 .m(m)
5901 .n(n)
5902 .k(k)
5903 .cm_stride(19)
5904 .iterations(1)
5905 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5906 }
5907 }
5908 }
5909 }
5910
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,qmin)5911 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmin) {
5912 TEST_REQUIRES_ARM_NEON_DOT;
5913 GemmMicrokernelTester()
5914 .mr(4)
5915 .nr(16)
5916 .kr(4)
5917 .sr(1)
5918 .m(4)
5919 .n(16)
5920 .k(16)
5921 .qmin(128)
5922 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5923 }
5924
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,qmax)5925 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
5926 TEST_REQUIRES_ARM_NEON_DOT;
5927 GemmMicrokernelTester()
5928 .mr(4)
5929 .nr(16)
5930 .kr(4)
5931 .sr(1)
5932 .m(4)
5933 .n(16)
5934 .k(16)
5935 .qmax(128)
5936 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5937 }
5938
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm)5939 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
5940 TEST_REQUIRES_ARM_NEON_DOT;
5941 GemmMicrokernelTester()
5942 .mr(4)
5943 .nr(16)
5944 .kr(4)
5945 .sr(1)
5946 .m(4)
5947 .n(16)
5948 .k(16)
5949 .cm_stride(19)
5950 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5951 }
5952 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
5953
5954
5955 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_eq_8)5956 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8) {
5957 TEST_REQUIRES_ARM_NEON_DOT;
5958 GemmMicrokernelTester()
5959 .mr(4)
5960 .nr(16)
5961 .kr(4)
5962 .sr(1)
5963 .m(4)
5964 .n(16)
5965 .k(8)
5966 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5967 }
5968
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,strided_cn)5969 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, strided_cn) {
5970 TEST_REQUIRES_ARM_NEON_DOT;
5971 GemmMicrokernelTester()
5972 .mr(4)
5973 .nr(16)
5974 .kr(4)
5975 .sr(1)
5976 .m(4)
5977 .n(16)
5978 .k(8)
5979 .cn_stride(19)
5980 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5981 }
5982
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_eq_8_strided_a)5983 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_strided_a) {
5984 TEST_REQUIRES_ARM_NEON_DOT;
5985 GemmMicrokernelTester()
5986 .mr(4)
5987 .nr(16)
5988 .kr(4)
5989 .sr(1)
5990 .m(4)
5991 .n(16)
5992 .k(8)
5993 .a_stride(11)
5994 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5995 }
5996
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_eq_8_subtile)5997 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile) {
5998 TEST_REQUIRES_ARM_NEON_DOT;
5999 for (uint32_t n = 1; n <= 16; n++) {
6000 for (uint32_t m = 1; m <= 4; m++) {
6001 GemmMicrokernelTester()
6002 .mr(4)
6003 .nr(16)
6004 .kr(4)
6005 .sr(1)
6006 .m(m)
6007 .n(n)
6008 .k(8)
6009 .iterations(1)
6010 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6011 }
6012 }
6013 }
6014
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_eq_8_subtile_m)6015 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_m) {
6016 TEST_REQUIRES_ARM_NEON_DOT;
6017 for (uint32_t m = 1; m <= 4; m++) {
6018 GemmMicrokernelTester()
6019 .mr(4)
6020 .nr(16)
6021 .kr(4)
6022 .sr(1)
6023 .m(m)
6024 .n(16)
6025 .k(8)
6026 .iterations(1)
6027 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6028 }
6029 }
6030
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_eq_8_subtile_n)6031 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_n) {
6032 TEST_REQUIRES_ARM_NEON_DOT;
6033 for (uint32_t n = 1; n <= 16; n++) {
6034 GemmMicrokernelTester()
6035 .mr(4)
6036 .nr(16)
6037 .kr(4)
6038 .sr(1)
6039 .m(4)
6040 .n(n)
6041 .k(8)
6042 .iterations(1)
6043 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6044 }
6045 }
6046
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_lt_8)6047 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8) {
6048 TEST_REQUIRES_ARM_NEON_DOT;
6049 for (size_t k = 1; k < 8; k++) {
6050 GemmMicrokernelTester()
6051 .mr(4)
6052 .nr(16)
6053 .kr(4)
6054 .sr(1)
6055 .m(4)
6056 .n(16)
6057 .k(k)
6058 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6059 }
6060 }
6061
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_lt_8_strided_a)6062 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8_strided_a) {
6063 TEST_REQUIRES_ARM_NEON_DOT;
6064 for (size_t k = 1; k < 8; k++) {
6065 GemmMicrokernelTester()
6066 .mr(4)
6067 .nr(16)
6068 .kr(4)
6069 .sr(1)
6070 .m(4)
6071 .n(16)
6072 .k(k)
6073 .a_stride(11)
6074 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6075 }
6076 }
6077
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_lt_8_subtile)6078 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8_subtile) {
6079 TEST_REQUIRES_ARM_NEON_DOT;
6080 for (size_t k = 1; k < 8; k++) {
6081 for (uint32_t n = 1; n <= 16; n++) {
6082 for (uint32_t m = 1; m <= 4; m++) {
6083 GemmMicrokernelTester()
6084 .mr(4)
6085 .nr(16)
6086 .kr(4)
6087 .sr(1)
6088 .m(m)
6089 .n(n)
6090 .k(k)
6091 .iterations(1)
6092 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6093 }
6094 }
6095 }
6096 }
6097
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_gt_8)6098 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8) {
6099 TEST_REQUIRES_ARM_NEON_DOT;
6100 for (size_t k = 9; k < 16; k++) {
6101 GemmMicrokernelTester()
6102 .mr(4)
6103 .nr(16)
6104 .kr(4)
6105 .sr(1)
6106 .m(4)
6107 .n(16)
6108 .k(k)
6109 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6110 }
6111 }
6112
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_gt_8_strided_a)6113 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8_strided_a) {
6114 TEST_REQUIRES_ARM_NEON_DOT;
6115 for (size_t k = 9; k < 16; k++) {
6116 GemmMicrokernelTester()
6117 .mr(4)
6118 .nr(16)
6119 .kr(4)
6120 .sr(1)
6121 .m(4)
6122 .n(16)
6123 .k(k)
6124 .a_stride(19)
6125 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6126 }
6127 }
6128
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_gt_8_subtile)6129 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8_subtile) {
6130 TEST_REQUIRES_ARM_NEON_DOT;
6131 for (size_t k = 9; k < 16; k++) {
6132 for (uint32_t n = 1; n <= 16; n++) {
6133 for (uint32_t m = 1; m <= 4; m++) {
6134 GemmMicrokernelTester()
6135 .mr(4)
6136 .nr(16)
6137 .kr(4)
6138 .sr(1)
6139 .m(m)
6140 .n(n)
6141 .k(k)
6142 .iterations(1)
6143 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6144 }
6145 }
6146 }
6147 }
6148
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_div_8)6149 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_div_8) {
6150 TEST_REQUIRES_ARM_NEON_DOT;
6151 for (size_t k = 16; k <= 80; k += 8) {
6152 GemmMicrokernelTester()
6153 .mr(4)
6154 .nr(16)
6155 .kr(4)
6156 .sr(1)
6157 .m(4)
6158 .n(16)
6159 .k(k)
6160 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6161 }
6162 }
6163
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_div_8_strided_a)6164 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_div_8_strided_a) {
6165 TEST_REQUIRES_ARM_NEON_DOT;
6166 for (size_t k = 16; k <= 80; k += 8) {
6167 GemmMicrokernelTester()
6168 .mr(4)
6169 .nr(16)
6170 .kr(4)
6171 .sr(1)
6172 .m(4)
6173 .n(16)
6174 .k(k)
6175 .a_stride(83)
6176 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6177 }
6178 }
6179
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_div_8_subtile)6180 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_div_8_subtile) {
6181 TEST_REQUIRES_ARM_NEON_DOT;
6182 for (size_t k = 16; k <= 80; k += 8) {
6183 for (uint32_t n = 1; n <= 16; n++) {
6184 for (uint32_t m = 1; m <= 4; m++) {
6185 GemmMicrokernelTester()
6186 .mr(4)
6187 .nr(16)
6188 .kr(4)
6189 .sr(1)
6190 .m(m)
6191 .n(n)
6192 .k(k)
6193 .iterations(1)
6194 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6195 }
6196 }
6197 }
6198 }
6199
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_gt_16)6200 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16) {
6201 TEST_REQUIRES_ARM_NEON_DOT;
6202 for (uint32_t n = 17; n < 32; n++) {
6203 for (size_t k = 1; k <= 40; k += 9) {
6204 GemmMicrokernelTester()
6205 .mr(4)
6206 .nr(16)
6207 .kr(4)
6208 .sr(1)
6209 .m(4)
6210 .n(n)
6211 .k(k)
6212 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6213 }
6214 }
6215 }
6216
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_gt_16_strided_cn)6217 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_cn) {
6218 TEST_REQUIRES_ARM_NEON_DOT;
6219 for (uint32_t n = 17; n < 32; n++) {
6220 for (size_t k = 1; k <= 40; k += 9) {
6221 GemmMicrokernelTester()
6222 .mr(4)
6223 .nr(16)
6224 .kr(4)
6225 .sr(1)
6226 .m(4)
6227 .n(n)
6228 .k(k)
6229 .cn_stride(19)
6230 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6231 }
6232 }
6233 }
6234
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_gt_16_strided_a)6235 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_a) {
6236 TEST_REQUIRES_ARM_NEON_DOT;
6237 for (uint32_t n = 17; n < 32; n++) {
6238 for (size_t k = 1; k <= 40; k += 9) {
6239 GemmMicrokernelTester()
6240 .mr(4)
6241 .nr(16)
6242 .kr(4)
6243 .sr(1)
6244 .m(4)
6245 .n(n)
6246 .k(k)
6247 .a_stride(43)
6248 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6249 }
6250 }
6251 }
6252
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_gt_16_subtile)6253 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_subtile) {
6254 TEST_REQUIRES_ARM_NEON_DOT;
6255 for (uint32_t n = 17; n < 32; n++) {
6256 for (size_t k = 1; k <= 40; k += 9) {
6257 for (uint32_t m = 1; m <= 4; m++) {
6258 GemmMicrokernelTester()
6259 .mr(4)
6260 .nr(16)
6261 .kr(4)
6262 .sr(1)
6263 .m(m)
6264 .n(n)
6265 .k(k)
6266 .iterations(1)
6267 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6268 }
6269 }
6270 }
6271 }
6272
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_div_16)6273 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_div_16) {
6274 TEST_REQUIRES_ARM_NEON_DOT;
6275 for (uint32_t n = 32; n <= 48; n += 16) {
6276 for (size_t k = 1; k <= 40; k += 9) {
6277 GemmMicrokernelTester()
6278 .mr(4)
6279 .nr(16)
6280 .kr(4)
6281 .sr(1)
6282 .m(4)
6283 .n(n)
6284 .k(k)
6285 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6286 }
6287 }
6288 }
6289
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_div_16_strided_cn)6290 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_cn) {
6291 TEST_REQUIRES_ARM_NEON_DOT;
6292 for (uint32_t n = 32; n <= 48; n += 16) {
6293 for (size_t k = 1; k <= 40; k += 9) {
6294 GemmMicrokernelTester()
6295 .mr(4)
6296 .nr(16)
6297 .kr(4)
6298 .sr(1)
6299 .m(4)
6300 .n(n)
6301 .k(k)
6302 .cn_stride(19)
6303 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6304 }
6305 }
6306 }
6307
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_div_16_strided_a)6308 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_a) {
6309 TEST_REQUIRES_ARM_NEON_DOT;
6310 for (uint32_t n = 32; n <= 48; n += 16) {
6311 for (size_t k = 1; k <= 40; k += 9) {
6312 GemmMicrokernelTester()
6313 .mr(4)
6314 .nr(16)
6315 .kr(4)
6316 .sr(1)
6317 .m(4)
6318 .n(n)
6319 .k(k)
6320 .a_stride(43)
6321 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6322 }
6323 }
6324 }
6325
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_div_16_subtile)6326 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_subtile) {
6327 TEST_REQUIRES_ARM_NEON_DOT;
6328 for (uint32_t n = 32; n <= 48; n += 16) {
6329 for (size_t k = 1; k <= 40; k += 9) {
6330 for (uint32_t m = 1; m <= 4; m++) {
6331 GemmMicrokernelTester()
6332 .mr(4)
6333 .nr(16)
6334 .kr(4)
6335 .sr(1)
6336 .m(m)
6337 .n(n)
6338 .k(k)
6339 .iterations(1)
6340 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6341 }
6342 }
6343 }
6344 }
6345
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,strided_cm_subtile)6346 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, strided_cm_subtile) {
6347 TEST_REQUIRES_ARM_NEON_DOT;
6348 for (size_t k = 1; k <= 40; k += 9) {
6349 for (uint32_t n = 1; n <= 16; n++) {
6350 for (uint32_t m = 1; m <= 4; m++) {
6351 GemmMicrokernelTester()
6352 .mr(4)
6353 .nr(16)
6354 .kr(4)
6355 .sr(1)
6356 .m(m)
6357 .n(n)
6358 .k(k)
6359 .cm_stride(19)
6360 .iterations(1)
6361 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6362 }
6363 }
6364 }
6365 }
6366
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,qmin)6367 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, qmin) {
6368 TEST_REQUIRES_ARM_NEON_DOT;
6369 GemmMicrokernelTester()
6370 .mr(4)
6371 .nr(16)
6372 .kr(4)
6373 .sr(1)
6374 .m(4)
6375 .n(16)
6376 .k(8)
6377 .qmin(128)
6378 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6379 }
6380
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,qmax)6381 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, qmax) {
6382 TEST_REQUIRES_ARM_NEON_DOT;
6383 GemmMicrokernelTester()
6384 .mr(4)
6385 .nr(16)
6386 .kr(4)
6387 .sr(1)
6388 .m(4)
6389 .n(16)
6390 .k(8)
6391 .qmax(128)
6392 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6393 }
6394
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,strided_cm)6395 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, strided_cm) {
6396 TEST_REQUIRES_ARM_NEON_DOT;
6397 GemmMicrokernelTester()
6398 .mr(4)
6399 .nr(16)
6400 .kr(4)
6401 .sr(1)
6402 .m(4)
6403 .n(16)
6404 .k(8)
6405 .cm_stride(19)
6406 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6407 }
6408 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
6409
6410
6411 #if XNN_ARCH_ARM
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_eq_4)6412 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_eq_4) {
6413 TEST_REQUIRES_ARM_SIMD32;
6414 GemmMicrokernelTester()
6415 .mr(1)
6416 .nr(2)
6417 .kr(4)
6418 .sr(1)
6419 .m(1)
6420 .n(2)
6421 .k(4)
6422 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6423 }
6424
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,strided_cn)6425 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, strided_cn) {
6426 TEST_REQUIRES_ARM_SIMD32;
6427 GemmMicrokernelTester()
6428 .mr(1)
6429 .nr(2)
6430 .kr(4)
6431 .sr(1)
6432 .m(1)
6433 .n(2)
6434 .k(4)
6435 .cn_stride(5)
6436 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6437 }
6438
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_eq_4_strided_a)6439 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_eq_4_strided_a) {
6440 TEST_REQUIRES_ARM_SIMD32;
6441 GemmMicrokernelTester()
6442 .mr(1)
6443 .nr(2)
6444 .kr(4)
6445 .sr(1)
6446 .m(1)
6447 .n(2)
6448 .k(4)
6449 .a_stride(7)
6450 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6451 }
6452
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_eq_4_subtile)6453 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_eq_4_subtile) {
6454 TEST_REQUIRES_ARM_SIMD32;
6455 for (uint32_t n = 1; n <= 2; n++) {
6456 for (uint32_t m = 1; m <= 1; m++) {
6457 GemmMicrokernelTester()
6458 .mr(1)
6459 .nr(2)
6460 .kr(4)
6461 .sr(1)
6462 .m(m)
6463 .n(n)
6464 .k(4)
6465 .iterations(1)
6466 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6467 }
6468 }
6469 }
6470
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_eq_4_subtile_m)6471 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_eq_4_subtile_m) {
6472 TEST_REQUIRES_ARM_SIMD32;
6473 for (uint32_t m = 1; m <= 1; m++) {
6474 GemmMicrokernelTester()
6475 .mr(1)
6476 .nr(2)
6477 .kr(4)
6478 .sr(1)
6479 .m(m)
6480 .n(2)
6481 .k(4)
6482 .iterations(1)
6483 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6484 }
6485 }
6486
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_eq_4_subtile_n)6487 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_eq_4_subtile_n) {
6488 TEST_REQUIRES_ARM_SIMD32;
6489 for (uint32_t n = 1; n <= 2; n++) {
6490 GemmMicrokernelTester()
6491 .mr(1)
6492 .nr(2)
6493 .kr(4)
6494 .sr(1)
6495 .m(1)
6496 .n(n)
6497 .k(4)
6498 .iterations(1)
6499 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6500 }
6501 }
6502
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_lt_4)6503 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_lt_4) {
6504 TEST_REQUIRES_ARM_SIMD32;
6505 for (size_t k = 1; k < 4; k++) {
6506 GemmMicrokernelTester()
6507 .mr(1)
6508 .nr(2)
6509 .kr(4)
6510 .sr(1)
6511 .m(1)
6512 .n(2)
6513 .k(k)
6514 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6515 }
6516 }
6517
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_lt_4_strided_a)6518 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_lt_4_strided_a) {
6519 TEST_REQUIRES_ARM_SIMD32;
6520 for (size_t k = 1; k < 4; k++) {
6521 GemmMicrokernelTester()
6522 .mr(1)
6523 .nr(2)
6524 .kr(4)
6525 .sr(1)
6526 .m(1)
6527 .n(2)
6528 .k(k)
6529 .a_stride(7)
6530 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6531 }
6532 }
6533
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_lt_4_subtile)6534 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_lt_4_subtile) {
6535 TEST_REQUIRES_ARM_SIMD32;
6536 for (size_t k = 1; k < 4; k++) {
6537 for (uint32_t n = 1; n <= 2; n++) {
6538 for (uint32_t m = 1; m <= 1; m++) {
6539 GemmMicrokernelTester()
6540 .mr(1)
6541 .nr(2)
6542 .kr(4)
6543 .sr(1)
6544 .m(m)
6545 .n(n)
6546 .k(k)
6547 .iterations(1)
6548 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6549 }
6550 }
6551 }
6552 }
6553
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_gt_4)6554 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_gt_4) {
6555 TEST_REQUIRES_ARM_SIMD32;
6556 for (size_t k = 5; k < 8; k++) {
6557 GemmMicrokernelTester()
6558 .mr(1)
6559 .nr(2)
6560 .kr(4)
6561 .sr(1)
6562 .m(1)
6563 .n(2)
6564 .k(k)
6565 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6566 }
6567 }
6568
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_gt_4_strided_a)6569 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_gt_4_strided_a) {
6570 TEST_REQUIRES_ARM_SIMD32;
6571 for (size_t k = 5; k < 8; k++) {
6572 GemmMicrokernelTester()
6573 .mr(1)
6574 .nr(2)
6575 .kr(4)
6576 .sr(1)
6577 .m(1)
6578 .n(2)
6579 .k(k)
6580 .a_stride(11)
6581 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6582 }
6583 }
6584
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_gt_4_subtile)6585 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_gt_4_subtile) {
6586 TEST_REQUIRES_ARM_SIMD32;
6587 for (size_t k = 5; k < 8; k++) {
6588 for (uint32_t n = 1; n <= 2; n++) {
6589 for (uint32_t m = 1; m <= 1; m++) {
6590 GemmMicrokernelTester()
6591 .mr(1)
6592 .nr(2)
6593 .kr(4)
6594 .sr(1)
6595 .m(m)
6596 .n(n)
6597 .k(k)
6598 .iterations(1)
6599 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6600 }
6601 }
6602 }
6603 }
6604
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_div_4)6605 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_div_4) {
6606 TEST_REQUIRES_ARM_SIMD32;
6607 for (size_t k = 8; k <= 40; k += 4) {
6608 GemmMicrokernelTester()
6609 .mr(1)
6610 .nr(2)
6611 .kr(4)
6612 .sr(1)
6613 .m(1)
6614 .n(2)
6615 .k(k)
6616 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6617 }
6618 }
6619
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_div_4_strided_a)6620 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_div_4_strided_a) {
6621 TEST_REQUIRES_ARM_SIMD32;
6622 for (size_t k = 8; k <= 40; k += 4) {
6623 GemmMicrokernelTester()
6624 .mr(1)
6625 .nr(2)
6626 .kr(4)
6627 .sr(1)
6628 .m(1)
6629 .n(2)
6630 .k(k)
6631 .a_stride(43)
6632 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6633 }
6634 }
6635
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_div_4_subtile)6636 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_div_4_subtile) {
6637 TEST_REQUIRES_ARM_SIMD32;
6638 for (size_t k = 8; k <= 40; k += 4) {
6639 for (uint32_t n = 1; n <= 2; n++) {
6640 for (uint32_t m = 1; m <= 1; m++) {
6641 GemmMicrokernelTester()
6642 .mr(1)
6643 .nr(2)
6644 .kr(4)
6645 .sr(1)
6646 .m(m)
6647 .n(n)
6648 .k(k)
6649 .iterations(1)
6650 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6651 }
6652 }
6653 }
6654 }
6655
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_gt_2)6656 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_gt_2) {
6657 TEST_REQUIRES_ARM_SIMD32;
6658 for (uint32_t n = 3; n < 4; n++) {
6659 for (size_t k = 1; k <= 20; k += 5) {
6660 GemmMicrokernelTester()
6661 .mr(1)
6662 .nr(2)
6663 .kr(4)
6664 .sr(1)
6665 .m(1)
6666 .n(n)
6667 .k(k)
6668 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6669 }
6670 }
6671 }
6672
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_gt_2_strided_cn)6673 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_gt_2_strided_cn) {
6674 TEST_REQUIRES_ARM_SIMD32;
6675 for (uint32_t n = 3; n < 4; n++) {
6676 for (size_t k = 1; k <= 20; k += 5) {
6677 GemmMicrokernelTester()
6678 .mr(1)
6679 .nr(2)
6680 .kr(4)
6681 .sr(1)
6682 .m(1)
6683 .n(n)
6684 .k(k)
6685 .cn_stride(5)
6686 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6687 }
6688 }
6689 }
6690
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_gt_2_strided_a)6691 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_gt_2_strided_a) {
6692 TEST_REQUIRES_ARM_SIMD32;
6693 for (uint32_t n = 3; n < 4; n++) {
6694 for (size_t k = 1; k <= 20; k += 5) {
6695 GemmMicrokernelTester()
6696 .mr(1)
6697 .nr(2)
6698 .kr(4)
6699 .sr(1)
6700 .m(1)
6701 .n(n)
6702 .k(k)
6703 .a_stride(23)
6704 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6705 }
6706 }
6707 }
6708
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_gt_2_subtile)6709 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_gt_2_subtile) {
6710 TEST_REQUIRES_ARM_SIMD32;
6711 for (uint32_t n = 3; n < 4; n++) {
6712 for (size_t k = 1; k <= 20; k += 5) {
6713 for (uint32_t m = 1; m <= 1; m++) {
6714 GemmMicrokernelTester()
6715 .mr(1)
6716 .nr(2)
6717 .kr(4)
6718 .sr(1)
6719 .m(m)
6720 .n(n)
6721 .k(k)
6722 .iterations(1)
6723 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6724 }
6725 }
6726 }
6727 }
6728
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_div_2)6729 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_div_2) {
6730 TEST_REQUIRES_ARM_SIMD32;
6731 for (uint32_t n = 4; n <= 6; n += 2) {
6732 for (size_t k = 1; k <= 20; k += 5) {
6733 GemmMicrokernelTester()
6734 .mr(1)
6735 .nr(2)
6736 .kr(4)
6737 .sr(1)
6738 .m(1)
6739 .n(n)
6740 .k(k)
6741 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6742 }
6743 }
6744 }
6745
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_div_2_strided_cn)6746 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_div_2_strided_cn) {
6747 TEST_REQUIRES_ARM_SIMD32;
6748 for (uint32_t n = 4; n <= 6; n += 2) {
6749 for (size_t k = 1; k <= 20; k += 5) {
6750 GemmMicrokernelTester()
6751 .mr(1)
6752 .nr(2)
6753 .kr(4)
6754 .sr(1)
6755 .m(1)
6756 .n(n)
6757 .k(k)
6758 .cn_stride(5)
6759 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6760 }
6761 }
6762 }
6763
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_div_2_strided_a)6764 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_div_2_strided_a) {
6765 TEST_REQUIRES_ARM_SIMD32;
6766 for (uint32_t n = 4; n <= 6; n += 2) {
6767 for (size_t k = 1; k <= 20; k += 5) {
6768 GemmMicrokernelTester()
6769 .mr(1)
6770 .nr(2)
6771 .kr(4)
6772 .sr(1)
6773 .m(1)
6774 .n(n)
6775 .k(k)
6776 .a_stride(23)
6777 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6778 }
6779 }
6780 }
6781
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_div_2_subtile)6782 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_div_2_subtile) {
6783 TEST_REQUIRES_ARM_SIMD32;
6784 for (uint32_t n = 4; n <= 6; n += 2) {
6785 for (size_t k = 1; k <= 20; k += 5) {
6786 for (uint32_t m = 1; m <= 1; m++) {
6787 GemmMicrokernelTester()
6788 .mr(1)
6789 .nr(2)
6790 .kr(4)
6791 .sr(1)
6792 .m(m)
6793 .n(n)
6794 .k(k)
6795 .iterations(1)
6796 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6797 }
6798 }
6799 }
6800 }
6801
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,strided_cm_subtile)6802 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, strided_cm_subtile) {
6803 TEST_REQUIRES_ARM_SIMD32;
6804 for (size_t k = 1; k <= 20; k += 5) {
6805 for (uint32_t n = 1; n <= 2; n++) {
6806 for (uint32_t m = 1; m <= 1; m++) {
6807 GemmMicrokernelTester()
6808 .mr(1)
6809 .nr(2)
6810 .kr(4)
6811 .sr(1)
6812 .m(m)
6813 .n(n)
6814 .k(k)
6815 .cm_stride(5)
6816 .iterations(1)
6817 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6818 }
6819 }
6820 }
6821 }
6822
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,qmin)6823 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, qmin) {
6824 TEST_REQUIRES_ARM_SIMD32;
6825 GemmMicrokernelTester()
6826 .mr(1)
6827 .nr(2)
6828 .kr(4)
6829 .sr(1)
6830 .m(1)
6831 .n(2)
6832 .k(4)
6833 .qmin(128)
6834 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6835 }
6836
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,qmax)6837 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, qmax) {
6838 TEST_REQUIRES_ARM_SIMD32;
6839 GemmMicrokernelTester()
6840 .mr(1)
6841 .nr(2)
6842 .kr(4)
6843 .sr(1)
6844 .m(1)
6845 .n(2)
6846 .k(4)
6847 .qmax(128)
6848 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6849 }
6850
TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32,strided_cm)6851 TEST(QC8_GEMM_MINMAX_FP32_1X2C4__ARMSIMD32, strided_cm) {
6852 TEST_REQUIRES_ARM_SIMD32;
6853 GemmMicrokernelTester()
6854 .mr(1)
6855 .nr(2)
6856 .kr(4)
6857 .sr(1)
6858 .m(1)
6859 .n(2)
6860 .k(4)
6861 .cm_stride(5)
6862 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6863 }
6864 #endif // XNN_ARCH_ARM
6865
6866
6867 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_eq_8)6868 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_eq_8) {
6869 TEST_REQUIRES_ARM_NEON;
6870 GemmMicrokernelTester()
6871 .mr(1)
6872 .nr(8)
6873 .kr(1)
6874 .sr(1)
6875 .m(1)
6876 .n(8)
6877 .k(8)
6878 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6879 }
6880
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,strided_cn)6881 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, strided_cn) {
6882 TEST_REQUIRES_ARM_NEON;
6883 GemmMicrokernelTester()
6884 .mr(1)
6885 .nr(8)
6886 .kr(1)
6887 .sr(1)
6888 .m(1)
6889 .n(8)
6890 .k(8)
6891 .cn_stride(11)
6892 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6893 }
6894
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_eq_8_strided_a)6895 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
6896 TEST_REQUIRES_ARM_NEON;
6897 GemmMicrokernelTester()
6898 .mr(1)
6899 .nr(8)
6900 .kr(1)
6901 .sr(1)
6902 .m(1)
6903 .n(8)
6904 .k(8)
6905 .a_stride(11)
6906 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6907 }
6908
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_eq_8_subtile)6909 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_eq_8_subtile) {
6910 TEST_REQUIRES_ARM_NEON;
6911 for (uint32_t n = 1; n <= 8; n++) {
6912 for (uint32_t m = 1; m <= 1; m++) {
6913 GemmMicrokernelTester()
6914 .mr(1)
6915 .nr(8)
6916 .kr(1)
6917 .sr(1)
6918 .m(m)
6919 .n(n)
6920 .k(8)
6921 .iterations(1)
6922 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6923 }
6924 }
6925 }
6926
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_eq_8_subtile_m)6927 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
6928 TEST_REQUIRES_ARM_NEON;
6929 for (uint32_t m = 1; m <= 1; m++) {
6930 GemmMicrokernelTester()
6931 .mr(1)
6932 .nr(8)
6933 .kr(1)
6934 .sr(1)
6935 .m(m)
6936 .n(8)
6937 .k(8)
6938 .iterations(1)
6939 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6940 }
6941 }
6942
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_eq_8_subtile_n)6943 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
6944 TEST_REQUIRES_ARM_NEON;
6945 for (uint32_t n = 1; n <= 8; n++) {
6946 GemmMicrokernelTester()
6947 .mr(1)
6948 .nr(8)
6949 .kr(1)
6950 .sr(1)
6951 .m(1)
6952 .n(n)
6953 .k(8)
6954 .iterations(1)
6955 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6956 }
6957 }
6958
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_lt_8)6959 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_lt_8) {
6960 TEST_REQUIRES_ARM_NEON;
6961 for (size_t k = 1; k < 8; k++) {
6962 GemmMicrokernelTester()
6963 .mr(1)
6964 .nr(8)
6965 .kr(1)
6966 .sr(1)
6967 .m(1)
6968 .n(8)
6969 .k(k)
6970 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6971 }
6972 }
6973
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_lt_8_strided_a)6974 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_lt_8_strided_a) {
6975 TEST_REQUIRES_ARM_NEON;
6976 for (size_t k = 1; k < 8; k++) {
6977 GemmMicrokernelTester()
6978 .mr(1)
6979 .nr(8)
6980 .kr(1)
6981 .sr(1)
6982 .m(1)
6983 .n(8)
6984 .k(k)
6985 .a_stride(11)
6986 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6987 }
6988 }
6989
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_lt_8_subtile)6990 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_lt_8_subtile) {
6991 TEST_REQUIRES_ARM_NEON;
6992 for (size_t k = 1; k < 8; k++) {
6993 for (uint32_t n = 1; n <= 8; n++) {
6994 for (uint32_t m = 1; m <= 1; m++) {
6995 GemmMicrokernelTester()
6996 .mr(1)
6997 .nr(8)
6998 .kr(1)
6999 .sr(1)
7000 .m(m)
7001 .n(n)
7002 .k(k)
7003 .iterations(1)
7004 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7005 }
7006 }
7007 }
7008 }
7009
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_gt_8)7010 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_gt_8) {
7011 TEST_REQUIRES_ARM_NEON;
7012 for (size_t k = 9; k < 16; k++) {
7013 GemmMicrokernelTester()
7014 .mr(1)
7015 .nr(8)
7016 .kr(1)
7017 .sr(1)
7018 .m(1)
7019 .n(8)
7020 .k(k)
7021 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7022 }
7023 }
7024
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_gt_8_strided_a)7025 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_gt_8_strided_a) {
7026 TEST_REQUIRES_ARM_NEON;
7027 for (size_t k = 9; k < 16; k++) {
7028 GemmMicrokernelTester()
7029 .mr(1)
7030 .nr(8)
7031 .kr(1)
7032 .sr(1)
7033 .m(1)
7034 .n(8)
7035 .k(k)
7036 .a_stride(19)
7037 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7038 }
7039 }
7040
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_gt_8_subtile)7041 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_gt_8_subtile) {
7042 TEST_REQUIRES_ARM_NEON;
7043 for (size_t k = 9; k < 16; k++) {
7044 for (uint32_t n = 1; n <= 8; n++) {
7045 for (uint32_t m = 1; m <= 1; m++) {
7046 GemmMicrokernelTester()
7047 .mr(1)
7048 .nr(8)
7049 .kr(1)
7050 .sr(1)
7051 .m(m)
7052 .n(n)
7053 .k(k)
7054 .iterations(1)
7055 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7056 }
7057 }
7058 }
7059 }
7060
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_div_8)7061 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_div_8) {
7062 TEST_REQUIRES_ARM_NEON;
7063 for (size_t k = 16; k <= 80; k += 8) {
7064 GemmMicrokernelTester()
7065 .mr(1)
7066 .nr(8)
7067 .kr(1)
7068 .sr(1)
7069 .m(1)
7070 .n(8)
7071 .k(k)
7072 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7073 }
7074 }
7075
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_div_8_strided_a)7076 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_div_8_strided_a) {
7077 TEST_REQUIRES_ARM_NEON;
7078 for (size_t k = 16; k <= 80; k += 8) {
7079 GemmMicrokernelTester()
7080 .mr(1)
7081 .nr(8)
7082 .kr(1)
7083 .sr(1)
7084 .m(1)
7085 .n(8)
7086 .k(k)
7087 .a_stride(83)
7088 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7089 }
7090 }
7091
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_div_8_subtile)7092 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_div_8_subtile) {
7093 TEST_REQUIRES_ARM_NEON;
7094 for (size_t k = 16; k <= 80; k += 8) {
7095 for (uint32_t n = 1; n <= 8; n++) {
7096 for (uint32_t m = 1; m <= 1; m++) {
7097 GemmMicrokernelTester()
7098 .mr(1)
7099 .nr(8)
7100 .kr(1)
7101 .sr(1)
7102 .m(m)
7103 .n(n)
7104 .k(k)
7105 .iterations(1)
7106 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7107 }
7108 }
7109 }
7110 }
7111
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_gt_8)7112 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_gt_8) {
7113 TEST_REQUIRES_ARM_NEON;
7114 for (uint32_t n = 9; n < 16; n++) {
7115 for (size_t k = 1; k <= 40; k += 9) {
7116 GemmMicrokernelTester()
7117 .mr(1)
7118 .nr(8)
7119 .kr(1)
7120 .sr(1)
7121 .m(1)
7122 .n(n)
7123 .k(k)
7124 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7125 }
7126 }
7127 }
7128
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_gt_8_strided_cn)7129 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
7130 TEST_REQUIRES_ARM_NEON;
7131 for (uint32_t n = 9; n < 16; n++) {
7132 for (size_t k = 1; k <= 40; k += 9) {
7133 GemmMicrokernelTester()
7134 .mr(1)
7135 .nr(8)
7136 .kr(1)
7137 .sr(1)
7138 .m(1)
7139 .n(n)
7140 .k(k)
7141 .cn_stride(11)
7142 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7143 }
7144 }
7145 }
7146
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_gt_8_strided_a)7147 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_gt_8_strided_a) {
7148 TEST_REQUIRES_ARM_NEON;
7149 for (uint32_t n = 9; n < 16; n++) {
7150 for (size_t k = 1; k <= 40; k += 9) {
7151 GemmMicrokernelTester()
7152 .mr(1)
7153 .nr(8)
7154 .kr(1)
7155 .sr(1)
7156 .m(1)
7157 .n(n)
7158 .k(k)
7159 .a_stride(43)
7160 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7161 }
7162 }
7163 }
7164
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_gt_8_subtile)7165 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_gt_8_subtile) {
7166 TEST_REQUIRES_ARM_NEON;
7167 for (uint32_t n = 9; n < 16; n++) {
7168 for (size_t k = 1; k <= 40; k += 9) {
7169 for (uint32_t m = 1; m <= 1; m++) {
7170 GemmMicrokernelTester()
7171 .mr(1)
7172 .nr(8)
7173 .kr(1)
7174 .sr(1)
7175 .m(m)
7176 .n(n)
7177 .k(k)
7178 .iterations(1)
7179 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7180 }
7181 }
7182 }
7183 }
7184
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_div_8)7185 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_div_8) {
7186 TEST_REQUIRES_ARM_NEON;
7187 for (uint32_t n = 16; n <= 24; n += 8) {
7188 for (size_t k = 1; k <= 40; k += 9) {
7189 GemmMicrokernelTester()
7190 .mr(1)
7191 .nr(8)
7192 .kr(1)
7193 .sr(1)
7194 .m(1)
7195 .n(n)
7196 .k(k)
7197 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7198 }
7199 }
7200 }
7201
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_div_8_strided_cn)7202 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
7203 TEST_REQUIRES_ARM_NEON;
7204 for (uint32_t n = 16; n <= 24; n += 8) {
7205 for (size_t k = 1; k <= 40; k += 9) {
7206 GemmMicrokernelTester()
7207 .mr(1)
7208 .nr(8)
7209 .kr(1)
7210 .sr(1)
7211 .m(1)
7212 .n(n)
7213 .k(k)
7214 .cn_stride(11)
7215 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7216 }
7217 }
7218 }
7219
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_div_8_strided_a)7220 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_div_8_strided_a) {
7221 TEST_REQUIRES_ARM_NEON;
7222 for (uint32_t n = 16; n <= 24; n += 8) {
7223 for (size_t k = 1; k <= 40; k += 9) {
7224 GemmMicrokernelTester()
7225 .mr(1)
7226 .nr(8)
7227 .kr(1)
7228 .sr(1)
7229 .m(1)
7230 .n(n)
7231 .k(k)
7232 .a_stride(43)
7233 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7234 }
7235 }
7236 }
7237
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_div_8_subtile)7238 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_div_8_subtile) {
7239 TEST_REQUIRES_ARM_NEON;
7240 for (uint32_t n = 16; n <= 24; n += 8) {
7241 for (size_t k = 1; k <= 40; k += 9) {
7242 for (uint32_t m = 1; m <= 1; m++) {
7243 GemmMicrokernelTester()
7244 .mr(1)
7245 .nr(8)
7246 .kr(1)
7247 .sr(1)
7248 .m(m)
7249 .n(n)
7250 .k(k)
7251 .iterations(1)
7252 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7253 }
7254 }
7255 }
7256 }
7257
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,strided_cm_subtile)7258 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, strided_cm_subtile) {
7259 TEST_REQUIRES_ARM_NEON;
7260 for (size_t k = 1; k <= 40; k += 9) {
7261 for (uint32_t n = 1; n <= 8; n++) {
7262 for (uint32_t m = 1; m <= 1; m++) {
7263 GemmMicrokernelTester()
7264 .mr(1)
7265 .nr(8)
7266 .kr(1)
7267 .sr(1)
7268 .m(m)
7269 .n(n)
7270 .k(k)
7271 .cm_stride(11)
7272 .iterations(1)
7273 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7274 }
7275 }
7276 }
7277 }
7278
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,qmin)7279 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, qmin) {
7280 TEST_REQUIRES_ARM_NEON;
7281 GemmMicrokernelTester()
7282 .mr(1)
7283 .nr(8)
7284 .kr(1)
7285 .sr(1)
7286 .m(1)
7287 .n(8)
7288 .k(8)
7289 .qmin(128)
7290 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7291 }
7292
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,qmax)7293 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, qmax) {
7294 TEST_REQUIRES_ARM_NEON;
7295 GemmMicrokernelTester()
7296 .mr(1)
7297 .nr(8)
7298 .kr(1)
7299 .sr(1)
7300 .m(1)
7301 .n(8)
7302 .k(8)
7303 .qmax(128)
7304 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7305 }
7306
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,strided_cm)7307 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, strided_cm) {
7308 TEST_REQUIRES_ARM_NEON;
7309 GemmMicrokernelTester()
7310 .mr(1)
7311 .nr(8)
7312 .kr(1)
7313 .sr(1)
7314 .m(1)
7315 .n(8)
7316 .k(8)
7317 .cm_stride(11)
7318 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7319 }
7320 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7321
7322
7323 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_eq_8)7324 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
7325 TEST_REQUIRES_ARM_NEON_V8;
7326 GemmMicrokernelTester()
7327 .mr(1)
7328 .nr(8)
7329 .kr(1)
7330 .sr(1)
7331 .m(1)
7332 .n(8)
7333 .k(8)
7334 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7335 }
7336
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,strided_cn)7337 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, strided_cn) {
7338 TEST_REQUIRES_ARM_NEON_V8;
7339 GemmMicrokernelTester()
7340 .mr(1)
7341 .nr(8)
7342 .kr(1)
7343 .sr(1)
7344 .m(1)
7345 .n(8)
7346 .k(8)
7347 .cn_stride(11)
7348 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7349 }
7350
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_strided_a)7351 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_strided_a) {
7352 TEST_REQUIRES_ARM_NEON_V8;
7353 GemmMicrokernelTester()
7354 .mr(1)
7355 .nr(8)
7356 .kr(1)
7357 .sr(1)
7358 .m(1)
7359 .n(8)
7360 .k(8)
7361 .a_stride(11)
7362 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7363 }
7364
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)7365 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
7366 TEST_REQUIRES_ARM_NEON_V8;
7367 for (uint32_t n = 1; n <= 8; n++) {
7368 for (uint32_t m = 1; m <= 1; m++) {
7369 GemmMicrokernelTester()
7370 .mr(1)
7371 .nr(8)
7372 .kr(1)
7373 .sr(1)
7374 .m(m)
7375 .n(n)
7376 .k(8)
7377 .iterations(1)
7378 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7379 }
7380 }
7381 }
7382
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)7383 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
7384 TEST_REQUIRES_ARM_NEON_V8;
7385 for (uint32_t m = 1; m <= 1; m++) {
7386 GemmMicrokernelTester()
7387 .mr(1)
7388 .nr(8)
7389 .kr(1)
7390 .sr(1)
7391 .m(m)
7392 .n(8)
7393 .k(8)
7394 .iterations(1)
7395 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7396 }
7397 }
7398
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)7399 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
7400 TEST_REQUIRES_ARM_NEON_V8;
7401 for (uint32_t n = 1; n <= 8; n++) {
7402 GemmMicrokernelTester()
7403 .mr(1)
7404 .nr(8)
7405 .kr(1)
7406 .sr(1)
7407 .m(1)
7408 .n(n)
7409 .k(8)
7410 .iterations(1)
7411 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7412 }
7413 }
7414
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_lt_8)7415 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
7416 TEST_REQUIRES_ARM_NEON_V8;
7417 for (size_t k = 1; k < 8; k++) {
7418 GemmMicrokernelTester()
7419 .mr(1)
7420 .nr(8)
7421 .kr(1)
7422 .sr(1)
7423 .m(1)
7424 .n(8)
7425 .k(k)
7426 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7427 }
7428 }
7429
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_lt_8_strided_a)7430 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_lt_8_strided_a) {
7431 TEST_REQUIRES_ARM_NEON_V8;
7432 for (size_t k = 1; k < 8; k++) {
7433 GemmMicrokernelTester()
7434 .mr(1)
7435 .nr(8)
7436 .kr(1)
7437 .sr(1)
7438 .m(1)
7439 .n(8)
7440 .k(k)
7441 .a_stride(11)
7442 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7443 }
7444 }
7445
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)7446 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
7447 TEST_REQUIRES_ARM_NEON_V8;
7448 for (size_t k = 1; k < 8; k++) {
7449 for (uint32_t n = 1; n <= 8; n++) {
7450 for (uint32_t m = 1; m <= 1; m++) {
7451 GemmMicrokernelTester()
7452 .mr(1)
7453 .nr(8)
7454 .kr(1)
7455 .sr(1)
7456 .m(m)
7457 .n(n)
7458 .k(k)
7459 .iterations(1)
7460 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7461 }
7462 }
7463 }
7464 }
7465
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_gt_8)7466 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
7467 TEST_REQUIRES_ARM_NEON_V8;
7468 for (size_t k = 9; k < 16; k++) {
7469 GemmMicrokernelTester()
7470 .mr(1)
7471 .nr(8)
7472 .kr(1)
7473 .sr(1)
7474 .m(1)
7475 .n(8)
7476 .k(k)
7477 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7478 }
7479 }
7480
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_gt_8_strided_a)7481 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_gt_8_strided_a) {
7482 TEST_REQUIRES_ARM_NEON_V8;
7483 for (size_t k = 9; k < 16; k++) {
7484 GemmMicrokernelTester()
7485 .mr(1)
7486 .nr(8)
7487 .kr(1)
7488 .sr(1)
7489 .m(1)
7490 .n(8)
7491 .k(k)
7492 .a_stride(19)
7493 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7494 }
7495 }
7496
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)7497 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
7498 TEST_REQUIRES_ARM_NEON_V8;
7499 for (size_t k = 9; k < 16; k++) {
7500 for (uint32_t n = 1; n <= 8; n++) {
7501 for (uint32_t m = 1; m <= 1; m++) {
7502 GemmMicrokernelTester()
7503 .mr(1)
7504 .nr(8)
7505 .kr(1)
7506 .sr(1)
7507 .m(m)
7508 .n(n)
7509 .k(k)
7510 .iterations(1)
7511 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7512 }
7513 }
7514 }
7515 }
7516
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_div_8)7517 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_div_8) {
7518 TEST_REQUIRES_ARM_NEON_V8;
7519 for (size_t k = 16; k <= 80; k += 8) {
7520 GemmMicrokernelTester()
7521 .mr(1)
7522 .nr(8)
7523 .kr(1)
7524 .sr(1)
7525 .m(1)
7526 .n(8)
7527 .k(k)
7528 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7529 }
7530 }
7531
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_div_8_strided_a)7532 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_div_8_strided_a) {
7533 TEST_REQUIRES_ARM_NEON_V8;
7534 for (size_t k = 16; k <= 80; k += 8) {
7535 GemmMicrokernelTester()
7536 .mr(1)
7537 .nr(8)
7538 .kr(1)
7539 .sr(1)
7540 .m(1)
7541 .n(8)
7542 .k(k)
7543 .a_stride(83)
7544 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7545 }
7546 }
7547
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)7548 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
7549 TEST_REQUIRES_ARM_NEON_V8;
7550 for (size_t k = 16; k <= 80; k += 8) {
7551 for (uint32_t n = 1; n <= 8; n++) {
7552 for (uint32_t m = 1; m <= 1; m++) {
7553 GemmMicrokernelTester()
7554 .mr(1)
7555 .nr(8)
7556 .kr(1)
7557 .sr(1)
7558 .m(m)
7559 .n(n)
7560 .k(k)
7561 .iterations(1)
7562 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7563 }
7564 }
7565 }
7566 }
7567
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,n_gt_8)7568 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, n_gt_8) {
7569 TEST_REQUIRES_ARM_NEON_V8;
7570 for (uint32_t n = 9; n < 16; n++) {
7571 for (size_t k = 1; k <= 40; k += 9) {
7572 GemmMicrokernelTester()
7573 .mr(1)
7574 .nr(8)
7575 .kr(1)
7576 .sr(1)
7577 .m(1)
7578 .n(n)
7579 .k(k)
7580 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7581 }
7582 }
7583 }
7584
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_strided_cn)7585 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
7586 TEST_REQUIRES_ARM_NEON_V8;
7587 for (uint32_t n = 9; n < 16; n++) {
7588 for (size_t k = 1; k <= 40; k += 9) {
7589 GemmMicrokernelTester()
7590 .mr(1)
7591 .nr(8)
7592 .kr(1)
7593 .sr(1)
7594 .m(1)
7595 .n(n)
7596 .k(k)
7597 .cn_stride(11)
7598 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7599 }
7600 }
7601 }
7602
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_strided_a)7603 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_strided_a) {
7604 TEST_REQUIRES_ARM_NEON_V8;
7605 for (uint32_t n = 9; n < 16; n++) {
7606 for (size_t k = 1; k <= 40; k += 9) {
7607 GemmMicrokernelTester()
7608 .mr(1)
7609 .nr(8)
7610 .kr(1)
7611 .sr(1)
7612 .m(1)
7613 .n(n)
7614 .k(k)
7615 .a_stride(43)
7616 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7617 }
7618 }
7619 }
7620
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_subtile)7621 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_subtile) {
7622 TEST_REQUIRES_ARM_NEON_V8;
7623 for (uint32_t n = 9; n < 16; n++) {
7624 for (size_t k = 1; k <= 40; k += 9) {
7625 for (uint32_t m = 1; m <= 1; m++) {
7626 GemmMicrokernelTester()
7627 .mr(1)
7628 .nr(8)
7629 .kr(1)
7630 .sr(1)
7631 .m(m)
7632 .n(n)
7633 .k(k)
7634 .iterations(1)
7635 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7636 }
7637 }
7638 }
7639 }
7640
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,n_div_8)7641 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, n_div_8) {
7642 TEST_REQUIRES_ARM_NEON_V8;
7643 for (uint32_t n = 16; n <= 24; n += 8) {
7644 for (size_t k = 1; k <= 40; k += 9) {
7645 GemmMicrokernelTester()
7646 .mr(1)
7647 .nr(8)
7648 .kr(1)
7649 .sr(1)
7650 .m(1)
7651 .n(n)
7652 .k(k)
7653 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7654 }
7655 }
7656 }
7657
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,n_div_8_strided_cn)7658 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, n_div_8_strided_cn) {
7659 TEST_REQUIRES_ARM_NEON_V8;
7660 for (uint32_t n = 16; n <= 24; n += 8) {
7661 for (size_t k = 1; k <= 40; k += 9) {
7662 GemmMicrokernelTester()
7663 .mr(1)
7664 .nr(8)
7665 .kr(1)
7666 .sr(1)
7667 .m(1)
7668 .n(n)
7669 .k(k)
7670 .cn_stride(11)
7671 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7672 }
7673 }
7674 }
7675
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,n_div_8_strided_a)7676 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, n_div_8_strided_a) {
7677 TEST_REQUIRES_ARM_NEON_V8;
7678 for (uint32_t n = 16; n <= 24; n += 8) {
7679 for (size_t k = 1; k <= 40; k += 9) {
7680 GemmMicrokernelTester()
7681 .mr(1)
7682 .nr(8)
7683 .kr(1)
7684 .sr(1)
7685 .m(1)
7686 .n(n)
7687 .k(k)
7688 .a_stride(43)
7689 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7690 }
7691 }
7692 }
7693
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,n_div_8_subtile)7694 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, n_div_8_subtile) {
7695 TEST_REQUIRES_ARM_NEON_V8;
7696 for (uint32_t n = 16; n <= 24; n += 8) {
7697 for (size_t k = 1; k <= 40; k += 9) {
7698 for (uint32_t m = 1; m <= 1; m++) {
7699 GemmMicrokernelTester()
7700 .mr(1)
7701 .nr(8)
7702 .kr(1)
7703 .sr(1)
7704 .m(m)
7705 .n(n)
7706 .k(k)
7707 .iterations(1)
7708 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7709 }
7710 }
7711 }
7712 }
7713
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)7714 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
7715 TEST_REQUIRES_ARM_NEON_V8;
7716 for (size_t k = 1; k <= 40; k += 9) {
7717 for (uint32_t n = 1; n <= 8; n++) {
7718 for (uint32_t m = 1; m <= 1; m++) {
7719 GemmMicrokernelTester()
7720 .mr(1)
7721 .nr(8)
7722 .kr(1)
7723 .sr(1)
7724 .m(m)
7725 .n(n)
7726 .k(k)
7727 .cm_stride(11)
7728 .iterations(1)
7729 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7730 }
7731 }
7732 }
7733 }
7734
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,qmin)7735 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, qmin) {
7736 TEST_REQUIRES_ARM_NEON_V8;
7737 GemmMicrokernelTester()
7738 .mr(1)
7739 .nr(8)
7740 .kr(1)
7741 .sr(1)
7742 .m(1)
7743 .n(8)
7744 .k(8)
7745 .qmin(128)
7746 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7747 }
7748
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,qmax)7749 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, qmax) {
7750 TEST_REQUIRES_ARM_NEON_V8;
7751 GemmMicrokernelTester()
7752 .mr(1)
7753 .nr(8)
7754 .kr(1)
7755 .sr(1)
7756 .m(1)
7757 .n(8)
7758 .k(8)
7759 .qmax(128)
7760 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7761 }
7762
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,strided_cm)7763 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, strided_cm) {
7764 TEST_REQUIRES_ARM_NEON_V8;
7765 GemmMicrokernelTester()
7766 .mr(1)
7767 .nr(8)
7768 .kr(1)
7769 .sr(1)
7770 .m(1)
7771 .n(8)
7772 .k(8)
7773 .cm_stride(11)
7774 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7775 }
7776 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7777
7778
7779 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_eq_16)7780 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16) {
7781 TEST_REQUIRES_ARM_NEON;
7782 GemmMicrokernelTester()
7783 .mr(1)
7784 .nr(8)
7785 .kr(2)
7786 .sr(1)
7787 .m(1)
7788 .n(8)
7789 .k(16)
7790 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7791 }
7792
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,strided_cn)7793 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, strided_cn) {
7794 TEST_REQUIRES_ARM_NEON;
7795 GemmMicrokernelTester()
7796 .mr(1)
7797 .nr(8)
7798 .kr(2)
7799 .sr(1)
7800 .m(1)
7801 .n(8)
7802 .k(16)
7803 .cn_stride(11)
7804 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7805 }
7806
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_eq_16_strided_a)7807 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_strided_a) {
7808 TEST_REQUIRES_ARM_NEON;
7809 GemmMicrokernelTester()
7810 .mr(1)
7811 .nr(8)
7812 .kr(2)
7813 .sr(1)
7814 .m(1)
7815 .n(8)
7816 .k(16)
7817 .a_stride(19)
7818 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7819 }
7820
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_eq_16_subtile)7821 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
7822 TEST_REQUIRES_ARM_NEON;
7823 for (uint32_t n = 1; n <= 8; n++) {
7824 for (uint32_t m = 1; m <= 1; m++) {
7825 GemmMicrokernelTester()
7826 .mr(1)
7827 .nr(8)
7828 .kr(2)
7829 .sr(1)
7830 .m(m)
7831 .n(n)
7832 .k(16)
7833 .iterations(1)
7834 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7835 }
7836 }
7837 }
7838
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_m)7839 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
7840 TEST_REQUIRES_ARM_NEON;
7841 for (uint32_t m = 1; m <= 1; m++) {
7842 GemmMicrokernelTester()
7843 .mr(1)
7844 .nr(8)
7845 .kr(2)
7846 .sr(1)
7847 .m(m)
7848 .n(8)
7849 .k(16)
7850 .iterations(1)
7851 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7852 }
7853 }
7854
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_n)7855 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
7856 TEST_REQUIRES_ARM_NEON;
7857 for (uint32_t n = 1; n <= 8; n++) {
7858 GemmMicrokernelTester()
7859 .mr(1)
7860 .nr(8)
7861 .kr(2)
7862 .sr(1)
7863 .m(1)
7864 .n(n)
7865 .k(16)
7866 .iterations(1)
7867 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7868 }
7869 }
7870
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_lt_16)7871 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_lt_16) {
7872 TEST_REQUIRES_ARM_NEON;
7873 for (size_t k = 1; k < 16; k++) {
7874 GemmMicrokernelTester()
7875 .mr(1)
7876 .nr(8)
7877 .kr(2)
7878 .sr(1)
7879 .m(1)
7880 .n(8)
7881 .k(k)
7882 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7883 }
7884 }
7885
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_lt_16_strided_a)7886 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_lt_16_strided_a) {
7887 TEST_REQUIRES_ARM_NEON;
7888 for (size_t k = 1; k < 16; k++) {
7889 GemmMicrokernelTester()
7890 .mr(1)
7891 .nr(8)
7892 .kr(2)
7893 .sr(1)
7894 .m(1)
7895 .n(8)
7896 .k(k)
7897 .a_stride(19)
7898 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7899 }
7900 }
7901
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_lt_16_subtile)7902 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
7903 TEST_REQUIRES_ARM_NEON;
7904 for (size_t k = 1; k < 16; k++) {
7905 for (uint32_t n = 1; n <= 8; n++) {
7906 for (uint32_t m = 1; m <= 1; m++) {
7907 GemmMicrokernelTester()
7908 .mr(1)
7909 .nr(8)
7910 .kr(2)
7911 .sr(1)
7912 .m(m)
7913 .n(n)
7914 .k(k)
7915 .iterations(1)
7916 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7917 }
7918 }
7919 }
7920 }
7921
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_gt_16)7922 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_gt_16) {
7923 TEST_REQUIRES_ARM_NEON;
7924 for (size_t k = 17; k < 32; k++) {
7925 GemmMicrokernelTester()
7926 .mr(1)
7927 .nr(8)
7928 .kr(2)
7929 .sr(1)
7930 .m(1)
7931 .n(8)
7932 .k(k)
7933 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7934 }
7935 }
7936
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_gt_16_strided_a)7937 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_gt_16_strided_a) {
7938 TEST_REQUIRES_ARM_NEON;
7939 for (size_t k = 17; k < 32; k++) {
7940 GemmMicrokernelTester()
7941 .mr(1)
7942 .nr(8)
7943 .kr(2)
7944 .sr(1)
7945 .m(1)
7946 .n(8)
7947 .k(k)
7948 .a_stride(37)
7949 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7950 }
7951 }
7952
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_gt_16_subtile)7953 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
7954 TEST_REQUIRES_ARM_NEON;
7955 for (size_t k = 17; k < 32; k++) {
7956 for (uint32_t n = 1; n <= 8; n++) {
7957 for (uint32_t m = 1; m <= 1; m++) {
7958 GemmMicrokernelTester()
7959 .mr(1)
7960 .nr(8)
7961 .kr(2)
7962 .sr(1)
7963 .m(m)
7964 .n(n)
7965 .k(k)
7966 .iterations(1)
7967 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7968 }
7969 }
7970 }
7971 }
7972
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_div_16)7973 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_div_16) {
7974 TEST_REQUIRES_ARM_NEON;
7975 for (size_t k = 32; k <= 160; k += 16) {
7976 GemmMicrokernelTester()
7977 .mr(1)
7978 .nr(8)
7979 .kr(2)
7980 .sr(1)
7981 .m(1)
7982 .n(8)
7983 .k(k)
7984 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7985 }
7986 }
7987
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_div_16_strided_a)7988 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_div_16_strided_a) {
7989 TEST_REQUIRES_ARM_NEON;
7990 for (size_t k = 32; k <= 160; k += 16) {
7991 GemmMicrokernelTester()
7992 .mr(1)
7993 .nr(8)
7994 .kr(2)
7995 .sr(1)
7996 .m(1)
7997 .n(8)
7998 .k(k)
7999 .a_stride(163)
8000 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8001 }
8002 }
8003
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_div_16_subtile)8004 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_div_16_subtile) {
8005 TEST_REQUIRES_ARM_NEON;
8006 for (size_t k = 32; k <= 160; k += 16) {
8007 for (uint32_t n = 1; n <= 8; n++) {
8008 for (uint32_t m = 1; m <= 1; m++) {
8009 GemmMicrokernelTester()
8010 .mr(1)
8011 .nr(8)
8012 .kr(2)
8013 .sr(1)
8014 .m(m)
8015 .n(n)
8016 .k(k)
8017 .iterations(1)
8018 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8019 }
8020 }
8021 }
8022 }
8023
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_gt_8)8024 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8) {
8025 TEST_REQUIRES_ARM_NEON;
8026 for (uint32_t n = 9; n < 16; n++) {
8027 for (size_t k = 1; k <= 80; k += 17) {
8028 GemmMicrokernelTester()
8029 .mr(1)
8030 .nr(8)
8031 .kr(2)
8032 .sr(1)
8033 .m(1)
8034 .n(n)
8035 .k(k)
8036 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8037 }
8038 }
8039 }
8040
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_gt_8_strided_cn)8041 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8_strided_cn) {
8042 TEST_REQUIRES_ARM_NEON;
8043 for (uint32_t n = 9; n < 16; n++) {
8044 for (size_t k = 1; k <= 80; k += 17) {
8045 GemmMicrokernelTester()
8046 .mr(1)
8047 .nr(8)
8048 .kr(2)
8049 .sr(1)
8050 .m(1)
8051 .n(n)
8052 .k(k)
8053 .cn_stride(11)
8054 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8055 }
8056 }
8057 }
8058
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_gt_8_strided_a)8059 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8_strided_a) {
8060 TEST_REQUIRES_ARM_NEON;
8061 for (uint32_t n = 9; n < 16; n++) {
8062 for (size_t k = 1; k <= 80; k += 17) {
8063 GemmMicrokernelTester()
8064 .mr(1)
8065 .nr(8)
8066 .kr(2)
8067 .sr(1)
8068 .m(1)
8069 .n(n)
8070 .k(k)
8071 .a_stride(83)
8072 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8073 }
8074 }
8075 }
8076
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_gt_8_subtile)8077 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8_subtile) {
8078 TEST_REQUIRES_ARM_NEON;
8079 for (uint32_t n = 9; n < 16; n++) {
8080 for (size_t k = 1; k <= 80; k += 17) {
8081 for (uint32_t m = 1; m <= 1; m++) {
8082 GemmMicrokernelTester()
8083 .mr(1)
8084 .nr(8)
8085 .kr(2)
8086 .sr(1)
8087 .m(m)
8088 .n(n)
8089 .k(k)
8090 .iterations(1)
8091 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8092 }
8093 }
8094 }
8095 }
8096
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_div_8)8097 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8) {
8098 TEST_REQUIRES_ARM_NEON;
8099 for (uint32_t n = 16; n <= 24; n += 8) {
8100 for (size_t k = 1; k <= 80; k += 17) {
8101 GemmMicrokernelTester()
8102 .mr(1)
8103 .nr(8)
8104 .kr(2)
8105 .sr(1)
8106 .m(1)
8107 .n(n)
8108 .k(k)
8109 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8110 }
8111 }
8112 }
8113
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_div_8_strided_cn)8114 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8_strided_cn) {
8115 TEST_REQUIRES_ARM_NEON;
8116 for (uint32_t n = 16; n <= 24; n += 8) {
8117 for (size_t k = 1; k <= 80; k += 17) {
8118 GemmMicrokernelTester()
8119 .mr(1)
8120 .nr(8)
8121 .kr(2)
8122 .sr(1)
8123 .m(1)
8124 .n(n)
8125 .k(k)
8126 .cn_stride(11)
8127 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8128 }
8129 }
8130 }
8131
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_div_8_strided_a)8132 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8_strided_a) {
8133 TEST_REQUIRES_ARM_NEON;
8134 for (uint32_t n = 16; n <= 24; n += 8) {
8135 for (size_t k = 1; k <= 80; k += 17) {
8136 GemmMicrokernelTester()
8137 .mr(1)
8138 .nr(8)
8139 .kr(2)
8140 .sr(1)
8141 .m(1)
8142 .n(n)
8143 .k(k)
8144 .a_stride(83)
8145 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8146 }
8147 }
8148 }
8149
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_div_8_subtile)8150 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8_subtile) {
8151 TEST_REQUIRES_ARM_NEON;
8152 for (uint32_t n = 16; n <= 24; n += 8) {
8153 for (size_t k = 1; k <= 80; k += 17) {
8154 for (uint32_t m = 1; m <= 1; m++) {
8155 GemmMicrokernelTester()
8156 .mr(1)
8157 .nr(8)
8158 .kr(2)
8159 .sr(1)
8160 .m(m)
8161 .n(n)
8162 .k(k)
8163 .iterations(1)
8164 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8165 }
8166 }
8167 }
8168 }
8169
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,strided_cm_subtile)8170 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, strided_cm_subtile) {
8171 TEST_REQUIRES_ARM_NEON;
8172 for (size_t k = 1; k <= 80; k += 17) {
8173 for (uint32_t n = 1; n <= 8; n++) {
8174 for (uint32_t m = 1; m <= 1; m++) {
8175 GemmMicrokernelTester()
8176 .mr(1)
8177 .nr(8)
8178 .kr(2)
8179 .sr(1)
8180 .m(m)
8181 .n(n)
8182 .k(k)
8183 .cm_stride(11)
8184 .iterations(1)
8185 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8186 }
8187 }
8188 }
8189 }
8190
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,qmin)8191 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, qmin) {
8192 TEST_REQUIRES_ARM_NEON;
8193 GemmMicrokernelTester()
8194 .mr(1)
8195 .nr(8)
8196 .kr(2)
8197 .sr(1)
8198 .m(1)
8199 .n(8)
8200 .k(16)
8201 .qmin(128)
8202 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8203 }
8204
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,qmax)8205 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, qmax) {
8206 TEST_REQUIRES_ARM_NEON;
8207 GemmMicrokernelTester()
8208 .mr(1)
8209 .nr(8)
8210 .kr(2)
8211 .sr(1)
8212 .m(1)
8213 .n(8)
8214 .k(16)
8215 .qmax(128)
8216 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8217 }
8218
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,strided_cm)8219 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, strided_cm) {
8220 TEST_REQUIRES_ARM_NEON;
8221 GemmMicrokernelTester()
8222 .mr(1)
8223 .nr(8)
8224 .kr(2)
8225 .sr(1)
8226 .m(1)
8227 .n(8)
8228 .k(16)
8229 .cm_stride(11)
8230 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8231 }
8232 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8233
8234
8235 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_eq_16)8236 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_eq_16) {
8237 TEST_REQUIRES_ARM_NEON_V8;
8238 GemmMicrokernelTester()
8239 .mr(1)
8240 .nr(8)
8241 .kr(2)
8242 .sr(1)
8243 .m(1)
8244 .n(8)
8245 .k(16)
8246 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8247 }
8248
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,strided_cn)8249 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, strided_cn) {
8250 TEST_REQUIRES_ARM_NEON_V8;
8251 GemmMicrokernelTester()
8252 .mr(1)
8253 .nr(8)
8254 .kr(2)
8255 .sr(1)
8256 .m(1)
8257 .n(8)
8258 .k(16)
8259 .cn_stride(11)
8260 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8261 }
8262
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_eq_16_strided_a)8263 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_eq_16_strided_a) {
8264 TEST_REQUIRES_ARM_NEON_V8;
8265 GemmMicrokernelTester()
8266 .mr(1)
8267 .nr(8)
8268 .kr(2)
8269 .sr(1)
8270 .m(1)
8271 .n(8)
8272 .k(16)
8273 .a_stride(19)
8274 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8275 }
8276
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_eq_16_subtile)8277 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile) {
8278 TEST_REQUIRES_ARM_NEON_V8;
8279 for (uint32_t n = 1; n <= 8; n++) {
8280 for (uint32_t m = 1; m <= 1; m++) {
8281 GemmMicrokernelTester()
8282 .mr(1)
8283 .nr(8)
8284 .kr(2)
8285 .sr(1)
8286 .m(m)
8287 .n(n)
8288 .k(16)
8289 .iterations(1)
8290 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8291 }
8292 }
8293 }
8294
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_eq_16_subtile_m)8295 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile_m) {
8296 TEST_REQUIRES_ARM_NEON_V8;
8297 for (uint32_t m = 1; m <= 1; m++) {
8298 GemmMicrokernelTester()
8299 .mr(1)
8300 .nr(8)
8301 .kr(2)
8302 .sr(1)
8303 .m(m)
8304 .n(8)
8305 .k(16)
8306 .iterations(1)
8307 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8308 }
8309 }
8310
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_eq_16_subtile_n)8311 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile_n) {
8312 TEST_REQUIRES_ARM_NEON_V8;
8313 for (uint32_t n = 1; n <= 8; n++) {
8314 GemmMicrokernelTester()
8315 .mr(1)
8316 .nr(8)
8317 .kr(2)
8318 .sr(1)
8319 .m(1)
8320 .n(n)
8321 .k(16)
8322 .iterations(1)
8323 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8324 }
8325 }
8326
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_lt_16)8327 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_lt_16) {
8328 TEST_REQUIRES_ARM_NEON_V8;
8329 for (size_t k = 1; k < 16; k++) {
8330 GemmMicrokernelTester()
8331 .mr(1)
8332 .nr(8)
8333 .kr(2)
8334 .sr(1)
8335 .m(1)
8336 .n(8)
8337 .k(k)
8338 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8339 }
8340 }
8341
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_lt_16_strided_a)8342 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_lt_16_strided_a) {
8343 TEST_REQUIRES_ARM_NEON_V8;
8344 for (size_t k = 1; k < 16; k++) {
8345 GemmMicrokernelTester()
8346 .mr(1)
8347 .nr(8)
8348 .kr(2)
8349 .sr(1)
8350 .m(1)
8351 .n(8)
8352 .k(k)
8353 .a_stride(19)
8354 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8355 }
8356 }
8357
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_lt_16_subtile)8358 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_lt_16_subtile) {
8359 TEST_REQUIRES_ARM_NEON_V8;
8360 for (size_t k = 1; k < 16; k++) {
8361 for (uint32_t n = 1; n <= 8; n++) {
8362 for (uint32_t m = 1; m <= 1; m++) {
8363 GemmMicrokernelTester()
8364 .mr(1)
8365 .nr(8)
8366 .kr(2)
8367 .sr(1)
8368 .m(m)
8369 .n(n)
8370 .k(k)
8371 .iterations(1)
8372 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8373 }
8374 }
8375 }
8376 }
8377
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_gt_16)8378 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_gt_16) {
8379 TEST_REQUIRES_ARM_NEON_V8;
8380 for (size_t k = 17; k < 32; k++) {
8381 GemmMicrokernelTester()
8382 .mr(1)
8383 .nr(8)
8384 .kr(2)
8385 .sr(1)
8386 .m(1)
8387 .n(8)
8388 .k(k)
8389 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8390 }
8391 }
8392
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_gt_16_strided_a)8393 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_gt_16_strided_a) {
8394 TEST_REQUIRES_ARM_NEON_V8;
8395 for (size_t k = 17; k < 32; k++) {
8396 GemmMicrokernelTester()
8397 .mr(1)
8398 .nr(8)
8399 .kr(2)
8400 .sr(1)
8401 .m(1)
8402 .n(8)
8403 .k(k)
8404 .a_stride(37)
8405 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8406 }
8407 }
8408
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_gt_16_subtile)8409 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_gt_16_subtile) {
8410 TEST_REQUIRES_ARM_NEON_V8;
8411 for (size_t k = 17; k < 32; k++) {
8412 for (uint32_t n = 1; n <= 8; n++) {
8413 for (uint32_t m = 1; m <= 1; m++) {
8414 GemmMicrokernelTester()
8415 .mr(1)
8416 .nr(8)
8417 .kr(2)
8418 .sr(1)
8419 .m(m)
8420 .n(n)
8421 .k(k)
8422 .iterations(1)
8423 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8424 }
8425 }
8426 }
8427 }
8428
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_div_16)8429 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_div_16) {
8430 TEST_REQUIRES_ARM_NEON_V8;
8431 for (size_t k = 32; k <= 160; k += 16) {
8432 GemmMicrokernelTester()
8433 .mr(1)
8434 .nr(8)
8435 .kr(2)
8436 .sr(1)
8437 .m(1)
8438 .n(8)
8439 .k(k)
8440 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8441 }
8442 }
8443
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_div_16_strided_a)8444 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_div_16_strided_a) {
8445 TEST_REQUIRES_ARM_NEON_V8;
8446 for (size_t k = 32; k <= 160; k += 16) {
8447 GemmMicrokernelTester()
8448 .mr(1)
8449 .nr(8)
8450 .kr(2)
8451 .sr(1)
8452 .m(1)
8453 .n(8)
8454 .k(k)
8455 .a_stride(163)
8456 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8457 }
8458 }
8459
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_div_16_subtile)8460 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_div_16_subtile) {
8461 TEST_REQUIRES_ARM_NEON_V8;
8462 for (size_t k = 32; k <= 160; k += 16) {
8463 for (uint32_t n = 1; n <= 8; n++) {
8464 for (uint32_t m = 1; m <= 1; m++) {
8465 GemmMicrokernelTester()
8466 .mr(1)
8467 .nr(8)
8468 .kr(2)
8469 .sr(1)
8470 .m(m)
8471 .n(n)
8472 .k(k)
8473 .iterations(1)
8474 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8475 }
8476 }
8477 }
8478 }
8479
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_gt_8)8480 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_gt_8) {
8481 TEST_REQUIRES_ARM_NEON_V8;
8482 for (uint32_t n = 9; n < 16; n++) {
8483 for (size_t k = 1; k <= 80; k += 17) {
8484 GemmMicrokernelTester()
8485 .mr(1)
8486 .nr(8)
8487 .kr(2)
8488 .sr(1)
8489 .m(1)
8490 .n(n)
8491 .k(k)
8492 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8493 }
8494 }
8495 }
8496
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_gt_8_strided_cn)8497 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_gt_8_strided_cn) {
8498 TEST_REQUIRES_ARM_NEON_V8;
8499 for (uint32_t n = 9; n < 16; n++) {
8500 for (size_t k = 1; k <= 80; k += 17) {
8501 GemmMicrokernelTester()
8502 .mr(1)
8503 .nr(8)
8504 .kr(2)
8505 .sr(1)
8506 .m(1)
8507 .n(n)
8508 .k(k)
8509 .cn_stride(11)
8510 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8511 }
8512 }
8513 }
8514
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_gt_8_strided_a)8515 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_gt_8_strided_a) {
8516 TEST_REQUIRES_ARM_NEON_V8;
8517 for (uint32_t n = 9; n < 16; n++) {
8518 for (size_t k = 1; k <= 80; k += 17) {
8519 GemmMicrokernelTester()
8520 .mr(1)
8521 .nr(8)
8522 .kr(2)
8523 .sr(1)
8524 .m(1)
8525 .n(n)
8526 .k(k)
8527 .a_stride(83)
8528 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8529 }
8530 }
8531 }
8532
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_gt_8_subtile)8533 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_gt_8_subtile) {
8534 TEST_REQUIRES_ARM_NEON_V8;
8535 for (uint32_t n = 9; n < 16; n++) {
8536 for (size_t k = 1; k <= 80; k += 17) {
8537 for (uint32_t m = 1; m <= 1; m++) {
8538 GemmMicrokernelTester()
8539 .mr(1)
8540 .nr(8)
8541 .kr(2)
8542 .sr(1)
8543 .m(m)
8544 .n(n)
8545 .k(k)
8546 .iterations(1)
8547 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8548 }
8549 }
8550 }
8551 }
8552
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_div_8)8553 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_div_8) {
8554 TEST_REQUIRES_ARM_NEON_V8;
8555 for (uint32_t n = 16; n <= 24; n += 8) {
8556 for (size_t k = 1; k <= 80; k += 17) {
8557 GemmMicrokernelTester()
8558 .mr(1)
8559 .nr(8)
8560 .kr(2)
8561 .sr(1)
8562 .m(1)
8563 .n(n)
8564 .k(k)
8565 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8566 }
8567 }
8568 }
8569
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_div_8_strided_cn)8570 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_div_8_strided_cn) {
8571 TEST_REQUIRES_ARM_NEON_V8;
8572 for (uint32_t n = 16; n <= 24; n += 8) {
8573 for (size_t k = 1; k <= 80; k += 17) {
8574 GemmMicrokernelTester()
8575 .mr(1)
8576 .nr(8)
8577 .kr(2)
8578 .sr(1)
8579 .m(1)
8580 .n(n)
8581 .k(k)
8582 .cn_stride(11)
8583 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8584 }
8585 }
8586 }
8587
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_div_8_strided_a)8588 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_div_8_strided_a) {
8589 TEST_REQUIRES_ARM_NEON_V8;
8590 for (uint32_t n = 16; n <= 24; n += 8) {
8591 for (size_t k = 1; k <= 80; k += 17) {
8592 GemmMicrokernelTester()
8593 .mr(1)
8594 .nr(8)
8595 .kr(2)
8596 .sr(1)
8597 .m(1)
8598 .n(n)
8599 .k(k)
8600 .a_stride(83)
8601 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8602 }
8603 }
8604 }
8605
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_div_8_subtile)8606 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_div_8_subtile) {
8607 TEST_REQUIRES_ARM_NEON_V8;
8608 for (uint32_t n = 16; n <= 24; n += 8) {
8609 for (size_t k = 1; k <= 80; k += 17) {
8610 for (uint32_t m = 1; m <= 1; m++) {
8611 GemmMicrokernelTester()
8612 .mr(1)
8613 .nr(8)
8614 .kr(2)
8615 .sr(1)
8616 .m(m)
8617 .n(n)
8618 .k(k)
8619 .iterations(1)
8620 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8621 }
8622 }
8623 }
8624 }
8625
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,strided_cm_subtile)8626 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, strided_cm_subtile) {
8627 TEST_REQUIRES_ARM_NEON_V8;
8628 for (size_t k = 1; k <= 80; k += 17) {
8629 for (uint32_t n = 1; n <= 8; n++) {
8630 for (uint32_t m = 1; m <= 1; m++) {
8631 GemmMicrokernelTester()
8632 .mr(1)
8633 .nr(8)
8634 .kr(2)
8635 .sr(1)
8636 .m(m)
8637 .n(n)
8638 .k(k)
8639 .cm_stride(11)
8640 .iterations(1)
8641 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8642 }
8643 }
8644 }
8645 }
8646
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,qmin)8647 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, qmin) {
8648 TEST_REQUIRES_ARM_NEON_V8;
8649 GemmMicrokernelTester()
8650 .mr(1)
8651 .nr(8)
8652 .kr(2)
8653 .sr(1)
8654 .m(1)
8655 .n(8)
8656 .k(16)
8657 .qmin(128)
8658 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8659 }
8660
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,qmax)8661 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, qmax) {
8662 TEST_REQUIRES_ARM_NEON_V8;
8663 GemmMicrokernelTester()
8664 .mr(1)
8665 .nr(8)
8666 .kr(2)
8667 .sr(1)
8668 .m(1)
8669 .n(8)
8670 .k(16)
8671 .qmax(128)
8672 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8673 }
8674
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,strided_cm)8675 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, strided_cm) {
8676 TEST_REQUIRES_ARM_NEON_V8;
8677 GemmMicrokernelTester()
8678 .mr(1)
8679 .nr(8)
8680 .kr(2)
8681 .sr(1)
8682 .m(1)
8683 .n(8)
8684 .k(16)
8685 .cm_stride(11)
8686 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8687 }
8688 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8689
8690
8691 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_eq_16)8692 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16) {
8693 TEST_REQUIRES_ARM_NEON_V8;
8694 GemmMicrokernelTester()
8695 .mr(1)
8696 .nr(8)
8697 .kr(2)
8698 .sr(1)
8699 .m(1)
8700 .n(8)
8701 .k(16)
8702 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8703 }
8704
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,strided_cn)8705 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, strided_cn) {
8706 TEST_REQUIRES_ARM_NEON_V8;
8707 GemmMicrokernelTester()
8708 .mr(1)
8709 .nr(8)
8710 .kr(2)
8711 .sr(1)
8712 .m(1)
8713 .n(8)
8714 .k(16)
8715 .cn_stride(11)
8716 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8717 }
8718
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_eq_16_strided_a)8719 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16_strided_a) {
8720 TEST_REQUIRES_ARM_NEON_V8;
8721 GemmMicrokernelTester()
8722 .mr(1)
8723 .nr(8)
8724 .kr(2)
8725 .sr(1)
8726 .m(1)
8727 .n(8)
8728 .k(16)
8729 .a_stride(19)
8730 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8731 }
8732
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_eq_16_subtile)8733 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile) {
8734 TEST_REQUIRES_ARM_NEON_V8;
8735 for (uint32_t n = 1; n <= 8; n++) {
8736 for (uint32_t m = 1; m <= 1; m++) {
8737 GemmMicrokernelTester()
8738 .mr(1)
8739 .nr(8)
8740 .kr(2)
8741 .sr(1)
8742 .m(m)
8743 .n(n)
8744 .k(16)
8745 .iterations(1)
8746 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8747 }
8748 }
8749 }
8750
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_eq_16_subtile_m)8751 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile_m) {
8752 TEST_REQUIRES_ARM_NEON_V8;
8753 for (uint32_t m = 1; m <= 1; m++) {
8754 GemmMicrokernelTester()
8755 .mr(1)
8756 .nr(8)
8757 .kr(2)
8758 .sr(1)
8759 .m(m)
8760 .n(8)
8761 .k(16)
8762 .iterations(1)
8763 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8764 }
8765 }
8766
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_eq_16_subtile_n)8767 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile_n) {
8768 TEST_REQUIRES_ARM_NEON_V8;
8769 for (uint32_t n = 1; n <= 8; n++) {
8770 GemmMicrokernelTester()
8771 .mr(1)
8772 .nr(8)
8773 .kr(2)
8774 .sr(1)
8775 .m(1)
8776 .n(n)
8777 .k(16)
8778 .iterations(1)
8779 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8780 }
8781 }
8782
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_lt_16)8783 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_lt_16) {
8784 TEST_REQUIRES_ARM_NEON_V8;
8785 for (size_t k = 1; k < 16; k++) {
8786 GemmMicrokernelTester()
8787 .mr(1)
8788 .nr(8)
8789 .kr(2)
8790 .sr(1)
8791 .m(1)
8792 .n(8)
8793 .k(k)
8794 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8795 }
8796 }
8797
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_lt_16_strided_a)8798 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_lt_16_strided_a) {
8799 TEST_REQUIRES_ARM_NEON_V8;
8800 for (size_t k = 1; k < 16; k++) {
8801 GemmMicrokernelTester()
8802 .mr(1)
8803 .nr(8)
8804 .kr(2)
8805 .sr(1)
8806 .m(1)
8807 .n(8)
8808 .k(k)
8809 .a_stride(19)
8810 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8811 }
8812 }
8813
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_lt_16_subtile)8814 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_lt_16_subtile) {
8815 TEST_REQUIRES_ARM_NEON_V8;
8816 for (size_t k = 1; k < 16; k++) {
8817 for (uint32_t n = 1; n <= 8; n++) {
8818 for (uint32_t m = 1; m <= 1; m++) {
8819 GemmMicrokernelTester()
8820 .mr(1)
8821 .nr(8)
8822 .kr(2)
8823 .sr(1)
8824 .m(m)
8825 .n(n)
8826 .k(k)
8827 .iterations(1)
8828 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8829 }
8830 }
8831 }
8832 }
8833
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_gt_16)8834 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_gt_16) {
8835 TEST_REQUIRES_ARM_NEON_V8;
8836 for (size_t k = 17; k < 32; k++) {
8837 GemmMicrokernelTester()
8838 .mr(1)
8839 .nr(8)
8840 .kr(2)
8841 .sr(1)
8842 .m(1)
8843 .n(8)
8844 .k(k)
8845 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8846 }
8847 }
8848
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_gt_16_strided_a)8849 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_gt_16_strided_a) {
8850 TEST_REQUIRES_ARM_NEON_V8;
8851 for (size_t k = 17; k < 32; k++) {
8852 GemmMicrokernelTester()
8853 .mr(1)
8854 .nr(8)
8855 .kr(2)
8856 .sr(1)
8857 .m(1)
8858 .n(8)
8859 .k(k)
8860 .a_stride(37)
8861 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8862 }
8863 }
8864
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_gt_16_subtile)8865 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_gt_16_subtile) {
8866 TEST_REQUIRES_ARM_NEON_V8;
8867 for (size_t k = 17; k < 32; k++) {
8868 for (uint32_t n = 1; n <= 8; n++) {
8869 for (uint32_t m = 1; m <= 1; m++) {
8870 GemmMicrokernelTester()
8871 .mr(1)
8872 .nr(8)
8873 .kr(2)
8874 .sr(1)
8875 .m(m)
8876 .n(n)
8877 .k(k)
8878 .iterations(1)
8879 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8880 }
8881 }
8882 }
8883 }
8884
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_div_16)8885 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_div_16) {
8886 TEST_REQUIRES_ARM_NEON_V8;
8887 for (size_t k = 32; k <= 160; k += 16) {
8888 GemmMicrokernelTester()
8889 .mr(1)
8890 .nr(8)
8891 .kr(2)
8892 .sr(1)
8893 .m(1)
8894 .n(8)
8895 .k(k)
8896 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8897 }
8898 }
8899
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_div_16_strided_a)8900 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_div_16_strided_a) {
8901 TEST_REQUIRES_ARM_NEON_V8;
8902 for (size_t k = 32; k <= 160; k += 16) {
8903 GemmMicrokernelTester()
8904 .mr(1)
8905 .nr(8)
8906 .kr(2)
8907 .sr(1)
8908 .m(1)
8909 .n(8)
8910 .k(k)
8911 .a_stride(163)
8912 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8913 }
8914 }
8915
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_div_16_subtile)8916 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_div_16_subtile) {
8917 TEST_REQUIRES_ARM_NEON_V8;
8918 for (size_t k = 32; k <= 160; k += 16) {
8919 for (uint32_t n = 1; n <= 8; n++) {
8920 for (uint32_t m = 1; m <= 1; m++) {
8921 GemmMicrokernelTester()
8922 .mr(1)
8923 .nr(8)
8924 .kr(2)
8925 .sr(1)
8926 .m(m)
8927 .n(n)
8928 .k(k)
8929 .iterations(1)
8930 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8931 }
8932 }
8933 }
8934 }
8935
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_gt_8)8936 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8) {
8937 TEST_REQUIRES_ARM_NEON_V8;
8938 for (uint32_t n = 9; n < 16; n++) {
8939 for (size_t k = 1; k <= 80; k += 17) {
8940 GemmMicrokernelTester()
8941 .mr(1)
8942 .nr(8)
8943 .kr(2)
8944 .sr(1)
8945 .m(1)
8946 .n(n)
8947 .k(k)
8948 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8949 }
8950 }
8951 }
8952
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_gt_8_strided_cn)8953 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8_strided_cn) {
8954 TEST_REQUIRES_ARM_NEON_V8;
8955 for (uint32_t n = 9; n < 16; n++) {
8956 for (size_t k = 1; k <= 80; k += 17) {
8957 GemmMicrokernelTester()
8958 .mr(1)
8959 .nr(8)
8960 .kr(2)
8961 .sr(1)
8962 .m(1)
8963 .n(n)
8964 .k(k)
8965 .cn_stride(11)
8966 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8967 }
8968 }
8969 }
8970
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_gt_8_strided_a)8971 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8_strided_a) {
8972 TEST_REQUIRES_ARM_NEON_V8;
8973 for (uint32_t n = 9; n < 16; n++) {
8974 for (size_t k = 1; k <= 80; k += 17) {
8975 GemmMicrokernelTester()
8976 .mr(1)
8977 .nr(8)
8978 .kr(2)
8979 .sr(1)
8980 .m(1)
8981 .n(n)
8982 .k(k)
8983 .a_stride(83)
8984 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8985 }
8986 }
8987 }
8988
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_gt_8_subtile)8989 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8_subtile) {
8990 TEST_REQUIRES_ARM_NEON_V8;
8991 for (uint32_t n = 9; n < 16; n++) {
8992 for (size_t k = 1; k <= 80; k += 17) {
8993 for (uint32_t m = 1; m <= 1; m++) {
8994 GemmMicrokernelTester()
8995 .mr(1)
8996 .nr(8)
8997 .kr(2)
8998 .sr(1)
8999 .m(m)
9000 .n(n)
9001 .k(k)
9002 .iterations(1)
9003 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9004 }
9005 }
9006 }
9007 }
9008
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_div_8)9009 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8) {
9010 TEST_REQUIRES_ARM_NEON_V8;
9011 for (uint32_t n = 16; n <= 24; n += 8) {
9012 for (size_t k = 1; k <= 80; k += 17) {
9013 GemmMicrokernelTester()
9014 .mr(1)
9015 .nr(8)
9016 .kr(2)
9017 .sr(1)
9018 .m(1)
9019 .n(n)
9020 .k(k)
9021 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9022 }
9023 }
9024 }
9025
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_div_8_strided_cn)9026 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8_strided_cn) {
9027 TEST_REQUIRES_ARM_NEON_V8;
9028 for (uint32_t n = 16; n <= 24; n += 8) {
9029 for (size_t k = 1; k <= 80; k += 17) {
9030 GemmMicrokernelTester()
9031 .mr(1)
9032 .nr(8)
9033 .kr(2)
9034 .sr(1)
9035 .m(1)
9036 .n(n)
9037 .k(k)
9038 .cn_stride(11)
9039 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9040 }
9041 }
9042 }
9043
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_div_8_strided_a)9044 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8_strided_a) {
9045 TEST_REQUIRES_ARM_NEON_V8;
9046 for (uint32_t n = 16; n <= 24; n += 8) {
9047 for (size_t k = 1; k <= 80; k += 17) {
9048 GemmMicrokernelTester()
9049 .mr(1)
9050 .nr(8)
9051 .kr(2)
9052 .sr(1)
9053 .m(1)
9054 .n(n)
9055 .k(k)
9056 .a_stride(83)
9057 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9058 }
9059 }
9060 }
9061
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_div_8_subtile)9062 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8_subtile) {
9063 TEST_REQUIRES_ARM_NEON_V8;
9064 for (uint32_t n = 16; n <= 24; n += 8) {
9065 for (size_t k = 1; k <= 80; k += 17) {
9066 for (uint32_t m = 1; m <= 1; m++) {
9067 GemmMicrokernelTester()
9068 .mr(1)
9069 .nr(8)
9070 .kr(2)
9071 .sr(1)
9072 .m(m)
9073 .n(n)
9074 .k(k)
9075 .iterations(1)
9076 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9077 }
9078 }
9079 }
9080 }
9081
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,strided_cm_subtile)9082 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, strided_cm_subtile) {
9083 TEST_REQUIRES_ARM_NEON_V8;
9084 for (size_t k = 1; k <= 80; k += 17) {
9085 for (uint32_t n = 1; n <= 8; n++) {
9086 for (uint32_t m = 1; m <= 1; m++) {
9087 GemmMicrokernelTester()
9088 .mr(1)
9089 .nr(8)
9090 .kr(2)
9091 .sr(1)
9092 .m(m)
9093 .n(n)
9094 .k(k)
9095 .cm_stride(11)
9096 .iterations(1)
9097 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9098 }
9099 }
9100 }
9101 }
9102
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,qmin)9103 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, qmin) {
9104 TEST_REQUIRES_ARM_NEON_V8;
9105 GemmMicrokernelTester()
9106 .mr(1)
9107 .nr(8)
9108 .kr(2)
9109 .sr(1)
9110 .m(1)
9111 .n(8)
9112 .k(16)
9113 .qmin(128)
9114 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9115 }
9116
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,qmax)9117 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, qmax) {
9118 TEST_REQUIRES_ARM_NEON_V8;
9119 GemmMicrokernelTester()
9120 .mr(1)
9121 .nr(8)
9122 .kr(2)
9123 .sr(1)
9124 .m(1)
9125 .n(8)
9126 .k(16)
9127 .qmax(128)
9128 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9129 }
9130
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,strided_cm)9131 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, strided_cm) {
9132 TEST_REQUIRES_ARM_NEON_V8;
9133 GemmMicrokernelTester()
9134 .mr(1)
9135 .nr(8)
9136 .kr(2)
9137 .sr(1)
9138 .m(1)
9139 .n(8)
9140 .k(16)
9141 .cm_stride(11)
9142 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9143 }
9144 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9145
9146
9147 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_eq_16)9148 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_eq_16) {
9149 TEST_REQUIRES_ARM_NEON;
9150 GemmMicrokernelTester()
9151 .mr(1)
9152 .nr(8)
9153 .kr(2)
9154 .sr(4)
9155 .m(1)
9156 .n(8)
9157 .k(16)
9158 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9159 }
9160
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,strided_cn)9161 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, strided_cn) {
9162 TEST_REQUIRES_ARM_NEON;
9163 GemmMicrokernelTester()
9164 .mr(1)
9165 .nr(8)
9166 .kr(2)
9167 .sr(4)
9168 .m(1)
9169 .n(8)
9170 .k(16)
9171 .cn_stride(11)
9172 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9173 }
9174
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_eq_16_strided_a)9175 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_eq_16_strided_a) {
9176 TEST_REQUIRES_ARM_NEON;
9177 GemmMicrokernelTester()
9178 .mr(1)
9179 .nr(8)
9180 .kr(2)
9181 .sr(4)
9182 .m(1)
9183 .n(8)
9184 .k(16)
9185 .a_stride(19)
9186 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9187 }
9188
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_eq_16_subtile)9189 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_eq_16_subtile) {
9190 TEST_REQUIRES_ARM_NEON;
9191 for (uint32_t n = 1; n <= 8; n++) {
9192 for (uint32_t m = 1; m <= 1; m++) {
9193 GemmMicrokernelTester()
9194 .mr(1)
9195 .nr(8)
9196 .kr(2)
9197 .sr(4)
9198 .m(m)
9199 .n(n)
9200 .k(16)
9201 .iterations(1)
9202 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9203 }
9204 }
9205 }
9206
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_eq_16_subtile_m)9207 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_eq_16_subtile_m) {
9208 TEST_REQUIRES_ARM_NEON;
9209 for (uint32_t m = 1; m <= 1; m++) {
9210 GemmMicrokernelTester()
9211 .mr(1)
9212 .nr(8)
9213 .kr(2)
9214 .sr(4)
9215 .m(m)
9216 .n(8)
9217 .k(16)
9218 .iterations(1)
9219 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9220 }
9221 }
9222
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_eq_16_subtile_n)9223 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_eq_16_subtile_n) {
9224 TEST_REQUIRES_ARM_NEON;
9225 for (uint32_t n = 1; n <= 8; n++) {
9226 GemmMicrokernelTester()
9227 .mr(1)
9228 .nr(8)
9229 .kr(2)
9230 .sr(4)
9231 .m(1)
9232 .n(n)
9233 .k(16)
9234 .iterations(1)
9235 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9236 }
9237 }
9238
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_lt_16)9239 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_lt_16) {
9240 TEST_REQUIRES_ARM_NEON;
9241 for (size_t k = 1; k < 16; k++) {
9242 GemmMicrokernelTester()
9243 .mr(1)
9244 .nr(8)
9245 .kr(2)
9246 .sr(4)
9247 .m(1)
9248 .n(8)
9249 .k(k)
9250 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9251 }
9252 }
9253
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_lt_16_strided_a)9254 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_lt_16_strided_a) {
9255 TEST_REQUIRES_ARM_NEON;
9256 for (size_t k = 1; k < 16; k++) {
9257 GemmMicrokernelTester()
9258 .mr(1)
9259 .nr(8)
9260 .kr(2)
9261 .sr(4)
9262 .m(1)
9263 .n(8)
9264 .k(k)
9265 .a_stride(19)
9266 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9267 }
9268 }
9269
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_lt_16_subtile)9270 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_lt_16_subtile) {
9271 TEST_REQUIRES_ARM_NEON;
9272 for (size_t k = 1; k < 16; k++) {
9273 for (uint32_t n = 1; n <= 8; n++) {
9274 for (uint32_t m = 1; m <= 1; m++) {
9275 GemmMicrokernelTester()
9276 .mr(1)
9277 .nr(8)
9278 .kr(2)
9279 .sr(4)
9280 .m(m)
9281 .n(n)
9282 .k(k)
9283 .iterations(1)
9284 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9285 }
9286 }
9287 }
9288 }
9289
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_gt_16)9290 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_gt_16) {
9291 TEST_REQUIRES_ARM_NEON;
9292 for (size_t k = 17; k < 32; k++) {
9293 GemmMicrokernelTester()
9294 .mr(1)
9295 .nr(8)
9296 .kr(2)
9297 .sr(4)
9298 .m(1)
9299 .n(8)
9300 .k(k)
9301 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9302 }
9303 }
9304
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_gt_16_strided_a)9305 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_gt_16_strided_a) {
9306 TEST_REQUIRES_ARM_NEON;
9307 for (size_t k = 17; k < 32; k++) {
9308 GemmMicrokernelTester()
9309 .mr(1)
9310 .nr(8)
9311 .kr(2)
9312 .sr(4)
9313 .m(1)
9314 .n(8)
9315 .k(k)
9316 .a_stride(37)
9317 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9318 }
9319 }
9320
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_gt_16_subtile)9321 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_gt_16_subtile) {
9322 TEST_REQUIRES_ARM_NEON;
9323 for (size_t k = 17; k < 32; k++) {
9324 for (uint32_t n = 1; n <= 8; n++) {
9325 for (uint32_t m = 1; m <= 1; m++) {
9326 GemmMicrokernelTester()
9327 .mr(1)
9328 .nr(8)
9329 .kr(2)
9330 .sr(4)
9331 .m(m)
9332 .n(n)
9333 .k(k)
9334 .iterations(1)
9335 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9336 }
9337 }
9338 }
9339 }
9340
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_div_16)9341 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_div_16) {
9342 TEST_REQUIRES_ARM_NEON;
9343 for (size_t k = 32; k <= 160; k += 16) {
9344 GemmMicrokernelTester()
9345 .mr(1)
9346 .nr(8)
9347 .kr(2)
9348 .sr(4)
9349 .m(1)
9350 .n(8)
9351 .k(k)
9352 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9353 }
9354 }
9355
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_div_16_strided_a)9356 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_div_16_strided_a) {
9357 TEST_REQUIRES_ARM_NEON;
9358 for (size_t k = 32; k <= 160; k += 16) {
9359 GemmMicrokernelTester()
9360 .mr(1)
9361 .nr(8)
9362 .kr(2)
9363 .sr(4)
9364 .m(1)
9365 .n(8)
9366 .k(k)
9367 .a_stride(163)
9368 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9369 }
9370 }
9371
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_div_16_subtile)9372 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_div_16_subtile) {
9373 TEST_REQUIRES_ARM_NEON;
9374 for (size_t k = 32; k <= 160; k += 16) {
9375 for (uint32_t n = 1; n <= 8; n++) {
9376 for (uint32_t m = 1; m <= 1; m++) {
9377 GemmMicrokernelTester()
9378 .mr(1)
9379 .nr(8)
9380 .kr(2)
9381 .sr(4)
9382 .m(m)
9383 .n(n)
9384 .k(k)
9385 .iterations(1)
9386 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9387 }
9388 }
9389 }
9390 }
9391
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_gt_8)9392 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_gt_8) {
9393 TEST_REQUIRES_ARM_NEON;
9394 for (uint32_t n = 9; n < 16; n++) {
9395 for (size_t k = 1; k <= 80; k += 17) {
9396 GemmMicrokernelTester()
9397 .mr(1)
9398 .nr(8)
9399 .kr(2)
9400 .sr(4)
9401 .m(1)
9402 .n(n)
9403 .k(k)
9404 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9405 }
9406 }
9407 }
9408
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_gt_8_strided_cn)9409 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_gt_8_strided_cn) {
9410 TEST_REQUIRES_ARM_NEON;
9411 for (uint32_t n = 9; n < 16; n++) {
9412 for (size_t k = 1; k <= 80; k += 17) {
9413 GemmMicrokernelTester()
9414 .mr(1)
9415 .nr(8)
9416 .kr(2)
9417 .sr(4)
9418 .m(1)
9419 .n(n)
9420 .k(k)
9421 .cn_stride(11)
9422 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9423 }
9424 }
9425 }
9426
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_gt_8_strided_a)9427 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_gt_8_strided_a) {
9428 TEST_REQUIRES_ARM_NEON;
9429 for (uint32_t n = 9; n < 16; n++) {
9430 for (size_t k = 1; k <= 80; k += 17) {
9431 GemmMicrokernelTester()
9432 .mr(1)
9433 .nr(8)
9434 .kr(2)
9435 .sr(4)
9436 .m(1)
9437 .n(n)
9438 .k(k)
9439 .a_stride(83)
9440 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9441 }
9442 }
9443 }
9444
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_gt_8_subtile)9445 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_gt_8_subtile) {
9446 TEST_REQUIRES_ARM_NEON;
9447 for (uint32_t n = 9; n < 16; n++) {
9448 for (size_t k = 1; k <= 80; k += 17) {
9449 for (uint32_t m = 1; m <= 1; m++) {
9450 GemmMicrokernelTester()
9451 .mr(1)
9452 .nr(8)
9453 .kr(2)
9454 .sr(4)
9455 .m(m)
9456 .n(n)
9457 .k(k)
9458 .iterations(1)
9459 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9460 }
9461 }
9462 }
9463 }
9464
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_div_8)9465 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_div_8) {
9466 TEST_REQUIRES_ARM_NEON;
9467 for (uint32_t n = 16; n <= 24; n += 8) {
9468 for (size_t k = 1; k <= 80; k += 17) {
9469 GemmMicrokernelTester()
9470 .mr(1)
9471 .nr(8)
9472 .kr(2)
9473 .sr(4)
9474 .m(1)
9475 .n(n)
9476 .k(k)
9477 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9478 }
9479 }
9480 }
9481
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_div_8_strided_cn)9482 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_div_8_strided_cn) {
9483 TEST_REQUIRES_ARM_NEON;
9484 for (uint32_t n = 16; n <= 24; n += 8) {
9485 for (size_t k = 1; k <= 80; k += 17) {
9486 GemmMicrokernelTester()
9487 .mr(1)
9488 .nr(8)
9489 .kr(2)
9490 .sr(4)
9491 .m(1)
9492 .n(n)
9493 .k(k)
9494 .cn_stride(11)
9495 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9496 }
9497 }
9498 }
9499
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_div_8_strided_a)9500 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_div_8_strided_a) {
9501 TEST_REQUIRES_ARM_NEON;
9502 for (uint32_t n = 16; n <= 24; n += 8) {
9503 for (size_t k = 1; k <= 80; k += 17) {
9504 GemmMicrokernelTester()
9505 .mr(1)
9506 .nr(8)
9507 .kr(2)
9508 .sr(4)
9509 .m(1)
9510 .n(n)
9511 .k(k)
9512 .a_stride(83)
9513 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9514 }
9515 }
9516 }
9517
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_div_8_subtile)9518 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_div_8_subtile) {
9519 TEST_REQUIRES_ARM_NEON;
9520 for (uint32_t n = 16; n <= 24; n += 8) {
9521 for (size_t k = 1; k <= 80; k += 17) {
9522 for (uint32_t m = 1; m <= 1; m++) {
9523 GemmMicrokernelTester()
9524 .mr(1)
9525 .nr(8)
9526 .kr(2)
9527 .sr(4)
9528 .m(m)
9529 .n(n)
9530 .k(k)
9531 .iterations(1)
9532 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9533 }
9534 }
9535 }
9536 }
9537
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,strided_cm_subtile)9538 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, strided_cm_subtile) {
9539 TEST_REQUIRES_ARM_NEON;
9540 for (size_t k = 1; k <= 80; k += 17) {
9541 for (uint32_t n = 1; n <= 8; n++) {
9542 for (uint32_t m = 1; m <= 1; m++) {
9543 GemmMicrokernelTester()
9544 .mr(1)
9545 .nr(8)
9546 .kr(2)
9547 .sr(4)
9548 .m(m)
9549 .n(n)
9550 .k(k)
9551 .cm_stride(11)
9552 .iterations(1)
9553 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9554 }
9555 }
9556 }
9557 }
9558
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,qmin)9559 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, qmin) {
9560 TEST_REQUIRES_ARM_NEON;
9561 GemmMicrokernelTester()
9562 .mr(1)
9563 .nr(8)
9564 .kr(2)
9565 .sr(4)
9566 .m(1)
9567 .n(8)
9568 .k(16)
9569 .qmin(128)
9570 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9571 }
9572
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,qmax)9573 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, qmax) {
9574 TEST_REQUIRES_ARM_NEON;
9575 GemmMicrokernelTester()
9576 .mr(1)
9577 .nr(8)
9578 .kr(2)
9579 .sr(4)
9580 .m(1)
9581 .n(8)
9582 .k(16)
9583 .qmax(128)
9584 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9585 }
9586
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,strided_cm)9587 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, strided_cm) {
9588 TEST_REQUIRES_ARM_NEON;
9589 GemmMicrokernelTester()
9590 .mr(1)
9591 .nr(8)
9592 .kr(2)
9593 .sr(4)
9594 .m(1)
9595 .n(8)
9596 .k(16)
9597 .cm_stride(11)
9598 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9599 }
9600 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9601
9602
9603 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_eq_16)9604 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_eq_16) {
9605 TEST_REQUIRES_ARM_NEON;
9606 GemmMicrokernelTester()
9607 .mr(1)
9608 .nr(8)
9609 .kr(4)
9610 .sr(1)
9611 .m(1)
9612 .n(8)
9613 .k(16)
9614 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9615 }
9616
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,strided_cn)9617 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, strided_cn) {
9618 TEST_REQUIRES_ARM_NEON;
9619 GemmMicrokernelTester()
9620 .mr(1)
9621 .nr(8)
9622 .kr(4)
9623 .sr(1)
9624 .m(1)
9625 .n(8)
9626 .k(16)
9627 .cn_stride(11)
9628 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9629 }
9630
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_eq_16_strided_a)9631 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_eq_16_strided_a) {
9632 TEST_REQUIRES_ARM_NEON;
9633 GemmMicrokernelTester()
9634 .mr(1)
9635 .nr(8)
9636 .kr(4)
9637 .sr(1)
9638 .m(1)
9639 .n(8)
9640 .k(16)
9641 .a_stride(19)
9642 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9643 }
9644
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_eq_16_subtile)9645 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_eq_16_subtile) {
9646 TEST_REQUIRES_ARM_NEON;
9647 for (uint32_t n = 1; n <= 8; n++) {
9648 for (uint32_t m = 1; m <= 1; m++) {
9649 GemmMicrokernelTester()
9650 .mr(1)
9651 .nr(8)
9652 .kr(4)
9653 .sr(1)
9654 .m(m)
9655 .n(n)
9656 .k(16)
9657 .iterations(1)
9658 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9659 }
9660 }
9661 }
9662
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_eq_16_subtile_m)9663 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
9664 TEST_REQUIRES_ARM_NEON;
9665 for (uint32_t m = 1; m <= 1; m++) {
9666 GemmMicrokernelTester()
9667 .mr(1)
9668 .nr(8)
9669 .kr(4)
9670 .sr(1)
9671 .m(m)
9672 .n(8)
9673 .k(16)
9674 .iterations(1)
9675 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9676 }
9677 }
9678
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_eq_16_subtile_n)9679 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
9680 TEST_REQUIRES_ARM_NEON;
9681 for (uint32_t n = 1; n <= 8; n++) {
9682 GemmMicrokernelTester()
9683 .mr(1)
9684 .nr(8)
9685 .kr(4)
9686 .sr(1)
9687 .m(1)
9688 .n(n)
9689 .k(16)
9690 .iterations(1)
9691 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9692 }
9693 }
9694
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_lt_16)9695 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_lt_16) {
9696 TEST_REQUIRES_ARM_NEON;
9697 for (size_t k = 1; k < 16; k++) {
9698 GemmMicrokernelTester()
9699 .mr(1)
9700 .nr(8)
9701 .kr(4)
9702 .sr(1)
9703 .m(1)
9704 .n(8)
9705 .k(k)
9706 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9707 }
9708 }
9709
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_lt_16_strided_a)9710 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_lt_16_strided_a) {
9711 TEST_REQUIRES_ARM_NEON;
9712 for (size_t k = 1; k < 16; k++) {
9713 GemmMicrokernelTester()
9714 .mr(1)
9715 .nr(8)
9716 .kr(4)
9717 .sr(1)
9718 .m(1)
9719 .n(8)
9720 .k(k)
9721 .a_stride(19)
9722 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9723 }
9724 }
9725
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_lt_16_subtile)9726 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_lt_16_subtile) {
9727 TEST_REQUIRES_ARM_NEON;
9728 for (size_t k = 1; k < 16; k++) {
9729 for (uint32_t n = 1; n <= 8; n++) {
9730 for (uint32_t m = 1; m <= 1; m++) {
9731 GemmMicrokernelTester()
9732 .mr(1)
9733 .nr(8)
9734 .kr(4)
9735 .sr(1)
9736 .m(m)
9737 .n(n)
9738 .k(k)
9739 .iterations(1)
9740 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9741 }
9742 }
9743 }
9744 }
9745
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_gt_16)9746 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_gt_16) {
9747 TEST_REQUIRES_ARM_NEON;
9748 for (size_t k = 17; k < 32; k++) {
9749 GemmMicrokernelTester()
9750 .mr(1)
9751 .nr(8)
9752 .kr(4)
9753 .sr(1)
9754 .m(1)
9755 .n(8)
9756 .k(k)
9757 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9758 }
9759 }
9760
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_gt_16_strided_a)9761 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_gt_16_strided_a) {
9762 TEST_REQUIRES_ARM_NEON;
9763 for (size_t k = 17; k < 32; k++) {
9764 GemmMicrokernelTester()
9765 .mr(1)
9766 .nr(8)
9767 .kr(4)
9768 .sr(1)
9769 .m(1)
9770 .n(8)
9771 .k(k)
9772 .a_stride(37)
9773 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9774 }
9775 }
9776
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_gt_16_subtile)9777 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_gt_16_subtile) {
9778 TEST_REQUIRES_ARM_NEON;
9779 for (size_t k = 17; k < 32; k++) {
9780 for (uint32_t n = 1; n <= 8; n++) {
9781 for (uint32_t m = 1; m <= 1; m++) {
9782 GemmMicrokernelTester()
9783 .mr(1)
9784 .nr(8)
9785 .kr(4)
9786 .sr(1)
9787 .m(m)
9788 .n(n)
9789 .k(k)
9790 .iterations(1)
9791 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9792 }
9793 }
9794 }
9795 }
9796
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_div_16)9797 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_div_16) {
9798 TEST_REQUIRES_ARM_NEON;
9799 for (size_t k = 32; k <= 160; k += 16) {
9800 GemmMicrokernelTester()
9801 .mr(1)
9802 .nr(8)
9803 .kr(4)
9804 .sr(1)
9805 .m(1)
9806 .n(8)
9807 .k(k)
9808 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9809 }
9810 }
9811
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_div_16_strided_a)9812 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_div_16_strided_a) {
9813 TEST_REQUIRES_ARM_NEON;
9814 for (size_t k = 32; k <= 160; k += 16) {
9815 GemmMicrokernelTester()
9816 .mr(1)
9817 .nr(8)
9818 .kr(4)
9819 .sr(1)
9820 .m(1)
9821 .n(8)
9822 .k(k)
9823 .a_stride(163)
9824 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9825 }
9826 }
9827
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_div_16_subtile)9828 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_div_16_subtile) {
9829 TEST_REQUIRES_ARM_NEON;
9830 for (size_t k = 32; k <= 160; k += 16) {
9831 for (uint32_t n = 1; n <= 8; n++) {
9832 for (uint32_t m = 1; m <= 1; m++) {
9833 GemmMicrokernelTester()
9834 .mr(1)
9835 .nr(8)
9836 .kr(4)
9837 .sr(1)
9838 .m(m)
9839 .n(n)
9840 .k(k)
9841 .iterations(1)
9842 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9843 }
9844 }
9845 }
9846 }
9847
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_gt_8)9848 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_gt_8) {
9849 TEST_REQUIRES_ARM_NEON;
9850 for (uint32_t n = 9; n < 16; n++) {
9851 for (size_t k = 1; k <= 80; k += 17) {
9852 GemmMicrokernelTester()
9853 .mr(1)
9854 .nr(8)
9855 .kr(4)
9856 .sr(1)
9857 .m(1)
9858 .n(n)
9859 .k(k)
9860 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9861 }
9862 }
9863 }
9864
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_gt_8_strided_cn)9865 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_gt_8_strided_cn) {
9866 TEST_REQUIRES_ARM_NEON;
9867 for (uint32_t n = 9; n < 16; n++) {
9868 for (size_t k = 1; k <= 80; k += 17) {
9869 GemmMicrokernelTester()
9870 .mr(1)
9871 .nr(8)
9872 .kr(4)
9873 .sr(1)
9874 .m(1)
9875 .n(n)
9876 .k(k)
9877 .cn_stride(11)
9878 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9879 }
9880 }
9881 }
9882
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_gt_8_strided_a)9883 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_gt_8_strided_a) {
9884 TEST_REQUIRES_ARM_NEON;
9885 for (uint32_t n = 9; n < 16; n++) {
9886 for (size_t k = 1; k <= 80; k += 17) {
9887 GemmMicrokernelTester()
9888 .mr(1)
9889 .nr(8)
9890 .kr(4)
9891 .sr(1)
9892 .m(1)
9893 .n(n)
9894 .k(k)
9895 .a_stride(83)
9896 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9897 }
9898 }
9899 }
9900
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_gt_8_subtile)9901 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_gt_8_subtile) {
9902 TEST_REQUIRES_ARM_NEON;
9903 for (uint32_t n = 9; n < 16; n++) {
9904 for (size_t k = 1; k <= 80; k += 17) {
9905 for (uint32_t m = 1; m <= 1; m++) {
9906 GemmMicrokernelTester()
9907 .mr(1)
9908 .nr(8)
9909 .kr(4)
9910 .sr(1)
9911 .m(m)
9912 .n(n)
9913 .k(k)
9914 .iterations(1)
9915 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9916 }
9917 }
9918 }
9919 }
9920
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_div_8)9921 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_div_8) {
9922 TEST_REQUIRES_ARM_NEON;
9923 for (uint32_t n = 16; n <= 24; n += 8) {
9924 for (size_t k = 1; k <= 80; k += 17) {
9925 GemmMicrokernelTester()
9926 .mr(1)
9927 .nr(8)
9928 .kr(4)
9929 .sr(1)
9930 .m(1)
9931 .n(n)
9932 .k(k)
9933 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9934 }
9935 }
9936 }
9937
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_div_8_strided_cn)9938 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_div_8_strided_cn) {
9939 TEST_REQUIRES_ARM_NEON;
9940 for (uint32_t n = 16; n <= 24; n += 8) {
9941 for (size_t k = 1; k <= 80; k += 17) {
9942 GemmMicrokernelTester()
9943 .mr(1)
9944 .nr(8)
9945 .kr(4)
9946 .sr(1)
9947 .m(1)
9948 .n(n)
9949 .k(k)
9950 .cn_stride(11)
9951 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9952 }
9953 }
9954 }
9955
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_div_8_strided_a)9956 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_div_8_strided_a) {
9957 TEST_REQUIRES_ARM_NEON;
9958 for (uint32_t n = 16; n <= 24; n += 8) {
9959 for (size_t k = 1; k <= 80; k += 17) {
9960 GemmMicrokernelTester()
9961 .mr(1)
9962 .nr(8)
9963 .kr(4)
9964 .sr(1)
9965 .m(1)
9966 .n(n)
9967 .k(k)
9968 .a_stride(83)
9969 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9970 }
9971 }
9972 }
9973
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_div_8_subtile)9974 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_div_8_subtile) {
9975 TEST_REQUIRES_ARM_NEON;
9976 for (uint32_t n = 16; n <= 24; n += 8) {
9977 for (size_t k = 1; k <= 80; k += 17) {
9978 for (uint32_t m = 1; m <= 1; m++) {
9979 GemmMicrokernelTester()
9980 .mr(1)
9981 .nr(8)
9982 .kr(4)
9983 .sr(1)
9984 .m(m)
9985 .n(n)
9986 .k(k)
9987 .iterations(1)
9988 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9989 }
9990 }
9991 }
9992 }
9993
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,strided_cm_subtile)9994 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, strided_cm_subtile) {
9995 TEST_REQUIRES_ARM_NEON;
9996 for (size_t k = 1; k <= 80; k += 17) {
9997 for (uint32_t n = 1; n <= 8; n++) {
9998 for (uint32_t m = 1; m <= 1; m++) {
9999 GemmMicrokernelTester()
10000 .mr(1)
10001 .nr(8)
10002 .kr(4)
10003 .sr(1)
10004 .m(m)
10005 .n(n)
10006 .k(k)
10007 .cm_stride(11)
10008 .iterations(1)
10009 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10010 }
10011 }
10012 }
10013 }
10014
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,qmin)10015 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, qmin) {
10016 TEST_REQUIRES_ARM_NEON;
10017 GemmMicrokernelTester()
10018 .mr(1)
10019 .nr(8)
10020 .kr(4)
10021 .sr(1)
10022 .m(1)
10023 .n(8)
10024 .k(16)
10025 .qmin(128)
10026 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10027 }
10028
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,qmax)10029 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, qmax) {
10030 TEST_REQUIRES_ARM_NEON;
10031 GemmMicrokernelTester()
10032 .mr(1)
10033 .nr(8)
10034 .kr(4)
10035 .sr(1)
10036 .m(1)
10037 .n(8)
10038 .k(16)
10039 .qmax(128)
10040 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10041 }
10042
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,strided_cm)10043 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, strided_cm) {
10044 TEST_REQUIRES_ARM_NEON;
10045 GemmMicrokernelTester()
10046 .mr(1)
10047 .nr(8)
10048 .kr(4)
10049 .sr(1)
10050 .m(1)
10051 .n(8)
10052 .k(16)
10053 .cm_stride(11)
10054 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10055 }
10056 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10057
10058
10059 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_eq_16)10060 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16) {
10061 TEST_REQUIRES_ARM_NEON;
10062 GemmMicrokernelTester()
10063 .mr(1)
10064 .nr(8)
10065 .kr(4)
10066 .sr(1)
10067 .m(1)
10068 .n(8)
10069 .k(16)
10070 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10071 }
10072
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,strided_cn)10073 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, strided_cn) {
10074 TEST_REQUIRES_ARM_NEON;
10075 GemmMicrokernelTester()
10076 .mr(1)
10077 .nr(8)
10078 .kr(4)
10079 .sr(1)
10080 .m(1)
10081 .n(8)
10082 .k(16)
10083 .cn_stride(11)
10084 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10085 }
10086
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_eq_16_strided_a)10087 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16_strided_a) {
10088 TEST_REQUIRES_ARM_NEON;
10089 GemmMicrokernelTester()
10090 .mr(1)
10091 .nr(8)
10092 .kr(4)
10093 .sr(1)
10094 .m(1)
10095 .n(8)
10096 .k(16)
10097 .a_stride(19)
10098 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10099 }
10100
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_eq_16_subtile)10101 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile) {
10102 TEST_REQUIRES_ARM_NEON;
10103 for (uint32_t n = 1; n <= 8; n++) {
10104 for (uint32_t m = 1; m <= 1; m++) {
10105 GemmMicrokernelTester()
10106 .mr(1)
10107 .nr(8)
10108 .kr(4)
10109 .sr(1)
10110 .m(m)
10111 .n(n)
10112 .k(16)
10113 .iterations(1)
10114 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10115 }
10116 }
10117 }
10118
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_m)10119 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
10120 TEST_REQUIRES_ARM_NEON;
10121 for (uint32_t m = 1; m <= 1; m++) {
10122 GemmMicrokernelTester()
10123 .mr(1)
10124 .nr(8)
10125 .kr(4)
10126 .sr(1)
10127 .m(m)
10128 .n(8)
10129 .k(16)
10130 .iterations(1)
10131 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10132 }
10133 }
10134
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_n)10135 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
10136 TEST_REQUIRES_ARM_NEON;
10137 for (uint32_t n = 1; n <= 8; n++) {
10138 GemmMicrokernelTester()
10139 .mr(1)
10140 .nr(8)
10141 .kr(4)
10142 .sr(1)
10143 .m(1)
10144 .n(n)
10145 .k(16)
10146 .iterations(1)
10147 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10148 }
10149 }
10150
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_lt_16)10151 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_lt_16) {
10152 TEST_REQUIRES_ARM_NEON;
10153 for (size_t k = 1; k < 16; k++) {
10154 GemmMicrokernelTester()
10155 .mr(1)
10156 .nr(8)
10157 .kr(4)
10158 .sr(1)
10159 .m(1)
10160 .n(8)
10161 .k(k)
10162 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10163 }
10164 }
10165
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_lt_16_strided_a)10166 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_lt_16_strided_a) {
10167 TEST_REQUIRES_ARM_NEON;
10168 for (size_t k = 1; k < 16; k++) {
10169 GemmMicrokernelTester()
10170 .mr(1)
10171 .nr(8)
10172 .kr(4)
10173 .sr(1)
10174 .m(1)
10175 .n(8)
10176 .k(k)
10177 .a_stride(19)
10178 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10179 }
10180 }
10181
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_lt_16_subtile)10182 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_lt_16_subtile) {
10183 TEST_REQUIRES_ARM_NEON;
10184 for (size_t k = 1; k < 16; k++) {
10185 for (uint32_t n = 1; n <= 8; n++) {
10186 for (uint32_t m = 1; m <= 1; m++) {
10187 GemmMicrokernelTester()
10188 .mr(1)
10189 .nr(8)
10190 .kr(4)
10191 .sr(1)
10192 .m(m)
10193 .n(n)
10194 .k(k)
10195 .iterations(1)
10196 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10197 }
10198 }
10199 }
10200 }
10201
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_gt_16)10202 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_gt_16) {
10203 TEST_REQUIRES_ARM_NEON;
10204 for (size_t k = 17; k < 32; k++) {
10205 GemmMicrokernelTester()
10206 .mr(1)
10207 .nr(8)
10208 .kr(4)
10209 .sr(1)
10210 .m(1)
10211 .n(8)
10212 .k(k)
10213 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10214 }
10215 }
10216
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_gt_16_strided_a)10217 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_gt_16_strided_a) {
10218 TEST_REQUIRES_ARM_NEON;
10219 for (size_t k = 17; k < 32; k++) {
10220 GemmMicrokernelTester()
10221 .mr(1)
10222 .nr(8)
10223 .kr(4)
10224 .sr(1)
10225 .m(1)
10226 .n(8)
10227 .k(k)
10228 .a_stride(37)
10229 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10230 }
10231 }
10232
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_gt_16_subtile)10233 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_gt_16_subtile) {
10234 TEST_REQUIRES_ARM_NEON;
10235 for (size_t k = 17; k < 32; k++) {
10236 for (uint32_t n = 1; n <= 8; n++) {
10237 for (uint32_t m = 1; m <= 1; m++) {
10238 GemmMicrokernelTester()
10239 .mr(1)
10240 .nr(8)
10241 .kr(4)
10242 .sr(1)
10243 .m(m)
10244 .n(n)
10245 .k(k)
10246 .iterations(1)
10247 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10248 }
10249 }
10250 }
10251 }
10252
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_div_16)10253 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_div_16) {
10254 TEST_REQUIRES_ARM_NEON;
10255 for (size_t k = 32; k <= 160; k += 16) {
10256 GemmMicrokernelTester()
10257 .mr(1)
10258 .nr(8)
10259 .kr(4)
10260 .sr(1)
10261 .m(1)
10262 .n(8)
10263 .k(k)
10264 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10265 }
10266 }
10267
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_div_16_strided_a)10268 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_div_16_strided_a) {
10269 TEST_REQUIRES_ARM_NEON;
10270 for (size_t k = 32; k <= 160; k += 16) {
10271 GemmMicrokernelTester()
10272 .mr(1)
10273 .nr(8)
10274 .kr(4)
10275 .sr(1)
10276 .m(1)
10277 .n(8)
10278 .k(k)
10279 .a_stride(163)
10280 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10281 }
10282 }
10283
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_div_16_subtile)10284 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_div_16_subtile) {
10285 TEST_REQUIRES_ARM_NEON;
10286 for (size_t k = 32; k <= 160; k += 16) {
10287 for (uint32_t n = 1; n <= 8; n++) {
10288 for (uint32_t m = 1; m <= 1; m++) {
10289 GemmMicrokernelTester()
10290 .mr(1)
10291 .nr(8)
10292 .kr(4)
10293 .sr(1)
10294 .m(m)
10295 .n(n)
10296 .k(k)
10297 .iterations(1)
10298 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10299 }
10300 }
10301 }
10302 }
10303
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_gt_8)10304 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8) {
10305 TEST_REQUIRES_ARM_NEON;
10306 for (uint32_t n = 9; n < 16; n++) {
10307 for (size_t k = 1; k <= 80; k += 17) {
10308 GemmMicrokernelTester()
10309 .mr(1)
10310 .nr(8)
10311 .kr(4)
10312 .sr(1)
10313 .m(1)
10314 .n(n)
10315 .k(k)
10316 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10317 }
10318 }
10319 }
10320
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_gt_8_strided_cn)10321 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
10322 TEST_REQUIRES_ARM_NEON;
10323 for (uint32_t n = 9; n < 16; n++) {
10324 for (size_t k = 1; k <= 80; k += 17) {
10325 GemmMicrokernelTester()
10326 .mr(1)
10327 .nr(8)
10328 .kr(4)
10329 .sr(1)
10330 .m(1)
10331 .n(n)
10332 .k(k)
10333 .cn_stride(11)
10334 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10335 }
10336 }
10337 }
10338
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_gt_8_strided_a)10339 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8_strided_a) {
10340 TEST_REQUIRES_ARM_NEON;
10341 for (uint32_t n = 9; n < 16; n++) {
10342 for (size_t k = 1; k <= 80; k += 17) {
10343 GemmMicrokernelTester()
10344 .mr(1)
10345 .nr(8)
10346 .kr(4)
10347 .sr(1)
10348 .m(1)
10349 .n(n)
10350 .k(k)
10351 .a_stride(83)
10352 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10353 }
10354 }
10355 }
10356
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_gt_8_subtile)10357 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8_subtile) {
10358 TEST_REQUIRES_ARM_NEON;
10359 for (uint32_t n = 9; n < 16; n++) {
10360 for (size_t k = 1; k <= 80; k += 17) {
10361 for (uint32_t m = 1; m <= 1; m++) {
10362 GemmMicrokernelTester()
10363 .mr(1)
10364 .nr(8)
10365 .kr(4)
10366 .sr(1)
10367 .m(m)
10368 .n(n)
10369 .k(k)
10370 .iterations(1)
10371 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10372 }
10373 }
10374 }
10375 }
10376
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_div_8)10377 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8) {
10378 TEST_REQUIRES_ARM_NEON;
10379 for (uint32_t n = 16; n <= 24; n += 8) {
10380 for (size_t k = 1; k <= 80; k += 17) {
10381 GemmMicrokernelTester()
10382 .mr(1)
10383 .nr(8)
10384 .kr(4)
10385 .sr(1)
10386 .m(1)
10387 .n(n)
10388 .k(k)
10389 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10390 }
10391 }
10392 }
10393
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_div_8_strided_cn)10394 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8_strided_cn) {
10395 TEST_REQUIRES_ARM_NEON;
10396 for (uint32_t n = 16; n <= 24; n += 8) {
10397 for (size_t k = 1; k <= 80; k += 17) {
10398 GemmMicrokernelTester()
10399 .mr(1)
10400 .nr(8)
10401 .kr(4)
10402 .sr(1)
10403 .m(1)
10404 .n(n)
10405 .k(k)
10406 .cn_stride(11)
10407 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10408 }
10409 }
10410 }
10411
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_div_8_strided_a)10412 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8_strided_a) {
10413 TEST_REQUIRES_ARM_NEON;
10414 for (uint32_t n = 16; n <= 24; n += 8) {
10415 for (size_t k = 1; k <= 80; k += 17) {
10416 GemmMicrokernelTester()
10417 .mr(1)
10418 .nr(8)
10419 .kr(4)
10420 .sr(1)
10421 .m(1)
10422 .n(n)
10423 .k(k)
10424 .a_stride(83)
10425 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10426 }
10427 }
10428 }
10429
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_div_8_subtile)10430 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8_subtile) {
10431 TEST_REQUIRES_ARM_NEON;
10432 for (uint32_t n = 16; n <= 24; n += 8) {
10433 for (size_t k = 1; k <= 80; k += 17) {
10434 for (uint32_t m = 1; m <= 1; m++) {
10435 GemmMicrokernelTester()
10436 .mr(1)
10437 .nr(8)
10438 .kr(4)
10439 .sr(1)
10440 .m(m)
10441 .n(n)
10442 .k(k)
10443 .iterations(1)
10444 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10445 }
10446 }
10447 }
10448 }
10449
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,strided_cm_subtile)10450 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, strided_cm_subtile) {
10451 TEST_REQUIRES_ARM_NEON;
10452 for (size_t k = 1; k <= 80; k += 17) {
10453 for (uint32_t n = 1; n <= 8; n++) {
10454 for (uint32_t m = 1; m <= 1; m++) {
10455 GemmMicrokernelTester()
10456 .mr(1)
10457 .nr(8)
10458 .kr(4)
10459 .sr(1)
10460 .m(m)
10461 .n(n)
10462 .k(k)
10463 .cm_stride(11)
10464 .iterations(1)
10465 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10466 }
10467 }
10468 }
10469 }
10470
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,qmin)10471 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, qmin) {
10472 TEST_REQUIRES_ARM_NEON;
10473 GemmMicrokernelTester()
10474 .mr(1)
10475 .nr(8)
10476 .kr(4)
10477 .sr(1)
10478 .m(1)
10479 .n(8)
10480 .k(16)
10481 .qmin(128)
10482 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10483 }
10484
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,qmax)10485 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, qmax) {
10486 TEST_REQUIRES_ARM_NEON;
10487 GemmMicrokernelTester()
10488 .mr(1)
10489 .nr(8)
10490 .kr(4)
10491 .sr(1)
10492 .m(1)
10493 .n(8)
10494 .k(16)
10495 .qmax(128)
10496 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10497 }
10498
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,strided_cm)10499 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, strided_cm) {
10500 TEST_REQUIRES_ARM_NEON;
10501 GemmMicrokernelTester()
10502 .mr(1)
10503 .nr(8)
10504 .kr(4)
10505 .sr(1)
10506 .m(1)
10507 .n(8)
10508 .k(16)
10509 .cm_stride(11)
10510 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10511 }
10512 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10513
10514
10515 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_eq_16)10516 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_eq_16) {
10517 TEST_REQUIRES_ARM_NEON;
10518 GemmMicrokernelTester()
10519 .mr(1)
10520 .nr(8)
10521 .kr(8)
10522 .sr(1)
10523 .m(1)
10524 .n(8)
10525 .k(16)
10526 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10527 }
10528
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,strided_cn)10529 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, strided_cn) {
10530 TEST_REQUIRES_ARM_NEON;
10531 GemmMicrokernelTester()
10532 .mr(1)
10533 .nr(8)
10534 .kr(8)
10535 .sr(1)
10536 .m(1)
10537 .n(8)
10538 .k(16)
10539 .cn_stride(11)
10540 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10541 }
10542
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_eq_16_strided_a)10543 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_eq_16_strided_a) {
10544 TEST_REQUIRES_ARM_NEON;
10545 GemmMicrokernelTester()
10546 .mr(1)
10547 .nr(8)
10548 .kr(8)
10549 .sr(1)
10550 .m(1)
10551 .n(8)
10552 .k(16)
10553 .a_stride(19)
10554 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10555 }
10556
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_eq_16_subtile)10557 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_eq_16_subtile) {
10558 TEST_REQUIRES_ARM_NEON;
10559 for (uint32_t n = 1; n <= 8; n++) {
10560 for (uint32_t m = 1; m <= 1; m++) {
10561 GemmMicrokernelTester()
10562 .mr(1)
10563 .nr(8)
10564 .kr(8)
10565 .sr(1)
10566 .m(m)
10567 .n(n)
10568 .k(16)
10569 .iterations(1)
10570 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10571 }
10572 }
10573 }
10574
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_eq_16_subtile_m)10575 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_eq_16_subtile_m) {
10576 TEST_REQUIRES_ARM_NEON;
10577 for (uint32_t m = 1; m <= 1; m++) {
10578 GemmMicrokernelTester()
10579 .mr(1)
10580 .nr(8)
10581 .kr(8)
10582 .sr(1)
10583 .m(m)
10584 .n(8)
10585 .k(16)
10586 .iterations(1)
10587 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10588 }
10589 }
10590
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_eq_16_subtile_n)10591 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_eq_16_subtile_n) {
10592 TEST_REQUIRES_ARM_NEON;
10593 for (uint32_t n = 1; n <= 8; n++) {
10594 GemmMicrokernelTester()
10595 .mr(1)
10596 .nr(8)
10597 .kr(8)
10598 .sr(1)
10599 .m(1)
10600 .n(n)
10601 .k(16)
10602 .iterations(1)
10603 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10604 }
10605 }
10606
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_lt_16)10607 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_lt_16) {
10608 TEST_REQUIRES_ARM_NEON;
10609 for (size_t k = 1; k < 16; k++) {
10610 GemmMicrokernelTester()
10611 .mr(1)
10612 .nr(8)
10613 .kr(8)
10614 .sr(1)
10615 .m(1)
10616 .n(8)
10617 .k(k)
10618 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10619 }
10620 }
10621
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_lt_16_strided_a)10622 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_lt_16_strided_a) {
10623 TEST_REQUIRES_ARM_NEON;
10624 for (size_t k = 1; k < 16; k++) {
10625 GemmMicrokernelTester()
10626 .mr(1)
10627 .nr(8)
10628 .kr(8)
10629 .sr(1)
10630 .m(1)
10631 .n(8)
10632 .k(k)
10633 .a_stride(19)
10634 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10635 }
10636 }
10637
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_lt_16_subtile)10638 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_lt_16_subtile) {
10639 TEST_REQUIRES_ARM_NEON;
10640 for (size_t k = 1; k < 16; k++) {
10641 for (uint32_t n = 1; n <= 8; n++) {
10642 for (uint32_t m = 1; m <= 1; m++) {
10643 GemmMicrokernelTester()
10644 .mr(1)
10645 .nr(8)
10646 .kr(8)
10647 .sr(1)
10648 .m(m)
10649 .n(n)
10650 .k(k)
10651 .iterations(1)
10652 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10653 }
10654 }
10655 }
10656 }
10657
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_gt_16)10658 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_gt_16) {
10659 TEST_REQUIRES_ARM_NEON;
10660 for (size_t k = 17; k < 32; k++) {
10661 GemmMicrokernelTester()
10662 .mr(1)
10663 .nr(8)
10664 .kr(8)
10665 .sr(1)
10666 .m(1)
10667 .n(8)
10668 .k(k)
10669 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10670 }
10671 }
10672
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_gt_16_strided_a)10673 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_gt_16_strided_a) {
10674 TEST_REQUIRES_ARM_NEON;
10675 for (size_t k = 17; k < 32; k++) {
10676 GemmMicrokernelTester()
10677 .mr(1)
10678 .nr(8)
10679 .kr(8)
10680 .sr(1)
10681 .m(1)
10682 .n(8)
10683 .k(k)
10684 .a_stride(37)
10685 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10686 }
10687 }
10688
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_gt_16_subtile)10689 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_gt_16_subtile) {
10690 TEST_REQUIRES_ARM_NEON;
10691 for (size_t k = 17; k < 32; k++) {
10692 for (uint32_t n = 1; n <= 8; n++) {
10693 for (uint32_t m = 1; m <= 1; m++) {
10694 GemmMicrokernelTester()
10695 .mr(1)
10696 .nr(8)
10697 .kr(8)
10698 .sr(1)
10699 .m(m)
10700 .n(n)
10701 .k(k)
10702 .iterations(1)
10703 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10704 }
10705 }
10706 }
10707 }
10708
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_div_16)10709 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_div_16) {
10710 TEST_REQUIRES_ARM_NEON;
10711 for (size_t k = 32; k <= 160; k += 16) {
10712 GemmMicrokernelTester()
10713 .mr(1)
10714 .nr(8)
10715 .kr(8)
10716 .sr(1)
10717 .m(1)
10718 .n(8)
10719 .k(k)
10720 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10721 }
10722 }
10723
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_div_16_strided_a)10724 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_div_16_strided_a) {
10725 TEST_REQUIRES_ARM_NEON;
10726 for (size_t k = 32; k <= 160; k += 16) {
10727 GemmMicrokernelTester()
10728 .mr(1)
10729 .nr(8)
10730 .kr(8)
10731 .sr(1)
10732 .m(1)
10733 .n(8)
10734 .k(k)
10735 .a_stride(163)
10736 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10737 }
10738 }
10739
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_div_16_subtile)10740 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_div_16_subtile) {
10741 TEST_REQUIRES_ARM_NEON;
10742 for (size_t k = 32; k <= 160; k += 16) {
10743 for (uint32_t n = 1; n <= 8; n++) {
10744 for (uint32_t m = 1; m <= 1; m++) {
10745 GemmMicrokernelTester()
10746 .mr(1)
10747 .nr(8)
10748 .kr(8)
10749 .sr(1)
10750 .m(m)
10751 .n(n)
10752 .k(k)
10753 .iterations(1)
10754 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10755 }
10756 }
10757 }
10758 }
10759
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_gt_8)10760 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_gt_8) {
10761 TEST_REQUIRES_ARM_NEON;
10762 for (uint32_t n = 9; n < 16; n++) {
10763 for (size_t k = 1; k <= 80; k += 17) {
10764 GemmMicrokernelTester()
10765 .mr(1)
10766 .nr(8)
10767 .kr(8)
10768 .sr(1)
10769 .m(1)
10770 .n(n)
10771 .k(k)
10772 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10773 }
10774 }
10775 }
10776
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_gt_8_strided_cn)10777 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_gt_8_strided_cn) {
10778 TEST_REQUIRES_ARM_NEON;
10779 for (uint32_t n = 9; n < 16; n++) {
10780 for (size_t k = 1; k <= 80; k += 17) {
10781 GemmMicrokernelTester()
10782 .mr(1)
10783 .nr(8)
10784 .kr(8)
10785 .sr(1)
10786 .m(1)
10787 .n(n)
10788 .k(k)
10789 .cn_stride(11)
10790 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10791 }
10792 }
10793 }
10794
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_gt_8_strided_a)10795 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_gt_8_strided_a) {
10796 TEST_REQUIRES_ARM_NEON;
10797 for (uint32_t n = 9; n < 16; n++) {
10798 for (size_t k = 1; k <= 80; k += 17) {
10799 GemmMicrokernelTester()
10800 .mr(1)
10801 .nr(8)
10802 .kr(8)
10803 .sr(1)
10804 .m(1)
10805 .n(n)
10806 .k(k)
10807 .a_stride(83)
10808 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10809 }
10810 }
10811 }
10812
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_gt_8_subtile)10813 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_gt_8_subtile) {
10814 TEST_REQUIRES_ARM_NEON;
10815 for (uint32_t n = 9; n < 16; n++) {
10816 for (size_t k = 1; k <= 80; k += 17) {
10817 for (uint32_t m = 1; m <= 1; m++) {
10818 GemmMicrokernelTester()
10819 .mr(1)
10820 .nr(8)
10821 .kr(8)
10822 .sr(1)
10823 .m(m)
10824 .n(n)
10825 .k(k)
10826 .iterations(1)
10827 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10828 }
10829 }
10830 }
10831 }
10832
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_div_8)10833 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_div_8) {
10834 TEST_REQUIRES_ARM_NEON;
10835 for (uint32_t n = 16; n <= 24; n += 8) {
10836 for (size_t k = 1; k <= 80; k += 17) {
10837 GemmMicrokernelTester()
10838 .mr(1)
10839 .nr(8)
10840 .kr(8)
10841 .sr(1)
10842 .m(1)
10843 .n(n)
10844 .k(k)
10845 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10846 }
10847 }
10848 }
10849
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_div_8_strided_cn)10850 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_div_8_strided_cn) {
10851 TEST_REQUIRES_ARM_NEON;
10852 for (uint32_t n = 16; n <= 24; n += 8) {
10853 for (size_t k = 1; k <= 80; k += 17) {
10854 GemmMicrokernelTester()
10855 .mr(1)
10856 .nr(8)
10857 .kr(8)
10858 .sr(1)
10859 .m(1)
10860 .n(n)
10861 .k(k)
10862 .cn_stride(11)
10863 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10864 }
10865 }
10866 }
10867
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_div_8_strided_a)10868 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_div_8_strided_a) {
10869 TEST_REQUIRES_ARM_NEON;
10870 for (uint32_t n = 16; n <= 24; n += 8) {
10871 for (size_t k = 1; k <= 80; k += 17) {
10872 GemmMicrokernelTester()
10873 .mr(1)
10874 .nr(8)
10875 .kr(8)
10876 .sr(1)
10877 .m(1)
10878 .n(n)
10879 .k(k)
10880 .a_stride(83)
10881 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10882 }
10883 }
10884 }
10885
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_div_8_subtile)10886 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_div_8_subtile) {
10887 TEST_REQUIRES_ARM_NEON;
10888 for (uint32_t n = 16; n <= 24; n += 8) {
10889 for (size_t k = 1; k <= 80; k += 17) {
10890 for (uint32_t m = 1; m <= 1; m++) {
10891 GemmMicrokernelTester()
10892 .mr(1)
10893 .nr(8)
10894 .kr(8)
10895 .sr(1)
10896 .m(m)
10897 .n(n)
10898 .k(k)
10899 .iterations(1)
10900 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10901 }
10902 }
10903 }
10904 }
10905
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,strided_cm_subtile)10906 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, strided_cm_subtile) {
10907 TEST_REQUIRES_ARM_NEON;
10908 for (size_t k = 1; k <= 80; k += 17) {
10909 for (uint32_t n = 1; n <= 8; n++) {
10910 for (uint32_t m = 1; m <= 1; m++) {
10911 GemmMicrokernelTester()
10912 .mr(1)
10913 .nr(8)
10914 .kr(8)
10915 .sr(1)
10916 .m(m)
10917 .n(n)
10918 .k(k)
10919 .cm_stride(11)
10920 .iterations(1)
10921 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10922 }
10923 }
10924 }
10925 }
10926
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,qmin)10927 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, qmin) {
10928 TEST_REQUIRES_ARM_NEON;
10929 GemmMicrokernelTester()
10930 .mr(1)
10931 .nr(8)
10932 .kr(8)
10933 .sr(1)
10934 .m(1)
10935 .n(8)
10936 .k(16)
10937 .qmin(128)
10938 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10939 }
10940
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,qmax)10941 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, qmax) {
10942 TEST_REQUIRES_ARM_NEON;
10943 GemmMicrokernelTester()
10944 .mr(1)
10945 .nr(8)
10946 .kr(8)
10947 .sr(1)
10948 .m(1)
10949 .n(8)
10950 .k(16)
10951 .qmax(128)
10952 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10953 }
10954
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL,strided_cm)10955 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, strided_cm) {
10956 TEST_REQUIRES_ARM_NEON;
10957 GemmMicrokernelTester()
10958 .mr(1)
10959 .nr(8)
10960 .kr(8)
10961 .sr(1)
10962 .m(1)
10963 .n(8)
10964 .k(16)
10965 .cm_stride(11)
10966 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10967 }
10968 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10969
10970
10971 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_eq_8)10972 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8) {
10973 TEST_REQUIRES_ARM_NEON_V8;
10974 GemmMicrokernelTester()
10975 .mr(1)
10976 .nr(16)
10977 .kr(1)
10978 .sr(1)
10979 .m(1)
10980 .n(16)
10981 .k(8)
10982 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10983 }
10984
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,strided_cn)10985 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cn) {
10986 TEST_REQUIRES_ARM_NEON_V8;
10987 GemmMicrokernelTester()
10988 .mr(1)
10989 .nr(16)
10990 .kr(1)
10991 .sr(1)
10992 .m(1)
10993 .n(16)
10994 .k(8)
10995 .cn_stride(19)
10996 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10997 }
10998
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_eq_8_strided_a)10999 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_strided_a) {
11000 TEST_REQUIRES_ARM_NEON_V8;
11001 GemmMicrokernelTester()
11002 .mr(1)
11003 .nr(16)
11004 .kr(1)
11005 .sr(1)
11006 .m(1)
11007 .n(16)
11008 .k(8)
11009 .a_stride(11)
11010 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11011 }
11012
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_eq_8_subtile)11013 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
11014 TEST_REQUIRES_ARM_NEON_V8;
11015 for (uint32_t n = 1; n <= 16; n++) {
11016 for (uint32_t m = 1; m <= 1; m++) {
11017 GemmMicrokernelTester()
11018 .mr(1)
11019 .nr(16)
11020 .kr(1)
11021 .sr(1)
11022 .m(m)
11023 .n(n)
11024 .k(8)
11025 .iterations(1)
11026 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11027 }
11028 }
11029 }
11030
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_eq_8_subtile_m)11031 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
11032 TEST_REQUIRES_ARM_NEON_V8;
11033 for (uint32_t m = 1; m <= 1; m++) {
11034 GemmMicrokernelTester()
11035 .mr(1)
11036 .nr(16)
11037 .kr(1)
11038 .sr(1)
11039 .m(m)
11040 .n(16)
11041 .k(8)
11042 .iterations(1)
11043 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11044 }
11045 }
11046
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_eq_8_subtile_n)11047 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
11048 TEST_REQUIRES_ARM_NEON_V8;
11049 for (uint32_t n = 1; n <= 16; n++) {
11050 GemmMicrokernelTester()
11051 .mr(1)
11052 .nr(16)
11053 .kr(1)
11054 .sr(1)
11055 .m(1)
11056 .n(n)
11057 .k(8)
11058 .iterations(1)
11059 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11060 }
11061 }
11062
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_lt_8)11063 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_lt_8) {
11064 TEST_REQUIRES_ARM_NEON_V8;
11065 for (size_t k = 1; k < 8; k++) {
11066 GemmMicrokernelTester()
11067 .mr(1)
11068 .nr(16)
11069 .kr(1)
11070 .sr(1)
11071 .m(1)
11072 .n(16)
11073 .k(k)
11074 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11075 }
11076 }
11077
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_lt_8_strided_a)11078 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_lt_8_strided_a) {
11079 TEST_REQUIRES_ARM_NEON_V8;
11080 for (size_t k = 1; k < 8; k++) {
11081 GemmMicrokernelTester()
11082 .mr(1)
11083 .nr(16)
11084 .kr(1)
11085 .sr(1)
11086 .m(1)
11087 .n(16)
11088 .k(k)
11089 .a_stride(11)
11090 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11091 }
11092 }
11093
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_lt_8_subtile)11094 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
11095 TEST_REQUIRES_ARM_NEON_V8;
11096 for (size_t k = 1; k < 8; k++) {
11097 for (uint32_t n = 1; n <= 16; n++) {
11098 for (uint32_t m = 1; m <= 1; m++) {
11099 GemmMicrokernelTester()
11100 .mr(1)
11101 .nr(16)
11102 .kr(1)
11103 .sr(1)
11104 .m(m)
11105 .n(n)
11106 .k(k)
11107 .iterations(1)
11108 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11109 }
11110 }
11111 }
11112 }
11113
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_gt_8)11114 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_gt_8) {
11115 TEST_REQUIRES_ARM_NEON_V8;
11116 for (size_t k = 9; k < 16; k++) {
11117 GemmMicrokernelTester()
11118 .mr(1)
11119 .nr(16)
11120 .kr(1)
11121 .sr(1)
11122 .m(1)
11123 .n(16)
11124 .k(k)
11125 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11126 }
11127 }
11128
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_gt_8_strided_a)11129 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_gt_8_strided_a) {
11130 TEST_REQUIRES_ARM_NEON_V8;
11131 for (size_t k = 9; k < 16; k++) {
11132 GemmMicrokernelTester()
11133 .mr(1)
11134 .nr(16)
11135 .kr(1)
11136 .sr(1)
11137 .m(1)
11138 .n(16)
11139 .k(k)
11140 .a_stride(19)
11141 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11142 }
11143 }
11144
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_gt_8_subtile)11145 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
11146 TEST_REQUIRES_ARM_NEON_V8;
11147 for (size_t k = 9; k < 16; k++) {
11148 for (uint32_t n = 1; n <= 16; n++) {
11149 for (uint32_t m = 1; m <= 1; m++) {
11150 GemmMicrokernelTester()
11151 .mr(1)
11152 .nr(16)
11153 .kr(1)
11154 .sr(1)
11155 .m(m)
11156 .n(n)
11157 .k(k)
11158 .iterations(1)
11159 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11160 }
11161 }
11162 }
11163 }
11164
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_div_8)11165 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_div_8) {
11166 TEST_REQUIRES_ARM_NEON_V8;
11167 for (size_t k = 16; k <= 80; k += 8) {
11168 GemmMicrokernelTester()
11169 .mr(1)
11170 .nr(16)
11171 .kr(1)
11172 .sr(1)
11173 .m(1)
11174 .n(16)
11175 .k(k)
11176 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11177 }
11178 }
11179
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_div_8_strided_a)11180 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_div_8_strided_a) {
11181 TEST_REQUIRES_ARM_NEON_V8;
11182 for (size_t k = 16; k <= 80; k += 8) {
11183 GemmMicrokernelTester()
11184 .mr(1)
11185 .nr(16)
11186 .kr(1)
11187 .sr(1)
11188 .m(1)
11189 .n(16)
11190 .k(k)
11191 .a_stride(83)
11192 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11193 }
11194 }
11195
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_div_8_subtile)11196 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
11197 TEST_REQUIRES_ARM_NEON_V8;
11198 for (size_t k = 16; k <= 80; k += 8) {
11199 for (uint32_t n = 1; n <= 16; n++) {
11200 for (uint32_t m = 1; m <= 1; m++) {
11201 GemmMicrokernelTester()
11202 .mr(1)
11203 .nr(16)
11204 .kr(1)
11205 .sr(1)
11206 .m(m)
11207 .n(n)
11208 .k(k)
11209 .iterations(1)
11210 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11211 }
11212 }
11213 }
11214 }
11215
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_gt_16)11216 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16) {
11217 TEST_REQUIRES_ARM_NEON_V8;
11218 for (uint32_t n = 17; n < 32; n++) {
11219 for (size_t k = 1; k <= 40; k += 9) {
11220 GemmMicrokernelTester()
11221 .mr(1)
11222 .nr(16)
11223 .kr(1)
11224 .sr(1)
11225 .m(1)
11226 .n(n)
11227 .k(k)
11228 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11229 }
11230 }
11231 }
11232
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_gt_16_strided_cn)11233 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
11234 TEST_REQUIRES_ARM_NEON_V8;
11235 for (uint32_t n = 17; n < 32; n++) {
11236 for (size_t k = 1; k <= 40; k += 9) {
11237 GemmMicrokernelTester()
11238 .mr(1)
11239 .nr(16)
11240 .kr(1)
11241 .sr(1)
11242 .m(1)
11243 .n(n)
11244 .k(k)
11245 .cn_stride(19)
11246 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11247 }
11248 }
11249 }
11250
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_gt_16_strided_a)11251 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_strided_a) {
11252 TEST_REQUIRES_ARM_NEON_V8;
11253 for (uint32_t n = 17; n < 32; n++) {
11254 for (size_t k = 1; k <= 40; k += 9) {
11255 GemmMicrokernelTester()
11256 .mr(1)
11257 .nr(16)
11258 .kr(1)
11259 .sr(1)
11260 .m(1)
11261 .n(n)
11262 .k(k)
11263 .a_stride(43)
11264 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11265 }
11266 }
11267 }
11268
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_gt_16_subtile)11269 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
11270 TEST_REQUIRES_ARM_NEON_V8;
11271 for (uint32_t n = 17; n < 32; n++) {
11272 for (size_t k = 1; k <= 40; k += 9) {
11273 for (uint32_t m = 1; m <= 1; m++) {
11274 GemmMicrokernelTester()
11275 .mr(1)
11276 .nr(16)
11277 .kr(1)
11278 .sr(1)
11279 .m(m)
11280 .n(n)
11281 .k(k)
11282 .iterations(1)
11283 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11284 }
11285 }
11286 }
11287 }
11288
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_div_16)11289 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16) {
11290 TEST_REQUIRES_ARM_NEON_V8;
11291 for (uint32_t n = 32; n <= 48; n += 16) {
11292 for (size_t k = 1; k <= 40; k += 9) {
11293 GemmMicrokernelTester()
11294 .mr(1)
11295 .nr(16)
11296 .kr(1)
11297 .sr(1)
11298 .m(1)
11299 .n(n)
11300 .k(k)
11301 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11302 }
11303 }
11304 }
11305
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_div_16_strided_cn)11306 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
11307 TEST_REQUIRES_ARM_NEON_V8;
11308 for (uint32_t n = 32; n <= 48; n += 16) {
11309 for (size_t k = 1; k <= 40; k += 9) {
11310 GemmMicrokernelTester()
11311 .mr(1)
11312 .nr(16)
11313 .kr(1)
11314 .sr(1)
11315 .m(1)
11316 .n(n)
11317 .k(k)
11318 .cn_stride(19)
11319 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11320 }
11321 }
11322 }
11323
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_div_16_strided_a)11324 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_strided_a) {
11325 TEST_REQUIRES_ARM_NEON_V8;
11326 for (uint32_t n = 32; n <= 48; n += 16) {
11327 for (size_t k = 1; k <= 40; k += 9) {
11328 GemmMicrokernelTester()
11329 .mr(1)
11330 .nr(16)
11331 .kr(1)
11332 .sr(1)
11333 .m(1)
11334 .n(n)
11335 .k(k)
11336 .a_stride(43)
11337 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11338 }
11339 }
11340 }
11341
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_div_16_subtile)11342 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
11343 TEST_REQUIRES_ARM_NEON_V8;
11344 for (uint32_t n = 32; n <= 48; n += 16) {
11345 for (size_t k = 1; k <= 40; k += 9) {
11346 for (uint32_t m = 1; m <= 1; m++) {
11347 GemmMicrokernelTester()
11348 .mr(1)
11349 .nr(16)
11350 .kr(1)
11351 .sr(1)
11352 .m(m)
11353 .n(n)
11354 .k(k)
11355 .iterations(1)
11356 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11357 }
11358 }
11359 }
11360 }
11361
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,strided_cm_subtile)11362 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
11363 TEST_REQUIRES_ARM_NEON_V8;
11364 for (size_t k = 1; k <= 40; k += 9) {
11365 for (uint32_t n = 1; n <= 16; n++) {
11366 for (uint32_t m = 1; m <= 1; m++) {
11367 GemmMicrokernelTester()
11368 .mr(1)
11369 .nr(16)
11370 .kr(1)
11371 .sr(1)
11372 .m(m)
11373 .n(n)
11374 .k(k)
11375 .cm_stride(19)
11376 .iterations(1)
11377 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11378 }
11379 }
11380 }
11381 }
11382
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,qmin)11383 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, qmin) {
11384 TEST_REQUIRES_ARM_NEON_V8;
11385 GemmMicrokernelTester()
11386 .mr(1)
11387 .nr(16)
11388 .kr(1)
11389 .sr(1)
11390 .m(1)
11391 .n(16)
11392 .k(8)
11393 .qmin(128)
11394 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11395 }
11396
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,qmax)11397 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, qmax) {
11398 TEST_REQUIRES_ARM_NEON_V8;
11399 GemmMicrokernelTester()
11400 .mr(1)
11401 .nr(16)
11402 .kr(1)
11403 .sr(1)
11404 .m(1)
11405 .n(16)
11406 .k(8)
11407 .qmax(128)
11408 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11409 }
11410
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,strided_cm)11411 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cm) {
11412 TEST_REQUIRES_ARM_NEON_V8;
11413 GemmMicrokernelTester()
11414 .mr(1)
11415 .nr(16)
11416 .kr(1)
11417 .sr(1)
11418 .m(1)
11419 .n(16)
11420 .k(8)
11421 .cm_stride(19)
11422 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11423 }
11424 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11425
11426
11427 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_eq_8)11428 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
11429 TEST_REQUIRES_ARM_NEON_V8;
11430 GemmMicrokernelTester()
11431 .mr(1)
11432 .nr(16)
11433 .kr(1)
11434 .sr(1)
11435 .m(1)
11436 .n(16)
11437 .k(8)
11438 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11439 }
11440
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,strided_cn)11441 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, strided_cn) {
11442 TEST_REQUIRES_ARM_NEON_V8;
11443 GemmMicrokernelTester()
11444 .mr(1)
11445 .nr(16)
11446 .kr(1)
11447 .sr(1)
11448 .m(1)
11449 .n(16)
11450 .k(8)
11451 .cn_stride(19)
11452 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11453 }
11454
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_strided_a)11455 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_strided_a) {
11456 TEST_REQUIRES_ARM_NEON_V8;
11457 GemmMicrokernelTester()
11458 .mr(1)
11459 .nr(16)
11460 .kr(1)
11461 .sr(1)
11462 .m(1)
11463 .n(16)
11464 .k(8)
11465 .a_stride(11)
11466 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11467 }
11468
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)11469 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
11470 TEST_REQUIRES_ARM_NEON_V8;
11471 for (uint32_t n = 1; n <= 16; n++) {
11472 for (uint32_t m = 1; m <= 1; m++) {
11473 GemmMicrokernelTester()
11474 .mr(1)
11475 .nr(16)
11476 .kr(1)
11477 .sr(1)
11478 .m(m)
11479 .n(n)
11480 .k(8)
11481 .iterations(1)
11482 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11483 }
11484 }
11485 }
11486
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)11487 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
11488 TEST_REQUIRES_ARM_NEON_V8;
11489 for (uint32_t m = 1; m <= 1; m++) {
11490 GemmMicrokernelTester()
11491 .mr(1)
11492 .nr(16)
11493 .kr(1)
11494 .sr(1)
11495 .m(m)
11496 .n(16)
11497 .k(8)
11498 .iterations(1)
11499 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11500 }
11501 }
11502
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)11503 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
11504 TEST_REQUIRES_ARM_NEON_V8;
11505 for (uint32_t n = 1; n <= 16; n++) {
11506 GemmMicrokernelTester()
11507 .mr(1)
11508 .nr(16)
11509 .kr(1)
11510 .sr(1)
11511 .m(1)
11512 .n(n)
11513 .k(8)
11514 .iterations(1)
11515 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11516 }
11517 }
11518
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_lt_8)11519 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
11520 TEST_REQUIRES_ARM_NEON_V8;
11521 for (size_t k = 1; k < 8; k++) {
11522 GemmMicrokernelTester()
11523 .mr(1)
11524 .nr(16)
11525 .kr(1)
11526 .sr(1)
11527 .m(1)
11528 .n(16)
11529 .k(k)
11530 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11531 }
11532 }
11533
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_lt_8_strided_a)11534 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_lt_8_strided_a) {
11535 TEST_REQUIRES_ARM_NEON_V8;
11536 for (size_t k = 1; k < 8; k++) {
11537 GemmMicrokernelTester()
11538 .mr(1)
11539 .nr(16)
11540 .kr(1)
11541 .sr(1)
11542 .m(1)
11543 .n(16)
11544 .k(k)
11545 .a_stride(11)
11546 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11547 }
11548 }
11549
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)11550 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
11551 TEST_REQUIRES_ARM_NEON_V8;
11552 for (size_t k = 1; k < 8; k++) {
11553 for (uint32_t n = 1; n <= 16; n++) {
11554 for (uint32_t m = 1; m <= 1; m++) {
11555 GemmMicrokernelTester()
11556 .mr(1)
11557 .nr(16)
11558 .kr(1)
11559 .sr(1)
11560 .m(m)
11561 .n(n)
11562 .k(k)
11563 .iterations(1)
11564 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11565 }
11566 }
11567 }
11568 }
11569
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_gt_8)11570 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
11571 TEST_REQUIRES_ARM_NEON_V8;
11572 for (size_t k = 9; k < 16; k++) {
11573 GemmMicrokernelTester()
11574 .mr(1)
11575 .nr(16)
11576 .kr(1)
11577 .sr(1)
11578 .m(1)
11579 .n(16)
11580 .k(k)
11581 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11582 }
11583 }
11584
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_gt_8_strided_a)11585 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_gt_8_strided_a) {
11586 TEST_REQUIRES_ARM_NEON_V8;
11587 for (size_t k = 9; k < 16; k++) {
11588 GemmMicrokernelTester()
11589 .mr(1)
11590 .nr(16)
11591 .kr(1)
11592 .sr(1)
11593 .m(1)
11594 .n(16)
11595 .k(k)
11596 .a_stride(19)
11597 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11598 }
11599 }
11600
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)11601 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
11602 TEST_REQUIRES_ARM_NEON_V8;
11603 for (size_t k = 9; k < 16; k++) {
11604 for (uint32_t n = 1; n <= 16; n++) {
11605 for (uint32_t m = 1; m <= 1; m++) {
11606 GemmMicrokernelTester()
11607 .mr(1)
11608 .nr(16)
11609 .kr(1)
11610 .sr(1)
11611 .m(m)
11612 .n(n)
11613 .k(k)
11614 .iterations(1)
11615 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11616 }
11617 }
11618 }
11619 }
11620
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_div_8)11621 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_div_8) {
11622 TEST_REQUIRES_ARM_NEON_V8;
11623 for (size_t k = 16; k <= 80; k += 8) {
11624 GemmMicrokernelTester()
11625 .mr(1)
11626 .nr(16)
11627 .kr(1)
11628 .sr(1)
11629 .m(1)
11630 .n(16)
11631 .k(k)
11632 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11633 }
11634 }
11635
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_div_8_strided_a)11636 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_div_8_strided_a) {
11637 TEST_REQUIRES_ARM_NEON_V8;
11638 for (size_t k = 16; k <= 80; k += 8) {
11639 GemmMicrokernelTester()
11640 .mr(1)
11641 .nr(16)
11642 .kr(1)
11643 .sr(1)
11644 .m(1)
11645 .n(16)
11646 .k(k)
11647 .a_stride(83)
11648 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11649 }
11650 }
11651
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)11652 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
11653 TEST_REQUIRES_ARM_NEON_V8;
11654 for (size_t k = 16; k <= 80; k += 8) {
11655 for (uint32_t n = 1; n <= 16; n++) {
11656 for (uint32_t m = 1; m <= 1; m++) {
11657 GemmMicrokernelTester()
11658 .mr(1)
11659 .nr(16)
11660 .kr(1)
11661 .sr(1)
11662 .m(m)
11663 .n(n)
11664 .k(k)
11665 .iterations(1)
11666 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11667 }
11668 }
11669 }
11670 }
11671
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_gt_16)11672 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_gt_16) {
11673 TEST_REQUIRES_ARM_NEON_V8;
11674 for (uint32_t n = 17; n < 32; n++) {
11675 for (size_t k = 1; k <= 40; k += 9) {
11676 GemmMicrokernelTester()
11677 .mr(1)
11678 .nr(16)
11679 .kr(1)
11680 .sr(1)
11681 .m(1)
11682 .n(n)
11683 .k(k)
11684 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11685 }
11686 }
11687 }
11688
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_strided_cn)11689 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
11690 TEST_REQUIRES_ARM_NEON_V8;
11691 for (uint32_t n = 17; n < 32; n++) {
11692 for (size_t k = 1; k <= 40; k += 9) {
11693 GemmMicrokernelTester()
11694 .mr(1)
11695 .nr(16)
11696 .kr(1)
11697 .sr(1)
11698 .m(1)
11699 .n(n)
11700 .k(k)
11701 .cn_stride(19)
11702 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11703 }
11704 }
11705 }
11706
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_strided_a)11707 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_strided_a) {
11708 TEST_REQUIRES_ARM_NEON_V8;
11709 for (uint32_t n = 17; n < 32; n++) {
11710 for (size_t k = 1; k <= 40; k += 9) {
11711 GemmMicrokernelTester()
11712 .mr(1)
11713 .nr(16)
11714 .kr(1)
11715 .sr(1)
11716 .m(1)
11717 .n(n)
11718 .k(k)
11719 .a_stride(43)
11720 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11721 }
11722 }
11723 }
11724
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_subtile)11725 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_subtile) {
11726 TEST_REQUIRES_ARM_NEON_V8;
11727 for (uint32_t n = 17; n < 32; n++) {
11728 for (size_t k = 1; k <= 40; k += 9) {
11729 for (uint32_t m = 1; m <= 1; m++) {
11730 GemmMicrokernelTester()
11731 .mr(1)
11732 .nr(16)
11733 .kr(1)
11734 .sr(1)
11735 .m(m)
11736 .n(n)
11737 .k(k)
11738 .iterations(1)
11739 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11740 }
11741 }
11742 }
11743 }
11744
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_div_16)11745 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_div_16) {
11746 TEST_REQUIRES_ARM_NEON_V8;
11747 for (uint32_t n = 32; n <= 48; n += 16) {
11748 for (size_t k = 1; k <= 40; k += 9) {
11749 GemmMicrokernelTester()
11750 .mr(1)
11751 .nr(16)
11752 .kr(1)
11753 .sr(1)
11754 .m(1)
11755 .n(n)
11756 .k(k)
11757 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11758 }
11759 }
11760 }
11761
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_div_16_strided_cn)11762 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_div_16_strided_cn) {
11763 TEST_REQUIRES_ARM_NEON_V8;
11764 for (uint32_t n = 32; n <= 48; n += 16) {
11765 for (size_t k = 1; k <= 40; k += 9) {
11766 GemmMicrokernelTester()
11767 .mr(1)
11768 .nr(16)
11769 .kr(1)
11770 .sr(1)
11771 .m(1)
11772 .n(n)
11773 .k(k)
11774 .cn_stride(19)
11775 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11776 }
11777 }
11778 }
11779
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_div_16_strided_a)11780 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_div_16_strided_a) {
11781 TEST_REQUIRES_ARM_NEON_V8;
11782 for (uint32_t n = 32; n <= 48; n += 16) {
11783 for (size_t k = 1; k <= 40; k += 9) {
11784 GemmMicrokernelTester()
11785 .mr(1)
11786 .nr(16)
11787 .kr(1)
11788 .sr(1)
11789 .m(1)
11790 .n(n)
11791 .k(k)
11792 .a_stride(43)
11793 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11794 }
11795 }
11796 }
11797
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_div_16_subtile)11798 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_div_16_subtile) {
11799 TEST_REQUIRES_ARM_NEON_V8;
11800 for (uint32_t n = 32; n <= 48; n += 16) {
11801 for (size_t k = 1; k <= 40; k += 9) {
11802 for (uint32_t m = 1; m <= 1; m++) {
11803 GemmMicrokernelTester()
11804 .mr(1)
11805 .nr(16)
11806 .kr(1)
11807 .sr(1)
11808 .m(m)
11809 .n(n)
11810 .k(k)
11811 .iterations(1)
11812 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11813 }
11814 }
11815 }
11816 }
11817
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)11818 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
11819 TEST_REQUIRES_ARM_NEON_V8;
11820 for (size_t k = 1; k <= 40; k += 9) {
11821 for (uint32_t n = 1; n <= 16; n++) {
11822 for (uint32_t m = 1; m <= 1; m++) {
11823 GemmMicrokernelTester()
11824 .mr(1)
11825 .nr(16)
11826 .kr(1)
11827 .sr(1)
11828 .m(m)
11829 .n(n)
11830 .k(k)
11831 .cm_stride(19)
11832 .iterations(1)
11833 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11834 }
11835 }
11836 }
11837 }
11838
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,qmin)11839 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, qmin) {
11840 TEST_REQUIRES_ARM_NEON_V8;
11841 GemmMicrokernelTester()
11842 .mr(1)
11843 .nr(16)
11844 .kr(1)
11845 .sr(1)
11846 .m(1)
11847 .n(16)
11848 .k(8)
11849 .qmin(128)
11850 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11851 }
11852
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,qmax)11853 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, qmax) {
11854 TEST_REQUIRES_ARM_NEON_V8;
11855 GemmMicrokernelTester()
11856 .mr(1)
11857 .nr(16)
11858 .kr(1)
11859 .sr(1)
11860 .m(1)
11861 .n(16)
11862 .k(8)
11863 .qmax(128)
11864 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11865 }
11866
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,strided_cm)11867 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, strided_cm) {
11868 TEST_REQUIRES_ARM_NEON_V8;
11869 GemmMicrokernelTester()
11870 .mr(1)
11871 .nr(16)
11872 .kr(1)
11873 .sr(1)
11874 .m(1)
11875 .n(16)
11876 .k(8)
11877 .cm_stride(19)
11878 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11879 }
11880 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11881
11882
11883 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8)11884 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8) {
11885 TEST_REQUIRES_ARM_NEON_DOT;
11886 GemmMicrokernelTester()
11887 .mr(1)
11888 .nr(16)
11889 .kr(4)
11890 .sr(1)
11891 .m(1)
11892 .n(16)
11893 .k(8)
11894 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11895 }
11896
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,strided_cn)11897 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, strided_cn) {
11898 TEST_REQUIRES_ARM_NEON_DOT;
11899 GemmMicrokernelTester()
11900 .mr(1)
11901 .nr(16)
11902 .kr(4)
11903 .sr(1)
11904 .m(1)
11905 .n(16)
11906 .k(8)
11907 .cn_stride(19)
11908 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11909 }
11910
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8_strided_a)11911 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_strided_a) {
11912 TEST_REQUIRES_ARM_NEON_DOT;
11913 GemmMicrokernelTester()
11914 .mr(1)
11915 .nr(16)
11916 .kr(4)
11917 .sr(1)
11918 .m(1)
11919 .n(16)
11920 .k(8)
11921 .a_stride(11)
11922 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11923 }
11924
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8_subtile)11925 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_subtile) {
11926 TEST_REQUIRES_ARM_NEON_DOT;
11927 for (uint32_t n = 1; n <= 16; n++) {
11928 for (uint32_t m = 1; m <= 1; m++) {
11929 GemmMicrokernelTester()
11930 .mr(1)
11931 .nr(16)
11932 .kr(4)
11933 .sr(1)
11934 .m(m)
11935 .n(n)
11936 .k(8)
11937 .iterations(1)
11938 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11939 }
11940 }
11941 }
11942
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8_subtile_m)11943 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_subtile_m) {
11944 TEST_REQUIRES_ARM_NEON_DOT;
11945 for (uint32_t m = 1; m <= 1; m++) {
11946 GemmMicrokernelTester()
11947 .mr(1)
11948 .nr(16)
11949 .kr(4)
11950 .sr(1)
11951 .m(m)
11952 .n(16)
11953 .k(8)
11954 .iterations(1)
11955 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11956 }
11957 }
11958
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8_subtile_n)11959 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_subtile_n) {
11960 TEST_REQUIRES_ARM_NEON_DOT;
11961 for (uint32_t n = 1; n <= 16; n++) {
11962 GemmMicrokernelTester()
11963 .mr(1)
11964 .nr(16)
11965 .kr(4)
11966 .sr(1)
11967 .m(1)
11968 .n(n)
11969 .k(8)
11970 .iterations(1)
11971 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11972 }
11973 }
11974
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_lt_8)11975 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_lt_8) {
11976 TEST_REQUIRES_ARM_NEON_DOT;
11977 for (size_t k = 1; k < 8; k++) {
11978 GemmMicrokernelTester()
11979 .mr(1)
11980 .nr(16)
11981 .kr(4)
11982 .sr(1)
11983 .m(1)
11984 .n(16)
11985 .k(k)
11986 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11987 }
11988 }
11989
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_lt_8_strided_a)11990 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_lt_8_strided_a) {
11991 TEST_REQUIRES_ARM_NEON_DOT;
11992 for (size_t k = 1; k < 8; k++) {
11993 GemmMicrokernelTester()
11994 .mr(1)
11995 .nr(16)
11996 .kr(4)
11997 .sr(1)
11998 .m(1)
11999 .n(16)
12000 .k(k)
12001 .a_stride(11)
12002 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12003 }
12004 }
12005
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_lt_8_subtile)12006 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_lt_8_subtile) {
12007 TEST_REQUIRES_ARM_NEON_DOT;
12008 for (size_t k = 1; k < 8; k++) {
12009 for (uint32_t n = 1; n <= 16; n++) {
12010 for (uint32_t m = 1; m <= 1; m++) {
12011 GemmMicrokernelTester()
12012 .mr(1)
12013 .nr(16)
12014 .kr(4)
12015 .sr(1)
12016 .m(m)
12017 .n(n)
12018 .k(k)
12019 .iterations(1)
12020 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12021 }
12022 }
12023 }
12024 }
12025
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_gt_8)12026 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_gt_8) {
12027 TEST_REQUIRES_ARM_NEON_DOT;
12028 for (size_t k = 9; k < 16; k++) {
12029 GemmMicrokernelTester()
12030 .mr(1)
12031 .nr(16)
12032 .kr(4)
12033 .sr(1)
12034 .m(1)
12035 .n(16)
12036 .k(k)
12037 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12038 }
12039 }
12040
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_gt_8_strided_a)12041 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_gt_8_strided_a) {
12042 TEST_REQUIRES_ARM_NEON_DOT;
12043 for (size_t k = 9; k < 16; k++) {
12044 GemmMicrokernelTester()
12045 .mr(1)
12046 .nr(16)
12047 .kr(4)
12048 .sr(1)
12049 .m(1)
12050 .n(16)
12051 .k(k)
12052 .a_stride(19)
12053 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12054 }
12055 }
12056
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_gt_8_subtile)12057 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_gt_8_subtile) {
12058 TEST_REQUIRES_ARM_NEON_DOT;
12059 for (size_t k = 9; k < 16; k++) {
12060 for (uint32_t n = 1; n <= 16; n++) {
12061 for (uint32_t m = 1; m <= 1; m++) {
12062 GemmMicrokernelTester()
12063 .mr(1)
12064 .nr(16)
12065 .kr(4)
12066 .sr(1)
12067 .m(m)
12068 .n(n)
12069 .k(k)
12070 .iterations(1)
12071 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12072 }
12073 }
12074 }
12075 }
12076
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_div_8)12077 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_div_8) {
12078 TEST_REQUIRES_ARM_NEON_DOT;
12079 for (size_t k = 16; k <= 80; k += 8) {
12080 GemmMicrokernelTester()
12081 .mr(1)
12082 .nr(16)
12083 .kr(4)
12084 .sr(1)
12085 .m(1)
12086 .n(16)
12087 .k(k)
12088 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12089 }
12090 }
12091
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_div_8_strided_a)12092 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_div_8_strided_a) {
12093 TEST_REQUIRES_ARM_NEON_DOT;
12094 for (size_t k = 16; k <= 80; k += 8) {
12095 GemmMicrokernelTester()
12096 .mr(1)
12097 .nr(16)
12098 .kr(4)
12099 .sr(1)
12100 .m(1)
12101 .n(16)
12102 .k(k)
12103 .a_stride(83)
12104 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12105 }
12106 }
12107
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_div_8_subtile)12108 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_div_8_subtile) {
12109 TEST_REQUIRES_ARM_NEON_DOT;
12110 for (size_t k = 16; k <= 80; k += 8) {
12111 for (uint32_t n = 1; n <= 16; n++) {
12112 for (uint32_t m = 1; m <= 1; m++) {
12113 GemmMicrokernelTester()
12114 .mr(1)
12115 .nr(16)
12116 .kr(4)
12117 .sr(1)
12118 .m(m)
12119 .n(n)
12120 .k(k)
12121 .iterations(1)
12122 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12123 }
12124 }
12125 }
12126 }
12127
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,n_gt_16)12128 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16) {
12129 TEST_REQUIRES_ARM_NEON_DOT;
12130 for (uint32_t n = 17; n < 32; n++) {
12131 for (size_t k = 1; k <= 40; k += 9) {
12132 GemmMicrokernelTester()
12133 .mr(1)
12134 .nr(16)
12135 .kr(4)
12136 .sr(1)
12137 .m(1)
12138 .n(n)
12139 .k(k)
12140 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12141 }
12142 }
12143 }
12144
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,n_gt_16_strided_cn)12145 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16_strided_cn) {
12146 TEST_REQUIRES_ARM_NEON_DOT;
12147 for (uint32_t n = 17; n < 32; n++) {
12148 for (size_t k = 1; k <= 40; k += 9) {
12149 GemmMicrokernelTester()
12150 .mr(1)
12151 .nr(16)
12152 .kr(4)
12153 .sr(1)
12154 .m(1)
12155 .n(n)
12156 .k(k)
12157 .cn_stride(19)
12158 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12159 }
12160 }
12161 }
12162
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,n_gt_16_strided_a)12163 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16_strided_a) {
12164 TEST_REQUIRES_ARM_NEON_DOT;
12165 for (uint32_t n = 17; n < 32; n++) {
12166 for (size_t k = 1; k <= 40; k += 9) {
12167 GemmMicrokernelTester()
12168 .mr(1)
12169 .nr(16)
12170 .kr(4)
12171 .sr(1)
12172 .m(1)
12173 .n(n)
12174 .k(k)
12175 .a_stride(43)
12176 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12177 }
12178 }
12179 }
12180
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,n_gt_16_subtile)12181 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16_subtile) {
12182 TEST_REQUIRES_ARM_NEON_DOT;
12183 for (uint32_t n = 17; n < 32; n++) {
12184 for (size_t k = 1; k <= 40; k += 9) {
12185 for (uint32_t m = 1; m <= 1; m++) {
12186 GemmMicrokernelTester()
12187 .mr(1)
12188 .nr(16)
12189 .kr(4)
12190 .sr(1)
12191 .m(m)
12192 .n(n)
12193 .k(k)
12194 .iterations(1)
12195 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12196 }
12197 }
12198 }
12199 }
12200
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,n_div_16)12201 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16) {
12202 TEST_REQUIRES_ARM_NEON_DOT;
12203 for (uint32_t n = 32; n <= 48; n += 16) {
12204 for (size_t k = 1; k <= 40; k += 9) {
12205 GemmMicrokernelTester()
12206 .mr(1)
12207 .nr(16)
12208 .kr(4)
12209 .sr(1)
12210 .m(1)
12211 .n(n)
12212 .k(k)
12213 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12214 }
12215 }
12216 }
12217
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,n_div_16_strided_cn)12218 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16_strided_cn) {
12219 TEST_REQUIRES_ARM_NEON_DOT;
12220 for (uint32_t n = 32; n <= 48; n += 16) {
12221 for (size_t k = 1; k <= 40; k += 9) {
12222 GemmMicrokernelTester()
12223 .mr(1)
12224 .nr(16)
12225 .kr(4)
12226 .sr(1)
12227 .m(1)
12228 .n(n)
12229 .k(k)
12230 .cn_stride(19)
12231 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12232 }
12233 }
12234 }
12235
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,n_div_16_strided_a)12236 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16_strided_a) {
12237 TEST_REQUIRES_ARM_NEON_DOT;
12238 for (uint32_t n = 32; n <= 48; n += 16) {
12239 for (size_t k = 1; k <= 40; k += 9) {
12240 GemmMicrokernelTester()
12241 .mr(1)
12242 .nr(16)
12243 .kr(4)
12244 .sr(1)
12245 .m(1)
12246 .n(n)
12247 .k(k)
12248 .a_stride(43)
12249 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12250 }
12251 }
12252 }
12253
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,n_div_16_subtile)12254 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16_subtile) {
12255 TEST_REQUIRES_ARM_NEON_DOT;
12256 for (uint32_t n = 32; n <= 48; n += 16) {
12257 for (size_t k = 1; k <= 40; k += 9) {
12258 for (uint32_t m = 1; m <= 1; m++) {
12259 GemmMicrokernelTester()
12260 .mr(1)
12261 .nr(16)
12262 .kr(4)
12263 .sr(1)
12264 .m(m)
12265 .n(n)
12266 .k(k)
12267 .iterations(1)
12268 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12269 }
12270 }
12271 }
12272 }
12273
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,strided_cm_subtile)12274 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, strided_cm_subtile) {
12275 TEST_REQUIRES_ARM_NEON_DOT;
12276 for (size_t k = 1; k <= 40; k += 9) {
12277 for (uint32_t n = 1; n <= 16; n++) {
12278 for (uint32_t m = 1; m <= 1; m++) {
12279 GemmMicrokernelTester()
12280 .mr(1)
12281 .nr(16)
12282 .kr(4)
12283 .sr(1)
12284 .m(m)
12285 .n(n)
12286 .k(k)
12287 .cm_stride(19)
12288 .iterations(1)
12289 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12290 }
12291 }
12292 }
12293 }
12294
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,qmin)12295 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, qmin) {
12296 TEST_REQUIRES_ARM_NEON_DOT;
12297 GemmMicrokernelTester()
12298 .mr(1)
12299 .nr(16)
12300 .kr(4)
12301 .sr(1)
12302 .m(1)
12303 .n(16)
12304 .k(8)
12305 .qmin(128)
12306 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12307 }
12308
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,qmax)12309 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, qmax) {
12310 TEST_REQUIRES_ARM_NEON_DOT;
12311 GemmMicrokernelTester()
12312 .mr(1)
12313 .nr(16)
12314 .kr(4)
12315 .sr(1)
12316 .m(1)
12317 .n(16)
12318 .k(8)
12319 .qmax(128)
12320 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12321 }
12322
TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,strided_cm)12323 TEST(QC8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, strided_cm) {
12324 TEST_REQUIRES_ARM_NEON_DOT;
12325 GemmMicrokernelTester()
12326 .mr(1)
12327 .nr(16)
12328 .kr(4)
12329 .sr(1)
12330 .m(1)
12331 .n(16)
12332 .k(8)
12333 .cm_stride(19)
12334 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12335 }
12336 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
12337
12338
12339 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_eq_8)12340 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
12341 TEST_REQUIRES_ARM_NEON_V8;
12342 GemmMicrokernelTester()
12343 .mr(2)
12344 .nr(8)
12345 .kr(1)
12346 .sr(1)
12347 .m(2)
12348 .n(8)
12349 .k(8)
12350 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12351 }
12352
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,strided_cn)12353 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, strided_cn) {
12354 TEST_REQUIRES_ARM_NEON_V8;
12355 GemmMicrokernelTester()
12356 .mr(2)
12357 .nr(8)
12358 .kr(1)
12359 .sr(1)
12360 .m(2)
12361 .n(8)
12362 .k(8)
12363 .cn_stride(11)
12364 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12365 }
12366
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_strided_a)12367 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_strided_a) {
12368 TEST_REQUIRES_ARM_NEON_V8;
12369 GemmMicrokernelTester()
12370 .mr(2)
12371 .nr(8)
12372 .kr(1)
12373 .sr(1)
12374 .m(2)
12375 .n(8)
12376 .k(8)
12377 .a_stride(11)
12378 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12379 }
12380
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)12381 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
12382 TEST_REQUIRES_ARM_NEON_V8;
12383 for (uint32_t n = 1; n <= 8; n++) {
12384 for (uint32_t m = 1; m <= 2; m++) {
12385 GemmMicrokernelTester()
12386 .mr(2)
12387 .nr(8)
12388 .kr(1)
12389 .sr(1)
12390 .m(m)
12391 .n(n)
12392 .k(8)
12393 .iterations(1)
12394 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12395 }
12396 }
12397 }
12398
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)12399 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
12400 TEST_REQUIRES_ARM_NEON_V8;
12401 for (uint32_t m = 1; m <= 2; m++) {
12402 GemmMicrokernelTester()
12403 .mr(2)
12404 .nr(8)
12405 .kr(1)
12406 .sr(1)
12407 .m(m)
12408 .n(8)
12409 .k(8)
12410 .iterations(1)
12411 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12412 }
12413 }
12414
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)12415 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
12416 TEST_REQUIRES_ARM_NEON_V8;
12417 for (uint32_t n = 1; n <= 8; n++) {
12418 GemmMicrokernelTester()
12419 .mr(2)
12420 .nr(8)
12421 .kr(1)
12422 .sr(1)
12423 .m(2)
12424 .n(n)
12425 .k(8)
12426 .iterations(1)
12427 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12428 }
12429 }
12430
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_lt_8)12431 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
12432 TEST_REQUIRES_ARM_NEON_V8;
12433 for (size_t k = 1; k < 8; k++) {
12434 GemmMicrokernelTester()
12435 .mr(2)
12436 .nr(8)
12437 .kr(1)
12438 .sr(1)
12439 .m(2)
12440 .n(8)
12441 .k(k)
12442 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12443 }
12444 }
12445
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_lt_8_strided_a)12446 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_lt_8_strided_a) {
12447 TEST_REQUIRES_ARM_NEON_V8;
12448 for (size_t k = 1; k < 8; k++) {
12449 GemmMicrokernelTester()
12450 .mr(2)
12451 .nr(8)
12452 .kr(1)
12453 .sr(1)
12454 .m(2)
12455 .n(8)
12456 .k(k)
12457 .a_stride(11)
12458 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12459 }
12460 }
12461
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)12462 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
12463 TEST_REQUIRES_ARM_NEON_V8;
12464 for (size_t k = 1; k < 8; k++) {
12465 for (uint32_t n = 1; n <= 8; n++) {
12466 for (uint32_t m = 1; m <= 2; m++) {
12467 GemmMicrokernelTester()
12468 .mr(2)
12469 .nr(8)
12470 .kr(1)
12471 .sr(1)
12472 .m(m)
12473 .n(n)
12474 .k(k)
12475 .iterations(1)
12476 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12477 }
12478 }
12479 }
12480 }
12481
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_gt_8)12482 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
12483 TEST_REQUIRES_ARM_NEON_V8;
12484 for (size_t k = 9; k < 16; k++) {
12485 GemmMicrokernelTester()
12486 .mr(2)
12487 .nr(8)
12488 .kr(1)
12489 .sr(1)
12490 .m(2)
12491 .n(8)
12492 .k(k)
12493 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12494 }
12495 }
12496
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_gt_8_strided_a)12497 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_gt_8_strided_a) {
12498 TEST_REQUIRES_ARM_NEON_V8;
12499 for (size_t k = 9; k < 16; k++) {
12500 GemmMicrokernelTester()
12501 .mr(2)
12502 .nr(8)
12503 .kr(1)
12504 .sr(1)
12505 .m(2)
12506 .n(8)
12507 .k(k)
12508 .a_stride(19)
12509 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12510 }
12511 }
12512
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)12513 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
12514 TEST_REQUIRES_ARM_NEON_V8;
12515 for (size_t k = 9; k < 16; k++) {
12516 for (uint32_t n = 1; n <= 8; n++) {
12517 for (uint32_t m = 1; m <= 2; m++) {
12518 GemmMicrokernelTester()
12519 .mr(2)
12520 .nr(8)
12521 .kr(1)
12522 .sr(1)
12523 .m(m)
12524 .n(n)
12525 .k(k)
12526 .iterations(1)
12527 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12528 }
12529 }
12530 }
12531 }
12532
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_div_8)12533 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_div_8) {
12534 TEST_REQUIRES_ARM_NEON_V8;
12535 for (size_t k = 16; k <= 80; k += 8) {
12536 GemmMicrokernelTester()
12537 .mr(2)
12538 .nr(8)
12539 .kr(1)
12540 .sr(1)
12541 .m(2)
12542 .n(8)
12543 .k(k)
12544 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12545 }
12546 }
12547
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_div_8_strided_a)12548 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_div_8_strided_a) {
12549 TEST_REQUIRES_ARM_NEON_V8;
12550 for (size_t k = 16; k <= 80; k += 8) {
12551 GemmMicrokernelTester()
12552 .mr(2)
12553 .nr(8)
12554 .kr(1)
12555 .sr(1)
12556 .m(2)
12557 .n(8)
12558 .k(k)
12559 .a_stride(83)
12560 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12561 }
12562 }
12563
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)12564 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
12565 TEST_REQUIRES_ARM_NEON_V8;
12566 for (size_t k = 16; k <= 80; k += 8) {
12567 for (uint32_t n = 1; n <= 8; n++) {
12568 for (uint32_t m = 1; m <= 2; m++) {
12569 GemmMicrokernelTester()
12570 .mr(2)
12571 .nr(8)
12572 .kr(1)
12573 .sr(1)
12574 .m(m)
12575 .n(n)
12576 .k(k)
12577 .iterations(1)
12578 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12579 }
12580 }
12581 }
12582 }
12583
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,n_gt_8)12584 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, n_gt_8) {
12585 TEST_REQUIRES_ARM_NEON_V8;
12586 for (uint32_t n = 9; n < 16; n++) {
12587 for (size_t k = 1; k <= 40; k += 9) {
12588 GemmMicrokernelTester()
12589 .mr(2)
12590 .nr(8)
12591 .kr(1)
12592 .sr(1)
12593 .m(2)
12594 .n(n)
12595 .k(k)
12596 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12597 }
12598 }
12599 }
12600
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_strided_cn)12601 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
12602 TEST_REQUIRES_ARM_NEON_V8;
12603 for (uint32_t n = 9; n < 16; n++) {
12604 for (size_t k = 1; k <= 40; k += 9) {
12605 GemmMicrokernelTester()
12606 .mr(2)
12607 .nr(8)
12608 .kr(1)
12609 .sr(1)
12610 .m(2)
12611 .n(n)
12612 .k(k)
12613 .cn_stride(11)
12614 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12615 }
12616 }
12617 }
12618
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_strided_a)12619 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_strided_a) {
12620 TEST_REQUIRES_ARM_NEON_V8;
12621 for (uint32_t n = 9; n < 16; n++) {
12622 for (size_t k = 1; k <= 40; k += 9) {
12623 GemmMicrokernelTester()
12624 .mr(2)
12625 .nr(8)
12626 .kr(1)
12627 .sr(1)
12628 .m(2)
12629 .n(n)
12630 .k(k)
12631 .a_stride(43)
12632 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12633 }
12634 }
12635 }
12636
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_subtile)12637 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_subtile) {
12638 TEST_REQUIRES_ARM_NEON_V8;
12639 for (uint32_t n = 9; n < 16; n++) {
12640 for (size_t k = 1; k <= 40; k += 9) {
12641 for (uint32_t m = 1; m <= 2; m++) {
12642 GemmMicrokernelTester()
12643 .mr(2)
12644 .nr(8)
12645 .kr(1)
12646 .sr(1)
12647 .m(m)
12648 .n(n)
12649 .k(k)
12650 .iterations(1)
12651 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12652 }
12653 }
12654 }
12655 }
12656
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,n_div_8)12657 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, n_div_8) {
12658 TEST_REQUIRES_ARM_NEON_V8;
12659 for (uint32_t n = 16; n <= 24; n += 8) {
12660 for (size_t k = 1; k <= 40; k += 9) {
12661 GemmMicrokernelTester()
12662 .mr(2)
12663 .nr(8)
12664 .kr(1)
12665 .sr(1)
12666 .m(2)
12667 .n(n)
12668 .k(k)
12669 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12670 }
12671 }
12672 }
12673
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,n_div_8_strided_cn)12674 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, n_div_8_strided_cn) {
12675 TEST_REQUIRES_ARM_NEON_V8;
12676 for (uint32_t n = 16; n <= 24; n += 8) {
12677 for (size_t k = 1; k <= 40; k += 9) {
12678 GemmMicrokernelTester()
12679 .mr(2)
12680 .nr(8)
12681 .kr(1)
12682 .sr(1)
12683 .m(2)
12684 .n(n)
12685 .k(k)
12686 .cn_stride(11)
12687 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12688 }
12689 }
12690 }
12691
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,n_div_8_strided_a)12692 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, n_div_8_strided_a) {
12693 TEST_REQUIRES_ARM_NEON_V8;
12694 for (uint32_t n = 16; n <= 24; n += 8) {
12695 for (size_t k = 1; k <= 40; k += 9) {
12696 GemmMicrokernelTester()
12697 .mr(2)
12698 .nr(8)
12699 .kr(1)
12700 .sr(1)
12701 .m(2)
12702 .n(n)
12703 .k(k)
12704 .a_stride(43)
12705 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12706 }
12707 }
12708 }
12709
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,n_div_8_subtile)12710 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, n_div_8_subtile) {
12711 TEST_REQUIRES_ARM_NEON_V8;
12712 for (uint32_t n = 16; n <= 24; n += 8) {
12713 for (size_t k = 1; k <= 40; k += 9) {
12714 for (uint32_t m = 1; m <= 2; m++) {
12715 GemmMicrokernelTester()
12716 .mr(2)
12717 .nr(8)
12718 .kr(1)
12719 .sr(1)
12720 .m(m)
12721 .n(n)
12722 .k(k)
12723 .iterations(1)
12724 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12725 }
12726 }
12727 }
12728 }
12729
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)12730 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
12731 TEST_REQUIRES_ARM_NEON_V8;
12732 for (size_t k = 1; k <= 40; k += 9) {
12733 for (uint32_t n = 1; n <= 8; n++) {
12734 for (uint32_t m = 1; m <= 2; m++) {
12735 GemmMicrokernelTester()
12736 .mr(2)
12737 .nr(8)
12738 .kr(1)
12739 .sr(1)
12740 .m(m)
12741 .n(n)
12742 .k(k)
12743 .cm_stride(11)
12744 .iterations(1)
12745 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12746 }
12747 }
12748 }
12749 }
12750
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,qmin)12751 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, qmin) {
12752 TEST_REQUIRES_ARM_NEON_V8;
12753 GemmMicrokernelTester()
12754 .mr(2)
12755 .nr(8)
12756 .kr(1)
12757 .sr(1)
12758 .m(2)
12759 .n(8)
12760 .k(8)
12761 .qmin(128)
12762 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12763 }
12764
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,qmax)12765 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, qmax) {
12766 TEST_REQUIRES_ARM_NEON_V8;
12767 GemmMicrokernelTester()
12768 .mr(2)
12769 .nr(8)
12770 .kr(1)
12771 .sr(1)
12772 .m(2)
12773 .n(8)
12774 .k(8)
12775 .qmax(128)
12776 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12777 }
12778
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,strided_cm)12779 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, strided_cm) {
12780 TEST_REQUIRES_ARM_NEON_V8;
12781 GemmMicrokernelTester()
12782 .mr(2)
12783 .nr(8)
12784 .kr(1)
12785 .sr(1)
12786 .m(2)
12787 .n(8)
12788 .k(8)
12789 .cm_stride(11)
12790 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12791 }
12792 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12793
12794
12795 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_eq_16)12796 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_eq_16) {
12797 TEST_REQUIRES_ARM_NEON;
12798 GemmMicrokernelTester()
12799 .mr(2)
12800 .nr(8)
12801 .kr(2)
12802 .sr(1)
12803 .m(2)
12804 .n(8)
12805 .k(16)
12806 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12807 }
12808
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,strided_cn)12809 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, strided_cn) {
12810 TEST_REQUIRES_ARM_NEON;
12811 GemmMicrokernelTester()
12812 .mr(2)
12813 .nr(8)
12814 .kr(2)
12815 .sr(1)
12816 .m(2)
12817 .n(8)
12818 .k(16)
12819 .cn_stride(11)
12820 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12821 }
12822
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_eq_16_strided_a)12823 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_eq_16_strided_a) {
12824 TEST_REQUIRES_ARM_NEON;
12825 GemmMicrokernelTester()
12826 .mr(2)
12827 .nr(8)
12828 .kr(2)
12829 .sr(1)
12830 .m(2)
12831 .n(8)
12832 .k(16)
12833 .a_stride(19)
12834 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12835 }
12836
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_eq_16_subtile)12837 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
12838 TEST_REQUIRES_ARM_NEON;
12839 for (uint32_t n = 1; n <= 8; n++) {
12840 for (uint32_t m = 1; m <= 2; m++) {
12841 GemmMicrokernelTester()
12842 .mr(2)
12843 .nr(8)
12844 .kr(2)
12845 .sr(1)
12846 .m(m)
12847 .n(n)
12848 .k(16)
12849 .iterations(1)
12850 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12851 }
12852 }
12853 }
12854
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_m)12855 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
12856 TEST_REQUIRES_ARM_NEON;
12857 for (uint32_t m = 1; m <= 2; m++) {
12858 GemmMicrokernelTester()
12859 .mr(2)
12860 .nr(8)
12861 .kr(2)
12862 .sr(1)
12863 .m(m)
12864 .n(8)
12865 .k(16)
12866 .iterations(1)
12867 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12868 }
12869 }
12870
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_n)12871 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
12872 TEST_REQUIRES_ARM_NEON;
12873 for (uint32_t n = 1; n <= 8; n++) {
12874 GemmMicrokernelTester()
12875 .mr(2)
12876 .nr(8)
12877 .kr(2)
12878 .sr(1)
12879 .m(2)
12880 .n(n)
12881 .k(16)
12882 .iterations(1)
12883 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12884 }
12885 }
12886
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_lt_16)12887 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_lt_16) {
12888 TEST_REQUIRES_ARM_NEON;
12889 for (size_t k = 1; k < 16; k++) {
12890 GemmMicrokernelTester()
12891 .mr(2)
12892 .nr(8)
12893 .kr(2)
12894 .sr(1)
12895 .m(2)
12896 .n(8)
12897 .k(k)
12898 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12899 }
12900 }
12901
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_lt_16_strided_a)12902 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_lt_16_strided_a) {
12903 TEST_REQUIRES_ARM_NEON;
12904 for (size_t k = 1; k < 16; k++) {
12905 GemmMicrokernelTester()
12906 .mr(2)
12907 .nr(8)
12908 .kr(2)
12909 .sr(1)
12910 .m(2)
12911 .n(8)
12912 .k(k)
12913 .a_stride(19)
12914 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12915 }
12916 }
12917
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_lt_16_subtile)12918 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
12919 TEST_REQUIRES_ARM_NEON;
12920 for (size_t k = 1; k < 16; k++) {
12921 for (uint32_t n = 1; n <= 8; n++) {
12922 for (uint32_t m = 1; m <= 2; m++) {
12923 GemmMicrokernelTester()
12924 .mr(2)
12925 .nr(8)
12926 .kr(2)
12927 .sr(1)
12928 .m(m)
12929 .n(n)
12930 .k(k)
12931 .iterations(1)
12932 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12933 }
12934 }
12935 }
12936 }
12937
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_gt_16)12938 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_gt_16) {
12939 TEST_REQUIRES_ARM_NEON;
12940 for (size_t k = 17; k < 32; k++) {
12941 GemmMicrokernelTester()
12942 .mr(2)
12943 .nr(8)
12944 .kr(2)
12945 .sr(1)
12946 .m(2)
12947 .n(8)
12948 .k(k)
12949 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12950 }
12951 }
12952
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_gt_16_strided_a)12953 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_gt_16_strided_a) {
12954 TEST_REQUIRES_ARM_NEON;
12955 for (size_t k = 17; k < 32; k++) {
12956 GemmMicrokernelTester()
12957 .mr(2)
12958 .nr(8)
12959 .kr(2)
12960 .sr(1)
12961 .m(2)
12962 .n(8)
12963 .k(k)
12964 .a_stride(37)
12965 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12966 }
12967 }
12968
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_gt_16_subtile)12969 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
12970 TEST_REQUIRES_ARM_NEON;
12971 for (size_t k = 17; k < 32; k++) {
12972 for (uint32_t n = 1; n <= 8; n++) {
12973 for (uint32_t m = 1; m <= 2; m++) {
12974 GemmMicrokernelTester()
12975 .mr(2)
12976 .nr(8)
12977 .kr(2)
12978 .sr(1)
12979 .m(m)
12980 .n(n)
12981 .k(k)
12982 .iterations(1)
12983 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12984 }
12985 }
12986 }
12987 }
12988
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_div_16)12989 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_div_16) {
12990 TEST_REQUIRES_ARM_NEON;
12991 for (size_t k = 32; k <= 160; k += 16) {
12992 GemmMicrokernelTester()
12993 .mr(2)
12994 .nr(8)
12995 .kr(2)
12996 .sr(1)
12997 .m(2)
12998 .n(8)
12999 .k(k)
13000 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13001 }
13002 }
13003
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_div_16_strided_a)13004 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_div_16_strided_a) {
13005 TEST_REQUIRES_ARM_NEON;
13006 for (size_t k = 32; k <= 160; k += 16) {
13007 GemmMicrokernelTester()
13008 .mr(2)
13009 .nr(8)
13010 .kr(2)
13011 .sr(1)
13012 .m(2)
13013 .n(8)
13014 .k(k)
13015 .a_stride(163)
13016 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13017 }
13018 }
13019
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_div_16_subtile)13020 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_div_16_subtile) {
13021 TEST_REQUIRES_ARM_NEON;
13022 for (size_t k = 32; k <= 160; k += 16) {
13023 for (uint32_t n = 1; n <= 8; n++) {
13024 for (uint32_t m = 1; m <= 2; m++) {
13025 GemmMicrokernelTester()
13026 .mr(2)
13027 .nr(8)
13028 .kr(2)
13029 .sr(1)
13030 .m(m)
13031 .n(n)
13032 .k(k)
13033 .iterations(1)
13034 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13035 }
13036 }
13037 }
13038 }
13039
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_gt_8)13040 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_gt_8) {
13041 TEST_REQUIRES_ARM_NEON;
13042 for (uint32_t n = 9; n < 16; n++) {
13043 for (size_t k = 1; k <= 80; k += 17) {
13044 GemmMicrokernelTester()
13045 .mr(2)
13046 .nr(8)
13047 .kr(2)
13048 .sr(1)
13049 .m(2)
13050 .n(n)
13051 .k(k)
13052 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13053 }
13054 }
13055 }
13056
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_gt_8_strided_cn)13057 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
13058 TEST_REQUIRES_ARM_NEON;
13059 for (uint32_t n = 9; n < 16; n++) {
13060 for (size_t k = 1; k <= 80; k += 17) {
13061 GemmMicrokernelTester()
13062 .mr(2)
13063 .nr(8)
13064 .kr(2)
13065 .sr(1)
13066 .m(2)
13067 .n(n)
13068 .k(k)
13069 .cn_stride(11)
13070 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13071 }
13072 }
13073 }
13074
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_gt_8_strided_a)13075 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_gt_8_strided_a) {
13076 TEST_REQUIRES_ARM_NEON;
13077 for (uint32_t n = 9; n < 16; n++) {
13078 for (size_t k = 1; k <= 80; k += 17) {
13079 GemmMicrokernelTester()
13080 .mr(2)
13081 .nr(8)
13082 .kr(2)
13083 .sr(1)
13084 .m(2)
13085 .n(n)
13086 .k(k)
13087 .a_stride(83)
13088 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13089 }
13090 }
13091 }
13092
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_gt_8_subtile)13093 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_gt_8_subtile) {
13094 TEST_REQUIRES_ARM_NEON;
13095 for (uint32_t n = 9; n < 16; n++) {
13096 for (size_t k = 1; k <= 80; k += 17) {
13097 for (uint32_t m = 1; m <= 2; m++) {
13098 GemmMicrokernelTester()
13099 .mr(2)
13100 .nr(8)
13101 .kr(2)
13102 .sr(1)
13103 .m(m)
13104 .n(n)
13105 .k(k)
13106 .iterations(1)
13107 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13108 }
13109 }
13110 }
13111 }
13112
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_div_8)13113 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_div_8) {
13114 TEST_REQUIRES_ARM_NEON;
13115 for (uint32_t n = 16; n <= 24; n += 8) {
13116 for (size_t k = 1; k <= 80; k += 17) {
13117 GemmMicrokernelTester()
13118 .mr(2)
13119 .nr(8)
13120 .kr(2)
13121 .sr(1)
13122 .m(2)
13123 .n(n)
13124 .k(k)
13125 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13126 }
13127 }
13128 }
13129
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_div_8_strided_cn)13130 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_div_8_strided_cn) {
13131 TEST_REQUIRES_ARM_NEON;
13132 for (uint32_t n = 16; n <= 24; n += 8) {
13133 for (size_t k = 1; k <= 80; k += 17) {
13134 GemmMicrokernelTester()
13135 .mr(2)
13136 .nr(8)
13137 .kr(2)
13138 .sr(1)
13139 .m(2)
13140 .n(n)
13141 .k(k)
13142 .cn_stride(11)
13143 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13144 }
13145 }
13146 }
13147
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_div_8_strided_a)13148 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_div_8_strided_a) {
13149 TEST_REQUIRES_ARM_NEON;
13150 for (uint32_t n = 16; n <= 24; n += 8) {
13151 for (size_t k = 1; k <= 80; k += 17) {
13152 GemmMicrokernelTester()
13153 .mr(2)
13154 .nr(8)
13155 .kr(2)
13156 .sr(1)
13157 .m(2)
13158 .n(n)
13159 .k(k)
13160 .a_stride(83)
13161 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13162 }
13163 }
13164 }
13165
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_div_8_subtile)13166 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_div_8_subtile) {
13167 TEST_REQUIRES_ARM_NEON;
13168 for (uint32_t n = 16; n <= 24; n += 8) {
13169 for (size_t k = 1; k <= 80; k += 17) {
13170 for (uint32_t m = 1; m <= 2; m++) {
13171 GemmMicrokernelTester()
13172 .mr(2)
13173 .nr(8)
13174 .kr(2)
13175 .sr(1)
13176 .m(m)
13177 .n(n)
13178 .k(k)
13179 .iterations(1)
13180 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13181 }
13182 }
13183 }
13184 }
13185
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,strided_cm_subtile)13186 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, strided_cm_subtile) {
13187 TEST_REQUIRES_ARM_NEON;
13188 for (size_t k = 1; k <= 80; k += 17) {
13189 for (uint32_t n = 1; n <= 8; n++) {
13190 for (uint32_t m = 1; m <= 2; m++) {
13191 GemmMicrokernelTester()
13192 .mr(2)
13193 .nr(8)
13194 .kr(2)
13195 .sr(1)
13196 .m(m)
13197 .n(n)
13198 .k(k)
13199 .cm_stride(11)
13200 .iterations(1)
13201 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13202 }
13203 }
13204 }
13205 }
13206
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,qmin)13207 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, qmin) {
13208 TEST_REQUIRES_ARM_NEON;
13209 GemmMicrokernelTester()
13210 .mr(2)
13211 .nr(8)
13212 .kr(2)
13213 .sr(1)
13214 .m(2)
13215 .n(8)
13216 .k(16)
13217 .qmin(128)
13218 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13219 }
13220
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,qmax)13221 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, qmax) {
13222 TEST_REQUIRES_ARM_NEON;
13223 GemmMicrokernelTester()
13224 .mr(2)
13225 .nr(8)
13226 .kr(2)
13227 .sr(1)
13228 .m(2)
13229 .n(8)
13230 .k(16)
13231 .qmax(128)
13232 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13233 }
13234
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,strided_cm)13235 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, strided_cm) {
13236 TEST_REQUIRES_ARM_NEON;
13237 GemmMicrokernelTester()
13238 .mr(2)
13239 .nr(8)
13240 .kr(2)
13241 .sr(1)
13242 .m(2)
13243 .n(8)
13244 .k(16)
13245 .cm_stride(11)
13246 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13247 }
13248 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13249
13250
13251 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_eq_16)13252 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16) {
13253 TEST_REQUIRES_ARM_NEON_V8;
13254 GemmMicrokernelTester()
13255 .mr(2)
13256 .nr(8)
13257 .kr(2)
13258 .sr(1)
13259 .m(2)
13260 .n(8)
13261 .k(16)
13262 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13263 }
13264
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,strided_cn)13265 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, strided_cn) {
13266 TEST_REQUIRES_ARM_NEON_V8;
13267 GemmMicrokernelTester()
13268 .mr(2)
13269 .nr(8)
13270 .kr(2)
13271 .sr(1)
13272 .m(2)
13273 .n(8)
13274 .k(16)
13275 .cn_stride(11)
13276 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13277 }
13278
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_eq_16_strided_a)13279 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16_strided_a) {
13280 TEST_REQUIRES_ARM_NEON_V8;
13281 GemmMicrokernelTester()
13282 .mr(2)
13283 .nr(8)
13284 .kr(2)
13285 .sr(1)
13286 .m(2)
13287 .n(8)
13288 .k(16)
13289 .a_stride(19)
13290 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13291 }
13292
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_eq_16_subtile)13293 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile) {
13294 TEST_REQUIRES_ARM_NEON_V8;
13295 for (uint32_t n = 1; n <= 8; n++) {
13296 for (uint32_t m = 1; m <= 2; m++) {
13297 GemmMicrokernelTester()
13298 .mr(2)
13299 .nr(8)
13300 .kr(2)
13301 .sr(1)
13302 .m(m)
13303 .n(n)
13304 .k(16)
13305 .iterations(1)
13306 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13307 }
13308 }
13309 }
13310
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_eq_16_subtile_m)13311 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile_m) {
13312 TEST_REQUIRES_ARM_NEON_V8;
13313 for (uint32_t m = 1; m <= 2; m++) {
13314 GemmMicrokernelTester()
13315 .mr(2)
13316 .nr(8)
13317 .kr(2)
13318 .sr(1)
13319 .m(m)
13320 .n(8)
13321 .k(16)
13322 .iterations(1)
13323 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13324 }
13325 }
13326
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_eq_16_subtile_n)13327 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile_n) {
13328 TEST_REQUIRES_ARM_NEON_V8;
13329 for (uint32_t n = 1; n <= 8; n++) {
13330 GemmMicrokernelTester()
13331 .mr(2)
13332 .nr(8)
13333 .kr(2)
13334 .sr(1)
13335 .m(2)
13336 .n(n)
13337 .k(16)
13338 .iterations(1)
13339 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13340 }
13341 }
13342
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_lt_16)13343 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_lt_16) {
13344 TEST_REQUIRES_ARM_NEON_V8;
13345 for (size_t k = 1; k < 16; k++) {
13346 GemmMicrokernelTester()
13347 .mr(2)
13348 .nr(8)
13349 .kr(2)
13350 .sr(1)
13351 .m(2)
13352 .n(8)
13353 .k(k)
13354 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13355 }
13356 }
13357
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_lt_16_strided_a)13358 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_lt_16_strided_a) {
13359 TEST_REQUIRES_ARM_NEON_V8;
13360 for (size_t k = 1; k < 16; k++) {
13361 GemmMicrokernelTester()
13362 .mr(2)
13363 .nr(8)
13364 .kr(2)
13365 .sr(1)
13366 .m(2)
13367 .n(8)
13368 .k(k)
13369 .a_stride(19)
13370 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13371 }
13372 }
13373
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_lt_16_subtile)13374 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_lt_16_subtile) {
13375 TEST_REQUIRES_ARM_NEON_V8;
13376 for (size_t k = 1; k < 16; k++) {
13377 for (uint32_t n = 1; n <= 8; n++) {
13378 for (uint32_t m = 1; m <= 2; m++) {
13379 GemmMicrokernelTester()
13380 .mr(2)
13381 .nr(8)
13382 .kr(2)
13383 .sr(1)
13384 .m(m)
13385 .n(n)
13386 .k(k)
13387 .iterations(1)
13388 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13389 }
13390 }
13391 }
13392 }
13393
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_gt_16)13394 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_gt_16) {
13395 TEST_REQUIRES_ARM_NEON_V8;
13396 for (size_t k = 17; k < 32; k++) {
13397 GemmMicrokernelTester()
13398 .mr(2)
13399 .nr(8)
13400 .kr(2)
13401 .sr(1)
13402 .m(2)
13403 .n(8)
13404 .k(k)
13405 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13406 }
13407 }
13408
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_gt_16_strided_a)13409 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_gt_16_strided_a) {
13410 TEST_REQUIRES_ARM_NEON_V8;
13411 for (size_t k = 17; k < 32; k++) {
13412 GemmMicrokernelTester()
13413 .mr(2)
13414 .nr(8)
13415 .kr(2)
13416 .sr(1)
13417 .m(2)
13418 .n(8)
13419 .k(k)
13420 .a_stride(37)
13421 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13422 }
13423 }
13424
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_gt_16_subtile)13425 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_gt_16_subtile) {
13426 TEST_REQUIRES_ARM_NEON_V8;
13427 for (size_t k = 17; k < 32; k++) {
13428 for (uint32_t n = 1; n <= 8; n++) {
13429 for (uint32_t m = 1; m <= 2; m++) {
13430 GemmMicrokernelTester()
13431 .mr(2)
13432 .nr(8)
13433 .kr(2)
13434 .sr(1)
13435 .m(m)
13436 .n(n)
13437 .k(k)
13438 .iterations(1)
13439 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13440 }
13441 }
13442 }
13443 }
13444
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_div_16)13445 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_div_16) {
13446 TEST_REQUIRES_ARM_NEON_V8;
13447 for (size_t k = 32; k <= 160; k += 16) {
13448 GemmMicrokernelTester()
13449 .mr(2)
13450 .nr(8)
13451 .kr(2)
13452 .sr(1)
13453 .m(2)
13454 .n(8)
13455 .k(k)
13456 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13457 }
13458 }
13459
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_div_16_strided_a)13460 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_div_16_strided_a) {
13461 TEST_REQUIRES_ARM_NEON_V8;
13462 for (size_t k = 32; k <= 160; k += 16) {
13463 GemmMicrokernelTester()
13464 .mr(2)
13465 .nr(8)
13466 .kr(2)
13467 .sr(1)
13468 .m(2)
13469 .n(8)
13470 .k(k)
13471 .a_stride(163)
13472 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13473 }
13474 }
13475
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_div_16_subtile)13476 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_div_16_subtile) {
13477 TEST_REQUIRES_ARM_NEON_V8;
13478 for (size_t k = 32; k <= 160; k += 16) {
13479 for (uint32_t n = 1; n <= 8; n++) {
13480 for (uint32_t m = 1; m <= 2; m++) {
13481 GemmMicrokernelTester()
13482 .mr(2)
13483 .nr(8)
13484 .kr(2)
13485 .sr(1)
13486 .m(m)
13487 .n(n)
13488 .k(k)
13489 .iterations(1)
13490 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13491 }
13492 }
13493 }
13494 }
13495
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_gt_8)13496 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_gt_8) {
13497 TEST_REQUIRES_ARM_NEON_V8;
13498 for (uint32_t n = 9; n < 16; n++) {
13499 for (size_t k = 1; k <= 80; k += 17) {
13500 GemmMicrokernelTester()
13501 .mr(2)
13502 .nr(8)
13503 .kr(2)
13504 .sr(1)
13505 .m(2)
13506 .n(n)
13507 .k(k)
13508 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13509 }
13510 }
13511 }
13512
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_gt_8_strided_cn)13513 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_gt_8_strided_cn) {
13514 TEST_REQUIRES_ARM_NEON_V8;
13515 for (uint32_t n = 9; n < 16; n++) {
13516 for (size_t k = 1; k <= 80; k += 17) {
13517 GemmMicrokernelTester()
13518 .mr(2)
13519 .nr(8)
13520 .kr(2)
13521 .sr(1)
13522 .m(2)
13523 .n(n)
13524 .k(k)
13525 .cn_stride(11)
13526 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13527 }
13528 }
13529 }
13530
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_gt_8_strided_a)13531 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_gt_8_strided_a) {
13532 TEST_REQUIRES_ARM_NEON_V8;
13533 for (uint32_t n = 9; n < 16; n++) {
13534 for (size_t k = 1; k <= 80; k += 17) {
13535 GemmMicrokernelTester()
13536 .mr(2)
13537 .nr(8)
13538 .kr(2)
13539 .sr(1)
13540 .m(2)
13541 .n(n)
13542 .k(k)
13543 .a_stride(83)
13544 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13545 }
13546 }
13547 }
13548
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_gt_8_subtile)13549 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_gt_8_subtile) {
13550 TEST_REQUIRES_ARM_NEON_V8;
13551 for (uint32_t n = 9; n < 16; n++) {
13552 for (size_t k = 1; k <= 80; k += 17) {
13553 for (uint32_t m = 1; m <= 2; m++) {
13554 GemmMicrokernelTester()
13555 .mr(2)
13556 .nr(8)
13557 .kr(2)
13558 .sr(1)
13559 .m(m)
13560 .n(n)
13561 .k(k)
13562 .iterations(1)
13563 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13564 }
13565 }
13566 }
13567 }
13568
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_div_8)13569 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_div_8) {
13570 TEST_REQUIRES_ARM_NEON_V8;
13571 for (uint32_t n = 16; n <= 24; n += 8) {
13572 for (size_t k = 1; k <= 80; k += 17) {
13573 GemmMicrokernelTester()
13574 .mr(2)
13575 .nr(8)
13576 .kr(2)
13577 .sr(1)
13578 .m(2)
13579 .n(n)
13580 .k(k)
13581 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13582 }
13583 }
13584 }
13585
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_div_8_strided_cn)13586 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_div_8_strided_cn) {
13587 TEST_REQUIRES_ARM_NEON_V8;
13588 for (uint32_t n = 16; n <= 24; n += 8) {
13589 for (size_t k = 1; k <= 80; k += 17) {
13590 GemmMicrokernelTester()
13591 .mr(2)
13592 .nr(8)
13593 .kr(2)
13594 .sr(1)
13595 .m(2)
13596 .n(n)
13597 .k(k)
13598 .cn_stride(11)
13599 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13600 }
13601 }
13602 }
13603
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_div_8_strided_a)13604 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_div_8_strided_a) {
13605 TEST_REQUIRES_ARM_NEON_V8;
13606 for (uint32_t n = 16; n <= 24; n += 8) {
13607 for (size_t k = 1; k <= 80; k += 17) {
13608 GemmMicrokernelTester()
13609 .mr(2)
13610 .nr(8)
13611 .kr(2)
13612 .sr(1)
13613 .m(2)
13614 .n(n)
13615 .k(k)
13616 .a_stride(83)
13617 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13618 }
13619 }
13620 }
13621
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_div_8_subtile)13622 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_div_8_subtile) {
13623 TEST_REQUIRES_ARM_NEON_V8;
13624 for (uint32_t n = 16; n <= 24; n += 8) {
13625 for (size_t k = 1; k <= 80; k += 17) {
13626 for (uint32_t m = 1; m <= 2; m++) {
13627 GemmMicrokernelTester()
13628 .mr(2)
13629 .nr(8)
13630 .kr(2)
13631 .sr(1)
13632 .m(m)
13633 .n(n)
13634 .k(k)
13635 .iterations(1)
13636 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13637 }
13638 }
13639 }
13640 }
13641
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,strided_cm_subtile)13642 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, strided_cm_subtile) {
13643 TEST_REQUIRES_ARM_NEON_V8;
13644 for (size_t k = 1; k <= 80; k += 17) {
13645 for (uint32_t n = 1; n <= 8; n++) {
13646 for (uint32_t m = 1; m <= 2; m++) {
13647 GemmMicrokernelTester()
13648 .mr(2)
13649 .nr(8)
13650 .kr(2)
13651 .sr(1)
13652 .m(m)
13653 .n(n)
13654 .k(k)
13655 .cm_stride(11)
13656 .iterations(1)
13657 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13658 }
13659 }
13660 }
13661 }
13662
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,qmin)13663 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, qmin) {
13664 TEST_REQUIRES_ARM_NEON_V8;
13665 GemmMicrokernelTester()
13666 .mr(2)
13667 .nr(8)
13668 .kr(2)
13669 .sr(1)
13670 .m(2)
13671 .n(8)
13672 .k(16)
13673 .qmin(128)
13674 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13675 }
13676
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,qmax)13677 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, qmax) {
13678 TEST_REQUIRES_ARM_NEON_V8;
13679 GemmMicrokernelTester()
13680 .mr(2)
13681 .nr(8)
13682 .kr(2)
13683 .sr(1)
13684 .m(2)
13685 .n(8)
13686 .k(16)
13687 .qmax(128)
13688 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13689 }
13690
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,strided_cm)13691 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, strided_cm) {
13692 TEST_REQUIRES_ARM_NEON_V8;
13693 GemmMicrokernelTester()
13694 .mr(2)
13695 .nr(8)
13696 .kr(2)
13697 .sr(1)
13698 .m(2)
13699 .n(8)
13700 .k(16)
13701 .cm_stride(11)
13702 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13703 }
13704 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13705
13706
13707 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_eq_16)13708 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_eq_16) {
13709 TEST_REQUIRES_ARM_NEON_V8;
13710 GemmMicrokernelTester()
13711 .mr(2)
13712 .nr(8)
13713 .kr(2)
13714 .sr(1)
13715 .m(2)
13716 .n(8)
13717 .k(16)
13718 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13719 }
13720
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,strided_cn)13721 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, strided_cn) {
13722 TEST_REQUIRES_ARM_NEON_V8;
13723 GemmMicrokernelTester()
13724 .mr(2)
13725 .nr(8)
13726 .kr(2)
13727 .sr(1)
13728 .m(2)
13729 .n(8)
13730 .k(16)
13731 .cn_stride(11)
13732 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13733 }
13734
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_eq_16_strided_a)13735 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_eq_16_strided_a) {
13736 TEST_REQUIRES_ARM_NEON_V8;
13737 GemmMicrokernelTester()
13738 .mr(2)
13739 .nr(8)
13740 .kr(2)
13741 .sr(1)
13742 .m(2)
13743 .n(8)
13744 .k(16)
13745 .a_stride(19)
13746 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13747 }
13748
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_eq_16_subtile)13749 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile) {
13750 TEST_REQUIRES_ARM_NEON_V8;
13751 for (uint32_t n = 1; n <= 8; n++) {
13752 for (uint32_t m = 1; m <= 2; m++) {
13753 GemmMicrokernelTester()
13754 .mr(2)
13755 .nr(8)
13756 .kr(2)
13757 .sr(1)
13758 .m(m)
13759 .n(n)
13760 .k(16)
13761 .iterations(1)
13762 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13763 }
13764 }
13765 }
13766
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_eq_16_subtile_m)13767 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile_m) {
13768 TEST_REQUIRES_ARM_NEON_V8;
13769 for (uint32_t m = 1; m <= 2; m++) {
13770 GemmMicrokernelTester()
13771 .mr(2)
13772 .nr(8)
13773 .kr(2)
13774 .sr(1)
13775 .m(m)
13776 .n(8)
13777 .k(16)
13778 .iterations(1)
13779 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13780 }
13781 }
13782
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_eq_16_subtile_n)13783 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile_n) {
13784 TEST_REQUIRES_ARM_NEON_V8;
13785 for (uint32_t n = 1; n <= 8; n++) {
13786 GemmMicrokernelTester()
13787 .mr(2)
13788 .nr(8)
13789 .kr(2)
13790 .sr(1)
13791 .m(2)
13792 .n(n)
13793 .k(16)
13794 .iterations(1)
13795 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13796 }
13797 }
13798
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_lt_16)13799 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_lt_16) {
13800 TEST_REQUIRES_ARM_NEON_V8;
13801 for (size_t k = 1; k < 16; k++) {
13802 GemmMicrokernelTester()
13803 .mr(2)
13804 .nr(8)
13805 .kr(2)
13806 .sr(1)
13807 .m(2)
13808 .n(8)
13809 .k(k)
13810 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13811 }
13812 }
13813
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_lt_16_strided_a)13814 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_lt_16_strided_a) {
13815 TEST_REQUIRES_ARM_NEON_V8;
13816 for (size_t k = 1; k < 16; k++) {
13817 GemmMicrokernelTester()
13818 .mr(2)
13819 .nr(8)
13820 .kr(2)
13821 .sr(1)
13822 .m(2)
13823 .n(8)
13824 .k(k)
13825 .a_stride(19)
13826 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13827 }
13828 }
13829
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_lt_16_subtile)13830 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_lt_16_subtile) {
13831 TEST_REQUIRES_ARM_NEON_V8;
13832 for (size_t k = 1; k < 16; k++) {
13833 for (uint32_t n = 1; n <= 8; n++) {
13834 for (uint32_t m = 1; m <= 2; m++) {
13835 GemmMicrokernelTester()
13836 .mr(2)
13837 .nr(8)
13838 .kr(2)
13839 .sr(1)
13840 .m(m)
13841 .n(n)
13842 .k(k)
13843 .iterations(1)
13844 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13845 }
13846 }
13847 }
13848 }
13849
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_gt_16)13850 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_gt_16) {
13851 TEST_REQUIRES_ARM_NEON_V8;
13852 for (size_t k = 17; k < 32; k++) {
13853 GemmMicrokernelTester()
13854 .mr(2)
13855 .nr(8)
13856 .kr(2)
13857 .sr(1)
13858 .m(2)
13859 .n(8)
13860 .k(k)
13861 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13862 }
13863 }
13864
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_gt_16_strided_a)13865 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_gt_16_strided_a) {
13866 TEST_REQUIRES_ARM_NEON_V8;
13867 for (size_t k = 17; k < 32; k++) {
13868 GemmMicrokernelTester()
13869 .mr(2)
13870 .nr(8)
13871 .kr(2)
13872 .sr(1)
13873 .m(2)
13874 .n(8)
13875 .k(k)
13876 .a_stride(37)
13877 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13878 }
13879 }
13880
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_gt_16_subtile)13881 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_gt_16_subtile) {
13882 TEST_REQUIRES_ARM_NEON_V8;
13883 for (size_t k = 17; k < 32; k++) {
13884 for (uint32_t n = 1; n <= 8; n++) {
13885 for (uint32_t m = 1; m <= 2; m++) {
13886 GemmMicrokernelTester()
13887 .mr(2)
13888 .nr(8)
13889 .kr(2)
13890 .sr(1)
13891 .m(m)
13892 .n(n)
13893 .k(k)
13894 .iterations(1)
13895 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13896 }
13897 }
13898 }
13899 }
13900
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_div_16)13901 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_div_16) {
13902 TEST_REQUIRES_ARM_NEON_V8;
13903 for (size_t k = 32; k <= 160; k += 16) {
13904 GemmMicrokernelTester()
13905 .mr(2)
13906 .nr(8)
13907 .kr(2)
13908 .sr(1)
13909 .m(2)
13910 .n(8)
13911 .k(k)
13912 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13913 }
13914 }
13915
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_div_16_strided_a)13916 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_div_16_strided_a) {
13917 TEST_REQUIRES_ARM_NEON_V8;
13918 for (size_t k = 32; k <= 160; k += 16) {
13919 GemmMicrokernelTester()
13920 .mr(2)
13921 .nr(8)
13922 .kr(2)
13923 .sr(1)
13924 .m(2)
13925 .n(8)
13926 .k(k)
13927 .a_stride(163)
13928 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13929 }
13930 }
13931
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_div_16_subtile)13932 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_div_16_subtile) {
13933 TEST_REQUIRES_ARM_NEON_V8;
13934 for (size_t k = 32; k <= 160; k += 16) {
13935 for (uint32_t n = 1; n <= 8; n++) {
13936 for (uint32_t m = 1; m <= 2; m++) {
13937 GemmMicrokernelTester()
13938 .mr(2)
13939 .nr(8)
13940 .kr(2)
13941 .sr(1)
13942 .m(m)
13943 .n(n)
13944 .k(k)
13945 .iterations(1)
13946 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13947 }
13948 }
13949 }
13950 }
13951
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_gt_8)13952 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_gt_8) {
13953 TEST_REQUIRES_ARM_NEON_V8;
13954 for (uint32_t n = 9; n < 16; n++) {
13955 for (size_t k = 1; k <= 80; k += 17) {
13956 GemmMicrokernelTester()
13957 .mr(2)
13958 .nr(8)
13959 .kr(2)
13960 .sr(1)
13961 .m(2)
13962 .n(n)
13963 .k(k)
13964 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13965 }
13966 }
13967 }
13968
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_gt_8_strided_cn)13969 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_gt_8_strided_cn) {
13970 TEST_REQUIRES_ARM_NEON_V8;
13971 for (uint32_t n = 9; n < 16; n++) {
13972 for (size_t k = 1; k <= 80; k += 17) {
13973 GemmMicrokernelTester()
13974 .mr(2)
13975 .nr(8)
13976 .kr(2)
13977 .sr(1)
13978 .m(2)
13979 .n(n)
13980 .k(k)
13981 .cn_stride(11)
13982 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13983 }
13984 }
13985 }
13986
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_gt_8_strided_a)13987 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_gt_8_strided_a) {
13988 TEST_REQUIRES_ARM_NEON_V8;
13989 for (uint32_t n = 9; n < 16; n++) {
13990 for (size_t k = 1; k <= 80; k += 17) {
13991 GemmMicrokernelTester()
13992 .mr(2)
13993 .nr(8)
13994 .kr(2)
13995 .sr(1)
13996 .m(2)
13997 .n(n)
13998 .k(k)
13999 .a_stride(83)
14000 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14001 }
14002 }
14003 }
14004
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_gt_8_subtile)14005 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_gt_8_subtile) {
14006 TEST_REQUIRES_ARM_NEON_V8;
14007 for (uint32_t n = 9; n < 16; n++) {
14008 for (size_t k = 1; k <= 80; k += 17) {
14009 for (uint32_t m = 1; m <= 2; m++) {
14010 GemmMicrokernelTester()
14011 .mr(2)
14012 .nr(8)
14013 .kr(2)
14014 .sr(1)
14015 .m(m)
14016 .n(n)
14017 .k(k)
14018 .iterations(1)
14019 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14020 }
14021 }
14022 }
14023 }
14024
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_div_8)14025 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_div_8) {
14026 TEST_REQUIRES_ARM_NEON_V8;
14027 for (uint32_t n = 16; n <= 24; n += 8) {
14028 for (size_t k = 1; k <= 80; k += 17) {
14029 GemmMicrokernelTester()
14030 .mr(2)
14031 .nr(8)
14032 .kr(2)
14033 .sr(1)
14034 .m(2)
14035 .n(n)
14036 .k(k)
14037 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14038 }
14039 }
14040 }
14041
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_div_8_strided_cn)14042 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_div_8_strided_cn) {
14043 TEST_REQUIRES_ARM_NEON_V8;
14044 for (uint32_t n = 16; n <= 24; n += 8) {
14045 for (size_t k = 1; k <= 80; k += 17) {
14046 GemmMicrokernelTester()
14047 .mr(2)
14048 .nr(8)
14049 .kr(2)
14050 .sr(1)
14051 .m(2)
14052 .n(n)
14053 .k(k)
14054 .cn_stride(11)
14055 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14056 }
14057 }
14058 }
14059
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_div_8_strided_a)14060 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_div_8_strided_a) {
14061 TEST_REQUIRES_ARM_NEON_V8;
14062 for (uint32_t n = 16; n <= 24; n += 8) {
14063 for (size_t k = 1; k <= 80; k += 17) {
14064 GemmMicrokernelTester()
14065 .mr(2)
14066 .nr(8)
14067 .kr(2)
14068 .sr(1)
14069 .m(2)
14070 .n(n)
14071 .k(k)
14072 .a_stride(83)
14073 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14074 }
14075 }
14076 }
14077
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_div_8_subtile)14078 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_div_8_subtile) {
14079 TEST_REQUIRES_ARM_NEON_V8;
14080 for (uint32_t n = 16; n <= 24; n += 8) {
14081 for (size_t k = 1; k <= 80; k += 17) {
14082 for (uint32_t m = 1; m <= 2; m++) {
14083 GemmMicrokernelTester()
14084 .mr(2)
14085 .nr(8)
14086 .kr(2)
14087 .sr(1)
14088 .m(m)
14089 .n(n)
14090 .k(k)
14091 .iterations(1)
14092 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14093 }
14094 }
14095 }
14096 }
14097
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,strided_cm_subtile)14098 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, strided_cm_subtile) {
14099 TEST_REQUIRES_ARM_NEON_V8;
14100 for (size_t k = 1; k <= 80; k += 17) {
14101 for (uint32_t n = 1; n <= 8; n++) {
14102 for (uint32_t m = 1; m <= 2; m++) {
14103 GemmMicrokernelTester()
14104 .mr(2)
14105 .nr(8)
14106 .kr(2)
14107 .sr(1)
14108 .m(m)
14109 .n(n)
14110 .k(k)
14111 .cm_stride(11)
14112 .iterations(1)
14113 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14114 }
14115 }
14116 }
14117 }
14118
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,qmin)14119 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, qmin) {
14120 TEST_REQUIRES_ARM_NEON_V8;
14121 GemmMicrokernelTester()
14122 .mr(2)
14123 .nr(8)
14124 .kr(2)
14125 .sr(1)
14126 .m(2)
14127 .n(8)
14128 .k(16)
14129 .qmin(128)
14130 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14131 }
14132
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,qmax)14133 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, qmax) {
14134 TEST_REQUIRES_ARM_NEON_V8;
14135 GemmMicrokernelTester()
14136 .mr(2)
14137 .nr(8)
14138 .kr(2)
14139 .sr(1)
14140 .m(2)
14141 .n(8)
14142 .k(16)
14143 .qmax(128)
14144 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14145 }
14146
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,strided_cm)14147 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, strided_cm) {
14148 TEST_REQUIRES_ARM_NEON_V8;
14149 GemmMicrokernelTester()
14150 .mr(2)
14151 .nr(8)
14152 .kr(2)
14153 .sr(1)
14154 .m(2)
14155 .n(8)
14156 .k(16)
14157 .cm_stride(11)
14158 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14159 }
14160 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14161
14162
14163 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_eq_16)14164 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_eq_16) {
14165 TEST_REQUIRES_ARM_NEON_V8;
14166 GemmMicrokernelTester()
14167 .mr(2)
14168 .nr(8)
14169 .kr(2)
14170 .sr(4)
14171 .m(2)
14172 .n(8)
14173 .k(16)
14174 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14175 }
14176
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,strided_cn)14177 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, strided_cn) {
14178 TEST_REQUIRES_ARM_NEON_V8;
14179 GemmMicrokernelTester()
14180 .mr(2)
14181 .nr(8)
14182 .kr(2)
14183 .sr(4)
14184 .m(2)
14185 .n(8)
14186 .k(16)
14187 .cn_stride(11)
14188 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14189 }
14190
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_eq_16_strided_a)14191 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_eq_16_strided_a) {
14192 TEST_REQUIRES_ARM_NEON_V8;
14193 GemmMicrokernelTester()
14194 .mr(2)
14195 .nr(8)
14196 .kr(2)
14197 .sr(4)
14198 .m(2)
14199 .n(8)
14200 .k(16)
14201 .a_stride(19)
14202 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14203 }
14204
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_eq_16_subtile)14205 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_eq_16_subtile) {
14206 TEST_REQUIRES_ARM_NEON_V8;
14207 for (uint32_t n = 1; n <= 8; n++) {
14208 for (uint32_t m = 1; m <= 2; m++) {
14209 GemmMicrokernelTester()
14210 .mr(2)
14211 .nr(8)
14212 .kr(2)
14213 .sr(4)
14214 .m(m)
14215 .n(n)
14216 .k(16)
14217 .iterations(1)
14218 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14219 }
14220 }
14221 }
14222
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_eq_16_subtile_m)14223 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_eq_16_subtile_m) {
14224 TEST_REQUIRES_ARM_NEON_V8;
14225 for (uint32_t m = 1; m <= 2; m++) {
14226 GemmMicrokernelTester()
14227 .mr(2)
14228 .nr(8)
14229 .kr(2)
14230 .sr(4)
14231 .m(m)
14232 .n(8)
14233 .k(16)
14234 .iterations(1)
14235 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14236 }
14237 }
14238
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_eq_16_subtile_n)14239 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_eq_16_subtile_n) {
14240 TEST_REQUIRES_ARM_NEON_V8;
14241 for (uint32_t n = 1; n <= 8; n++) {
14242 GemmMicrokernelTester()
14243 .mr(2)
14244 .nr(8)
14245 .kr(2)
14246 .sr(4)
14247 .m(2)
14248 .n(n)
14249 .k(16)
14250 .iterations(1)
14251 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14252 }
14253 }
14254
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_lt_16)14255 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_lt_16) {
14256 TEST_REQUIRES_ARM_NEON_V8;
14257 for (size_t k = 1; k < 16; k++) {
14258 GemmMicrokernelTester()
14259 .mr(2)
14260 .nr(8)
14261 .kr(2)
14262 .sr(4)
14263 .m(2)
14264 .n(8)
14265 .k(k)
14266 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14267 }
14268 }
14269
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_lt_16_strided_a)14270 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_lt_16_strided_a) {
14271 TEST_REQUIRES_ARM_NEON_V8;
14272 for (size_t k = 1; k < 16; k++) {
14273 GemmMicrokernelTester()
14274 .mr(2)
14275 .nr(8)
14276 .kr(2)
14277 .sr(4)
14278 .m(2)
14279 .n(8)
14280 .k(k)
14281 .a_stride(19)
14282 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14283 }
14284 }
14285
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_lt_16_subtile)14286 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_lt_16_subtile) {
14287 TEST_REQUIRES_ARM_NEON_V8;
14288 for (size_t k = 1; k < 16; k++) {
14289 for (uint32_t n = 1; n <= 8; n++) {
14290 for (uint32_t m = 1; m <= 2; m++) {
14291 GemmMicrokernelTester()
14292 .mr(2)
14293 .nr(8)
14294 .kr(2)
14295 .sr(4)
14296 .m(m)
14297 .n(n)
14298 .k(k)
14299 .iterations(1)
14300 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14301 }
14302 }
14303 }
14304 }
14305
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_gt_16)14306 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_gt_16) {
14307 TEST_REQUIRES_ARM_NEON_V8;
14308 for (size_t k = 17; k < 32; k++) {
14309 GemmMicrokernelTester()
14310 .mr(2)
14311 .nr(8)
14312 .kr(2)
14313 .sr(4)
14314 .m(2)
14315 .n(8)
14316 .k(k)
14317 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14318 }
14319 }
14320
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_gt_16_strided_a)14321 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_gt_16_strided_a) {
14322 TEST_REQUIRES_ARM_NEON_V8;
14323 for (size_t k = 17; k < 32; k++) {
14324 GemmMicrokernelTester()
14325 .mr(2)
14326 .nr(8)
14327 .kr(2)
14328 .sr(4)
14329 .m(2)
14330 .n(8)
14331 .k(k)
14332 .a_stride(37)
14333 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14334 }
14335 }
14336
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_gt_16_subtile)14337 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_gt_16_subtile) {
14338 TEST_REQUIRES_ARM_NEON_V8;
14339 for (size_t k = 17; k < 32; k++) {
14340 for (uint32_t n = 1; n <= 8; n++) {
14341 for (uint32_t m = 1; m <= 2; m++) {
14342 GemmMicrokernelTester()
14343 .mr(2)
14344 .nr(8)
14345 .kr(2)
14346 .sr(4)
14347 .m(m)
14348 .n(n)
14349 .k(k)
14350 .iterations(1)
14351 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14352 }
14353 }
14354 }
14355 }
14356
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_div_16)14357 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_div_16) {
14358 TEST_REQUIRES_ARM_NEON_V8;
14359 for (size_t k = 32; k <= 160; k += 16) {
14360 GemmMicrokernelTester()
14361 .mr(2)
14362 .nr(8)
14363 .kr(2)
14364 .sr(4)
14365 .m(2)
14366 .n(8)
14367 .k(k)
14368 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14369 }
14370 }
14371
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_div_16_strided_a)14372 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_div_16_strided_a) {
14373 TEST_REQUIRES_ARM_NEON_V8;
14374 for (size_t k = 32; k <= 160; k += 16) {
14375 GemmMicrokernelTester()
14376 .mr(2)
14377 .nr(8)
14378 .kr(2)
14379 .sr(4)
14380 .m(2)
14381 .n(8)
14382 .k(k)
14383 .a_stride(163)
14384 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14385 }
14386 }
14387
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_div_16_subtile)14388 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_div_16_subtile) {
14389 TEST_REQUIRES_ARM_NEON_V8;
14390 for (size_t k = 32; k <= 160; k += 16) {
14391 for (uint32_t n = 1; n <= 8; n++) {
14392 for (uint32_t m = 1; m <= 2; m++) {
14393 GemmMicrokernelTester()
14394 .mr(2)
14395 .nr(8)
14396 .kr(2)
14397 .sr(4)
14398 .m(m)
14399 .n(n)
14400 .k(k)
14401 .iterations(1)
14402 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14403 }
14404 }
14405 }
14406 }
14407
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_gt_8)14408 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_gt_8) {
14409 TEST_REQUIRES_ARM_NEON_V8;
14410 for (uint32_t n = 9; n < 16; n++) {
14411 for (size_t k = 1; k <= 80; k += 17) {
14412 GemmMicrokernelTester()
14413 .mr(2)
14414 .nr(8)
14415 .kr(2)
14416 .sr(4)
14417 .m(2)
14418 .n(n)
14419 .k(k)
14420 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14421 }
14422 }
14423 }
14424
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_gt_8_strided_cn)14425 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_gt_8_strided_cn) {
14426 TEST_REQUIRES_ARM_NEON_V8;
14427 for (uint32_t n = 9; n < 16; n++) {
14428 for (size_t k = 1; k <= 80; k += 17) {
14429 GemmMicrokernelTester()
14430 .mr(2)
14431 .nr(8)
14432 .kr(2)
14433 .sr(4)
14434 .m(2)
14435 .n(n)
14436 .k(k)
14437 .cn_stride(11)
14438 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14439 }
14440 }
14441 }
14442
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_gt_8_strided_a)14443 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_gt_8_strided_a) {
14444 TEST_REQUIRES_ARM_NEON_V8;
14445 for (uint32_t n = 9; n < 16; n++) {
14446 for (size_t k = 1; k <= 80; k += 17) {
14447 GemmMicrokernelTester()
14448 .mr(2)
14449 .nr(8)
14450 .kr(2)
14451 .sr(4)
14452 .m(2)
14453 .n(n)
14454 .k(k)
14455 .a_stride(83)
14456 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14457 }
14458 }
14459 }
14460
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_gt_8_subtile)14461 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_gt_8_subtile) {
14462 TEST_REQUIRES_ARM_NEON_V8;
14463 for (uint32_t n = 9; n < 16; n++) {
14464 for (size_t k = 1; k <= 80; k += 17) {
14465 for (uint32_t m = 1; m <= 2; m++) {
14466 GemmMicrokernelTester()
14467 .mr(2)
14468 .nr(8)
14469 .kr(2)
14470 .sr(4)
14471 .m(m)
14472 .n(n)
14473 .k(k)
14474 .iterations(1)
14475 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14476 }
14477 }
14478 }
14479 }
14480
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_div_8)14481 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_div_8) {
14482 TEST_REQUIRES_ARM_NEON_V8;
14483 for (uint32_t n = 16; n <= 24; n += 8) {
14484 for (size_t k = 1; k <= 80; k += 17) {
14485 GemmMicrokernelTester()
14486 .mr(2)
14487 .nr(8)
14488 .kr(2)
14489 .sr(4)
14490 .m(2)
14491 .n(n)
14492 .k(k)
14493 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14494 }
14495 }
14496 }
14497
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_div_8_strided_cn)14498 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_div_8_strided_cn) {
14499 TEST_REQUIRES_ARM_NEON_V8;
14500 for (uint32_t n = 16; n <= 24; n += 8) {
14501 for (size_t k = 1; k <= 80; k += 17) {
14502 GemmMicrokernelTester()
14503 .mr(2)
14504 .nr(8)
14505 .kr(2)
14506 .sr(4)
14507 .m(2)
14508 .n(n)
14509 .k(k)
14510 .cn_stride(11)
14511 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14512 }
14513 }
14514 }
14515
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_div_8_strided_a)14516 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_div_8_strided_a) {
14517 TEST_REQUIRES_ARM_NEON_V8;
14518 for (uint32_t n = 16; n <= 24; n += 8) {
14519 for (size_t k = 1; k <= 80; k += 17) {
14520 GemmMicrokernelTester()
14521 .mr(2)
14522 .nr(8)
14523 .kr(2)
14524 .sr(4)
14525 .m(2)
14526 .n(n)
14527 .k(k)
14528 .a_stride(83)
14529 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14530 }
14531 }
14532 }
14533
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_div_8_subtile)14534 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_div_8_subtile) {
14535 TEST_REQUIRES_ARM_NEON_V8;
14536 for (uint32_t n = 16; n <= 24; n += 8) {
14537 for (size_t k = 1; k <= 80; k += 17) {
14538 for (uint32_t m = 1; m <= 2; m++) {
14539 GemmMicrokernelTester()
14540 .mr(2)
14541 .nr(8)
14542 .kr(2)
14543 .sr(4)
14544 .m(m)
14545 .n(n)
14546 .k(k)
14547 .iterations(1)
14548 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14549 }
14550 }
14551 }
14552 }
14553
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,strided_cm_subtile)14554 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, strided_cm_subtile) {
14555 TEST_REQUIRES_ARM_NEON_V8;
14556 for (size_t k = 1; k <= 80; k += 17) {
14557 for (uint32_t n = 1; n <= 8; n++) {
14558 for (uint32_t m = 1; m <= 2; m++) {
14559 GemmMicrokernelTester()
14560 .mr(2)
14561 .nr(8)
14562 .kr(2)
14563 .sr(4)
14564 .m(m)
14565 .n(n)
14566 .k(k)
14567 .cm_stride(11)
14568 .iterations(1)
14569 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14570 }
14571 }
14572 }
14573 }
14574
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,qmin)14575 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, qmin) {
14576 TEST_REQUIRES_ARM_NEON_V8;
14577 GemmMicrokernelTester()
14578 .mr(2)
14579 .nr(8)
14580 .kr(2)
14581 .sr(4)
14582 .m(2)
14583 .n(8)
14584 .k(16)
14585 .qmin(128)
14586 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14587 }
14588
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,qmax)14589 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, qmax) {
14590 TEST_REQUIRES_ARM_NEON_V8;
14591 GemmMicrokernelTester()
14592 .mr(2)
14593 .nr(8)
14594 .kr(2)
14595 .sr(4)
14596 .m(2)
14597 .n(8)
14598 .k(16)
14599 .qmax(128)
14600 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14601 }
14602
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,strided_cm)14603 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, strided_cm) {
14604 TEST_REQUIRES_ARM_NEON_V8;
14605 GemmMicrokernelTester()
14606 .mr(2)
14607 .nr(8)
14608 .kr(2)
14609 .sr(4)
14610 .m(2)
14611 .n(8)
14612 .k(16)
14613 .cm_stride(11)
14614 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14615 }
14616 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14617
14618
14619 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_eq_16)14620 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_eq_16) {
14621 TEST_REQUIRES_ARM_NEON;
14622 GemmMicrokernelTester()
14623 .mr(2)
14624 .nr(8)
14625 .kr(4)
14626 .sr(2)
14627 .m(2)
14628 .n(8)
14629 .k(16)
14630 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14631 }
14632
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,strided_cn)14633 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, strided_cn) {
14634 TEST_REQUIRES_ARM_NEON;
14635 GemmMicrokernelTester()
14636 .mr(2)
14637 .nr(8)
14638 .kr(4)
14639 .sr(2)
14640 .m(2)
14641 .n(8)
14642 .k(16)
14643 .cn_stride(11)
14644 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14645 }
14646
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_eq_16_strided_a)14647 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_eq_16_strided_a) {
14648 TEST_REQUIRES_ARM_NEON;
14649 GemmMicrokernelTester()
14650 .mr(2)
14651 .nr(8)
14652 .kr(4)
14653 .sr(2)
14654 .m(2)
14655 .n(8)
14656 .k(16)
14657 .a_stride(19)
14658 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14659 }
14660
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_eq_16_subtile)14661 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_eq_16_subtile) {
14662 TEST_REQUIRES_ARM_NEON;
14663 for (uint32_t n = 1; n <= 8; n++) {
14664 for (uint32_t m = 1; m <= 2; m++) {
14665 GemmMicrokernelTester()
14666 .mr(2)
14667 .nr(8)
14668 .kr(4)
14669 .sr(2)
14670 .m(m)
14671 .n(n)
14672 .k(16)
14673 .iterations(1)
14674 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14675 }
14676 }
14677 }
14678
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_eq_16_subtile_m)14679 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_eq_16_subtile_m) {
14680 TEST_REQUIRES_ARM_NEON;
14681 for (uint32_t m = 1; m <= 2; m++) {
14682 GemmMicrokernelTester()
14683 .mr(2)
14684 .nr(8)
14685 .kr(4)
14686 .sr(2)
14687 .m(m)
14688 .n(8)
14689 .k(16)
14690 .iterations(1)
14691 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14692 }
14693 }
14694
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_eq_16_subtile_n)14695 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_eq_16_subtile_n) {
14696 TEST_REQUIRES_ARM_NEON;
14697 for (uint32_t n = 1; n <= 8; n++) {
14698 GemmMicrokernelTester()
14699 .mr(2)
14700 .nr(8)
14701 .kr(4)
14702 .sr(2)
14703 .m(2)
14704 .n(n)
14705 .k(16)
14706 .iterations(1)
14707 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14708 }
14709 }
14710
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_lt_16)14711 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_lt_16) {
14712 TEST_REQUIRES_ARM_NEON;
14713 for (size_t k = 1; k < 16; k++) {
14714 GemmMicrokernelTester()
14715 .mr(2)
14716 .nr(8)
14717 .kr(4)
14718 .sr(2)
14719 .m(2)
14720 .n(8)
14721 .k(k)
14722 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14723 }
14724 }
14725
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_lt_16_strided_a)14726 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_lt_16_strided_a) {
14727 TEST_REQUIRES_ARM_NEON;
14728 for (size_t k = 1; k < 16; k++) {
14729 GemmMicrokernelTester()
14730 .mr(2)
14731 .nr(8)
14732 .kr(4)
14733 .sr(2)
14734 .m(2)
14735 .n(8)
14736 .k(k)
14737 .a_stride(19)
14738 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14739 }
14740 }
14741
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_lt_16_subtile)14742 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_lt_16_subtile) {
14743 TEST_REQUIRES_ARM_NEON;
14744 for (size_t k = 1; k < 16; k++) {
14745 for (uint32_t n = 1; n <= 8; n++) {
14746 for (uint32_t m = 1; m <= 2; m++) {
14747 GemmMicrokernelTester()
14748 .mr(2)
14749 .nr(8)
14750 .kr(4)
14751 .sr(2)
14752 .m(m)
14753 .n(n)
14754 .k(k)
14755 .iterations(1)
14756 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14757 }
14758 }
14759 }
14760 }
14761
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_gt_16)14762 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_gt_16) {
14763 TEST_REQUIRES_ARM_NEON;
14764 for (size_t k = 17; k < 32; k++) {
14765 GemmMicrokernelTester()
14766 .mr(2)
14767 .nr(8)
14768 .kr(4)
14769 .sr(2)
14770 .m(2)
14771 .n(8)
14772 .k(k)
14773 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14774 }
14775 }
14776
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_gt_16_strided_a)14777 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_gt_16_strided_a) {
14778 TEST_REQUIRES_ARM_NEON;
14779 for (size_t k = 17; k < 32; k++) {
14780 GemmMicrokernelTester()
14781 .mr(2)
14782 .nr(8)
14783 .kr(4)
14784 .sr(2)
14785 .m(2)
14786 .n(8)
14787 .k(k)
14788 .a_stride(37)
14789 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14790 }
14791 }
14792
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_gt_16_subtile)14793 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_gt_16_subtile) {
14794 TEST_REQUIRES_ARM_NEON;
14795 for (size_t k = 17; k < 32; k++) {
14796 for (uint32_t n = 1; n <= 8; n++) {
14797 for (uint32_t m = 1; m <= 2; m++) {
14798 GemmMicrokernelTester()
14799 .mr(2)
14800 .nr(8)
14801 .kr(4)
14802 .sr(2)
14803 .m(m)
14804 .n(n)
14805 .k(k)
14806 .iterations(1)
14807 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14808 }
14809 }
14810 }
14811 }
14812
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_div_16)14813 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_div_16) {
14814 TEST_REQUIRES_ARM_NEON;
14815 for (size_t k = 32; k <= 160; k += 16) {
14816 GemmMicrokernelTester()
14817 .mr(2)
14818 .nr(8)
14819 .kr(4)
14820 .sr(2)
14821 .m(2)
14822 .n(8)
14823 .k(k)
14824 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14825 }
14826 }
14827
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_div_16_strided_a)14828 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_div_16_strided_a) {
14829 TEST_REQUIRES_ARM_NEON;
14830 for (size_t k = 32; k <= 160; k += 16) {
14831 GemmMicrokernelTester()
14832 .mr(2)
14833 .nr(8)
14834 .kr(4)
14835 .sr(2)
14836 .m(2)
14837 .n(8)
14838 .k(k)
14839 .a_stride(163)
14840 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14841 }
14842 }
14843
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_div_16_subtile)14844 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_div_16_subtile) {
14845 TEST_REQUIRES_ARM_NEON;
14846 for (size_t k = 32; k <= 160; k += 16) {
14847 for (uint32_t n = 1; n <= 8; n++) {
14848 for (uint32_t m = 1; m <= 2; m++) {
14849 GemmMicrokernelTester()
14850 .mr(2)
14851 .nr(8)
14852 .kr(4)
14853 .sr(2)
14854 .m(m)
14855 .n(n)
14856 .k(k)
14857 .iterations(1)
14858 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14859 }
14860 }
14861 }
14862 }
14863
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_gt_8)14864 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_gt_8) {
14865 TEST_REQUIRES_ARM_NEON;
14866 for (uint32_t n = 9; n < 16; n++) {
14867 for (size_t k = 1; k <= 80; k += 17) {
14868 GemmMicrokernelTester()
14869 .mr(2)
14870 .nr(8)
14871 .kr(4)
14872 .sr(2)
14873 .m(2)
14874 .n(n)
14875 .k(k)
14876 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14877 }
14878 }
14879 }
14880
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_gt_8_strided_cn)14881 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_gt_8_strided_cn) {
14882 TEST_REQUIRES_ARM_NEON;
14883 for (uint32_t n = 9; n < 16; n++) {
14884 for (size_t k = 1; k <= 80; k += 17) {
14885 GemmMicrokernelTester()
14886 .mr(2)
14887 .nr(8)
14888 .kr(4)
14889 .sr(2)
14890 .m(2)
14891 .n(n)
14892 .k(k)
14893 .cn_stride(11)
14894 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14895 }
14896 }
14897 }
14898
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_gt_8_strided_a)14899 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_gt_8_strided_a) {
14900 TEST_REQUIRES_ARM_NEON;
14901 for (uint32_t n = 9; n < 16; n++) {
14902 for (size_t k = 1; k <= 80; k += 17) {
14903 GemmMicrokernelTester()
14904 .mr(2)
14905 .nr(8)
14906 .kr(4)
14907 .sr(2)
14908 .m(2)
14909 .n(n)
14910 .k(k)
14911 .a_stride(83)
14912 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14913 }
14914 }
14915 }
14916
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_gt_8_subtile)14917 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_gt_8_subtile) {
14918 TEST_REQUIRES_ARM_NEON;
14919 for (uint32_t n = 9; n < 16; n++) {
14920 for (size_t k = 1; k <= 80; k += 17) {
14921 for (uint32_t m = 1; m <= 2; m++) {
14922 GemmMicrokernelTester()
14923 .mr(2)
14924 .nr(8)
14925 .kr(4)
14926 .sr(2)
14927 .m(m)
14928 .n(n)
14929 .k(k)
14930 .iterations(1)
14931 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14932 }
14933 }
14934 }
14935 }
14936
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_div_8)14937 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_div_8) {
14938 TEST_REQUIRES_ARM_NEON;
14939 for (uint32_t n = 16; n <= 24; n += 8) {
14940 for (size_t k = 1; k <= 80; k += 17) {
14941 GemmMicrokernelTester()
14942 .mr(2)
14943 .nr(8)
14944 .kr(4)
14945 .sr(2)
14946 .m(2)
14947 .n(n)
14948 .k(k)
14949 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14950 }
14951 }
14952 }
14953
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_div_8_strided_cn)14954 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_div_8_strided_cn) {
14955 TEST_REQUIRES_ARM_NEON;
14956 for (uint32_t n = 16; n <= 24; n += 8) {
14957 for (size_t k = 1; k <= 80; k += 17) {
14958 GemmMicrokernelTester()
14959 .mr(2)
14960 .nr(8)
14961 .kr(4)
14962 .sr(2)
14963 .m(2)
14964 .n(n)
14965 .k(k)
14966 .cn_stride(11)
14967 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14968 }
14969 }
14970 }
14971
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_div_8_strided_a)14972 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_div_8_strided_a) {
14973 TEST_REQUIRES_ARM_NEON;
14974 for (uint32_t n = 16; n <= 24; n += 8) {
14975 for (size_t k = 1; k <= 80; k += 17) {
14976 GemmMicrokernelTester()
14977 .mr(2)
14978 .nr(8)
14979 .kr(4)
14980 .sr(2)
14981 .m(2)
14982 .n(n)
14983 .k(k)
14984 .a_stride(83)
14985 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14986 }
14987 }
14988 }
14989
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_div_8_subtile)14990 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_div_8_subtile) {
14991 TEST_REQUIRES_ARM_NEON;
14992 for (uint32_t n = 16; n <= 24; n += 8) {
14993 for (size_t k = 1; k <= 80; k += 17) {
14994 for (uint32_t m = 1; m <= 2; m++) {
14995 GemmMicrokernelTester()
14996 .mr(2)
14997 .nr(8)
14998 .kr(4)
14999 .sr(2)
15000 .m(m)
15001 .n(n)
15002 .k(k)
15003 .iterations(1)
15004 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15005 }
15006 }
15007 }
15008 }
15009
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,strided_cm_subtile)15010 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, strided_cm_subtile) {
15011 TEST_REQUIRES_ARM_NEON;
15012 for (size_t k = 1; k <= 80; k += 17) {
15013 for (uint32_t n = 1; n <= 8; n++) {
15014 for (uint32_t m = 1; m <= 2; m++) {
15015 GemmMicrokernelTester()
15016 .mr(2)
15017 .nr(8)
15018 .kr(4)
15019 .sr(2)
15020 .m(m)
15021 .n(n)
15022 .k(k)
15023 .cm_stride(11)
15024 .iterations(1)
15025 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15026 }
15027 }
15028 }
15029 }
15030
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,qmin)15031 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, qmin) {
15032 TEST_REQUIRES_ARM_NEON;
15033 GemmMicrokernelTester()
15034 .mr(2)
15035 .nr(8)
15036 .kr(4)
15037 .sr(2)
15038 .m(2)
15039 .n(8)
15040 .k(16)
15041 .qmin(128)
15042 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15043 }
15044
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,qmax)15045 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, qmax) {
15046 TEST_REQUIRES_ARM_NEON;
15047 GemmMicrokernelTester()
15048 .mr(2)
15049 .nr(8)
15050 .kr(4)
15051 .sr(2)
15052 .m(2)
15053 .n(8)
15054 .k(16)
15055 .qmax(128)
15056 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15057 }
15058
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,strided_cm)15059 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, strided_cm) {
15060 TEST_REQUIRES_ARM_NEON;
15061 GemmMicrokernelTester()
15062 .mr(2)
15063 .nr(8)
15064 .kr(4)
15065 .sr(2)
15066 .m(2)
15067 .n(8)
15068 .k(16)
15069 .cm_stride(11)
15070 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15071 }
15072 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15073
15074
15075 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_eq_16)15076 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_eq_16) {
15077 TEST_REQUIRES_ARM_NEON_V8;
15078 GemmMicrokernelTester()
15079 .mr(2)
15080 .nr(8)
15081 .kr(4)
15082 .sr(2)
15083 .m(2)
15084 .n(8)
15085 .k(16)
15086 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15087 }
15088
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,strided_cn)15089 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, strided_cn) {
15090 TEST_REQUIRES_ARM_NEON_V8;
15091 GemmMicrokernelTester()
15092 .mr(2)
15093 .nr(8)
15094 .kr(4)
15095 .sr(2)
15096 .m(2)
15097 .n(8)
15098 .k(16)
15099 .cn_stride(11)
15100 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15101 }
15102
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_eq_16_strided_a)15103 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_eq_16_strided_a) {
15104 TEST_REQUIRES_ARM_NEON_V8;
15105 GemmMicrokernelTester()
15106 .mr(2)
15107 .nr(8)
15108 .kr(4)
15109 .sr(2)
15110 .m(2)
15111 .n(8)
15112 .k(16)
15113 .a_stride(19)
15114 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15115 }
15116
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_eq_16_subtile)15117 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_eq_16_subtile) {
15118 TEST_REQUIRES_ARM_NEON_V8;
15119 for (uint32_t n = 1; n <= 8; n++) {
15120 for (uint32_t m = 1; m <= 2; m++) {
15121 GemmMicrokernelTester()
15122 .mr(2)
15123 .nr(8)
15124 .kr(4)
15125 .sr(2)
15126 .m(m)
15127 .n(n)
15128 .k(16)
15129 .iterations(1)
15130 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15131 }
15132 }
15133 }
15134
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_eq_16_subtile_m)15135 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_eq_16_subtile_m) {
15136 TEST_REQUIRES_ARM_NEON_V8;
15137 for (uint32_t m = 1; m <= 2; m++) {
15138 GemmMicrokernelTester()
15139 .mr(2)
15140 .nr(8)
15141 .kr(4)
15142 .sr(2)
15143 .m(m)
15144 .n(8)
15145 .k(16)
15146 .iterations(1)
15147 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15148 }
15149 }
15150
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_eq_16_subtile_n)15151 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_eq_16_subtile_n) {
15152 TEST_REQUIRES_ARM_NEON_V8;
15153 for (uint32_t n = 1; n <= 8; n++) {
15154 GemmMicrokernelTester()
15155 .mr(2)
15156 .nr(8)
15157 .kr(4)
15158 .sr(2)
15159 .m(2)
15160 .n(n)
15161 .k(16)
15162 .iterations(1)
15163 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15164 }
15165 }
15166
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_lt_16)15167 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_lt_16) {
15168 TEST_REQUIRES_ARM_NEON_V8;
15169 for (size_t k = 1; k < 16; k++) {
15170 GemmMicrokernelTester()
15171 .mr(2)
15172 .nr(8)
15173 .kr(4)
15174 .sr(2)
15175 .m(2)
15176 .n(8)
15177 .k(k)
15178 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15179 }
15180 }
15181
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_lt_16_strided_a)15182 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_lt_16_strided_a) {
15183 TEST_REQUIRES_ARM_NEON_V8;
15184 for (size_t k = 1; k < 16; k++) {
15185 GemmMicrokernelTester()
15186 .mr(2)
15187 .nr(8)
15188 .kr(4)
15189 .sr(2)
15190 .m(2)
15191 .n(8)
15192 .k(k)
15193 .a_stride(19)
15194 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15195 }
15196 }
15197
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_lt_16_subtile)15198 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_lt_16_subtile) {
15199 TEST_REQUIRES_ARM_NEON_V8;
15200 for (size_t k = 1; k < 16; k++) {
15201 for (uint32_t n = 1; n <= 8; n++) {
15202 for (uint32_t m = 1; m <= 2; m++) {
15203 GemmMicrokernelTester()
15204 .mr(2)
15205 .nr(8)
15206 .kr(4)
15207 .sr(2)
15208 .m(m)
15209 .n(n)
15210 .k(k)
15211 .iterations(1)
15212 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15213 }
15214 }
15215 }
15216 }
15217
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_gt_16)15218 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_gt_16) {
15219 TEST_REQUIRES_ARM_NEON_V8;
15220 for (size_t k = 17; k < 32; k++) {
15221 GemmMicrokernelTester()
15222 .mr(2)
15223 .nr(8)
15224 .kr(4)
15225 .sr(2)
15226 .m(2)
15227 .n(8)
15228 .k(k)
15229 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15230 }
15231 }
15232
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_gt_16_strided_a)15233 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_gt_16_strided_a) {
15234 TEST_REQUIRES_ARM_NEON_V8;
15235 for (size_t k = 17; k < 32; k++) {
15236 GemmMicrokernelTester()
15237 .mr(2)
15238 .nr(8)
15239 .kr(4)
15240 .sr(2)
15241 .m(2)
15242 .n(8)
15243 .k(k)
15244 .a_stride(37)
15245 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15246 }
15247 }
15248
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_gt_16_subtile)15249 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_gt_16_subtile) {
15250 TEST_REQUIRES_ARM_NEON_V8;
15251 for (size_t k = 17; k < 32; k++) {
15252 for (uint32_t n = 1; n <= 8; n++) {
15253 for (uint32_t m = 1; m <= 2; m++) {
15254 GemmMicrokernelTester()
15255 .mr(2)
15256 .nr(8)
15257 .kr(4)
15258 .sr(2)
15259 .m(m)
15260 .n(n)
15261 .k(k)
15262 .iterations(1)
15263 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15264 }
15265 }
15266 }
15267 }
15268
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_div_16)15269 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_div_16) {
15270 TEST_REQUIRES_ARM_NEON_V8;
15271 for (size_t k = 32; k <= 160; k += 16) {
15272 GemmMicrokernelTester()
15273 .mr(2)
15274 .nr(8)
15275 .kr(4)
15276 .sr(2)
15277 .m(2)
15278 .n(8)
15279 .k(k)
15280 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15281 }
15282 }
15283
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_div_16_strided_a)15284 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_div_16_strided_a) {
15285 TEST_REQUIRES_ARM_NEON_V8;
15286 for (size_t k = 32; k <= 160; k += 16) {
15287 GemmMicrokernelTester()
15288 .mr(2)
15289 .nr(8)
15290 .kr(4)
15291 .sr(2)
15292 .m(2)
15293 .n(8)
15294 .k(k)
15295 .a_stride(163)
15296 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15297 }
15298 }
15299
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_div_16_subtile)15300 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_div_16_subtile) {
15301 TEST_REQUIRES_ARM_NEON_V8;
15302 for (size_t k = 32; k <= 160; k += 16) {
15303 for (uint32_t n = 1; n <= 8; n++) {
15304 for (uint32_t m = 1; m <= 2; m++) {
15305 GemmMicrokernelTester()
15306 .mr(2)
15307 .nr(8)
15308 .kr(4)
15309 .sr(2)
15310 .m(m)
15311 .n(n)
15312 .k(k)
15313 .iterations(1)
15314 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15315 }
15316 }
15317 }
15318 }
15319
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_gt_8)15320 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_gt_8) {
15321 TEST_REQUIRES_ARM_NEON_V8;
15322 for (uint32_t n = 9; n < 16; n++) {
15323 for (size_t k = 1; k <= 80; k += 17) {
15324 GemmMicrokernelTester()
15325 .mr(2)
15326 .nr(8)
15327 .kr(4)
15328 .sr(2)
15329 .m(2)
15330 .n(n)
15331 .k(k)
15332 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15333 }
15334 }
15335 }
15336
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_gt_8_strided_cn)15337 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_gt_8_strided_cn) {
15338 TEST_REQUIRES_ARM_NEON_V8;
15339 for (uint32_t n = 9; n < 16; n++) {
15340 for (size_t k = 1; k <= 80; k += 17) {
15341 GemmMicrokernelTester()
15342 .mr(2)
15343 .nr(8)
15344 .kr(4)
15345 .sr(2)
15346 .m(2)
15347 .n(n)
15348 .k(k)
15349 .cn_stride(11)
15350 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15351 }
15352 }
15353 }
15354
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_gt_8_strided_a)15355 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_gt_8_strided_a) {
15356 TEST_REQUIRES_ARM_NEON_V8;
15357 for (uint32_t n = 9; n < 16; n++) {
15358 for (size_t k = 1; k <= 80; k += 17) {
15359 GemmMicrokernelTester()
15360 .mr(2)
15361 .nr(8)
15362 .kr(4)
15363 .sr(2)
15364 .m(2)
15365 .n(n)
15366 .k(k)
15367 .a_stride(83)
15368 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15369 }
15370 }
15371 }
15372
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_gt_8_subtile)15373 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_gt_8_subtile) {
15374 TEST_REQUIRES_ARM_NEON_V8;
15375 for (uint32_t n = 9; n < 16; n++) {
15376 for (size_t k = 1; k <= 80; k += 17) {
15377 for (uint32_t m = 1; m <= 2; m++) {
15378 GemmMicrokernelTester()
15379 .mr(2)
15380 .nr(8)
15381 .kr(4)
15382 .sr(2)
15383 .m(m)
15384 .n(n)
15385 .k(k)
15386 .iterations(1)
15387 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15388 }
15389 }
15390 }
15391 }
15392
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_div_8)15393 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_div_8) {
15394 TEST_REQUIRES_ARM_NEON_V8;
15395 for (uint32_t n = 16; n <= 24; n += 8) {
15396 for (size_t k = 1; k <= 80; k += 17) {
15397 GemmMicrokernelTester()
15398 .mr(2)
15399 .nr(8)
15400 .kr(4)
15401 .sr(2)
15402 .m(2)
15403 .n(n)
15404 .k(k)
15405 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15406 }
15407 }
15408 }
15409
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_div_8_strided_cn)15410 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_div_8_strided_cn) {
15411 TEST_REQUIRES_ARM_NEON_V8;
15412 for (uint32_t n = 16; n <= 24; n += 8) {
15413 for (size_t k = 1; k <= 80; k += 17) {
15414 GemmMicrokernelTester()
15415 .mr(2)
15416 .nr(8)
15417 .kr(4)
15418 .sr(2)
15419 .m(2)
15420 .n(n)
15421 .k(k)
15422 .cn_stride(11)
15423 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15424 }
15425 }
15426 }
15427
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_div_8_strided_a)15428 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_div_8_strided_a) {
15429 TEST_REQUIRES_ARM_NEON_V8;
15430 for (uint32_t n = 16; n <= 24; n += 8) {
15431 for (size_t k = 1; k <= 80; k += 17) {
15432 GemmMicrokernelTester()
15433 .mr(2)
15434 .nr(8)
15435 .kr(4)
15436 .sr(2)
15437 .m(2)
15438 .n(n)
15439 .k(k)
15440 .a_stride(83)
15441 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15442 }
15443 }
15444 }
15445
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_div_8_subtile)15446 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_div_8_subtile) {
15447 TEST_REQUIRES_ARM_NEON_V8;
15448 for (uint32_t n = 16; n <= 24; n += 8) {
15449 for (size_t k = 1; k <= 80; k += 17) {
15450 for (uint32_t m = 1; m <= 2; m++) {
15451 GemmMicrokernelTester()
15452 .mr(2)
15453 .nr(8)
15454 .kr(4)
15455 .sr(2)
15456 .m(m)
15457 .n(n)
15458 .k(k)
15459 .iterations(1)
15460 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15461 }
15462 }
15463 }
15464 }
15465
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,strided_cm_subtile)15466 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, strided_cm_subtile) {
15467 TEST_REQUIRES_ARM_NEON_V8;
15468 for (size_t k = 1; k <= 80; k += 17) {
15469 for (uint32_t n = 1; n <= 8; n++) {
15470 for (uint32_t m = 1; m <= 2; m++) {
15471 GemmMicrokernelTester()
15472 .mr(2)
15473 .nr(8)
15474 .kr(4)
15475 .sr(2)
15476 .m(m)
15477 .n(n)
15478 .k(k)
15479 .cm_stride(11)
15480 .iterations(1)
15481 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15482 }
15483 }
15484 }
15485 }
15486
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,qmin)15487 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, qmin) {
15488 TEST_REQUIRES_ARM_NEON_V8;
15489 GemmMicrokernelTester()
15490 .mr(2)
15491 .nr(8)
15492 .kr(4)
15493 .sr(2)
15494 .m(2)
15495 .n(8)
15496 .k(16)
15497 .qmin(128)
15498 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15499 }
15500
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,qmax)15501 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, qmax) {
15502 TEST_REQUIRES_ARM_NEON_V8;
15503 GemmMicrokernelTester()
15504 .mr(2)
15505 .nr(8)
15506 .kr(4)
15507 .sr(2)
15508 .m(2)
15509 .n(8)
15510 .k(16)
15511 .qmax(128)
15512 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15513 }
15514
TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,strided_cm)15515 TEST(QC8_GEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, strided_cm) {
15516 TEST_REQUIRES_ARM_NEON_V8;
15517 GemmMicrokernelTester()
15518 .mr(2)
15519 .nr(8)
15520 .kr(4)
15521 .sr(2)
15522 .m(2)
15523 .n(8)
15524 .k(16)
15525 .cm_stride(11)
15526 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15527 }
15528 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15529
15530
15531 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_eq_16)15532 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16) {
15533 TEST_REQUIRES_ARM_NEON;
15534 GemmMicrokernelTester()
15535 .mr(2)
15536 .nr(8)
15537 .kr(8)
15538 .sr(1)
15539 .m(2)
15540 .n(8)
15541 .k(16)
15542 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15543 }
15544
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,strided_cn)15545 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, strided_cn) {
15546 TEST_REQUIRES_ARM_NEON;
15547 GemmMicrokernelTester()
15548 .mr(2)
15549 .nr(8)
15550 .kr(8)
15551 .sr(1)
15552 .m(2)
15553 .n(8)
15554 .k(16)
15555 .cn_stride(11)
15556 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15557 }
15558
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_eq_16_strided_a)15559 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16_strided_a) {
15560 TEST_REQUIRES_ARM_NEON;
15561 GemmMicrokernelTester()
15562 .mr(2)
15563 .nr(8)
15564 .kr(8)
15565 .sr(1)
15566 .m(2)
15567 .n(8)
15568 .k(16)
15569 .a_stride(19)
15570 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15571 }
15572
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_eq_16_subtile)15573 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16_subtile) {
15574 TEST_REQUIRES_ARM_NEON;
15575 for (uint32_t n = 1; n <= 8; n++) {
15576 for (uint32_t m = 1; m <= 2; m++) {
15577 GemmMicrokernelTester()
15578 .mr(2)
15579 .nr(8)
15580 .kr(8)
15581 .sr(1)
15582 .m(m)
15583 .n(n)
15584 .k(16)
15585 .iterations(1)
15586 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15587 }
15588 }
15589 }
15590
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_eq_16_subtile_m)15591 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16_subtile_m) {
15592 TEST_REQUIRES_ARM_NEON;
15593 for (uint32_t m = 1; m <= 2; m++) {
15594 GemmMicrokernelTester()
15595 .mr(2)
15596 .nr(8)
15597 .kr(8)
15598 .sr(1)
15599 .m(m)
15600 .n(8)
15601 .k(16)
15602 .iterations(1)
15603 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15604 }
15605 }
15606
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_eq_16_subtile_n)15607 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16_subtile_n) {
15608 TEST_REQUIRES_ARM_NEON;
15609 for (uint32_t n = 1; n <= 8; n++) {
15610 GemmMicrokernelTester()
15611 .mr(2)
15612 .nr(8)
15613 .kr(8)
15614 .sr(1)
15615 .m(2)
15616 .n(n)
15617 .k(16)
15618 .iterations(1)
15619 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15620 }
15621 }
15622
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_lt_16)15623 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_lt_16) {
15624 TEST_REQUIRES_ARM_NEON;
15625 for (size_t k = 1; k < 16; k++) {
15626 GemmMicrokernelTester()
15627 .mr(2)
15628 .nr(8)
15629 .kr(8)
15630 .sr(1)
15631 .m(2)
15632 .n(8)
15633 .k(k)
15634 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15635 }
15636 }
15637
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_lt_16_strided_a)15638 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_lt_16_strided_a) {
15639 TEST_REQUIRES_ARM_NEON;
15640 for (size_t k = 1; k < 16; k++) {
15641 GemmMicrokernelTester()
15642 .mr(2)
15643 .nr(8)
15644 .kr(8)
15645 .sr(1)
15646 .m(2)
15647 .n(8)
15648 .k(k)
15649 .a_stride(19)
15650 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15651 }
15652 }
15653
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_lt_16_subtile)15654 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_lt_16_subtile) {
15655 TEST_REQUIRES_ARM_NEON;
15656 for (size_t k = 1; k < 16; k++) {
15657 for (uint32_t n = 1; n <= 8; n++) {
15658 for (uint32_t m = 1; m <= 2; m++) {
15659 GemmMicrokernelTester()
15660 .mr(2)
15661 .nr(8)
15662 .kr(8)
15663 .sr(1)
15664 .m(m)
15665 .n(n)
15666 .k(k)
15667 .iterations(1)
15668 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15669 }
15670 }
15671 }
15672 }
15673
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_gt_16)15674 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_gt_16) {
15675 TEST_REQUIRES_ARM_NEON;
15676 for (size_t k = 17; k < 32; k++) {
15677 GemmMicrokernelTester()
15678 .mr(2)
15679 .nr(8)
15680 .kr(8)
15681 .sr(1)
15682 .m(2)
15683 .n(8)
15684 .k(k)
15685 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15686 }
15687 }
15688
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_gt_16_strided_a)15689 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_gt_16_strided_a) {
15690 TEST_REQUIRES_ARM_NEON;
15691 for (size_t k = 17; k < 32; k++) {
15692 GemmMicrokernelTester()
15693 .mr(2)
15694 .nr(8)
15695 .kr(8)
15696 .sr(1)
15697 .m(2)
15698 .n(8)
15699 .k(k)
15700 .a_stride(37)
15701 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15702 }
15703 }
15704
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_gt_16_subtile)15705 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_gt_16_subtile) {
15706 TEST_REQUIRES_ARM_NEON;
15707 for (size_t k = 17; k < 32; k++) {
15708 for (uint32_t n = 1; n <= 8; n++) {
15709 for (uint32_t m = 1; m <= 2; m++) {
15710 GemmMicrokernelTester()
15711 .mr(2)
15712 .nr(8)
15713 .kr(8)
15714 .sr(1)
15715 .m(m)
15716 .n(n)
15717 .k(k)
15718 .iterations(1)
15719 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15720 }
15721 }
15722 }
15723 }
15724
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_div_16)15725 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_div_16) {
15726 TEST_REQUIRES_ARM_NEON;
15727 for (size_t k = 32; k <= 160; k += 16) {
15728 GemmMicrokernelTester()
15729 .mr(2)
15730 .nr(8)
15731 .kr(8)
15732 .sr(1)
15733 .m(2)
15734 .n(8)
15735 .k(k)
15736 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15737 }
15738 }
15739
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_div_16_strided_a)15740 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_div_16_strided_a) {
15741 TEST_REQUIRES_ARM_NEON;
15742 for (size_t k = 32; k <= 160; k += 16) {
15743 GemmMicrokernelTester()
15744 .mr(2)
15745 .nr(8)
15746 .kr(8)
15747 .sr(1)
15748 .m(2)
15749 .n(8)
15750 .k(k)
15751 .a_stride(163)
15752 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15753 }
15754 }
15755
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_div_16_subtile)15756 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_div_16_subtile) {
15757 TEST_REQUIRES_ARM_NEON;
15758 for (size_t k = 32; k <= 160; k += 16) {
15759 for (uint32_t n = 1; n <= 8; n++) {
15760 for (uint32_t m = 1; m <= 2; m++) {
15761 GemmMicrokernelTester()
15762 .mr(2)
15763 .nr(8)
15764 .kr(8)
15765 .sr(1)
15766 .m(m)
15767 .n(n)
15768 .k(k)
15769 .iterations(1)
15770 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15771 }
15772 }
15773 }
15774 }
15775
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_gt_8)15776 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_gt_8) {
15777 TEST_REQUIRES_ARM_NEON;
15778 for (uint32_t n = 9; n < 16; n++) {
15779 for (size_t k = 1; k <= 80; k += 17) {
15780 GemmMicrokernelTester()
15781 .mr(2)
15782 .nr(8)
15783 .kr(8)
15784 .sr(1)
15785 .m(2)
15786 .n(n)
15787 .k(k)
15788 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15789 }
15790 }
15791 }
15792
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_gt_8_strided_cn)15793 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_gt_8_strided_cn) {
15794 TEST_REQUIRES_ARM_NEON;
15795 for (uint32_t n = 9; n < 16; n++) {
15796 for (size_t k = 1; k <= 80; k += 17) {
15797 GemmMicrokernelTester()
15798 .mr(2)
15799 .nr(8)
15800 .kr(8)
15801 .sr(1)
15802 .m(2)
15803 .n(n)
15804 .k(k)
15805 .cn_stride(11)
15806 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15807 }
15808 }
15809 }
15810
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_gt_8_strided_a)15811 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_gt_8_strided_a) {
15812 TEST_REQUIRES_ARM_NEON;
15813 for (uint32_t n = 9; n < 16; n++) {
15814 for (size_t k = 1; k <= 80; k += 17) {
15815 GemmMicrokernelTester()
15816 .mr(2)
15817 .nr(8)
15818 .kr(8)
15819 .sr(1)
15820 .m(2)
15821 .n(n)
15822 .k(k)
15823 .a_stride(83)
15824 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15825 }
15826 }
15827 }
15828
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_gt_8_subtile)15829 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_gt_8_subtile) {
15830 TEST_REQUIRES_ARM_NEON;
15831 for (uint32_t n = 9; n < 16; n++) {
15832 for (size_t k = 1; k <= 80; k += 17) {
15833 for (uint32_t m = 1; m <= 2; m++) {
15834 GemmMicrokernelTester()
15835 .mr(2)
15836 .nr(8)
15837 .kr(8)
15838 .sr(1)
15839 .m(m)
15840 .n(n)
15841 .k(k)
15842 .iterations(1)
15843 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15844 }
15845 }
15846 }
15847 }
15848
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_div_8)15849 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_div_8) {
15850 TEST_REQUIRES_ARM_NEON;
15851 for (uint32_t n = 16; n <= 24; n += 8) {
15852 for (size_t k = 1; k <= 80; k += 17) {
15853 GemmMicrokernelTester()
15854 .mr(2)
15855 .nr(8)
15856 .kr(8)
15857 .sr(1)
15858 .m(2)
15859 .n(n)
15860 .k(k)
15861 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15862 }
15863 }
15864 }
15865
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_div_8_strided_cn)15866 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_div_8_strided_cn) {
15867 TEST_REQUIRES_ARM_NEON;
15868 for (uint32_t n = 16; n <= 24; n += 8) {
15869 for (size_t k = 1; k <= 80; k += 17) {
15870 GemmMicrokernelTester()
15871 .mr(2)
15872 .nr(8)
15873 .kr(8)
15874 .sr(1)
15875 .m(2)
15876 .n(n)
15877 .k(k)
15878 .cn_stride(11)
15879 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15880 }
15881 }
15882 }
15883
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_div_8_strided_a)15884 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_div_8_strided_a) {
15885 TEST_REQUIRES_ARM_NEON;
15886 for (uint32_t n = 16; n <= 24; n += 8) {
15887 for (size_t k = 1; k <= 80; k += 17) {
15888 GemmMicrokernelTester()
15889 .mr(2)
15890 .nr(8)
15891 .kr(8)
15892 .sr(1)
15893 .m(2)
15894 .n(n)
15895 .k(k)
15896 .a_stride(83)
15897 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15898 }
15899 }
15900 }
15901
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_div_8_subtile)15902 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_div_8_subtile) {
15903 TEST_REQUIRES_ARM_NEON;
15904 for (uint32_t n = 16; n <= 24; n += 8) {
15905 for (size_t k = 1; k <= 80; k += 17) {
15906 for (uint32_t m = 1; m <= 2; m++) {
15907 GemmMicrokernelTester()
15908 .mr(2)
15909 .nr(8)
15910 .kr(8)
15911 .sr(1)
15912 .m(m)
15913 .n(n)
15914 .k(k)
15915 .iterations(1)
15916 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15917 }
15918 }
15919 }
15920 }
15921
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,strided_cm_subtile)15922 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, strided_cm_subtile) {
15923 TEST_REQUIRES_ARM_NEON;
15924 for (size_t k = 1; k <= 80; k += 17) {
15925 for (uint32_t n = 1; n <= 8; n++) {
15926 for (uint32_t m = 1; m <= 2; m++) {
15927 GemmMicrokernelTester()
15928 .mr(2)
15929 .nr(8)
15930 .kr(8)
15931 .sr(1)
15932 .m(m)
15933 .n(n)
15934 .k(k)
15935 .cm_stride(11)
15936 .iterations(1)
15937 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15938 }
15939 }
15940 }
15941 }
15942
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,qmin)15943 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, qmin) {
15944 TEST_REQUIRES_ARM_NEON;
15945 GemmMicrokernelTester()
15946 .mr(2)
15947 .nr(8)
15948 .kr(8)
15949 .sr(1)
15950 .m(2)
15951 .n(8)
15952 .k(16)
15953 .qmin(128)
15954 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15955 }
15956
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,qmax)15957 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, qmax) {
15958 TEST_REQUIRES_ARM_NEON;
15959 GemmMicrokernelTester()
15960 .mr(2)
15961 .nr(8)
15962 .kr(8)
15963 .sr(1)
15964 .m(2)
15965 .n(8)
15966 .k(16)
15967 .qmax(128)
15968 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15969 }
15970
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL,strided_cm)15971 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, strided_cm) {
15972 TEST_REQUIRES_ARM_NEON;
15973 GemmMicrokernelTester()
15974 .mr(2)
15975 .nr(8)
15976 .kr(8)
15977 .sr(1)
15978 .m(2)
15979 .n(8)
15980 .k(16)
15981 .cm_stride(11)
15982 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15983 }
15984 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15985
15986
15987 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_eq_16)15988 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_eq_16) {
15989 TEST_REQUIRES_ARM_NEON_V8;
15990 GemmMicrokernelTester()
15991 .mr(2)
15992 .nr(8)
15993 .kr(8)
15994 .sr(1)
15995 .m(2)
15996 .n(8)
15997 .k(16)
15998 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15999 }
16000
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,strided_cn)16001 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, strided_cn) {
16002 TEST_REQUIRES_ARM_NEON_V8;
16003 GemmMicrokernelTester()
16004 .mr(2)
16005 .nr(8)
16006 .kr(8)
16007 .sr(1)
16008 .m(2)
16009 .n(8)
16010 .k(16)
16011 .cn_stride(11)
16012 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16013 }
16014
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_eq_16_strided_a)16015 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_eq_16_strided_a) {
16016 TEST_REQUIRES_ARM_NEON_V8;
16017 GemmMicrokernelTester()
16018 .mr(2)
16019 .nr(8)
16020 .kr(8)
16021 .sr(1)
16022 .m(2)
16023 .n(8)
16024 .k(16)
16025 .a_stride(19)
16026 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16027 }
16028
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_eq_16_subtile)16029 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_eq_16_subtile) {
16030 TEST_REQUIRES_ARM_NEON_V8;
16031 for (uint32_t n = 1; n <= 8; n++) {
16032 for (uint32_t m = 1; m <= 2; m++) {
16033 GemmMicrokernelTester()
16034 .mr(2)
16035 .nr(8)
16036 .kr(8)
16037 .sr(1)
16038 .m(m)
16039 .n(n)
16040 .k(16)
16041 .iterations(1)
16042 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16043 }
16044 }
16045 }
16046
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_eq_16_subtile_m)16047 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_eq_16_subtile_m) {
16048 TEST_REQUIRES_ARM_NEON_V8;
16049 for (uint32_t m = 1; m <= 2; m++) {
16050 GemmMicrokernelTester()
16051 .mr(2)
16052 .nr(8)
16053 .kr(8)
16054 .sr(1)
16055 .m(m)
16056 .n(8)
16057 .k(16)
16058 .iterations(1)
16059 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16060 }
16061 }
16062
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_eq_16_subtile_n)16063 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_eq_16_subtile_n) {
16064 TEST_REQUIRES_ARM_NEON_V8;
16065 for (uint32_t n = 1; n <= 8; n++) {
16066 GemmMicrokernelTester()
16067 .mr(2)
16068 .nr(8)
16069 .kr(8)
16070 .sr(1)
16071 .m(2)
16072 .n(n)
16073 .k(16)
16074 .iterations(1)
16075 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16076 }
16077 }
16078
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_lt_16)16079 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_lt_16) {
16080 TEST_REQUIRES_ARM_NEON_V8;
16081 for (size_t k = 1; k < 16; k++) {
16082 GemmMicrokernelTester()
16083 .mr(2)
16084 .nr(8)
16085 .kr(8)
16086 .sr(1)
16087 .m(2)
16088 .n(8)
16089 .k(k)
16090 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16091 }
16092 }
16093
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_lt_16_strided_a)16094 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_lt_16_strided_a) {
16095 TEST_REQUIRES_ARM_NEON_V8;
16096 for (size_t k = 1; k < 16; k++) {
16097 GemmMicrokernelTester()
16098 .mr(2)
16099 .nr(8)
16100 .kr(8)
16101 .sr(1)
16102 .m(2)
16103 .n(8)
16104 .k(k)
16105 .a_stride(19)
16106 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16107 }
16108 }
16109
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_lt_16_subtile)16110 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_lt_16_subtile) {
16111 TEST_REQUIRES_ARM_NEON_V8;
16112 for (size_t k = 1; k < 16; k++) {
16113 for (uint32_t n = 1; n <= 8; n++) {
16114 for (uint32_t m = 1; m <= 2; m++) {
16115 GemmMicrokernelTester()
16116 .mr(2)
16117 .nr(8)
16118 .kr(8)
16119 .sr(1)
16120 .m(m)
16121 .n(n)
16122 .k(k)
16123 .iterations(1)
16124 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16125 }
16126 }
16127 }
16128 }
16129
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_gt_16)16130 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_gt_16) {
16131 TEST_REQUIRES_ARM_NEON_V8;
16132 for (size_t k = 17; k < 32; k++) {
16133 GemmMicrokernelTester()
16134 .mr(2)
16135 .nr(8)
16136 .kr(8)
16137 .sr(1)
16138 .m(2)
16139 .n(8)
16140 .k(k)
16141 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16142 }
16143 }
16144
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_gt_16_strided_a)16145 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_gt_16_strided_a) {
16146 TEST_REQUIRES_ARM_NEON_V8;
16147 for (size_t k = 17; k < 32; k++) {
16148 GemmMicrokernelTester()
16149 .mr(2)
16150 .nr(8)
16151 .kr(8)
16152 .sr(1)
16153 .m(2)
16154 .n(8)
16155 .k(k)
16156 .a_stride(37)
16157 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16158 }
16159 }
16160
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_gt_16_subtile)16161 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_gt_16_subtile) {
16162 TEST_REQUIRES_ARM_NEON_V8;
16163 for (size_t k = 17; k < 32; k++) {
16164 for (uint32_t n = 1; n <= 8; n++) {
16165 for (uint32_t m = 1; m <= 2; m++) {
16166 GemmMicrokernelTester()
16167 .mr(2)
16168 .nr(8)
16169 .kr(8)
16170 .sr(1)
16171 .m(m)
16172 .n(n)
16173 .k(k)
16174 .iterations(1)
16175 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16176 }
16177 }
16178 }
16179 }
16180
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_div_16)16181 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_div_16) {
16182 TEST_REQUIRES_ARM_NEON_V8;
16183 for (size_t k = 32; k <= 160; k += 16) {
16184 GemmMicrokernelTester()
16185 .mr(2)
16186 .nr(8)
16187 .kr(8)
16188 .sr(1)
16189 .m(2)
16190 .n(8)
16191 .k(k)
16192 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16193 }
16194 }
16195
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_div_16_strided_a)16196 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_div_16_strided_a) {
16197 TEST_REQUIRES_ARM_NEON_V8;
16198 for (size_t k = 32; k <= 160; k += 16) {
16199 GemmMicrokernelTester()
16200 .mr(2)
16201 .nr(8)
16202 .kr(8)
16203 .sr(1)
16204 .m(2)
16205 .n(8)
16206 .k(k)
16207 .a_stride(163)
16208 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16209 }
16210 }
16211
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_div_16_subtile)16212 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_div_16_subtile) {
16213 TEST_REQUIRES_ARM_NEON_V8;
16214 for (size_t k = 32; k <= 160; k += 16) {
16215 for (uint32_t n = 1; n <= 8; n++) {
16216 for (uint32_t m = 1; m <= 2; m++) {
16217 GemmMicrokernelTester()
16218 .mr(2)
16219 .nr(8)
16220 .kr(8)
16221 .sr(1)
16222 .m(m)
16223 .n(n)
16224 .k(k)
16225 .iterations(1)
16226 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16227 }
16228 }
16229 }
16230 }
16231
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_gt_8)16232 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_gt_8) {
16233 TEST_REQUIRES_ARM_NEON_V8;
16234 for (uint32_t n = 9; n < 16; n++) {
16235 for (size_t k = 1; k <= 80; k += 17) {
16236 GemmMicrokernelTester()
16237 .mr(2)
16238 .nr(8)
16239 .kr(8)
16240 .sr(1)
16241 .m(2)
16242 .n(n)
16243 .k(k)
16244 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16245 }
16246 }
16247 }
16248
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_gt_8_strided_cn)16249 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_gt_8_strided_cn) {
16250 TEST_REQUIRES_ARM_NEON_V8;
16251 for (uint32_t n = 9; n < 16; n++) {
16252 for (size_t k = 1; k <= 80; k += 17) {
16253 GemmMicrokernelTester()
16254 .mr(2)
16255 .nr(8)
16256 .kr(8)
16257 .sr(1)
16258 .m(2)
16259 .n(n)
16260 .k(k)
16261 .cn_stride(11)
16262 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16263 }
16264 }
16265 }
16266
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_gt_8_strided_a)16267 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_gt_8_strided_a) {
16268 TEST_REQUIRES_ARM_NEON_V8;
16269 for (uint32_t n = 9; n < 16; n++) {
16270 for (size_t k = 1; k <= 80; k += 17) {
16271 GemmMicrokernelTester()
16272 .mr(2)
16273 .nr(8)
16274 .kr(8)
16275 .sr(1)
16276 .m(2)
16277 .n(n)
16278 .k(k)
16279 .a_stride(83)
16280 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16281 }
16282 }
16283 }
16284
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_gt_8_subtile)16285 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_gt_8_subtile) {
16286 TEST_REQUIRES_ARM_NEON_V8;
16287 for (uint32_t n = 9; n < 16; n++) {
16288 for (size_t k = 1; k <= 80; k += 17) {
16289 for (uint32_t m = 1; m <= 2; m++) {
16290 GemmMicrokernelTester()
16291 .mr(2)
16292 .nr(8)
16293 .kr(8)
16294 .sr(1)
16295 .m(m)
16296 .n(n)
16297 .k(k)
16298 .iterations(1)
16299 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16300 }
16301 }
16302 }
16303 }
16304
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_div_8)16305 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_div_8) {
16306 TEST_REQUIRES_ARM_NEON_V8;
16307 for (uint32_t n = 16; n <= 24; n += 8) {
16308 for (size_t k = 1; k <= 80; k += 17) {
16309 GemmMicrokernelTester()
16310 .mr(2)
16311 .nr(8)
16312 .kr(8)
16313 .sr(1)
16314 .m(2)
16315 .n(n)
16316 .k(k)
16317 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16318 }
16319 }
16320 }
16321
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_div_8_strided_cn)16322 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_div_8_strided_cn) {
16323 TEST_REQUIRES_ARM_NEON_V8;
16324 for (uint32_t n = 16; n <= 24; n += 8) {
16325 for (size_t k = 1; k <= 80; k += 17) {
16326 GemmMicrokernelTester()
16327 .mr(2)
16328 .nr(8)
16329 .kr(8)
16330 .sr(1)
16331 .m(2)
16332 .n(n)
16333 .k(k)
16334 .cn_stride(11)
16335 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16336 }
16337 }
16338 }
16339
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_div_8_strided_a)16340 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_div_8_strided_a) {
16341 TEST_REQUIRES_ARM_NEON_V8;
16342 for (uint32_t n = 16; n <= 24; n += 8) {
16343 for (size_t k = 1; k <= 80; k += 17) {
16344 GemmMicrokernelTester()
16345 .mr(2)
16346 .nr(8)
16347 .kr(8)
16348 .sr(1)
16349 .m(2)
16350 .n(n)
16351 .k(k)
16352 .a_stride(83)
16353 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16354 }
16355 }
16356 }
16357
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_div_8_subtile)16358 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_div_8_subtile) {
16359 TEST_REQUIRES_ARM_NEON_V8;
16360 for (uint32_t n = 16; n <= 24; n += 8) {
16361 for (size_t k = 1; k <= 80; k += 17) {
16362 for (uint32_t m = 1; m <= 2; m++) {
16363 GemmMicrokernelTester()
16364 .mr(2)
16365 .nr(8)
16366 .kr(8)
16367 .sr(1)
16368 .m(m)
16369 .n(n)
16370 .k(k)
16371 .iterations(1)
16372 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16373 }
16374 }
16375 }
16376 }
16377
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,strided_cm_subtile)16378 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, strided_cm_subtile) {
16379 TEST_REQUIRES_ARM_NEON_V8;
16380 for (size_t k = 1; k <= 80; k += 17) {
16381 for (uint32_t n = 1; n <= 8; n++) {
16382 for (uint32_t m = 1; m <= 2; m++) {
16383 GemmMicrokernelTester()
16384 .mr(2)
16385 .nr(8)
16386 .kr(8)
16387 .sr(1)
16388 .m(m)
16389 .n(n)
16390 .k(k)
16391 .cm_stride(11)
16392 .iterations(1)
16393 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16394 }
16395 }
16396 }
16397 }
16398
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,qmin)16399 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, qmin) {
16400 TEST_REQUIRES_ARM_NEON_V8;
16401 GemmMicrokernelTester()
16402 .mr(2)
16403 .nr(8)
16404 .kr(8)
16405 .sr(1)
16406 .m(2)
16407 .n(8)
16408 .k(16)
16409 .qmin(128)
16410 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16411 }
16412
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,qmax)16413 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, qmax) {
16414 TEST_REQUIRES_ARM_NEON_V8;
16415 GemmMicrokernelTester()
16416 .mr(2)
16417 .nr(8)
16418 .kr(8)
16419 .sr(1)
16420 .m(2)
16421 .n(8)
16422 .k(16)
16423 .qmax(128)
16424 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16425 }
16426
TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,strided_cm)16427 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, strided_cm) {
16428 TEST_REQUIRES_ARM_NEON_V8;
16429 GemmMicrokernelTester()
16430 .mr(2)
16431 .nr(8)
16432 .kr(8)
16433 .sr(1)
16434 .m(2)
16435 .n(8)
16436 .k(16)
16437 .cm_stride(11)
16438 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16439 }
16440 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
16441
16442
16443 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_eq_8)16444 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
16445 TEST_REQUIRES_ARM_NEON;
16446 GemmMicrokernelTester()
16447 .mr(2)
16448 .nr(16)
16449 .kr(1)
16450 .sr(1)
16451 .m(2)
16452 .n(16)
16453 .k(8)
16454 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16455 }
16456
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,strided_cn)16457 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, strided_cn) {
16458 TEST_REQUIRES_ARM_NEON;
16459 GemmMicrokernelTester()
16460 .mr(2)
16461 .nr(16)
16462 .kr(1)
16463 .sr(1)
16464 .m(2)
16465 .n(16)
16466 .k(8)
16467 .cn_stride(19)
16468 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16469 }
16470
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_eq_8_strided_a)16471 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
16472 TEST_REQUIRES_ARM_NEON;
16473 GemmMicrokernelTester()
16474 .mr(2)
16475 .nr(16)
16476 .kr(1)
16477 .sr(1)
16478 .m(2)
16479 .n(16)
16480 .k(8)
16481 .a_stride(11)
16482 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16483 }
16484
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)16485 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
16486 TEST_REQUIRES_ARM_NEON;
16487 for (uint32_t n = 1; n <= 16; n++) {
16488 for (uint32_t m = 1; m <= 2; m++) {
16489 GemmMicrokernelTester()
16490 .mr(2)
16491 .nr(16)
16492 .kr(1)
16493 .sr(1)
16494 .m(m)
16495 .n(n)
16496 .k(8)
16497 .iterations(1)
16498 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16499 }
16500 }
16501 }
16502
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)16503 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
16504 TEST_REQUIRES_ARM_NEON;
16505 for (uint32_t m = 1; m <= 2; m++) {
16506 GemmMicrokernelTester()
16507 .mr(2)
16508 .nr(16)
16509 .kr(1)
16510 .sr(1)
16511 .m(m)
16512 .n(16)
16513 .k(8)
16514 .iterations(1)
16515 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16516 }
16517 }
16518
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)16519 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
16520 TEST_REQUIRES_ARM_NEON;
16521 for (uint32_t n = 1; n <= 16; n++) {
16522 GemmMicrokernelTester()
16523 .mr(2)
16524 .nr(16)
16525 .kr(1)
16526 .sr(1)
16527 .m(2)
16528 .n(n)
16529 .k(8)
16530 .iterations(1)
16531 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16532 }
16533 }
16534
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_lt_8)16535 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
16536 TEST_REQUIRES_ARM_NEON;
16537 for (size_t k = 1; k < 8; k++) {
16538 GemmMicrokernelTester()
16539 .mr(2)
16540 .nr(16)
16541 .kr(1)
16542 .sr(1)
16543 .m(2)
16544 .n(16)
16545 .k(k)
16546 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16547 }
16548 }
16549
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_lt_8_strided_a)16550 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
16551 TEST_REQUIRES_ARM_NEON;
16552 for (size_t k = 1; k < 8; k++) {
16553 GemmMicrokernelTester()
16554 .mr(2)
16555 .nr(16)
16556 .kr(1)
16557 .sr(1)
16558 .m(2)
16559 .n(16)
16560 .k(k)
16561 .a_stride(11)
16562 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16563 }
16564 }
16565
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)16566 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
16567 TEST_REQUIRES_ARM_NEON;
16568 for (size_t k = 1; k < 8; k++) {
16569 for (uint32_t n = 1; n <= 16; n++) {
16570 for (uint32_t m = 1; m <= 2; m++) {
16571 GemmMicrokernelTester()
16572 .mr(2)
16573 .nr(16)
16574 .kr(1)
16575 .sr(1)
16576 .m(m)
16577 .n(n)
16578 .k(k)
16579 .iterations(1)
16580 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16581 }
16582 }
16583 }
16584 }
16585
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_gt_8)16586 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
16587 TEST_REQUIRES_ARM_NEON;
16588 for (size_t k = 9; k < 16; k++) {
16589 GemmMicrokernelTester()
16590 .mr(2)
16591 .nr(16)
16592 .kr(1)
16593 .sr(1)
16594 .m(2)
16595 .n(16)
16596 .k(k)
16597 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16598 }
16599 }
16600
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_gt_8_strided_a)16601 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
16602 TEST_REQUIRES_ARM_NEON;
16603 for (size_t k = 9; k < 16; k++) {
16604 GemmMicrokernelTester()
16605 .mr(2)
16606 .nr(16)
16607 .kr(1)
16608 .sr(1)
16609 .m(2)
16610 .n(16)
16611 .k(k)
16612 .a_stride(19)
16613 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16614 }
16615 }
16616
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)16617 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
16618 TEST_REQUIRES_ARM_NEON;
16619 for (size_t k = 9; k < 16; k++) {
16620 for (uint32_t n = 1; n <= 16; n++) {
16621 for (uint32_t m = 1; m <= 2; m++) {
16622 GemmMicrokernelTester()
16623 .mr(2)
16624 .nr(16)
16625 .kr(1)
16626 .sr(1)
16627 .m(m)
16628 .n(n)
16629 .k(k)
16630 .iterations(1)
16631 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16632 }
16633 }
16634 }
16635 }
16636
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_div_8)16637 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_div_8) {
16638 TEST_REQUIRES_ARM_NEON;
16639 for (size_t k = 16; k <= 80; k += 8) {
16640 GemmMicrokernelTester()
16641 .mr(2)
16642 .nr(16)
16643 .kr(1)
16644 .sr(1)
16645 .m(2)
16646 .n(16)
16647 .k(k)
16648 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16649 }
16650 }
16651
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_div_8_strided_a)16652 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
16653 TEST_REQUIRES_ARM_NEON;
16654 for (size_t k = 16; k <= 80; k += 8) {
16655 GemmMicrokernelTester()
16656 .mr(2)
16657 .nr(16)
16658 .kr(1)
16659 .sr(1)
16660 .m(2)
16661 .n(16)
16662 .k(k)
16663 .a_stride(83)
16664 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16665 }
16666 }
16667
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_div_8_subtile)16668 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
16669 TEST_REQUIRES_ARM_NEON;
16670 for (size_t k = 16; k <= 80; k += 8) {
16671 for (uint32_t n = 1; n <= 16; n++) {
16672 for (uint32_t m = 1; m <= 2; m++) {
16673 GemmMicrokernelTester()
16674 .mr(2)
16675 .nr(16)
16676 .kr(1)
16677 .sr(1)
16678 .m(m)
16679 .n(n)
16680 .k(k)
16681 .iterations(1)
16682 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16683 }
16684 }
16685 }
16686 }
16687
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,n_gt_16)16688 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
16689 TEST_REQUIRES_ARM_NEON;
16690 for (uint32_t n = 17; n < 32; n++) {
16691 for (size_t k = 1; k <= 40; k += 9) {
16692 GemmMicrokernelTester()
16693 .mr(2)
16694 .nr(16)
16695 .kr(1)
16696 .sr(1)
16697 .m(2)
16698 .n(n)
16699 .k(k)
16700 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16701 }
16702 }
16703 }
16704
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_cn)16705 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
16706 TEST_REQUIRES_ARM_NEON;
16707 for (uint32_t n = 17; n < 32; n++) {
16708 for (size_t k = 1; k <= 40; k += 9) {
16709 GemmMicrokernelTester()
16710 .mr(2)
16711 .nr(16)
16712 .kr(1)
16713 .sr(1)
16714 .m(2)
16715 .n(n)
16716 .k(k)
16717 .cn_stride(19)
16718 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16719 }
16720 }
16721 }
16722
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_a)16723 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_a) {
16724 TEST_REQUIRES_ARM_NEON;
16725 for (uint32_t n = 17; n < 32; n++) {
16726 for (size_t k = 1; k <= 40; k += 9) {
16727 GemmMicrokernelTester()
16728 .mr(2)
16729 .nr(16)
16730 .kr(1)
16731 .sr(1)
16732 .m(2)
16733 .n(n)
16734 .k(k)
16735 .a_stride(43)
16736 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16737 }
16738 }
16739 }
16740
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,n_gt_16_subtile)16741 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
16742 TEST_REQUIRES_ARM_NEON;
16743 for (uint32_t n = 17; n < 32; n++) {
16744 for (size_t k = 1; k <= 40; k += 9) {
16745 for (uint32_t m = 1; m <= 2; m++) {
16746 GemmMicrokernelTester()
16747 .mr(2)
16748 .nr(16)
16749 .kr(1)
16750 .sr(1)
16751 .m(m)
16752 .n(n)
16753 .k(k)
16754 .iterations(1)
16755 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16756 }
16757 }
16758 }
16759 }
16760
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,n_div_16)16761 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, n_div_16) {
16762 TEST_REQUIRES_ARM_NEON;
16763 for (uint32_t n = 32; n <= 48; n += 16) {
16764 for (size_t k = 1; k <= 40; k += 9) {
16765 GemmMicrokernelTester()
16766 .mr(2)
16767 .nr(16)
16768 .kr(1)
16769 .sr(1)
16770 .m(2)
16771 .n(n)
16772 .k(k)
16773 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16774 }
16775 }
16776 }
16777
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_cn)16778 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
16779 TEST_REQUIRES_ARM_NEON;
16780 for (uint32_t n = 32; n <= 48; n += 16) {
16781 for (size_t k = 1; k <= 40; k += 9) {
16782 GemmMicrokernelTester()
16783 .mr(2)
16784 .nr(16)
16785 .kr(1)
16786 .sr(1)
16787 .m(2)
16788 .n(n)
16789 .k(k)
16790 .cn_stride(19)
16791 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16792 }
16793 }
16794 }
16795
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_a)16796 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_a) {
16797 TEST_REQUIRES_ARM_NEON;
16798 for (uint32_t n = 32; n <= 48; n += 16) {
16799 for (size_t k = 1; k <= 40; k += 9) {
16800 GemmMicrokernelTester()
16801 .mr(2)
16802 .nr(16)
16803 .kr(1)
16804 .sr(1)
16805 .m(2)
16806 .n(n)
16807 .k(k)
16808 .a_stride(43)
16809 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16810 }
16811 }
16812 }
16813
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,n_div_16_subtile)16814 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
16815 TEST_REQUIRES_ARM_NEON;
16816 for (uint32_t n = 32; n <= 48; n += 16) {
16817 for (size_t k = 1; k <= 40; k += 9) {
16818 for (uint32_t m = 1; m <= 2; m++) {
16819 GemmMicrokernelTester()
16820 .mr(2)
16821 .nr(16)
16822 .kr(1)
16823 .sr(1)
16824 .m(m)
16825 .n(n)
16826 .k(k)
16827 .iterations(1)
16828 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16829 }
16830 }
16831 }
16832 }
16833
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,strided_cm_subtile)16834 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
16835 TEST_REQUIRES_ARM_NEON;
16836 for (size_t k = 1; k <= 40; k += 9) {
16837 for (uint32_t n = 1; n <= 16; n++) {
16838 for (uint32_t m = 1; m <= 2; m++) {
16839 GemmMicrokernelTester()
16840 .mr(2)
16841 .nr(16)
16842 .kr(1)
16843 .sr(1)
16844 .m(m)
16845 .n(n)
16846 .k(k)
16847 .cm_stride(19)
16848 .iterations(1)
16849 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16850 }
16851 }
16852 }
16853 }
16854
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,qmin)16855 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, qmin) {
16856 TEST_REQUIRES_ARM_NEON;
16857 GemmMicrokernelTester()
16858 .mr(2)
16859 .nr(16)
16860 .kr(1)
16861 .sr(1)
16862 .m(2)
16863 .n(16)
16864 .k(8)
16865 .qmin(128)
16866 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16867 }
16868
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,qmax)16869 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, qmax) {
16870 TEST_REQUIRES_ARM_NEON;
16871 GemmMicrokernelTester()
16872 .mr(2)
16873 .nr(16)
16874 .kr(1)
16875 .sr(1)
16876 .m(2)
16877 .n(16)
16878 .k(8)
16879 .qmax(128)
16880 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16881 }
16882
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,strided_cm)16883 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, strided_cm) {
16884 TEST_REQUIRES_ARM_NEON;
16885 GemmMicrokernelTester()
16886 .mr(2)
16887 .nr(16)
16888 .kr(1)
16889 .sr(1)
16890 .m(2)
16891 .n(16)
16892 .k(8)
16893 .cm_stride(19)
16894 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16895 }
16896 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
16897
16898
16899 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_eq_8)16900 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_eq_8) {
16901 TEST_REQUIRES_ARM_NEON_V8;
16902 GemmMicrokernelTester()
16903 .mr(2)
16904 .nr(16)
16905 .kr(1)
16906 .sr(1)
16907 .m(2)
16908 .n(16)
16909 .k(8)
16910 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16911 }
16912
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,strided_cn)16913 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, strided_cn) {
16914 TEST_REQUIRES_ARM_NEON_V8;
16915 GemmMicrokernelTester()
16916 .mr(2)
16917 .nr(16)
16918 .kr(1)
16919 .sr(1)
16920 .m(2)
16921 .n(16)
16922 .k(8)
16923 .cn_stride(19)
16924 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16925 }
16926
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_eq_8_strided_a)16927 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_eq_8_strided_a) {
16928 TEST_REQUIRES_ARM_NEON_V8;
16929 GemmMicrokernelTester()
16930 .mr(2)
16931 .nr(16)
16932 .kr(1)
16933 .sr(1)
16934 .m(2)
16935 .n(16)
16936 .k(8)
16937 .a_stride(11)
16938 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16939 }
16940
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_eq_8_subtile)16941 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
16942 TEST_REQUIRES_ARM_NEON_V8;
16943 for (uint32_t n = 1; n <= 16; n++) {
16944 for (uint32_t m = 1; m <= 2; m++) {
16945 GemmMicrokernelTester()
16946 .mr(2)
16947 .nr(16)
16948 .kr(1)
16949 .sr(1)
16950 .m(m)
16951 .n(n)
16952 .k(8)
16953 .iterations(1)
16954 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16955 }
16956 }
16957 }
16958
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_eq_8_subtile_m)16959 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
16960 TEST_REQUIRES_ARM_NEON_V8;
16961 for (uint32_t m = 1; m <= 2; m++) {
16962 GemmMicrokernelTester()
16963 .mr(2)
16964 .nr(16)
16965 .kr(1)
16966 .sr(1)
16967 .m(m)
16968 .n(16)
16969 .k(8)
16970 .iterations(1)
16971 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16972 }
16973 }
16974
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_eq_8_subtile_n)16975 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
16976 TEST_REQUIRES_ARM_NEON_V8;
16977 for (uint32_t n = 1; n <= 16; n++) {
16978 GemmMicrokernelTester()
16979 .mr(2)
16980 .nr(16)
16981 .kr(1)
16982 .sr(1)
16983 .m(2)
16984 .n(n)
16985 .k(8)
16986 .iterations(1)
16987 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16988 }
16989 }
16990
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_lt_8)16991 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_lt_8) {
16992 TEST_REQUIRES_ARM_NEON_V8;
16993 for (size_t k = 1; k < 8; k++) {
16994 GemmMicrokernelTester()
16995 .mr(2)
16996 .nr(16)
16997 .kr(1)
16998 .sr(1)
16999 .m(2)
17000 .n(16)
17001 .k(k)
17002 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17003 }
17004 }
17005
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_lt_8_strided_a)17006 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_lt_8_strided_a) {
17007 TEST_REQUIRES_ARM_NEON_V8;
17008 for (size_t k = 1; k < 8; k++) {
17009 GemmMicrokernelTester()
17010 .mr(2)
17011 .nr(16)
17012 .kr(1)
17013 .sr(1)
17014 .m(2)
17015 .n(16)
17016 .k(k)
17017 .a_stride(11)
17018 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17019 }
17020 }
17021
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_lt_8_subtile)17022 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
17023 TEST_REQUIRES_ARM_NEON_V8;
17024 for (size_t k = 1; k < 8; k++) {
17025 for (uint32_t n = 1; n <= 16; n++) {
17026 for (uint32_t m = 1; m <= 2; m++) {
17027 GemmMicrokernelTester()
17028 .mr(2)
17029 .nr(16)
17030 .kr(1)
17031 .sr(1)
17032 .m(m)
17033 .n(n)
17034 .k(k)
17035 .iterations(1)
17036 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17037 }
17038 }
17039 }
17040 }
17041
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_gt_8)17042 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_gt_8) {
17043 TEST_REQUIRES_ARM_NEON_V8;
17044 for (size_t k = 9; k < 16; k++) {
17045 GemmMicrokernelTester()
17046 .mr(2)
17047 .nr(16)
17048 .kr(1)
17049 .sr(1)
17050 .m(2)
17051 .n(16)
17052 .k(k)
17053 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17054 }
17055 }
17056
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_gt_8_strided_a)17057 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_gt_8_strided_a) {
17058 TEST_REQUIRES_ARM_NEON_V8;
17059 for (size_t k = 9; k < 16; k++) {
17060 GemmMicrokernelTester()
17061 .mr(2)
17062 .nr(16)
17063 .kr(1)
17064 .sr(1)
17065 .m(2)
17066 .n(16)
17067 .k(k)
17068 .a_stride(19)
17069 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17070 }
17071 }
17072
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_gt_8_subtile)17073 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
17074 TEST_REQUIRES_ARM_NEON_V8;
17075 for (size_t k = 9; k < 16; k++) {
17076 for (uint32_t n = 1; n <= 16; n++) {
17077 for (uint32_t m = 1; m <= 2; m++) {
17078 GemmMicrokernelTester()
17079 .mr(2)
17080 .nr(16)
17081 .kr(1)
17082 .sr(1)
17083 .m(m)
17084 .n(n)
17085 .k(k)
17086 .iterations(1)
17087 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17088 }
17089 }
17090 }
17091 }
17092
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_div_8)17093 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_div_8) {
17094 TEST_REQUIRES_ARM_NEON_V8;
17095 for (size_t k = 16; k <= 80; k += 8) {
17096 GemmMicrokernelTester()
17097 .mr(2)
17098 .nr(16)
17099 .kr(1)
17100 .sr(1)
17101 .m(2)
17102 .n(16)
17103 .k(k)
17104 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17105 }
17106 }
17107
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_div_8_strided_a)17108 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_div_8_strided_a) {
17109 TEST_REQUIRES_ARM_NEON_V8;
17110 for (size_t k = 16; k <= 80; k += 8) {
17111 GemmMicrokernelTester()
17112 .mr(2)
17113 .nr(16)
17114 .kr(1)
17115 .sr(1)
17116 .m(2)
17117 .n(16)
17118 .k(k)
17119 .a_stride(83)
17120 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17121 }
17122 }
17123
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_div_8_subtile)17124 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
17125 TEST_REQUIRES_ARM_NEON_V8;
17126 for (size_t k = 16; k <= 80; k += 8) {
17127 for (uint32_t n = 1; n <= 16; n++) {
17128 for (uint32_t m = 1; m <= 2; m++) {
17129 GemmMicrokernelTester()
17130 .mr(2)
17131 .nr(16)
17132 .kr(1)
17133 .sr(1)
17134 .m(m)
17135 .n(n)
17136 .k(k)
17137 .iterations(1)
17138 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17139 }
17140 }
17141 }
17142 }
17143
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,n_gt_16)17144 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, n_gt_16) {
17145 TEST_REQUIRES_ARM_NEON_V8;
17146 for (uint32_t n = 17; n < 32; n++) {
17147 for (size_t k = 1; k <= 40; k += 9) {
17148 GemmMicrokernelTester()
17149 .mr(2)
17150 .nr(16)
17151 .kr(1)
17152 .sr(1)
17153 .m(2)
17154 .n(n)
17155 .k(k)
17156 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17157 }
17158 }
17159 }
17160
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,n_gt_16_strided_cn)17161 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
17162 TEST_REQUIRES_ARM_NEON_V8;
17163 for (uint32_t n = 17; n < 32; n++) {
17164 for (size_t k = 1; k <= 40; k += 9) {
17165 GemmMicrokernelTester()
17166 .mr(2)
17167 .nr(16)
17168 .kr(1)
17169 .sr(1)
17170 .m(2)
17171 .n(n)
17172 .k(k)
17173 .cn_stride(19)
17174 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17175 }
17176 }
17177 }
17178
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,n_gt_16_strided_a)17179 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, n_gt_16_strided_a) {
17180 TEST_REQUIRES_ARM_NEON_V8;
17181 for (uint32_t n = 17; n < 32; n++) {
17182 for (size_t k = 1; k <= 40; k += 9) {
17183 GemmMicrokernelTester()
17184 .mr(2)
17185 .nr(16)
17186 .kr(1)
17187 .sr(1)
17188 .m(2)
17189 .n(n)
17190 .k(k)
17191 .a_stride(43)
17192 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17193 }
17194 }
17195 }
17196
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,n_gt_16_subtile)17197 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
17198 TEST_REQUIRES_ARM_NEON_V8;
17199 for (uint32_t n = 17; n < 32; n++) {
17200 for (size_t k = 1; k <= 40; k += 9) {
17201 for (uint32_t m = 1; m <= 2; m++) {
17202 GemmMicrokernelTester()
17203 .mr(2)
17204 .nr(16)
17205 .kr(1)
17206 .sr(1)
17207 .m(m)
17208 .n(n)
17209 .k(k)
17210 .iterations(1)
17211 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17212 }
17213 }
17214 }
17215 }
17216
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,n_div_16)17217 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, n_div_16) {
17218 TEST_REQUIRES_ARM_NEON_V8;
17219 for (uint32_t n = 32; n <= 48; n += 16) {
17220 for (size_t k = 1; k <= 40; k += 9) {
17221 GemmMicrokernelTester()
17222 .mr(2)
17223 .nr(16)
17224 .kr(1)
17225 .sr(1)
17226 .m(2)
17227 .n(n)
17228 .k(k)
17229 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17230 }
17231 }
17232 }
17233
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,n_div_16_strided_cn)17234 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
17235 TEST_REQUIRES_ARM_NEON_V8;
17236 for (uint32_t n = 32; n <= 48; n += 16) {
17237 for (size_t k = 1; k <= 40; k += 9) {
17238 GemmMicrokernelTester()
17239 .mr(2)
17240 .nr(16)
17241 .kr(1)
17242 .sr(1)
17243 .m(2)
17244 .n(n)
17245 .k(k)
17246 .cn_stride(19)
17247 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17248 }
17249 }
17250 }
17251
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,n_div_16_strided_a)17252 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, n_div_16_strided_a) {
17253 TEST_REQUIRES_ARM_NEON_V8;
17254 for (uint32_t n = 32; n <= 48; n += 16) {
17255 for (size_t k = 1; k <= 40; k += 9) {
17256 GemmMicrokernelTester()
17257 .mr(2)
17258 .nr(16)
17259 .kr(1)
17260 .sr(1)
17261 .m(2)
17262 .n(n)
17263 .k(k)
17264 .a_stride(43)
17265 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17266 }
17267 }
17268 }
17269
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,n_div_16_subtile)17270 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
17271 TEST_REQUIRES_ARM_NEON_V8;
17272 for (uint32_t n = 32; n <= 48; n += 16) {
17273 for (size_t k = 1; k <= 40; k += 9) {
17274 for (uint32_t m = 1; m <= 2; m++) {
17275 GemmMicrokernelTester()
17276 .mr(2)
17277 .nr(16)
17278 .kr(1)
17279 .sr(1)
17280 .m(m)
17281 .n(n)
17282 .k(k)
17283 .iterations(1)
17284 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17285 }
17286 }
17287 }
17288 }
17289
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,strided_cm_subtile)17290 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
17291 TEST_REQUIRES_ARM_NEON_V8;
17292 for (size_t k = 1; k <= 40; k += 9) {
17293 for (uint32_t n = 1; n <= 16; n++) {
17294 for (uint32_t m = 1; m <= 2; m++) {
17295 GemmMicrokernelTester()
17296 .mr(2)
17297 .nr(16)
17298 .kr(1)
17299 .sr(1)
17300 .m(m)
17301 .n(n)
17302 .k(k)
17303 .cm_stride(19)
17304 .iterations(1)
17305 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17306 }
17307 }
17308 }
17309 }
17310
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,qmin)17311 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, qmin) {
17312 TEST_REQUIRES_ARM_NEON_V8;
17313 GemmMicrokernelTester()
17314 .mr(2)
17315 .nr(16)
17316 .kr(1)
17317 .sr(1)
17318 .m(2)
17319 .n(16)
17320 .k(8)
17321 .qmin(128)
17322 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17323 }
17324
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,qmax)17325 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, qmax) {
17326 TEST_REQUIRES_ARM_NEON_V8;
17327 GemmMicrokernelTester()
17328 .mr(2)
17329 .nr(16)
17330 .kr(1)
17331 .sr(1)
17332 .m(2)
17333 .n(16)
17334 .k(8)
17335 .qmax(128)
17336 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17337 }
17338
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,strided_cm)17339 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, strided_cm) {
17340 TEST_REQUIRES_ARM_NEON_V8;
17341 GemmMicrokernelTester()
17342 .mr(2)
17343 .nr(16)
17344 .kr(1)
17345 .sr(1)
17346 .m(2)
17347 .n(16)
17348 .k(8)
17349 .cm_stride(19)
17350 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17351 }
17352 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
17353
17354
17355 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_eq_8)17356 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
17357 TEST_REQUIRES_ARM_NEON_V8;
17358 GemmMicrokernelTester()
17359 .mr(2)
17360 .nr(16)
17361 .kr(1)
17362 .sr(1)
17363 .m(2)
17364 .n(16)
17365 .k(8)
17366 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17367 }
17368
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,strided_cn)17369 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, strided_cn) {
17370 TEST_REQUIRES_ARM_NEON_V8;
17371 GemmMicrokernelTester()
17372 .mr(2)
17373 .nr(16)
17374 .kr(1)
17375 .sr(1)
17376 .m(2)
17377 .n(16)
17378 .k(8)
17379 .cn_stride(19)
17380 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17381 }
17382
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_strided_a)17383 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_strided_a) {
17384 TEST_REQUIRES_ARM_NEON_V8;
17385 GemmMicrokernelTester()
17386 .mr(2)
17387 .nr(16)
17388 .kr(1)
17389 .sr(1)
17390 .m(2)
17391 .n(16)
17392 .k(8)
17393 .a_stride(11)
17394 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17395 }
17396
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)17397 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
17398 TEST_REQUIRES_ARM_NEON_V8;
17399 for (uint32_t n = 1; n <= 16; n++) {
17400 for (uint32_t m = 1; m <= 2; m++) {
17401 GemmMicrokernelTester()
17402 .mr(2)
17403 .nr(16)
17404 .kr(1)
17405 .sr(1)
17406 .m(m)
17407 .n(n)
17408 .k(8)
17409 .iterations(1)
17410 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17411 }
17412 }
17413 }
17414
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)17415 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
17416 TEST_REQUIRES_ARM_NEON_V8;
17417 for (uint32_t m = 1; m <= 2; m++) {
17418 GemmMicrokernelTester()
17419 .mr(2)
17420 .nr(16)
17421 .kr(1)
17422 .sr(1)
17423 .m(m)
17424 .n(16)
17425 .k(8)
17426 .iterations(1)
17427 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17428 }
17429 }
17430
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)17431 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
17432 TEST_REQUIRES_ARM_NEON_V8;
17433 for (uint32_t n = 1; n <= 16; n++) {
17434 GemmMicrokernelTester()
17435 .mr(2)
17436 .nr(16)
17437 .kr(1)
17438 .sr(1)
17439 .m(2)
17440 .n(n)
17441 .k(8)
17442 .iterations(1)
17443 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17444 }
17445 }
17446
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_lt_8)17447 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
17448 TEST_REQUIRES_ARM_NEON_V8;
17449 for (size_t k = 1; k < 8; k++) {
17450 GemmMicrokernelTester()
17451 .mr(2)
17452 .nr(16)
17453 .kr(1)
17454 .sr(1)
17455 .m(2)
17456 .n(16)
17457 .k(k)
17458 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17459 }
17460 }
17461
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_lt_8_strided_a)17462 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_lt_8_strided_a) {
17463 TEST_REQUIRES_ARM_NEON_V8;
17464 for (size_t k = 1; k < 8; k++) {
17465 GemmMicrokernelTester()
17466 .mr(2)
17467 .nr(16)
17468 .kr(1)
17469 .sr(1)
17470 .m(2)
17471 .n(16)
17472 .k(k)
17473 .a_stride(11)
17474 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17475 }
17476 }
17477
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)17478 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
17479 TEST_REQUIRES_ARM_NEON_V8;
17480 for (size_t k = 1; k < 8; k++) {
17481 for (uint32_t n = 1; n <= 16; n++) {
17482 for (uint32_t m = 1; m <= 2; m++) {
17483 GemmMicrokernelTester()
17484 .mr(2)
17485 .nr(16)
17486 .kr(1)
17487 .sr(1)
17488 .m(m)
17489 .n(n)
17490 .k(k)
17491 .iterations(1)
17492 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17493 }
17494 }
17495 }
17496 }
17497
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_gt_8)17498 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
17499 TEST_REQUIRES_ARM_NEON_V8;
17500 for (size_t k = 9; k < 16; k++) {
17501 GemmMicrokernelTester()
17502 .mr(2)
17503 .nr(16)
17504 .kr(1)
17505 .sr(1)
17506 .m(2)
17507 .n(16)
17508 .k(k)
17509 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17510 }
17511 }
17512
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_gt_8_strided_a)17513 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_gt_8_strided_a) {
17514 TEST_REQUIRES_ARM_NEON_V8;
17515 for (size_t k = 9; k < 16; k++) {
17516 GemmMicrokernelTester()
17517 .mr(2)
17518 .nr(16)
17519 .kr(1)
17520 .sr(1)
17521 .m(2)
17522 .n(16)
17523 .k(k)
17524 .a_stride(19)
17525 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17526 }
17527 }
17528
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)17529 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
17530 TEST_REQUIRES_ARM_NEON_V8;
17531 for (size_t k = 9; k < 16; k++) {
17532 for (uint32_t n = 1; n <= 16; n++) {
17533 for (uint32_t m = 1; m <= 2; m++) {
17534 GemmMicrokernelTester()
17535 .mr(2)
17536 .nr(16)
17537 .kr(1)
17538 .sr(1)
17539 .m(m)
17540 .n(n)
17541 .k(k)
17542 .iterations(1)
17543 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17544 }
17545 }
17546 }
17547 }
17548
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_div_8)17549 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_div_8) {
17550 TEST_REQUIRES_ARM_NEON_V8;
17551 for (size_t k = 16; k <= 80; k += 8) {
17552 GemmMicrokernelTester()
17553 .mr(2)
17554 .nr(16)
17555 .kr(1)
17556 .sr(1)
17557 .m(2)
17558 .n(16)
17559 .k(k)
17560 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17561 }
17562 }
17563
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_div_8_strided_a)17564 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_div_8_strided_a) {
17565 TEST_REQUIRES_ARM_NEON_V8;
17566 for (size_t k = 16; k <= 80; k += 8) {
17567 GemmMicrokernelTester()
17568 .mr(2)
17569 .nr(16)
17570 .kr(1)
17571 .sr(1)
17572 .m(2)
17573 .n(16)
17574 .k(k)
17575 .a_stride(83)
17576 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17577 }
17578 }
17579
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)17580 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
17581 TEST_REQUIRES_ARM_NEON_V8;
17582 for (size_t k = 16; k <= 80; k += 8) {
17583 for (uint32_t n = 1; n <= 16; n++) {
17584 for (uint32_t m = 1; m <= 2; m++) {
17585 GemmMicrokernelTester()
17586 .mr(2)
17587 .nr(16)
17588 .kr(1)
17589 .sr(1)
17590 .m(m)
17591 .n(n)
17592 .k(k)
17593 .iterations(1)
17594 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17595 }
17596 }
17597 }
17598 }
17599
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,n_gt_16)17600 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, n_gt_16) {
17601 TEST_REQUIRES_ARM_NEON_V8;
17602 for (uint32_t n = 17; n < 32; n++) {
17603 for (size_t k = 1; k <= 40; k += 9) {
17604 GemmMicrokernelTester()
17605 .mr(2)
17606 .nr(16)
17607 .kr(1)
17608 .sr(1)
17609 .m(2)
17610 .n(n)
17611 .k(k)
17612 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17613 }
17614 }
17615 }
17616
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_strided_cn)17617 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
17618 TEST_REQUIRES_ARM_NEON_V8;
17619 for (uint32_t n = 17; n < 32; n++) {
17620 for (size_t k = 1; k <= 40; k += 9) {
17621 GemmMicrokernelTester()
17622 .mr(2)
17623 .nr(16)
17624 .kr(1)
17625 .sr(1)
17626 .m(2)
17627 .n(n)
17628 .k(k)
17629 .cn_stride(19)
17630 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17631 }
17632 }
17633 }
17634
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_strided_a)17635 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_strided_a) {
17636 TEST_REQUIRES_ARM_NEON_V8;
17637 for (uint32_t n = 17; n < 32; n++) {
17638 for (size_t k = 1; k <= 40; k += 9) {
17639 GemmMicrokernelTester()
17640 .mr(2)
17641 .nr(16)
17642 .kr(1)
17643 .sr(1)
17644 .m(2)
17645 .n(n)
17646 .k(k)
17647 .a_stride(43)
17648 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17649 }
17650 }
17651 }
17652
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_subtile)17653 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_subtile) {
17654 TEST_REQUIRES_ARM_NEON_V8;
17655 for (uint32_t n = 17; n < 32; n++) {
17656 for (size_t k = 1; k <= 40; k += 9) {
17657 for (uint32_t m = 1; m <= 2; m++) {
17658 GemmMicrokernelTester()
17659 .mr(2)
17660 .nr(16)
17661 .kr(1)
17662 .sr(1)
17663 .m(m)
17664 .n(n)
17665 .k(k)
17666 .iterations(1)
17667 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17668 }
17669 }
17670 }
17671 }
17672
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,n_div_16)17673 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, n_div_16) {
17674 TEST_REQUIRES_ARM_NEON_V8;
17675 for (uint32_t n = 32; n <= 48; n += 16) {
17676 for (size_t k = 1; k <= 40; k += 9) {
17677 GemmMicrokernelTester()
17678 .mr(2)
17679 .nr(16)
17680 .kr(1)
17681 .sr(1)
17682 .m(2)
17683 .n(n)
17684 .k(k)
17685 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17686 }
17687 }
17688 }
17689
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,n_div_16_strided_cn)17690 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, n_div_16_strided_cn) {
17691 TEST_REQUIRES_ARM_NEON_V8;
17692 for (uint32_t n = 32; n <= 48; n += 16) {
17693 for (size_t k = 1; k <= 40; k += 9) {
17694 GemmMicrokernelTester()
17695 .mr(2)
17696 .nr(16)
17697 .kr(1)
17698 .sr(1)
17699 .m(2)
17700 .n(n)
17701 .k(k)
17702 .cn_stride(19)
17703 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17704 }
17705 }
17706 }
17707
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,n_div_16_strided_a)17708 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, n_div_16_strided_a) {
17709 TEST_REQUIRES_ARM_NEON_V8;
17710 for (uint32_t n = 32; n <= 48; n += 16) {
17711 for (size_t k = 1; k <= 40; k += 9) {
17712 GemmMicrokernelTester()
17713 .mr(2)
17714 .nr(16)
17715 .kr(1)
17716 .sr(1)
17717 .m(2)
17718 .n(n)
17719 .k(k)
17720 .a_stride(43)
17721 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17722 }
17723 }
17724 }
17725
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,n_div_16_subtile)17726 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, n_div_16_subtile) {
17727 TEST_REQUIRES_ARM_NEON_V8;
17728 for (uint32_t n = 32; n <= 48; n += 16) {
17729 for (size_t k = 1; k <= 40; k += 9) {
17730 for (uint32_t m = 1; m <= 2; m++) {
17731 GemmMicrokernelTester()
17732 .mr(2)
17733 .nr(16)
17734 .kr(1)
17735 .sr(1)
17736 .m(m)
17737 .n(n)
17738 .k(k)
17739 .iterations(1)
17740 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17741 }
17742 }
17743 }
17744 }
17745
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)17746 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
17747 TEST_REQUIRES_ARM_NEON_V8;
17748 for (size_t k = 1; k <= 40; k += 9) {
17749 for (uint32_t n = 1; n <= 16; n++) {
17750 for (uint32_t m = 1; m <= 2; m++) {
17751 GemmMicrokernelTester()
17752 .mr(2)
17753 .nr(16)
17754 .kr(1)
17755 .sr(1)
17756 .m(m)
17757 .n(n)
17758 .k(k)
17759 .cm_stride(19)
17760 .iterations(1)
17761 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17762 }
17763 }
17764 }
17765 }
17766
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,qmin)17767 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, qmin) {
17768 TEST_REQUIRES_ARM_NEON_V8;
17769 GemmMicrokernelTester()
17770 .mr(2)
17771 .nr(16)
17772 .kr(1)
17773 .sr(1)
17774 .m(2)
17775 .n(16)
17776 .k(8)
17777 .qmin(128)
17778 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17779 }
17780
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,qmax)17781 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, qmax) {
17782 TEST_REQUIRES_ARM_NEON_V8;
17783 GemmMicrokernelTester()
17784 .mr(2)
17785 .nr(16)
17786 .kr(1)
17787 .sr(1)
17788 .m(2)
17789 .n(16)
17790 .k(8)
17791 .qmax(128)
17792 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17793 }
17794
TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,strided_cm)17795 TEST(QC8_GEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, strided_cm) {
17796 TEST_REQUIRES_ARM_NEON_V8;
17797 GemmMicrokernelTester()
17798 .mr(2)
17799 .nr(16)
17800 .kr(1)
17801 .sr(1)
17802 .m(2)
17803 .n(16)
17804 .k(8)
17805 .cm_stride(19)
17806 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17807 }
17808 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
17809
17810
17811 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_eq_8)17812 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_eq_8) {
17813 TEST_REQUIRES_ARM_NEON;
17814 GemmMicrokernelTester()
17815 .mr(3)
17816 .nr(8)
17817 .kr(1)
17818 .sr(1)
17819 .m(3)
17820 .n(8)
17821 .k(8)
17822 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17823 }
17824
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,strided_cn)17825 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, strided_cn) {
17826 TEST_REQUIRES_ARM_NEON;
17827 GemmMicrokernelTester()
17828 .mr(3)
17829 .nr(8)
17830 .kr(1)
17831 .sr(1)
17832 .m(3)
17833 .n(8)
17834 .k(8)
17835 .cn_stride(11)
17836 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17837 }
17838
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_eq_8_strided_a)17839 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
17840 TEST_REQUIRES_ARM_NEON;
17841 GemmMicrokernelTester()
17842 .mr(3)
17843 .nr(8)
17844 .kr(1)
17845 .sr(1)
17846 .m(3)
17847 .n(8)
17848 .k(8)
17849 .a_stride(11)
17850 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17851 }
17852
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_eq_8_subtile)17853 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_eq_8_subtile) {
17854 TEST_REQUIRES_ARM_NEON;
17855 for (uint32_t n = 1; n <= 8; n++) {
17856 for (uint32_t m = 1; m <= 3; m++) {
17857 GemmMicrokernelTester()
17858 .mr(3)
17859 .nr(8)
17860 .kr(1)
17861 .sr(1)
17862 .m(m)
17863 .n(n)
17864 .k(8)
17865 .iterations(1)
17866 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17867 }
17868 }
17869 }
17870
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_eq_8_subtile_m)17871 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
17872 TEST_REQUIRES_ARM_NEON;
17873 for (uint32_t m = 1; m <= 3; m++) {
17874 GemmMicrokernelTester()
17875 .mr(3)
17876 .nr(8)
17877 .kr(1)
17878 .sr(1)
17879 .m(m)
17880 .n(8)
17881 .k(8)
17882 .iterations(1)
17883 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17884 }
17885 }
17886
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_eq_8_subtile_n)17887 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
17888 TEST_REQUIRES_ARM_NEON;
17889 for (uint32_t n = 1; n <= 8; n++) {
17890 GemmMicrokernelTester()
17891 .mr(3)
17892 .nr(8)
17893 .kr(1)
17894 .sr(1)
17895 .m(3)
17896 .n(n)
17897 .k(8)
17898 .iterations(1)
17899 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17900 }
17901 }
17902
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_lt_8)17903 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_lt_8) {
17904 TEST_REQUIRES_ARM_NEON;
17905 for (size_t k = 1; k < 8; k++) {
17906 GemmMicrokernelTester()
17907 .mr(3)
17908 .nr(8)
17909 .kr(1)
17910 .sr(1)
17911 .m(3)
17912 .n(8)
17913 .k(k)
17914 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17915 }
17916 }
17917
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_lt_8_strided_a)17918 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_lt_8_strided_a) {
17919 TEST_REQUIRES_ARM_NEON;
17920 for (size_t k = 1; k < 8; k++) {
17921 GemmMicrokernelTester()
17922 .mr(3)
17923 .nr(8)
17924 .kr(1)
17925 .sr(1)
17926 .m(3)
17927 .n(8)
17928 .k(k)
17929 .a_stride(11)
17930 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17931 }
17932 }
17933
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_lt_8_subtile)17934 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_lt_8_subtile) {
17935 TEST_REQUIRES_ARM_NEON;
17936 for (size_t k = 1; k < 8; k++) {
17937 for (uint32_t n = 1; n <= 8; n++) {
17938 for (uint32_t m = 1; m <= 3; m++) {
17939 GemmMicrokernelTester()
17940 .mr(3)
17941 .nr(8)
17942 .kr(1)
17943 .sr(1)
17944 .m(m)
17945 .n(n)
17946 .k(k)
17947 .iterations(1)
17948 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17949 }
17950 }
17951 }
17952 }
17953
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_gt_8)17954 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_gt_8) {
17955 TEST_REQUIRES_ARM_NEON;
17956 for (size_t k = 9; k < 16; k++) {
17957 GemmMicrokernelTester()
17958 .mr(3)
17959 .nr(8)
17960 .kr(1)
17961 .sr(1)
17962 .m(3)
17963 .n(8)
17964 .k(k)
17965 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17966 }
17967 }
17968
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_gt_8_strided_a)17969 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_gt_8_strided_a) {
17970 TEST_REQUIRES_ARM_NEON;
17971 for (size_t k = 9; k < 16; k++) {
17972 GemmMicrokernelTester()
17973 .mr(3)
17974 .nr(8)
17975 .kr(1)
17976 .sr(1)
17977 .m(3)
17978 .n(8)
17979 .k(k)
17980 .a_stride(19)
17981 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17982 }
17983 }
17984
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_gt_8_subtile)17985 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_gt_8_subtile) {
17986 TEST_REQUIRES_ARM_NEON;
17987 for (size_t k = 9; k < 16; k++) {
17988 for (uint32_t n = 1; n <= 8; n++) {
17989 for (uint32_t m = 1; m <= 3; m++) {
17990 GemmMicrokernelTester()
17991 .mr(3)
17992 .nr(8)
17993 .kr(1)
17994 .sr(1)
17995 .m(m)
17996 .n(n)
17997 .k(k)
17998 .iterations(1)
17999 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18000 }
18001 }
18002 }
18003 }
18004
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_div_8)18005 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_div_8) {
18006 TEST_REQUIRES_ARM_NEON;
18007 for (size_t k = 16; k <= 80; k += 8) {
18008 GemmMicrokernelTester()
18009 .mr(3)
18010 .nr(8)
18011 .kr(1)
18012 .sr(1)
18013 .m(3)
18014 .n(8)
18015 .k(k)
18016 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18017 }
18018 }
18019
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_div_8_strided_a)18020 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_div_8_strided_a) {
18021 TEST_REQUIRES_ARM_NEON;
18022 for (size_t k = 16; k <= 80; k += 8) {
18023 GemmMicrokernelTester()
18024 .mr(3)
18025 .nr(8)
18026 .kr(1)
18027 .sr(1)
18028 .m(3)
18029 .n(8)
18030 .k(k)
18031 .a_stride(83)
18032 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18033 }
18034 }
18035
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_div_8_subtile)18036 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_div_8_subtile) {
18037 TEST_REQUIRES_ARM_NEON;
18038 for (size_t k = 16; k <= 80; k += 8) {
18039 for (uint32_t n = 1; n <= 8; n++) {
18040 for (uint32_t m = 1; m <= 3; m++) {
18041 GemmMicrokernelTester()
18042 .mr(3)
18043 .nr(8)
18044 .kr(1)
18045 .sr(1)
18046 .m(m)
18047 .n(n)
18048 .k(k)
18049 .iterations(1)
18050 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18051 }
18052 }
18053 }
18054 }
18055
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,n_gt_8)18056 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, n_gt_8) {
18057 TEST_REQUIRES_ARM_NEON;
18058 for (uint32_t n = 9; n < 16; n++) {
18059 for (size_t k = 1; k <= 40; k += 9) {
18060 GemmMicrokernelTester()
18061 .mr(3)
18062 .nr(8)
18063 .kr(1)
18064 .sr(1)
18065 .m(3)
18066 .n(n)
18067 .k(k)
18068 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18069 }
18070 }
18071 }
18072
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,n_gt_8_strided_cn)18073 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
18074 TEST_REQUIRES_ARM_NEON;
18075 for (uint32_t n = 9; n < 16; n++) {
18076 for (size_t k = 1; k <= 40; k += 9) {
18077 GemmMicrokernelTester()
18078 .mr(3)
18079 .nr(8)
18080 .kr(1)
18081 .sr(1)
18082 .m(3)
18083 .n(n)
18084 .k(k)
18085 .cn_stride(11)
18086 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18087 }
18088 }
18089 }
18090
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,n_gt_8_strided_a)18091 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, n_gt_8_strided_a) {
18092 TEST_REQUIRES_ARM_NEON;
18093 for (uint32_t n = 9; n < 16; n++) {
18094 for (size_t k = 1; k <= 40; k += 9) {
18095 GemmMicrokernelTester()
18096 .mr(3)
18097 .nr(8)
18098 .kr(1)
18099 .sr(1)
18100 .m(3)
18101 .n(n)
18102 .k(k)
18103 .a_stride(43)
18104 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18105 }
18106 }
18107 }
18108
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,n_gt_8_subtile)18109 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, n_gt_8_subtile) {
18110 TEST_REQUIRES_ARM_NEON;
18111 for (uint32_t n = 9; n < 16; n++) {
18112 for (size_t k = 1; k <= 40; k += 9) {
18113 for (uint32_t m = 1; m <= 3; m++) {
18114 GemmMicrokernelTester()
18115 .mr(3)
18116 .nr(8)
18117 .kr(1)
18118 .sr(1)
18119 .m(m)
18120 .n(n)
18121 .k(k)
18122 .iterations(1)
18123 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18124 }
18125 }
18126 }
18127 }
18128
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,n_div_8)18129 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, n_div_8) {
18130 TEST_REQUIRES_ARM_NEON;
18131 for (uint32_t n = 16; n <= 24; n += 8) {
18132 for (size_t k = 1; k <= 40; k += 9) {
18133 GemmMicrokernelTester()
18134 .mr(3)
18135 .nr(8)
18136 .kr(1)
18137 .sr(1)
18138 .m(3)
18139 .n(n)
18140 .k(k)
18141 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18142 }
18143 }
18144 }
18145
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,n_div_8_strided_cn)18146 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
18147 TEST_REQUIRES_ARM_NEON;
18148 for (uint32_t n = 16; n <= 24; n += 8) {
18149 for (size_t k = 1; k <= 40; k += 9) {
18150 GemmMicrokernelTester()
18151 .mr(3)
18152 .nr(8)
18153 .kr(1)
18154 .sr(1)
18155 .m(3)
18156 .n(n)
18157 .k(k)
18158 .cn_stride(11)
18159 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18160 }
18161 }
18162 }
18163
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,n_div_8_strided_a)18164 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, n_div_8_strided_a) {
18165 TEST_REQUIRES_ARM_NEON;
18166 for (uint32_t n = 16; n <= 24; n += 8) {
18167 for (size_t k = 1; k <= 40; k += 9) {
18168 GemmMicrokernelTester()
18169 .mr(3)
18170 .nr(8)
18171 .kr(1)
18172 .sr(1)
18173 .m(3)
18174 .n(n)
18175 .k(k)
18176 .a_stride(43)
18177 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18178 }
18179 }
18180 }
18181
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,n_div_8_subtile)18182 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, n_div_8_subtile) {
18183 TEST_REQUIRES_ARM_NEON;
18184 for (uint32_t n = 16; n <= 24; n += 8) {
18185 for (size_t k = 1; k <= 40; k += 9) {
18186 for (uint32_t m = 1; m <= 3; m++) {
18187 GemmMicrokernelTester()
18188 .mr(3)
18189 .nr(8)
18190 .kr(1)
18191 .sr(1)
18192 .m(m)
18193 .n(n)
18194 .k(k)
18195 .iterations(1)
18196 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18197 }
18198 }
18199 }
18200 }
18201
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,strided_cm_subtile)18202 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, strided_cm_subtile) {
18203 TEST_REQUIRES_ARM_NEON;
18204 for (size_t k = 1; k <= 40; k += 9) {
18205 for (uint32_t n = 1; n <= 8; n++) {
18206 for (uint32_t m = 1; m <= 3; m++) {
18207 GemmMicrokernelTester()
18208 .mr(3)
18209 .nr(8)
18210 .kr(1)
18211 .sr(1)
18212 .m(m)
18213 .n(n)
18214 .k(k)
18215 .cm_stride(11)
18216 .iterations(1)
18217 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18218 }
18219 }
18220 }
18221 }
18222
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,qmin)18223 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, qmin) {
18224 TEST_REQUIRES_ARM_NEON;
18225 GemmMicrokernelTester()
18226 .mr(3)
18227 .nr(8)
18228 .kr(1)
18229 .sr(1)
18230 .m(3)
18231 .n(8)
18232 .k(8)
18233 .qmin(128)
18234 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18235 }
18236
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,qmax)18237 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, qmax) {
18238 TEST_REQUIRES_ARM_NEON;
18239 GemmMicrokernelTester()
18240 .mr(3)
18241 .nr(8)
18242 .kr(1)
18243 .sr(1)
18244 .m(3)
18245 .n(8)
18246 .k(8)
18247 .qmax(128)
18248 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18249 }
18250
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,strided_cm)18251 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, strided_cm) {
18252 TEST_REQUIRES_ARM_NEON;
18253 GemmMicrokernelTester()
18254 .mr(3)
18255 .nr(8)
18256 .kr(1)
18257 .sr(1)
18258 .m(3)
18259 .n(8)
18260 .k(8)
18261 .cm_stride(11)
18262 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18263 }
18264 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
18265
18266
18267 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_eq_8)18268 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
18269 TEST_REQUIRES_ARM_NEON_V8;
18270 GemmMicrokernelTester()
18271 .mr(3)
18272 .nr(8)
18273 .kr(1)
18274 .sr(1)
18275 .m(3)
18276 .n(8)
18277 .k(8)
18278 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18279 }
18280
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,strided_cn)18281 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, strided_cn) {
18282 TEST_REQUIRES_ARM_NEON_V8;
18283 GemmMicrokernelTester()
18284 .mr(3)
18285 .nr(8)
18286 .kr(1)
18287 .sr(1)
18288 .m(3)
18289 .n(8)
18290 .k(8)
18291 .cn_stride(11)
18292 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18293 }
18294
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_strided_a)18295 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_strided_a) {
18296 TEST_REQUIRES_ARM_NEON_V8;
18297 GemmMicrokernelTester()
18298 .mr(3)
18299 .nr(8)
18300 .kr(1)
18301 .sr(1)
18302 .m(3)
18303 .n(8)
18304 .k(8)
18305 .a_stride(11)
18306 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18307 }
18308
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)18309 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
18310 TEST_REQUIRES_ARM_NEON_V8;
18311 for (uint32_t n = 1; n <= 8; n++) {
18312 for (uint32_t m = 1; m <= 3; m++) {
18313 GemmMicrokernelTester()
18314 .mr(3)
18315 .nr(8)
18316 .kr(1)
18317 .sr(1)
18318 .m(m)
18319 .n(n)
18320 .k(8)
18321 .iterations(1)
18322 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18323 }
18324 }
18325 }
18326
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)18327 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
18328 TEST_REQUIRES_ARM_NEON_V8;
18329 for (uint32_t m = 1; m <= 3; m++) {
18330 GemmMicrokernelTester()
18331 .mr(3)
18332 .nr(8)
18333 .kr(1)
18334 .sr(1)
18335 .m(m)
18336 .n(8)
18337 .k(8)
18338 .iterations(1)
18339 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18340 }
18341 }
18342
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)18343 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
18344 TEST_REQUIRES_ARM_NEON_V8;
18345 for (uint32_t n = 1; n <= 8; n++) {
18346 GemmMicrokernelTester()
18347 .mr(3)
18348 .nr(8)
18349 .kr(1)
18350 .sr(1)
18351 .m(3)
18352 .n(n)
18353 .k(8)
18354 .iterations(1)
18355 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18356 }
18357 }
18358
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_lt_8)18359 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
18360 TEST_REQUIRES_ARM_NEON_V8;
18361 for (size_t k = 1; k < 8; k++) {
18362 GemmMicrokernelTester()
18363 .mr(3)
18364 .nr(8)
18365 .kr(1)
18366 .sr(1)
18367 .m(3)
18368 .n(8)
18369 .k(k)
18370 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18371 }
18372 }
18373
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_lt_8_strided_a)18374 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_lt_8_strided_a) {
18375 TEST_REQUIRES_ARM_NEON_V8;
18376 for (size_t k = 1; k < 8; k++) {
18377 GemmMicrokernelTester()
18378 .mr(3)
18379 .nr(8)
18380 .kr(1)
18381 .sr(1)
18382 .m(3)
18383 .n(8)
18384 .k(k)
18385 .a_stride(11)
18386 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18387 }
18388 }
18389
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)18390 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
18391 TEST_REQUIRES_ARM_NEON_V8;
18392 for (size_t k = 1; k < 8; k++) {
18393 for (uint32_t n = 1; n <= 8; n++) {
18394 for (uint32_t m = 1; m <= 3; m++) {
18395 GemmMicrokernelTester()
18396 .mr(3)
18397 .nr(8)
18398 .kr(1)
18399 .sr(1)
18400 .m(m)
18401 .n(n)
18402 .k(k)
18403 .iterations(1)
18404 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18405 }
18406 }
18407 }
18408 }
18409
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_gt_8)18410 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
18411 TEST_REQUIRES_ARM_NEON_V8;
18412 for (size_t k = 9; k < 16; k++) {
18413 GemmMicrokernelTester()
18414 .mr(3)
18415 .nr(8)
18416 .kr(1)
18417 .sr(1)
18418 .m(3)
18419 .n(8)
18420 .k(k)
18421 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18422 }
18423 }
18424
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_gt_8_strided_a)18425 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_gt_8_strided_a) {
18426 TEST_REQUIRES_ARM_NEON_V8;
18427 for (size_t k = 9; k < 16; k++) {
18428 GemmMicrokernelTester()
18429 .mr(3)
18430 .nr(8)
18431 .kr(1)
18432 .sr(1)
18433 .m(3)
18434 .n(8)
18435 .k(k)
18436 .a_stride(19)
18437 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18438 }
18439 }
18440
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)18441 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
18442 TEST_REQUIRES_ARM_NEON_V8;
18443 for (size_t k = 9; k < 16; k++) {
18444 for (uint32_t n = 1; n <= 8; n++) {
18445 for (uint32_t m = 1; m <= 3; m++) {
18446 GemmMicrokernelTester()
18447 .mr(3)
18448 .nr(8)
18449 .kr(1)
18450 .sr(1)
18451 .m(m)
18452 .n(n)
18453 .k(k)
18454 .iterations(1)
18455 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18456 }
18457 }
18458 }
18459 }
18460
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_div_8)18461 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_div_8) {
18462 TEST_REQUIRES_ARM_NEON_V8;
18463 for (size_t k = 16; k <= 80; k += 8) {
18464 GemmMicrokernelTester()
18465 .mr(3)
18466 .nr(8)
18467 .kr(1)
18468 .sr(1)
18469 .m(3)
18470 .n(8)
18471 .k(k)
18472 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18473 }
18474 }
18475
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_div_8_strided_a)18476 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_div_8_strided_a) {
18477 TEST_REQUIRES_ARM_NEON_V8;
18478 for (size_t k = 16; k <= 80; k += 8) {
18479 GemmMicrokernelTester()
18480 .mr(3)
18481 .nr(8)
18482 .kr(1)
18483 .sr(1)
18484 .m(3)
18485 .n(8)
18486 .k(k)
18487 .a_stride(83)
18488 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18489 }
18490 }
18491
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)18492 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
18493 TEST_REQUIRES_ARM_NEON_V8;
18494 for (size_t k = 16; k <= 80; k += 8) {
18495 for (uint32_t n = 1; n <= 8; n++) {
18496 for (uint32_t m = 1; m <= 3; m++) {
18497 GemmMicrokernelTester()
18498 .mr(3)
18499 .nr(8)
18500 .kr(1)
18501 .sr(1)
18502 .m(m)
18503 .n(n)
18504 .k(k)
18505 .iterations(1)
18506 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18507 }
18508 }
18509 }
18510 }
18511
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,n_gt_8)18512 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, n_gt_8) {
18513 TEST_REQUIRES_ARM_NEON_V8;
18514 for (uint32_t n = 9; n < 16; n++) {
18515 for (size_t k = 1; k <= 40; k += 9) {
18516 GemmMicrokernelTester()
18517 .mr(3)
18518 .nr(8)
18519 .kr(1)
18520 .sr(1)
18521 .m(3)
18522 .n(n)
18523 .k(k)
18524 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18525 }
18526 }
18527 }
18528
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_strided_cn)18529 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
18530 TEST_REQUIRES_ARM_NEON_V8;
18531 for (uint32_t n = 9; n < 16; n++) {
18532 for (size_t k = 1; k <= 40; k += 9) {
18533 GemmMicrokernelTester()
18534 .mr(3)
18535 .nr(8)
18536 .kr(1)
18537 .sr(1)
18538 .m(3)
18539 .n(n)
18540 .k(k)
18541 .cn_stride(11)
18542 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18543 }
18544 }
18545 }
18546
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_strided_a)18547 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_strided_a) {
18548 TEST_REQUIRES_ARM_NEON_V8;
18549 for (uint32_t n = 9; n < 16; n++) {
18550 for (size_t k = 1; k <= 40; k += 9) {
18551 GemmMicrokernelTester()
18552 .mr(3)
18553 .nr(8)
18554 .kr(1)
18555 .sr(1)
18556 .m(3)
18557 .n(n)
18558 .k(k)
18559 .a_stride(43)
18560 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18561 }
18562 }
18563 }
18564
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_subtile)18565 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_subtile) {
18566 TEST_REQUIRES_ARM_NEON_V8;
18567 for (uint32_t n = 9; n < 16; n++) {
18568 for (size_t k = 1; k <= 40; k += 9) {
18569 for (uint32_t m = 1; m <= 3; m++) {
18570 GemmMicrokernelTester()
18571 .mr(3)
18572 .nr(8)
18573 .kr(1)
18574 .sr(1)
18575 .m(m)
18576 .n(n)
18577 .k(k)
18578 .iterations(1)
18579 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18580 }
18581 }
18582 }
18583 }
18584
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,n_div_8)18585 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, n_div_8) {
18586 TEST_REQUIRES_ARM_NEON_V8;
18587 for (uint32_t n = 16; n <= 24; n += 8) {
18588 for (size_t k = 1; k <= 40; k += 9) {
18589 GemmMicrokernelTester()
18590 .mr(3)
18591 .nr(8)
18592 .kr(1)
18593 .sr(1)
18594 .m(3)
18595 .n(n)
18596 .k(k)
18597 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18598 }
18599 }
18600 }
18601
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,n_div_8_strided_cn)18602 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, n_div_8_strided_cn) {
18603 TEST_REQUIRES_ARM_NEON_V8;
18604 for (uint32_t n = 16; n <= 24; n += 8) {
18605 for (size_t k = 1; k <= 40; k += 9) {
18606 GemmMicrokernelTester()
18607 .mr(3)
18608 .nr(8)
18609 .kr(1)
18610 .sr(1)
18611 .m(3)
18612 .n(n)
18613 .k(k)
18614 .cn_stride(11)
18615 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18616 }
18617 }
18618 }
18619
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,n_div_8_strided_a)18620 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, n_div_8_strided_a) {
18621 TEST_REQUIRES_ARM_NEON_V8;
18622 for (uint32_t n = 16; n <= 24; n += 8) {
18623 for (size_t k = 1; k <= 40; k += 9) {
18624 GemmMicrokernelTester()
18625 .mr(3)
18626 .nr(8)
18627 .kr(1)
18628 .sr(1)
18629 .m(3)
18630 .n(n)
18631 .k(k)
18632 .a_stride(43)
18633 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18634 }
18635 }
18636 }
18637
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,n_div_8_subtile)18638 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, n_div_8_subtile) {
18639 TEST_REQUIRES_ARM_NEON_V8;
18640 for (uint32_t n = 16; n <= 24; n += 8) {
18641 for (size_t k = 1; k <= 40; k += 9) {
18642 for (uint32_t m = 1; m <= 3; m++) {
18643 GemmMicrokernelTester()
18644 .mr(3)
18645 .nr(8)
18646 .kr(1)
18647 .sr(1)
18648 .m(m)
18649 .n(n)
18650 .k(k)
18651 .iterations(1)
18652 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18653 }
18654 }
18655 }
18656 }
18657
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)18658 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
18659 TEST_REQUIRES_ARM_NEON_V8;
18660 for (size_t k = 1; k <= 40; k += 9) {
18661 for (uint32_t n = 1; n <= 8; n++) {
18662 for (uint32_t m = 1; m <= 3; m++) {
18663 GemmMicrokernelTester()
18664 .mr(3)
18665 .nr(8)
18666 .kr(1)
18667 .sr(1)
18668 .m(m)
18669 .n(n)
18670 .k(k)
18671 .cm_stride(11)
18672 .iterations(1)
18673 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18674 }
18675 }
18676 }
18677 }
18678
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,qmin)18679 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, qmin) {
18680 TEST_REQUIRES_ARM_NEON_V8;
18681 GemmMicrokernelTester()
18682 .mr(3)
18683 .nr(8)
18684 .kr(1)
18685 .sr(1)
18686 .m(3)
18687 .n(8)
18688 .k(8)
18689 .qmin(128)
18690 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18691 }
18692
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,qmax)18693 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, qmax) {
18694 TEST_REQUIRES_ARM_NEON_V8;
18695 GemmMicrokernelTester()
18696 .mr(3)
18697 .nr(8)
18698 .kr(1)
18699 .sr(1)
18700 .m(3)
18701 .n(8)
18702 .k(8)
18703 .qmax(128)
18704 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18705 }
18706
TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,strided_cm)18707 TEST(QC8_GEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, strided_cm) {
18708 TEST_REQUIRES_ARM_NEON_V8;
18709 GemmMicrokernelTester()
18710 .mr(3)
18711 .nr(8)
18712 .kr(1)
18713 .sr(1)
18714 .m(3)
18715 .n(8)
18716 .k(8)
18717 .cm_stride(11)
18718 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18719 }
18720 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
18721
18722
18723 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_eq_8)18724 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_eq_8) {
18725 TEST_REQUIRES_ARM_NEON;
18726 GemmMicrokernelTester()
18727 .mr(3)
18728 .nr(16)
18729 .kr(1)
18730 .sr(1)
18731 .m(3)
18732 .n(16)
18733 .k(8)
18734 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18735 }
18736
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,strided_cn)18737 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, strided_cn) {
18738 TEST_REQUIRES_ARM_NEON;
18739 GemmMicrokernelTester()
18740 .mr(3)
18741 .nr(16)
18742 .kr(1)
18743 .sr(1)
18744 .m(3)
18745 .n(16)
18746 .k(8)
18747 .cn_stride(19)
18748 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18749 }
18750
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_eq_8_strided_a)18751 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
18752 TEST_REQUIRES_ARM_NEON;
18753 GemmMicrokernelTester()
18754 .mr(3)
18755 .nr(16)
18756 .kr(1)
18757 .sr(1)
18758 .m(3)
18759 .n(16)
18760 .k(8)
18761 .a_stride(11)
18762 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18763 }
18764
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_eq_8_subtile)18765 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_eq_8_subtile) {
18766 TEST_REQUIRES_ARM_NEON;
18767 for (uint32_t n = 1; n <= 16; n++) {
18768 for (uint32_t m = 1; m <= 3; m++) {
18769 GemmMicrokernelTester()
18770 .mr(3)
18771 .nr(16)
18772 .kr(1)
18773 .sr(1)
18774 .m(m)
18775 .n(n)
18776 .k(8)
18777 .iterations(1)
18778 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18779 }
18780 }
18781 }
18782
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_eq_8_subtile_m)18783 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
18784 TEST_REQUIRES_ARM_NEON;
18785 for (uint32_t m = 1; m <= 3; m++) {
18786 GemmMicrokernelTester()
18787 .mr(3)
18788 .nr(16)
18789 .kr(1)
18790 .sr(1)
18791 .m(m)
18792 .n(16)
18793 .k(8)
18794 .iterations(1)
18795 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18796 }
18797 }
18798
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_eq_8_subtile_n)18799 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
18800 TEST_REQUIRES_ARM_NEON;
18801 for (uint32_t n = 1; n <= 16; n++) {
18802 GemmMicrokernelTester()
18803 .mr(3)
18804 .nr(16)
18805 .kr(1)
18806 .sr(1)
18807 .m(3)
18808 .n(n)
18809 .k(8)
18810 .iterations(1)
18811 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18812 }
18813 }
18814
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_lt_8)18815 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_lt_8) {
18816 TEST_REQUIRES_ARM_NEON;
18817 for (size_t k = 1; k < 8; k++) {
18818 GemmMicrokernelTester()
18819 .mr(3)
18820 .nr(16)
18821 .kr(1)
18822 .sr(1)
18823 .m(3)
18824 .n(16)
18825 .k(k)
18826 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18827 }
18828 }
18829
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_lt_8_strided_a)18830 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_lt_8_strided_a) {
18831 TEST_REQUIRES_ARM_NEON;
18832 for (size_t k = 1; k < 8; k++) {
18833 GemmMicrokernelTester()
18834 .mr(3)
18835 .nr(16)
18836 .kr(1)
18837 .sr(1)
18838 .m(3)
18839 .n(16)
18840 .k(k)
18841 .a_stride(11)
18842 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18843 }
18844 }
18845
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_lt_8_subtile)18846 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_lt_8_subtile) {
18847 TEST_REQUIRES_ARM_NEON;
18848 for (size_t k = 1; k < 8; k++) {
18849 for (uint32_t n = 1; n <= 16; n++) {
18850 for (uint32_t m = 1; m <= 3; m++) {
18851 GemmMicrokernelTester()
18852 .mr(3)
18853 .nr(16)
18854 .kr(1)
18855 .sr(1)
18856 .m(m)
18857 .n(n)
18858 .k(k)
18859 .iterations(1)
18860 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18861 }
18862 }
18863 }
18864 }
18865
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_gt_8)18866 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_gt_8) {
18867 TEST_REQUIRES_ARM_NEON;
18868 for (size_t k = 9; k < 16; k++) {
18869 GemmMicrokernelTester()
18870 .mr(3)
18871 .nr(16)
18872 .kr(1)
18873 .sr(1)
18874 .m(3)
18875 .n(16)
18876 .k(k)
18877 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18878 }
18879 }
18880
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_gt_8_strided_a)18881 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_gt_8_strided_a) {
18882 TEST_REQUIRES_ARM_NEON;
18883 for (size_t k = 9; k < 16; k++) {
18884 GemmMicrokernelTester()
18885 .mr(3)
18886 .nr(16)
18887 .kr(1)
18888 .sr(1)
18889 .m(3)
18890 .n(16)
18891 .k(k)
18892 .a_stride(19)
18893 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18894 }
18895 }
18896
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_gt_8_subtile)18897 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_gt_8_subtile) {
18898 TEST_REQUIRES_ARM_NEON;
18899 for (size_t k = 9; k < 16; k++) {
18900 for (uint32_t n = 1; n <= 16; n++) {
18901 for (uint32_t m = 1; m <= 3; m++) {
18902 GemmMicrokernelTester()
18903 .mr(3)
18904 .nr(16)
18905 .kr(1)
18906 .sr(1)
18907 .m(m)
18908 .n(n)
18909 .k(k)
18910 .iterations(1)
18911 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18912 }
18913 }
18914 }
18915 }
18916
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_div_8)18917 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_div_8) {
18918 TEST_REQUIRES_ARM_NEON;
18919 for (size_t k = 16; k <= 80; k += 8) {
18920 GemmMicrokernelTester()
18921 .mr(3)
18922 .nr(16)
18923 .kr(1)
18924 .sr(1)
18925 .m(3)
18926 .n(16)
18927 .k(k)
18928 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18929 }
18930 }
18931
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_div_8_strided_a)18932 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_div_8_strided_a) {
18933 TEST_REQUIRES_ARM_NEON;
18934 for (size_t k = 16; k <= 80; k += 8) {
18935 GemmMicrokernelTester()
18936 .mr(3)
18937 .nr(16)
18938 .kr(1)
18939 .sr(1)
18940 .m(3)
18941 .n(16)
18942 .k(k)
18943 .a_stride(83)
18944 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18945 }
18946 }
18947
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_div_8_subtile)18948 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_div_8_subtile) {
18949 TEST_REQUIRES_ARM_NEON;
18950 for (size_t k = 16; k <= 80; k += 8) {
18951 for (uint32_t n = 1; n <= 16; n++) {
18952 for (uint32_t m = 1; m <= 3; m++) {
18953 GemmMicrokernelTester()
18954 .mr(3)
18955 .nr(16)
18956 .kr(1)
18957 .sr(1)
18958 .m(m)
18959 .n(n)
18960 .k(k)
18961 .iterations(1)
18962 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18963 }
18964 }
18965 }
18966 }
18967
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_gt_16)18968 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_gt_16) {
18969 TEST_REQUIRES_ARM_NEON;
18970 for (uint32_t n = 17; n < 32; n++) {
18971 for (size_t k = 1; k <= 40; k += 9) {
18972 GemmMicrokernelTester()
18973 .mr(3)
18974 .nr(16)
18975 .kr(1)
18976 .sr(1)
18977 .m(3)
18978 .n(n)
18979 .k(k)
18980 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18981 }
18982 }
18983 }
18984
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_gt_16_strided_cn)18985 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
18986 TEST_REQUIRES_ARM_NEON;
18987 for (uint32_t n = 17; n < 32; n++) {
18988 for (size_t k = 1; k <= 40; k += 9) {
18989 GemmMicrokernelTester()
18990 .mr(3)
18991 .nr(16)
18992 .kr(1)
18993 .sr(1)
18994 .m(3)
18995 .n(n)
18996 .k(k)
18997 .cn_stride(19)
18998 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18999 }
19000 }
19001 }
19002
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_gt_16_strided_a)19003 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_gt_16_strided_a) {
19004 TEST_REQUIRES_ARM_NEON;
19005 for (uint32_t n = 17; n < 32; n++) {
19006 for (size_t k = 1; k <= 40; k += 9) {
19007 GemmMicrokernelTester()
19008 .mr(3)
19009 .nr(16)
19010 .kr(1)
19011 .sr(1)
19012 .m(3)
19013 .n(n)
19014 .k(k)
19015 .a_stride(43)
19016 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19017 }
19018 }
19019 }
19020
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_gt_16_subtile)19021 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_gt_16_subtile) {
19022 TEST_REQUIRES_ARM_NEON;
19023 for (uint32_t n = 17; n < 32; n++) {
19024 for (size_t k = 1; k <= 40; k += 9) {
19025 for (uint32_t m = 1; m <= 3; m++) {
19026 GemmMicrokernelTester()
19027 .mr(3)
19028 .nr(16)
19029 .kr(1)
19030 .sr(1)
19031 .m(m)
19032 .n(n)
19033 .k(k)
19034 .iterations(1)
19035 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19036 }
19037 }
19038 }
19039 }
19040
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_div_16)19041 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_div_16) {
19042 TEST_REQUIRES_ARM_NEON;
19043 for (uint32_t n = 32; n <= 48; n += 16) {
19044 for (size_t k = 1; k <= 40; k += 9) {
19045 GemmMicrokernelTester()
19046 .mr(3)
19047 .nr(16)
19048 .kr(1)
19049 .sr(1)
19050 .m(3)
19051 .n(n)
19052 .k(k)
19053 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19054 }
19055 }
19056 }
19057
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_div_16_strided_cn)19058 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
19059 TEST_REQUIRES_ARM_NEON;
19060 for (uint32_t n = 32; n <= 48; n += 16) {
19061 for (size_t k = 1; k <= 40; k += 9) {
19062 GemmMicrokernelTester()
19063 .mr(3)
19064 .nr(16)
19065 .kr(1)
19066 .sr(1)
19067 .m(3)
19068 .n(n)
19069 .k(k)
19070 .cn_stride(19)
19071 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19072 }
19073 }
19074 }
19075
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_div_16_strided_a)19076 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_div_16_strided_a) {
19077 TEST_REQUIRES_ARM_NEON;
19078 for (uint32_t n = 32; n <= 48; n += 16) {
19079 for (size_t k = 1; k <= 40; k += 9) {
19080 GemmMicrokernelTester()
19081 .mr(3)
19082 .nr(16)
19083 .kr(1)
19084 .sr(1)
19085 .m(3)
19086 .n(n)
19087 .k(k)
19088 .a_stride(43)
19089 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19090 }
19091 }
19092 }
19093
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_div_16_subtile)19094 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_div_16_subtile) {
19095 TEST_REQUIRES_ARM_NEON;
19096 for (uint32_t n = 32; n <= 48; n += 16) {
19097 for (size_t k = 1; k <= 40; k += 9) {
19098 for (uint32_t m = 1; m <= 3; m++) {
19099 GemmMicrokernelTester()
19100 .mr(3)
19101 .nr(16)
19102 .kr(1)
19103 .sr(1)
19104 .m(m)
19105 .n(n)
19106 .k(k)
19107 .iterations(1)
19108 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19109 }
19110 }
19111 }
19112 }
19113
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,strided_cm_subtile)19114 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, strided_cm_subtile) {
19115 TEST_REQUIRES_ARM_NEON;
19116 for (size_t k = 1; k <= 40; k += 9) {
19117 for (uint32_t n = 1; n <= 16; n++) {
19118 for (uint32_t m = 1; m <= 3; m++) {
19119 GemmMicrokernelTester()
19120 .mr(3)
19121 .nr(16)
19122 .kr(1)
19123 .sr(1)
19124 .m(m)
19125 .n(n)
19126 .k(k)
19127 .cm_stride(19)
19128 .iterations(1)
19129 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19130 }
19131 }
19132 }
19133 }
19134
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,qmin)19135 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, qmin) {
19136 TEST_REQUIRES_ARM_NEON;
19137 GemmMicrokernelTester()
19138 .mr(3)
19139 .nr(16)
19140 .kr(1)
19141 .sr(1)
19142 .m(3)
19143 .n(16)
19144 .k(8)
19145 .qmin(128)
19146 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19147 }
19148
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,qmax)19149 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, qmax) {
19150 TEST_REQUIRES_ARM_NEON;
19151 GemmMicrokernelTester()
19152 .mr(3)
19153 .nr(16)
19154 .kr(1)
19155 .sr(1)
19156 .m(3)
19157 .n(16)
19158 .k(8)
19159 .qmax(128)
19160 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19161 }
19162
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,strided_cm)19163 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, strided_cm) {
19164 TEST_REQUIRES_ARM_NEON;
19165 GemmMicrokernelTester()
19166 .mr(3)
19167 .nr(16)
19168 .kr(1)
19169 .sr(1)
19170 .m(3)
19171 .n(16)
19172 .k(8)
19173 .cm_stride(19)
19174 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19175 }
19176 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
19177
19178
19179 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_eq_8)19180 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_eq_8) {
19181 TEST_REQUIRES_ARM_NEON_V8;
19182 GemmMicrokernelTester()
19183 .mr(3)
19184 .nr(16)
19185 .kr(1)
19186 .sr(1)
19187 .m(3)
19188 .n(16)
19189 .k(8)
19190 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19191 }
19192
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,strided_cn)19193 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, strided_cn) {
19194 TEST_REQUIRES_ARM_NEON_V8;
19195 GemmMicrokernelTester()
19196 .mr(3)
19197 .nr(16)
19198 .kr(1)
19199 .sr(1)
19200 .m(3)
19201 .n(16)
19202 .k(8)
19203 .cn_stride(19)
19204 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19205 }
19206
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_eq_8_strided_a)19207 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_eq_8_strided_a) {
19208 TEST_REQUIRES_ARM_NEON_V8;
19209 GemmMicrokernelTester()
19210 .mr(3)
19211 .nr(16)
19212 .kr(1)
19213 .sr(1)
19214 .m(3)
19215 .n(16)
19216 .k(8)
19217 .a_stride(11)
19218 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19219 }
19220
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_eq_8_subtile)19221 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
19222 TEST_REQUIRES_ARM_NEON_V8;
19223 for (uint32_t n = 1; n <= 16; n++) {
19224 for (uint32_t m = 1; m <= 3; m++) {
19225 GemmMicrokernelTester()
19226 .mr(3)
19227 .nr(16)
19228 .kr(1)
19229 .sr(1)
19230 .m(m)
19231 .n(n)
19232 .k(8)
19233 .iterations(1)
19234 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19235 }
19236 }
19237 }
19238
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_eq_8_subtile_m)19239 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
19240 TEST_REQUIRES_ARM_NEON_V8;
19241 for (uint32_t m = 1; m <= 3; m++) {
19242 GemmMicrokernelTester()
19243 .mr(3)
19244 .nr(16)
19245 .kr(1)
19246 .sr(1)
19247 .m(m)
19248 .n(16)
19249 .k(8)
19250 .iterations(1)
19251 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19252 }
19253 }
19254
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_eq_8_subtile_n)19255 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
19256 TEST_REQUIRES_ARM_NEON_V8;
19257 for (uint32_t n = 1; n <= 16; n++) {
19258 GemmMicrokernelTester()
19259 .mr(3)
19260 .nr(16)
19261 .kr(1)
19262 .sr(1)
19263 .m(3)
19264 .n(n)
19265 .k(8)
19266 .iterations(1)
19267 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19268 }
19269 }
19270
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_lt_8)19271 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_lt_8) {
19272 TEST_REQUIRES_ARM_NEON_V8;
19273 for (size_t k = 1; k < 8; k++) {
19274 GemmMicrokernelTester()
19275 .mr(3)
19276 .nr(16)
19277 .kr(1)
19278 .sr(1)
19279 .m(3)
19280 .n(16)
19281 .k(k)
19282 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19283 }
19284 }
19285
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_lt_8_strided_a)19286 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_lt_8_strided_a) {
19287 TEST_REQUIRES_ARM_NEON_V8;
19288 for (size_t k = 1; k < 8; k++) {
19289 GemmMicrokernelTester()
19290 .mr(3)
19291 .nr(16)
19292 .kr(1)
19293 .sr(1)
19294 .m(3)
19295 .n(16)
19296 .k(k)
19297 .a_stride(11)
19298 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19299 }
19300 }
19301
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_lt_8_subtile)19302 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
19303 TEST_REQUIRES_ARM_NEON_V8;
19304 for (size_t k = 1; k < 8; k++) {
19305 for (uint32_t n = 1; n <= 16; n++) {
19306 for (uint32_t m = 1; m <= 3; m++) {
19307 GemmMicrokernelTester()
19308 .mr(3)
19309 .nr(16)
19310 .kr(1)
19311 .sr(1)
19312 .m(m)
19313 .n(n)
19314 .k(k)
19315 .iterations(1)
19316 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19317 }
19318 }
19319 }
19320 }
19321
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_gt_8)19322 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_gt_8) {
19323 TEST_REQUIRES_ARM_NEON_V8;
19324 for (size_t k = 9; k < 16; k++) {
19325 GemmMicrokernelTester()
19326 .mr(3)
19327 .nr(16)
19328 .kr(1)
19329 .sr(1)
19330 .m(3)
19331 .n(16)
19332 .k(k)
19333 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19334 }
19335 }
19336
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_gt_8_strided_a)19337 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_gt_8_strided_a) {
19338 TEST_REQUIRES_ARM_NEON_V8;
19339 for (size_t k = 9; k < 16; k++) {
19340 GemmMicrokernelTester()
19341 .mr(3)
19342 .nr(16)
19343 .kr(1)
19344 .sr(1)
19345 .m(3)
19346 .n(16)
19347 .k(k)
19348 .a_stride(19)
19349 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19350 }
19351 }
19352
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_gt_8_subtile)19353 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
19354 TEST_REQUIRES_ARM_NEON_V8;
19355 for (size_t k = 9; k < 16; k++) {
19356 for (uint32_t n = 1; n <= 16; n++) {
19357 for (uint32_t m = 1; m <= 3; m++) {
19358 GemmMicrokernelTester()
19359 .mr(3)
19360 .nr(16)
19361 .kr(1)
19362 .sr(1)
19363 .m(m)
19364 .n(n)
19365 .k(k)
19366 .iterations(1)
19367 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19368 }
19369 }
19370 }
19371 }
19372
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_div_8)19373 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_div_8) {
19374 TEST_REQUIRES_ARM_NEON_V8;
19375 for (size_t k = 16; k <= 80; k += 8) {
19376 GemmMicrokernelTester()
19377 .mr(3)
19378 .nr(16)
19379 .kr(1)
19380 .sr(1)
19381 .m(3)
19382 .n(16)
19383 .k(k)
19384 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19385 }
19386 }
19387
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_div_8_strided_a)19388 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_div_8_strided_a) {
19389 TEST_REQUIRES_ARM_NEON_V8;
19390 for (size_t k = 16; k <= 80; k += 8) {
19391 GemmMicrokernelTester()
19392 .mr(3)
19393 .nr(16)
19394 .kr(1)
19395 .sr(1)
19396 .m(3)
19397 .n(16)
19398 .k(k)
19399 .a_stride(83)
19400 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19401 }
19402 }
19403
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_div_8_subtile)19404 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
19405 TEST_REQUIRES_ARM_NEON_V8;
19406 for (size_t k = 16; k <= 80; k += 8) {
19407 for (uint32_t n = 1; n <= 16; n++) {
19408 for (uint32_t m = 1; m <= 3; m++) {
19409 GemmMicrokernelTester()
19410 .mr(3)
19411 .nr(16)
19412 .kr(1)
19413 .sr(1)
19414 .m(m)
19415 .n(n)
19416 .k(k)
19417 .iterations(1)
19418 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19419 }
19420 }
19421 }
19422 }
19423
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_gt_16)19424 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_gt_16) {
19425 TEST_REQUIRES_ARM_NEON_V8;
19426 for (uint32_t n = 17; n < 32; n++) {
19427 for (size_t k = 1; k <= 40; k += 9) {
19428 GemmMicrokernelTester()
19429 .mr(3)
19430 .nr(16)
19431 .kr(1)
19432 .sr(1)
19433 .m(3)
19434 .n(n)
19435 .k(k)
19436 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19437 }
19438 }
19439 }
19440
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_gt_16_strided_cn)19441 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
19442 TEST_REQUIRES_ARM_NEON_V8;
19443 for (uint32_t n = 17; n < 32; n++) {
19444 for (size_t k = 1; k <= 40; k += 9) {
19445 GemmMicrokernelTester()
19446 .mr(3)
19447 .nr(16)
19448 .kr(1)
19449 .sr(1)
19450 .m(3)
19451 .n(n)
19452 .k(k)
19453 .cn_stride(19)
19454 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19455 }
19456 }
19457 }
19458
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_gt_16_strided_a)19459 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_gt_16_strided_a) {
19460 TEST_REQUIRES_ARM_NEON_V8;
19461 for (uint32_t n = 17; n < 32; n++) {
19462 for (size_t k = 1; k <= 40; k += 9) {
19463 GemmMicrokernelTester()
19464 .mr(3)
19465 .nr(16)
19466 .kr(1)
19467 .sr(1)
19468 .m(3)
19469 .n(n)
19470 .k(k)
19471 .a_stride(43)
19472 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19473 }
19474 }
19475 }
19476
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_gt_16_subtile)19477 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
19478 TEST_REQUIRES_ARM_NEON_V8;
19479 for (uint32_t n = 17; n < 32; n++) {
19480 for (size_t k = 1; k <= 40; k += 9) {
19481 for (uint32_t m = 1; m <= 3; m++) {
19482 GemmMicrokernelTester()
19483 .mr(3)
19484 .nr(16)
19485 .kr(1)
19486 .sr(1)
19487 .m(m)
19488 .n(n)
19489 .k(k)
19490 .iterations(1)
19491 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19492 }
19493 }
19494 }
19495 }
19496
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_div_16)19497 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_div_16) {
19498 TEST_REQUIRES_ARM_NEON_V8;
19499 for (uint32_t n = 32; n <= 48; n += 16) {
19500 for (size_t k = 1; k <= 40; k += 9) {
19501 GemmMicrokernelTester()
19502 .mr(3)
19503 .nr(16)
19504 .kr(1)
19505 .sr(1)
19506 .m(3)
19507 .n(n)
19508 .k(k)
19509 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19510 }
19511 }
19512 }
19513
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_div_16_strided_cn)19514 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
19515 TEST_REQUIRES_ARM_NEON_V8;
19516 for (uint32_t n = 32; n <= 48; n += 16) {
19517 for (size_t k = 1; k <= 40; k += 9) {
19518 GemmMicrokernelTester()
19519 .mr(3)
19520 .nr(16)
19521 .kr(1)
19522 .sr(1)
19523 .m(3)
19524 .n(n)
19525 .k(k)
19526 .cn_stride(19)
19527 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19528 }
19529 }
19530 }
19531
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_div_16_strided_a)19532 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_div_16_strided_a) {
19533 TEST_REQUIRES_ARM_NEON_V8;
19534 for (uint32_t n = 32; n <= 48; n += 16) {
19535 for (size_t k = 1; k <= 40; k += 9) {
19536 GemmMicrokernelTester()
19537 .mr(3)
19538 .nr(16)
19539 .kr(1)
19540 .sr(1)
19541 .m(3)
19542 .n(n)
19543 .k(k)
19544 .a_stride(43)
19545 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19546 }
19547 }
19548 }
19549
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_div_16_subtile)19550 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
19551 TEST_REQUIRES_ARM_NEON_V8;
19552 for (uint32_t n = 32; n <= 48; n += 16) {
19553 for (size_t k = 1; k <= 40; k += 9) {
19554 for (uint32_t m = 1; m <= 3; m++) {
19555 GemmMicrokernelTester()
19556 .mr(3)
19557 .nr(16)
19558 .kr(1)
19559 .sr(1)
19560 .m(m)
19561 .n(n)
19562 .k(k)
19563 .iterations(1)
19564 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19565 }
19566 }
19567 }
19568 }
19569
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,strided_cm_subtile)19570 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
19571 TEST_REQUIRES_ARM_NEON_V8;
19572 for (size_t k = 1; k <= 40; k += 9) {
19573 for (uint32_t n = 1; n <= 16; n++) {
19574 for (uint32_t m = 1; m <= 3; m++) {
19575 GemmMicrokernelTester()
19576 .mr(3)
19577 .nr(16)
19578 .kr(1)
19579 .sr(1)
19580 .m(m)
19581 .n(n)
19582 .k(k)
19583 .cm_stride(19)
19584 .iterations(1)
19585 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19586 }
19587 }
19588 }
19589 }
19590
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,qmin)19591 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, qmin) {
19592 TEST_REQUIRES_ARM_NEON_V8;
19593 GemmMicrokernelTester()
19594 .mr(3)
19595 .nr(16)
19596 .kr(1)
19597 .sr(1)
19598 .m(3)
19599 .n(16)
19600 .k(8)
19601 .qmin(128)
19602 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19603 }
19604
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,qmax)19605 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, qmax) {
19606 TEST_REQUIRES_ARM_NEON_V8;
19607 GemmMicrokernelTester()
19608 .mr(3)
19609 .nr(16)
19610 .kr(1)
19611 .sr(1)
19612 .m(3)
19613 .n(16)
19614 .k(8)
19615 .qmax(128)
19616 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19617 }
19618
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,strided_cm)19619 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, strided_cm) {
19620 TEST_REQUIRES_ARM_NEON_V8;
19621 GemmMicrokernelTester()
19622 .mr(3)
19623 .nr(16)
19624 .kr(1)
19625 .sr(1)
19626 .m(3)
19627 .n(16)
19628 .k(8)
19629 .cm_stride(19)
19630 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19631 }
19632 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
19633
19634
19635 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_eq_8)19636 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
19637 TEST_REQUIRES_ARM_NEON_V8;
19638 GemmMicrokernelTester()
19639 .mr(3)
19640 .nr(16)
19641 .kr(1)
19642 .sr(1)
19643 .m(3)
19644 .n(16)
19645 .k(8)
19646 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19647 }
19648
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,strided_cn)19649 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, strided_cn) {
19650 TEST_REQUIRES_ARM_NEON_V8;
19651 GemmMicrokernelTester()
19652 .mr(3)
19653 .nr(16)
19654 .kr(1)
19655 .sr(1)
19656 .m(3)
19657 .n(16)
19658 .k(8)
19659 .cn_stride(19)
19660 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19661 }
19662
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_strided_a)19663 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_strided_a) {
19664 TEST_REQUIRES_ARM_NEON_V8;
19665 GemmMicrokernelTester()
19666 .mr(3)
19667 .nr(16)
19668 .kr(1)
19669 .sr(1)
19670 .m(3)
19671 .n(16)
19672 .k(8)
19673 .a_stride(11)
19674 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19675 }
19676
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)19677 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
19678 TEST_REQUIRES_ARM_NEON_V8;
19679 for (uint32_t n = 1; n <= 16; n++) {
19680 for (uint32_t m = 1; m <= 3; m++) {
19681 GemmMicrokernelTester()
19682 .mr(3)
19683 .nr(16)
19684 .kr(1)
19685 .sr(1)
19686 .m(m)
19687 .n(n)
19688 .k(8)
19689 .iterations(1)
19690 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19691 }
19692 }
19693 }
19694
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)19695 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
19696 TEST_REQUIRES_ARM_NEON_V8;
19697 for (uint32_t m = 1; m <= 3; m++) {
19698 GemmMicrokernelTester()
19699 .mr(3)
19700 .nr(16)
19701 .kr(1)
19702 .sr(1)
19703 .m(m)
19704 .n(16)
19705 .k(8)
19706 .iterations(1)
19707 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19708 }
19709 }
19710
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)19711 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
19712 TEST_REQUIRES_ARM_NEON_V8;
19713 for (uint32_t n = 1; n <= 16; n++) {
19714 GemmMicrokernelTester()
19715 .mr(3)
19716 .nr(16)
19717 .kr(1)
19718 .sr(1)
19719 .m(3)
19720 .n(n)
19721 .k(8)
19722 .iterations(1)
19723 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19724 }
19725 }
19726
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_lt_8)19727 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
19728 TEST_REQUIRES_ARM_NEON_V8;
19729 for (size_t k = 1; k < 8; k++) {
19730 GemmMicrokernelTester()
19731 .mr(3)
19732 .nr(16)
19733 .kr(1)
19734 .sr(1)
19735 .m(3)
19736 .n(16)
19737 .k(k)
19738 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19739 }
19740 }
19741
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_lt_8_strided_a)19742 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_lt_8_strided_a) {
19743 TEST_REQUIRES_ARM_NEON_V8;
19744 for (size_t k = 1; k < 8; k++) {
19745 GemmMicrokernelTester()
19746 .mr(3)
19747 .nr(16)
19748 .kr(1)
19749 .sr(1)
19750 .m(3)
19751 .n(16)
19752 .k(k)
19753 .a_stride(11)
19754 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19755 }
19756 }
19757
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)19758 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
19759 TEST_REQUIRES_ARM_NEON_V8;
19760 for (size_t k = 1; k < 8; k++) {
19761 for (uint32_t n = 1; n <= 16; n++) {
19762 for (uint32_t m = 1; m <= 3; m++) {
19763 GemmMicrokernelTester()
19764 .mr(3)
19765 .nr(16)
19766 .kr(1)
19767 .sr(1)
19768 .m(m)
19769 .n(n)
19770 .k(k)
19771 .iterations(1)
19772 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19773 }
19774 }
19775 }
19776 }
19777
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_gt_8)19778 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
19779 TEST_REQUIRES_ARM_NEON_V8;
19780 for (size_t k = 9; k < 16; k++) {
19781 GemmMicrokernelTester()
19782 .mr(3)
19783 .nr(16)
19784 .kr(1)
19785 .sr(1)
19786 .m(3)
19787 .n(16)
19788 .k(k)
19789 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19790 }
19791 }
19792
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_gt_8_strided_a)19793 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_gt_8_strided_a) {
19794 TEST_REQUIRES_ARM_NEON_V8;
19795 for (size_t k = 9; k < 16; k++) {
19796 GemmMicrokernelTester()
19797 .mr(3)
19798 .nr(16)
19799 .kr(1)
19800 .sr(1)
19801 .m(3)
19802 .n(16)
19803 .k(k)
19804 .a_stride(19)
19805 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19806 }
19807 }
19808
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)19809 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
19810 TEST_REQUIRES_ARM_NEON_V8;
19811 for (size_t k = 9; k < 16; k++) {
19812 for (uint32_t n = 1; n <= 16; n++) {
19813 for (uint32_t m = 1; m <= 3; m++) {
19814 GemmMicrokernelTester()
19815 .mr(3)
19816 .nr(16)
19817 .kr(1)
19818 .sr(1)
19819 .m(m)
19820 .n(n)
19821 .k(k)
19822 .iterations(1)
19823 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19824 }
19825 }
19826 }
19827 }
19828
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_div_8)19829 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_div_8) {
19830 TEST_REQUIRES_ARM_NEON_V8;
19831 for (size_t k = 16; k <= 80; k += 8) {
19832 GemmMicrokernelTester()
19833 .mr(3)
19834 .nr(16)
19835 .kr(1)
19836 .sr(1)
19837 .m(3)
19838 .n(16)
19839 .k(k)
19840 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19841 }
19842 }
19843
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_div_8_strided_a)19844 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_div_8_strided_a) {
19845 TEST_REQUIRES_ARM_NEON_V8;
19846 for (size_t k = 16; k <= 80; k += 8) {
19847 GemmMicrokernelTester()
19848 .mr(3)
19849 .nr(16)
19850 .kr(1)
19851 .sr(1)
19852 .m(3)
19853 .n(16)
19854 .k(k)
19855 .a_stride(83)
19856 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19857 }
19858 }
19859
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)19860 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
19861 TEST_REQUIRES_ARM_NEON_V8;
19862 for (size_t k = 16; k <= 80; k += 8) {
19863 for (uint32_t n = 1; n <= 16; n++) {
19864 for (uint32_t m = 1; m <= 3; m++) {
19865 GemmMicrokernelTester()
19866 .mr(3)
19867 .nr(16)
19868 .kr(1)
19869 .sr(1)
19870 .m(m)
19871 .n(n)
19872 .k(k)
19873 .iterations(1)
19874 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19875 }
19876 }
19877 }
19878 }
19879
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,n_gt_16)19880 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, n_gt_16) {
19881 TEST_REQUIRES_ARM_NEON_V8;
19882 for (uint32_t n = 17; n < 32; n++) {
19883 for (size_t k = 1; k <= 40; k += 9) {
19884 GemmMicrokernelTester()
19885 .mr(3)
19886 .nr(16)
19887 .kr(1)
19888 .sr(1)
19889 .m(3)
19890 .n(n)
19891 .k(k)
19892 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19893 }
19894 }
19895 }
19896
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_strided_cn)19897 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
19898 TEST_REQUIRES_ARM_NEON_V8;
19899 for (uint32_t n = 17; n < 32; n++) {
19900 for (size_t k = 1; k <= 40; k += 9) {
19901 GemmMicrokernelTester()
19902 .mr(3)
19903 .nr(16)
19904 .kr(1)
19905 .sr(1)
19906 .m(3)
19907 .n(n)
19908 .k(k)
19909 .cn_stride(19)
19910 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19911 }
19912 }
19913 }
19914
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_strided_a)19915 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_strided_a) {
19916 TEST_REQUIRES_ARM_NEON_V8;
19917 for (uint32_t n = 17; n < 32; n++) {
19918 for (size_t k = 1; k <= 40; k += 9) {
19919 GemmMicrokernelTester()
19920 .mr(3)
19921 .nr(16)
19922 .kr(1)
19923 .sr(1)
19924 .m(3)
19925 .n(n)
19926 .k(k)
19927 .a_stride(43)
19928 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19929 }
19930 }
19931 }
19932
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_subtile)19933 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_subtile) {
19934 TEST_REQUIRES_ARM_NEON_V8;
19935 for (uint32_t n = 17; n < 32; n++) {
19936 for (size_t k = 1; k <= 40; k += 9) {
19937 for (uint32_t m = 1; m <= 3; m++) {
19938 GemmMicrokernelTester()
19939 .mr(3)
19940 .nr(16)
19941 .kr(1)
19942 .sr(1)
19943 .m(m)
19944 .n(n)
19945 .k(k)
19946 .iterations(1)
19947 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19948 }
19949 }
19950 }
19951 }
19952
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,n_div_16)19953 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, n_div_16) {
19954 TEST_REQUIRES_ARM_NEON_V8;
19955 for (uint32_t n = 32; n <= 48; n += 16) {
19956 for (size_t k = 1; k <= 40; k += 9) {
19957 GemmMicrokernelTester()
19958 .mr(3)
19959 .nr(16)
19960 .kr(1)
19961 .sr(1)
19962 .m(3)
19963 .n(n)
19964 .k(k)
19965 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19966 }
19967 }
19968 }
19969
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,n_div_16_strided_cn)19970 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, n_div_16_strided_cn) {
19971 TEST_REQUIRES_ARM_NEON_V8;
19972 for (uint32_t n = 32; n <= 48; n += 16) {
19973 for (size_t k = 1; k <= 40; k += 9) {
19974 GemmMicrokernelTester()
19975 .mr(3)
19976 .nr(16)
19977 .kr(1)
19978 .sr(1)
19979 .m(3)
19980 .n(n)
19981 .k(k)
19982 .cn_stride(19)
19983 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19984 }
19985 }
19986 }
19987
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,n_div_16_strided_a)19988 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, n_div_16_strided_a) {
19989 TEST_REQUIRES_ARM_NEON_V8;
19990 for (uint32_t n = 32; n <= 48; n += 16) {
19991 for (size_t k = 1; k <= 40; k += 9) {
19992 GemmMicrokernelTester()
19993 .mr(3)
19994 .nr(16)
19995 .kr(1)
19996 .sr(1)
19997 .m(3)
19998 .n(n)
19999 .k(k)
20000 .a_stride(43)
20001 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20002 }
20003 }
20004 }
20005
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,n_div_16_subtile)20006 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, n_div_16_subtile) {
20007 TEST_REQUIRES_ARM_NEON_V8;
20008 for (uint32_t n = 32; n <= 48; n += 16) {
20009 for (size_t k = 1; k <= 40; k += 9) {
20010 for (uint32_t m = 1; m <= 3; m++) {
20011 GemmMicrokernelTester()
20012 .mr(3)
20013 .nr(16)
20014 .kr(1)
20015 .sr(1)
20016 .m(m)
20017 .n(n)
20018 .k(k)
20019 .iterations(1)
20020 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20021 }
20022 }
20023 }
20024 }
20025
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)20026 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
20027 TEST_REQUIRES_ARM_NEON_V8;
20028 for (size_t k = 1; k <= 40; k += 9) {
20029 for (uint32_t n = 1; n <= 16; n++) {
20030 for (uint32_t m = 1; m <= 3; m++) {
20031 GemmMicrokernelTester()
20032 .mr(3)
20033 .nr(16)
20034 .kr(1)
20035 .sr(1)
20036 .m(m)
20037 .n(n)
20038 .k(k)
20039 .cm_stride(19)
20040 .iterations(1)
20041 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20042 }
20043 }
20044 }
20045 }
20046
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,qmin)20047 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, qmin) {
20048 TEST_REQUIRES_ARM_NEON_V8;
20049 GemmMicrokernelTester()
20050 .mr(3)
20051 .nr(16)
20052 .kr(1)
20053 .sr(1)
20054 .m(3)
20055 .n(16)
20056 .k(8)
20057 .qmin(128)
20058 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20059 }
20060
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,qmax)20061 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, qmax) {
20062 TEST_REQUIRES_ARM_NEON_V8;
20063 GemmMicrokernelTester()
20064 .mr(3)
20065 .nr(16)
20066 .kr(1)
20067 .sr(1)
20068 .m(3)
20069 .n(16)
20070 .k(8)
20071 .qmax(128)
20072 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20073 }
20074
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,strided_cm)20075 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, strided_cm) {
20076 TEST_REQUIRES_ARM_NEON_V8;
20077 GemmMicrokernelTester()
20078 .mr(3)
20079 .nr(16)
20080 .kr(1)
20081 .sr(1)
20082 .m(3)
20083 .n(16)
20084 .k(8)
20085 .cm_stride(19)
20086 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20087 }
20088 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
20089
20090
20091 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,k_eq_8)20092 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, k_eq_8) {
20093 TEST_REQUIRES_ARM_NEON_DOT;
20094 GemmMicrokernelTester()
20095 .mr(4)
20096 .nr(8)
20097 .kr(4)
20098 .sr(1)
20099 .m(4)
20100 .n(8)
20101 .k(8)
20102 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20103 }
20104
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,strided_cn)20105 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, strided_cn) {
20106 TEST_REQUIRES_ARM_NEON_DOT;
20107 GemmMicrokernelTester()
20108 .mr(4)
20109 .nr(8)
20110 .kr(4)
20111 .sr(1)
20112 .m(4)
20113 .n(8)
20114 .k(8)
20115 .cn_stride(11)
20116 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20117 }
20118
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,k_eq_8_strided_a)20119 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, k_eq_8_strided_a) {
20120 TEST_REQUIRES_ARM_NEON_DOT;
20121 GemmMicrokernelTester()
20122 .mr(4)
20123 .nr(8)
20124 .kr(4)
20125 .sr(1)
20126 .m(4)
20127 .n(8)
20128 .k(8)
20129 .a_stride(11)
20130 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20131 }
20132
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,k_eq_8_subtile)20133 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, k_eq_8_subtile) {
20134 TEST_REQUIRES_ARM_NEON_DOT;
20135 for (uint32_t n = 1; n <= 8; n++) {
20136 for (uint32_t m = 1; m <= 4; m++) {
20137 GemmMicrokernelTester()
20138 .mr(4)
20139 .nr(8)
20140 .kr(4)
20141 .sr(1)
20142 .m(m)
20143 .n(n)
20144 .k(8)
20145 .iterations(1)
20146 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20147 }
20148 }
20149 }
20150
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,k_eq_8_subtile_m)20151 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, k_eq_8_subtile_m) {
20152 TEST_REQUIRES_ARM_NEON_DOT;
20153 for (uint32_t m = 1; m <= 4; m++) {
20154 GemmMicrokernelTester()
20155 .mr(4)
20156 .nr(8)
20157 .kr(4)
20158 .sr(1)
20159 .m(m)
20160 .n(8)
20161 .k(8)
20162 .iterations(1)
20163 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20164 }
20165 }
20166
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,k_eq_8_subtile_n)20167 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, k_eq_8_subtile_n) {
20168 TEST_REQUIRES_ARM_NEON_DOT;
20169 for (uint32_t n = 1; n <= 8; n++) {
20170 GemmMicrokernelTester()
20171 .mr(4)
20172 .nr(8)
20173 .kr(4)
20174 .sr(1)
20175 .m(4)
20176 .n(n)
20177 .k(8)
20178 .iterations(1)
20179 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20180 }
20181 }
20182
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,k_lt_8)20183 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, k_lt_8) {
20184 TEST_REQUIRES_ARM_NEON_DOT;
20185 for (size_t k = 1; k < 8; k++) {
20186 GemmMicrokernelTester()
20187 .mr(4)
20188 .nr(8)
20189 .kr(4)
20190 .sr(1)
20191 .m(4)
20192 .n(8)
20193 .k(k)
20194 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20195 }
20196 }
20197
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,k_lt_8_strided_a)20198 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, k_lt_8_strided_a) {
20199 TEST_REQUIRES_ARM_NEON_DOT;
20200 for (size_t k = 1; k < 8; k++) {
20201 GemmMicrokernelTester()
20202 .mr(4)
20203 .nr(8)
20204 .kr(4)
20205 .sr(1)
20206 .m(4)
20207 .n(8)
20208 .k(k)
20209 .a_stride(11)
20210 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20211 }
20212 }
20213
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,k_lt_8_subtile)20214 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, k_lt_8_subtile) {
20215 TEST_REQUIRES_ARM_NEON_DOT;
20216 for (size_t k = 1; k < 8; k++) {
20217 for (uint32_t n = 1; n <= 8; n++) {
20218 for (uint32_t m = 1; m <= 4; m++) {
20219 GemmMicrokernelTester()
20220 .mr(4)
20221 .nr(8)
20222 .kr(4)
20223 .sr(1)
20224 .m(m)
20225 .n(n)
20226 .k(k)
20227 .iterations(1)
20228 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20229 }
20230 }
20231 }
20232 }
20233
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,k_gt_8)20234 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, k_gt_8) {
20235 TEST_REQUIRES_ARM_NEON_DOT;
20236 for (size_t k = 9; k < 16; k++) {
20237 GemmMicrokernelTester()
20238 .mr(4)
20239 .nr(8)
20240 .kr(4)
20241 .sr(1)
20242 .m(4)
20243 .n(8)
20244 .k(k)
20245 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20246 }
20247 }
20248
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,k_gt_8_strided_a)20249 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, k_gt_8_strided_a) {
20250 TEST_REQUIRES_ARM_NEON_DOT;
20251 for (size_t k = 9; k < 16; k++) {
20252 GemmMicrokernelTester()
20253 .mr(4)
20254 .nr(8)
20255 .kr(4)
20256 .sr(1)
20257 .m(4)
20258 .n(8)
20259 .k(k)
20260 .a_stride(19)
20261 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20262 }
20263 }
20264
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,k_gt_8_subtile)20265 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, k_gt_8_subtile) {
20266 TEST_REQUIRES_ARM_NEON_DOT;
20267 for (size_t k = 9; k < 16; k++) {
20268 for (uint32_t n = 1; n <= 8; n++) {
20269 for (uint32_t m = 1; m <= 4; m++) {
20270 GemmMicrokernelTester()
20271 .mr(4)
20272 .nr(8)
20273 .kr(4)
20274 .sr(1)
20275 .m(m)
20276 .n(n)
20277 .k(k)
20278 .iterations(1)
20279 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20280 }
20281 }
20282 }
20283 }
20284
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,k_div_8)20285 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, k_div_8) {
20286 TEST_REQUIRES_ARM_NEON_DOT;
20287 for (size_t k = 16; k <= 80; k += 8) {
20288 GemmMicrokernelTester()
20289 .mr(4)
20290 .nr(8)
20291 .kr(4)
20292 .sr(1)
20293 .m(4)
20294 .n(8)
20295 .k(k)
20296 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20297 }
20298 }
20299
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,k_div_8_strided_a)20300 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, k_div_8_strided_a) {
20301 TEST_REQUIRES_ARM_NEON_DOT;
20302 for (size_t k = 16; k <= 80; k += 8) {
20303 GemmMicrokernelTester()
20304 .mr(4)
20305 .nr(8)
20306 .kr(4)
20307 .sr(1)
20308 .m(4)
20309 .n(8)
20310 .k(k)
20311 .a_stride(83)
20312 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20313 }
20314 }
20315
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,k_div_8_subtile)20316 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, k_div_8_subtile) {
20317 TEST_REQUIRES_ARM_NEON_DOT;
20318 for (size_t k = 16; k <= 80; k += 8) {
20319 for (uint32_t n = 1; n <= 8; n++) {
20320 for (uint32_t m = 1; m <= 4; m++) {
20321 GemmMicrokernelTester()
20322 .mr(4)
20323 .nr(8)
20324 .kr(4)
20325 .sr(1)
20326 .m(m)
20327 .n(n)
20328 .k(k)
20329 .iterations(1)
20330 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20331 }
20332 }
20333 }
20334 }
20335
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,n_gt_8)20336 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, n_gt_8) {
20337 TEST_REQUIRES_ARM_NEON_DOT;
20338 for (uint32_t n = 9; n < 16; n++) {
20339 for (size_t k = 1; k <= 40; k += 9) {
20340 GemmMicrokernelTester()
20341 .mr(4)
20342 .nr(8)
20343 .kr(4)
20344 .sr(1)
20345 .m(4)
20346 .n(n)
20347 .k(k)
20348 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20349 }
20350 }
20351 }
20352
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,n_gt_8_strided_cn)20353 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, n_gt_8_strided_cn) {
20354 TEST_REQUIRES_ARM_NEON_DOT;
20355 for (uint32_t n = 9; n < 16; n++) {
20356 for (size_t k = 1; k <= 40; k += 9) {
20357 GemmMicrokernelTester()
20358 .mr(4)
20359 .nr(8)
20360 .kr(4)
20361 .sr(1)
20362 .m(4)
20363 .n(n)
20364 .k(k)
20365 .cn_stride(11)
20366 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20367 }
20368 }
20369 }
20370
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,n_gt_8_strided_a)20371 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, n_gt_8_strided_a) {
20372 TEST_REQUIRES_ARM_NEON_DOT;
20373 for (uint32_t n = 9; n < 16; n++) {
20374 for (size_t k = 1; k <= 40; k += 9) {
20375 GemmMicrokernelTester()
20376 .mr(4)
20377 .nr(8)
20378 .kr(4)
20379 .sr(1)
20380 .m(4)
20381 .n(n)
20382 .k(k)
20383 .a_stride(43)
20384 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20385 }
20386 }
20387 }
20388
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,n_gt_8_subtile)20389 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, n_gt_8_subtile) {
20390 TEST_REQUIRES_ARM_NEON_DOT;
20391 for (uint32_t n = 9; n < 16; n++) {
20392 for (size_t k = 1; k <= 40; k += 9) {
20393 for (uint32_t m = 1; m <= 4; m++) {
20394 GemmMicrokernelTester()
20395 .mr(4)
20396 .nr(8)
20397 .kr(4)
20398 .sr(1)
20399 .m(m)
20400 .n(n)
20401 .k(k)
20402 .iterations(1)
20403 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20404 }
20405 }
20406 }
20407 }
20408
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,n_div_8)20409 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, n_div_8) {
20410 TEST_REQUIRES_ARM_NEON_DOT;
20411 for (uint32_t n = 16; n <= 24; n += 8) {
20412 for (size_t k = 1; k <= 40; k += 9) {
20413 GemmMicrokernelTester()
20414 .mr(4)
20415 .nr(8)
20416 .kr(4)
20417 .sr(1)
20418 .m(4)
20419 .n(n)
20420 .k(k)
20421 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20422 }
20423 }
20424 }
20425
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,n_div_8_strided_cn)20426 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, n_div_8_strided_cn) {
20427 TEST_REQUIRES_ARM_NEON_DOT;
20428 for (uint32_t n = 16; n <= 24; n += 8) {
20429 for (size_t k = 1; k <= 40; k += 9) {
20430 GemmMicrokernelTester()
20431 .mr(4)
20432 .nr(8)
20433 .kr(4)
20434 .sr(1)
20435 .m(4)
20436 .n(n)
20437 .k(k)
20438 .cn_stride(11)
20439 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20440 }
20441 }
20442 }
20443
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,n_div_8_strided_a)20444 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, n_div_8_strided_a) {
20445 TEST_REQUIRES_ARM_NEON_DOT;
20446 for (uint32_t n = 16; n <= 24; n += 8) {
20447 for (size_t k = 1; k <= 40; k += 9) {
20448 GemmMicrokernelTester()
20449 .mr(4)
20450 .nr(8)
20451 .kr(4)
20452 .sr(1)
20453 .m(4)
20454 .n(n)
20455 .k(k)
20456 .a_stride(43)
20457 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20458 }
20459 }
20460 }
20461
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,n_div_8_subtile)20462 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, n_div_8_subtile) {
20463 TEST_REQUIRES_ARM_NEON_DOT;
20464 for (uint32_t n = 16; n <= 24; n += 8) {
20465 for (size_t k = 1; k <= 40; k += 9) {
20466 for (uint32_t m = 1; m <= 4; m++) {
20467 GemmMicrokernelTester()
20468 .mr(4)
20469 .nr(8)
20470 .kr(4)
20471 .sr(1)
20472 .m(m)
20473 .n(n)
20474 .k(k)
20475 .iterations(1)
20476 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20477 }
20478 }
20479 }
20480 }
20481
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,strided_cm_subtile)20482 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, strided_cm_subtile) {
20483 TEST_REQUIRES_ARM_NEON_DOT;
20484 for (size_t k = 1; k <= 40; k += 9) {
20485 for (uint32_t n = 1; n <= 8; n++) {
20486 for (uint32_t m = 1; m <= 4; m++) {
20487 GemmMicrokernelTester()
20488 .mr(4)
20489 .nr(8)
20490 .kr(4)
20491 .sr(1)
20492 .m(m)
20493 .n(n)
20494 .k(k)
20495 .cm_stride(11)
20496 .iterations(1)
20497 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20498 }
20499 }
20500 }
20501 }
20502
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,qmin)20503 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, qmin) {
20504 TEST_REQUIRES_ARM_NEON_DOT;
20505 GemmMicrokernelTester()
20506 .mr(4)
20507 .nr(8)
20508 .kr(4)
20509 .sr(1)
20510 .m(4)
20511 .n(8)
20512 .k(8)
20513 .qmin(128)
20514 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20515 }
20516
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,qmax)20517 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, qmax) {
20518 TEST_REQUIRES_ARM_NEON_DOT;
20519 GemmMicrokernelTester()
20520 .mr(4)
20521 .nr(8)
20522 .kr(4)
20523 .sr(1)
20524 .m(4)
20525 .n(8)
20526 .k(8)
20527 .qmax(128)
20528 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20529 }
20530
TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT,strided_cm)20531 TEST(QC8_GEMM_MINMAX_FP32_4X8C4__NEONDOT, strided_cm) {
20532 TEST_REQUIRES_ARM_NEON_DOT;
20533 GemmMicrokernelTester()
20534 .mr(4)
20535 .nr(8)
20536 .kr(4)
20537 .sr(1)
20538 .m(4)
20539 .n(8)
20540 .k(8)
20541 .cm_stride(11)
20542 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20543 }
20544 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
20545
20546
20547 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_eq_8)20548 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
20549 TEST_REQUIRES_ARM_NEON_V8;
20550 GemmMicrokernelTester()
20551 .mr(4)
20552 .nr(16)
20553 .kr(1)
20554 .sr(1)
20555 .m(4)
20556 .n(16)
20557 .k(8)
20558 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20559 }
20560
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,strided_cn)20561 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, strided_cn) {
20562 TEST_REQUIRES_ARM_NEON_V8;
20563 GemmMicrokernelTester()
20564 .mr(4)
20565 .nr(16)
20566 .kr(1)
20567 .sr(1)
20568 .m(4)
20569 .n(16)
20570 .k(8)
20571 .cn_stride(19)
20572 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20573 }
20574
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_strided_a)20575 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_strided_a) {
20576 TEST_REQUIRES_ARM_NEON_V8;
20577 GemmMicrokernelTester()
20578 .mr(4)
20579 .nr(16)
20580 .kr(1)
20581 .sr(1)
20582 .m(4)
20583 .n(16)
20584 .k(8)
20585 .a_stride(11)
20586 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20587 }
20588
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)20589 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
20590 TEST_REQUIRES_ARM_NEON_V8;
20591 for (uint32_t n = 1; n <= 16; n++) {
20592 for (uint32_t m = 1; m <= 4; m++) {
20593 GemmMicrokernelTester()
20594 .mr(4)
20595 .nr(16)
20596 .kr(1)
20597 .sr(1)
20598 .m(m)
20599 .n(n)
20600 .k(8)
20601 .iterations(1)
20602 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20603 }
20604 }
20605 }
20606
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)20607 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
20608 TEST_REQUIRES_ARM_NEON_V8;
20609 for (uint32_t m = 1; m <= 4; m++) {
20610 GemmMicrokernelTester()
20611 .mr(4)
20612 .nr(16)
20613 .kr(1)
20614 .sr(1)
20615 .m(m)
20616 .n(16)
20617 .k(8)
20618 .iterations(1)
20619 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20620 }
20621 }
20622
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)20623 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
20624 TEST_REQUIRES_ARM_NEON_V8;
20625 for (uint32_t n = 1; n <= 16; n++) {
20626 GemmMicrokernelTester()
20627 .mr(4)
20628 .nr(16)
20629 .kr(1)
20630 .sr(1)
20631 .m(4)
20632 .n(n)
20633 .k(8)
20634 .iterations(1)
20635 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20636 }
20637 }
20638
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_lt_8)20639 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
20640 TEST_REQUIRES_ARM_NEON_V8;
20641 for (size_t k = 1; k < 8; k++) {
20642 GemmMicrokernelTester()
20643 .mr(4)
20644 .nr(16)
20645 .kr(1)
20646 .sr(1)
20647 .m(4)
20648 .n(16)
20649 .k(k)
20650 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20651 }
20652 }
20653
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_lt_8_strided_a)20654 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_lt_8_strided_a) {
20655 TEST_REQUIRES_ARM_NEON_V8;
20656 for (size_t k = 1; k < 8; k++) {
20657 GemmMicrokernelTester()
20658 .mr(4)
20659 .nr(16)
20660 .kr(1)
20661 .sr(1)
20662 .m(4)
20663 .n(16)
20664 .k(k)
20665 .a_stride(11)
20666 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20667 }
20668 }
20669
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)20670 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
20671 TEST_REQUIRES_ARM_NEON_V8;
20672 for (size_t k = 1; k < 8; k++) {
20673 for (uint32_t n = 1; n <= 16; n++) {
20674 for (uint32_t m = 1; m <= 4; m++) {
20675 GemmMicrokernelTester()
20676 .mr(4)
20677 .nr(16)
20678 .kr(1)
20679 .sr(1)
20680 .m(m)
20681 .n(n)
20682 .k(k)
20683 .iterations(1)
20684 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20685 }
20686 }
20687 }
20688 }
20689
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_gt_8)20690 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
20691 TEST_REQUIRES_ARM_NEON_V8;
20692 for (size_t k = 9; k < 16; k++) {
20693 GemmMicrokernelTester()
20694 .mr(4)
20695 .nr(16)
20696 .kr(1)
20697 .sr(1)
20698 .m(4)
20699 .n(16)
20700 .k(k)
20701 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20702 }
20703 }
20704
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_gt_8_strided_a)20705 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_gt_8_strided_a) {
20706 TEST_REQUIRES_ARM_NEON_V8;
20707 for (size_t k = 9; k < 16; k++) {
20708 GemmMicrokernelTester()
20709 .mr(4)
20710 .nr(16)
20711 .kr(1)
20712 .sr(1)
20713 .m(4)
20714 .n(16)
20715 .k(k)
20716 .a_stride(19)
20717 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20718 }
20719 }
20720
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)20721 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
20722 TEST_REQUIRES_ARM_NEON_V8;
20723 for (size_t k = 9; k < 16; k++) {
20724 for (uint32_t n = 1; n <= 16; n++) {
20725 for (uint32_t m = 1; m <= 4; m++) {
20726 GemmMicrokernelTester()
20727 .mr(4)
20728 .nr(16)
20729 .kr(1)
20730 .sr(1)
20731 .m(m)
20732 .n(n)
20733 .k(k)
20734 .iterations(1)
20735 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20736 }
20737 }
20738 }
20739 }
20740
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_div_8)20741 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_div_8) {
20742 TEST_REQUIRES_ARM_NEON_V8;
20743 for (size_t k = 16; k <= 80; k += 8) {
20744 GemmMicrokernelTester()
20745 .mr(4)
20746 .nr(16)
20747 .kr(1)
20748 .sr(1)
20749 .m(4)
20750 .n(16)
20751 .k(k)
20752 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20753 }
20754 }
20755
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_div_8_strided_a)20756 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_div_8_strided_a) {
20757 TEST_REQUIRES_ARM_NEON_V8;
20758 for (size_t k = 16; k <= 80; k += 8) {
20759 GemmMicrokernelTester()
20760 .mr(4)
20761 .nr(16)
20762 .kr(1)
20763 .sr(1)
20764 .m(4)
20765 .n(16)
20766 .k(k)
20767 .a_stride(83)
20768 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20769 }
20770 }
20771
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)20772 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
20773 TEST_REQUIRES_ARM_NEON_V8;
20774 for (size_t k = 16; k <= 80; k += 8) {
20775 for (uint32_t n = 1; n <= 16; n++) {
20776 for (uint32_t m = 1; m <= 4; m++) {
20777 GemmMicrokernelTester()
20778 .mr(4)
20779 .nr(16)
20780 .kr(1)
20781 .sr(1)
20782 .m(m)
20783 .n(n)
20784 .k(k)
20785 .iterations(1)
20786 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20787 }
20788 }
20789 }
20790 }
20791
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_gt_16)20792 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_gt_16) {
20793 TEST_REQUIRES_ARM_NEON_V8;
20794 for (uint32_t n = 17; n < 32; n++) {
20795 for (size_t k = 1; k <= 40; k += 9) {
20796 GemmMicrokernelTester()
20797 .mr(4)
20798 .nr(16)
20799 .kr(1)
20800 .sr(1)
20801 .m(4)
20802 .n(n)
20803 .k(k)
20804 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20805 }
20806 }
20807 }
20808
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_strided_cn)20809 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
20810 TEST_REQUIRES_ARM_NEON_V8;
20811 for (uint32_t n = 17; n < 32; n++) {
20812 for (size_t k = 1; k <= 40; k += 9) {
20813 GemmMicrokernelTester()
20814 .mr(4)
20815 .nr(16)
20816 .kr(1)
20817 .sr(1)
20818 .m(4)
20819 .n(n)
20820 .k(k)
20821 .cn_stride(19)
20822 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20823 }
20824 }
20825 }
20826
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_strided_a)20827 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_strided_a) {
20828 TEST_REQUIRES_ARM_NEON_V8;
20829 for (uint32_t n = 17; n < 32; n++) {
20830 for (size_t k = 1; k <= 40; k += 9) {
20831 GemmMicrokernelTester()
20832 .mr(4)
20833 .nr(16)
20834 .kr(1)
20835 .sr(1)
20836 .m(4)
20837 .n(n)
20838 .k(k)
20839 .a_stride(43)
20840 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20841 }
20842 }
20843 }
20844
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_subtile)20845 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_subtile) {
20846 TEST_REQUIRES_ARM_NEON_V8;
20847 for (uint32_t n = 17; n < 32; n++) {
20848 for (size_t k = 1; k <= 40; k += 9) {
20849 for (uint32_t m = 1; m <= 4; m++) {
20850 GemmMicrokernelTester()
20851 .mr(4)
20852 .nr(16)
20853 .kr(1)
20854 .sr(1)
20855 .m(m)
20856 .n(n)
20857 .k(k)
20858 .iterations(1)
20859 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20860 }
20861 }
20862 }
20863 }
20864
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_div_16)20865 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_div_16) {
20866 TEST_REQUIRES_ARM_NEON_V8;
20867 for (uint32_t n = 32; n <= 48; n += 16) {
20868 for (size_t k = 1; k <= 40; k += 9) {
20869 GemmMicrokernelTester()
20870 .mr(4)
20871 .nr(16)
20872 .kr(1)
20873 .sr(1)
20874 .m(4)
20875 .n(n)
20876 .k(k)
20877 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20878 }
20879 }
20880 }
20881
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_div_16_strided_cn)20882 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_div_16_strided_cn) {
20883 TEST_REQUIRES_ARM_NEON_V8;
20884 for (uint32_t n = 32; n <= 48; n += 16) {
20885 for (size_t k = 1; k <= 40; k += 9) {
20886 GemmMicrokernelTester()
20887 .mr(4)
20888 .nr(16)
20889 .kr(1)
20890 .sr(1)
20891 .m(4)
20892 .n(n)
20893 .k(k)
20894 .cn_stride(19)
20895 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20896 }
20897 }
20898 }
20899
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_div_16_strided_a)20900 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_div_16_strided_a) {
20901 TEST_REQUIRES_ARM_NEON_V8;
20902 for (uint32_t n = 32; n <= 48; n += 16) {
20903 for (size_t k = 1; k <= 40; k += 9) {
20904 GemmMicrokernelTester()
20905 .mr(4)
20906 .nr(16)
20907 .kr(1)
20908 .sr(1)
20909 .m(4)
20910 .n(n)
20911 .k(k)
20912 .a_stride(43)
20913 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20914 }
20915 }
20916 }
20917
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_div_16_subtile)20918 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_div_16_subtile) {
20919 TEST_REQUIRES_ARM_NEON_V8;
20920 for (uint32_t n = 32; n <= 48; n += 16) {
20921 for (size_t k = 1; k <= 40; k += 9) {
20922 for (uint32_t m = 1; m <= 4; m++) {
20923 GemmMicrokernelTester()
20924 .mr(4)
20925 .nr(16)
20926 .kr(1)
20927 .sr(1)
20928 .m(m)
20929 .n(n)
20930 .k(k)
20931 .iterations(1)
20932 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20933 }
20934 }
20935 }
20936 }
20937
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)20938 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
20939 TEST_REQUIRES_ARM_NEON_V8;
20940 for (size_t k = 1; k <= 40; k += 9) {
20941 for (uint32_t n = 1; n <= 16; n++) {
20942 for (uint32_t m = 1; m <= 4; m++) {
20943 GemmMicrokernelTester()
20944 .mr(4)
20945 .nr(16)
20946 .kr(1)
20947 .sr(1)
20948 .m(m)
20949 .n(n)
20950 .k(k)
20951 .cm_stride(19)
20952 .iterations(1)
20953 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20954 }
20955 }
20956 }
20957 }
20958
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,qmin)20959 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, qmin) {
20960 TEST_REQUIRES_ARM_NEON_V8;
20961 GemmMicrokernelTester()
20962 .mr(4)
20963 .nr(16)
20964 .kr(1)
20965 .sr(1)
20966 .m(4)
20967 .n(16)
20968 .k(8)
20969 .qmin(128)
20970 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20971 }
20972
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,qmax)20973 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, qmax) {
20974 TEST_REQUIRES_ARM_NEON_V8;
20975 GemmMicrokernelTester()
20976 .mr(4)
20977 .nr(16)
20978 .kr(1)
20979 .sr(1)
20980 .m(4)
20981 .n(16)
20982 .k(8)
20983 .qmax(128)
20984 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20985 }
20986
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,strided_cm)20987 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, strided_cm) {
20988 TEST_REQUIRES_ARM_NEON_V8;
20989 GemmMicrokernelTester()
20990 .mr(4)
20991 .nr(16)
20992 .kr(1)
20993 .sr(1)
20994 .m(4)
20995 .n(16)
20996 .k(8)
20997 .cm_stride(19)
20998 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20999 }
21000 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
21001
21002
21003 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,k_eq_8)21004 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, k_eq_8) {
21005 TEST_REQUIRES_ARM_NEON_DOT;
21006 GemmMicrokernelTester()
21007 .mr(8)
21008 .nr(8)
21009 .kr(4)
21010 .sr(1)
21011 .m(8)
21012 .n(8)
21013 .k(8)
21014 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21015 }
21016
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,strided_cn)21017 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, strided_cn) {
21018 TEST_REQUIRES_ARM_NEON_DOT;
21019 GemmMicrokernelTester()
21020 .mr(8)
21021 .nr(8)
21022 .kr(4)
21023 .sr(1)
21024 .m(8)
21025 .n(8)
21026 .k(8)
21027 .cn_stride(11)
21028 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21029 }
21030
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,k_eq_8_strided_a)21031 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, k_eq_8_strided_a) {
21032 TEST_REQUIRES_ARM_NEON_DOT;
21033 GemmMicrokernelTester()
21034 .mr(8)
21035 .nr(8)
21036 .kr(4)
21037 .sr(1)
21038 .m(8)
21039 .n(8)
21040 .k(8)
21041 .a_stride(11)
21042 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21043 }
21044
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,k_eq_8_subtile)21045 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, k_eq_8_subtile) {
21046 TEST_REQUIRES_ARM_NEON_DOT;
21047 for (uint32_t n = 1; n <= 8; n++) {
21048 for (uint32_t m = 1; m <= 8; m++) {
21049 GemmMicrokernelTester()
21050 .mr(8)
21051 .nr(8)
21052 .kr(4)
21053 .sr(1)
21054 .m(m)
21055 .n(n)
21056 .k(8)
21057 .iterations(1)
21058 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21059 }
21060 }
21061 }
21062
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,k_eq_8_subtile_m)21063 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, k_eq_8_subtile_m) {
21064 TEST_REQUIRES_ARM_NEON_DOT;
21065 for (uint32_t m = 1; m <= 8; m++) {
21066 GemmMicrokernelTester()
21067 .mr(8)
21068 .nr(8)
21069 .kr(4)
21070 .sr(1)
21071 .m(m)
21072 .n(8)
21073 .k(8)
21074 .iterations(1)
21075 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21076 }
21077 }
21078
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,k_eq_8_subtile_n)21079 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, k_eq_8_subtile_n) {
21080 TEST_REQUIRES_ARM_NEON_DOT;
21081 for (uint32_t n = 1; n <= 8; n++) {
21082 GemmMicrokernelTester()
21083 .mr(8)
21084 .nr(8)
21085 .kr(4)
21086 .sr(1)
21087 .m(8)
21088 .n(n)
21089 .k(8)
21090 .iterations(1)
21091 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21092 }
21093 }
21094
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,k_lt_8)21095 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, k_lt_8) {
21096 TEST_REQUIRES_ARM_NEON_DOT;
21097 for (size_t k = 1; k < 8; k++) {
21098 GemmMicrokernelTester()
21099 .mr(8)
21100 .nr(8)
21101 .kr(4)
21102 .sr(1)
21103 .m(8)
21104 .n(8)
21105 .k(k)
21106 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21107 }
21108 }
21109
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,k_lt_8_strided_a)21110 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, k_lt_8_strided_a) {
21111 TEST_REQUIRES_ARM_NEON_DOT;
21112 for (size_t k = 1; k < 8; k++) {
21113 GemmMicrokernelTester()
21114 .mr(8)
21115 .nr(8)
21116 .kr(4)
21117 .sr(1)
21118 .m(8)
21119 .n(8)
21120 .k(k)
21121 .a_stride(11)
21122 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21123 }
21124 }
21125
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,k_lt_8_subtile)21126 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, k_lt_8_subtile) {
21127 TEST_REQUIRES_ARM_NEON_DOT;
21128 for (size_t k = 1; k < 8; k++) {
21129 for (uint32_t n = 1; n <= 8; n++) {
21130 for (uint32_t m = 1; m <= 8; m++) {
21131 GemmMicrokernelTester()
21132 .mr(8)
21133 .nr(8)
21134 .kr(4)
21135 .sr(1)
21136 .m(m)
21137 .n(n)
21138 .k(k)
21139 .iterations(1)
21140 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21141 }
21142 }
21143 }
21144 }
21145
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,k_gt_8)21146 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, k_gt_8) {
21147 TEST_REQUIRES_ARM_NEON_DOT;
21148 for (size_t k = 9; k < 16; k++) {
21149 GemmMicrokernelTester()
21150 .mr(8)
21151 .nr(8)
21152 .kr(4)
21153 .sr(1)
21154 .m(8)
21155 .n(8)
21156 .k(k)
21157 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21158 }
21159 }
21160
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,k_gt_8_strided_a)21161 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, k_gt_8_strided_a) {
21162 TEST_REQUIRES_ARM_NEON_DOT;
21163 for (size_t k = 9; k < 16; k++) {
21164 GemmMicrokernelTester()
21165 .mr(8)
21166 .nr(8)
21167 .kr(4)
21168 .sr(1)
21169 .m(8)
21170 .n(8)
21171 .k(k)
21172 .a_stride(19)
21173 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21174 }
21175 }
21176
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,k_gt_8_subtile)21177 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, k_gt_8_subtile) {
21178 TEST_REQUIRES_ARM_NEON_DOT;
21179 for (size_t k = 9; k < 16; k++) {
21180 for (uint32_t n = 1; n <= 8; n++) {
21181 for (uint32_t m = 1; m <= 8; m++) {
21182 GemmMicrokernelTester()
21183 .mr(8)
21184 .nr(8)
21185 .kr(4)
21186 .sr(1)
21187 .m(m)
21188 .n(n)
21189 .k(k)
21190 .iterations(1)
21191 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21192 }
21193 }
21194 }
21195 }
21196
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,k_div_8)21197 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, k_div_8) {
21198 TEST_REQUIRES_ARM_NEON_DOT;
21199 for (size_t k = 16; k <= 80; k += 8) {
21200 GemmMicrokernelTester()
21201 .mr(8)
21202 .nr(8)
21203 .kr(4)
21204 .sr(1)
21205 .m(8)
21206 .n(8)
21207 .k(k)
21208 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21209 }
21210 }
21211
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,k_div_8_strided_a)21212 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, k_div_8_strided_a) {
21213 TEST_REQUIRES_ARM_NEON_DOT;
21214 for (size_t k = 16; k <= 80; k += 8) {
21215 GemmMicrokernelTester()
21216 .mr(8)
21217 .nr(8)
21218 .kr(4)
21219 .sr(1)
21220 .m(8)
21221 .n(8)
21222 .k(k)
21223 .a_stride(83)
21224 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21225 }
21226 }
21227
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,k_div_8_subtile)21228 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, k_div_8_subtile) {
21229 TEST_REQUIRES_ARM_NEON_DOT;
21230 for (size_t k = 16; k <= 80; k += 8) {
21231 for (uint32_t n = 1; n <= 8; n++) {
21232 for (uint32_t m = 1; m <= 8; m++) {
21233 GemmMicrokernelTester()
21234 .mr(8)
21235 .nr(8)
21236 .kr(4)
21237 .sr(1)
21238 .m(m)
21239 .n(n)
21240 .k(k)
21241 .iterations(1)
21242 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21243 }
21244 }
21245 }
21246 }
21247
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,n_gt_8)21248 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, n_gt_8) {
21249 TEST_REQUIRES_ARM_NEON_DOT;
21250 for (uint32_t n = 9; n < 16; n++) {
21251 for (size_t k = 1; k <= 40; k += 9) {
21252 GemmMicrokernelTester()
21253 .mr(8)
21254 .nr(8)
21255 .kr(4)
21256 .sr(1)
21257 .m(8)
21258 .n(n)
21259 .k(k)
21260 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21261 }
21262 }
21263 }
21264
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,n_gt_8_strided_cn)21265 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, n_gt_8_strided_cn) {
21266 TEST_REQUIRES_ARM_NEON_DOT;
21267 for (uint32_t n = 9; n < 16; n++) {
21268 for (size_t k = 1; k <= 40; k += 9) {
21269 GemmMicrokernelTester()
21270 .mr(8)
21271 .nr(8)
21272 .kr(4)
21273 .sr(1)
21274 .m(8)
21275 .n(n)
21276 .k(k)
21277 .cn_stride(11)
21278 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21279 }
21280 }
21281 }
21282
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,n_gt_8_strided_a)21283 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, n_gt_8_strided_a) {
21284 TEST_REQUIRES_ARM_NEON_DOT;
21285 for (uint32_t n = 9; n < 16; n++) {
21286 for (size_t k = 1; k <= 40; k += 9) {
21287 GemmMicrokernelTester()
21288 .mr(8)
21289 .nr(8)
21290 .kr(4)
21291 .sr(1)
21292 .m(8)
21293 .n(n)
21294 .k(k)
21295 .a_stride(43)
21296 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21297 }
21298 }
21299 }
21300
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,n_gt_8_subtile)21301 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, n_gt_8_subtile) {
21302 TEST_REQUIRES_ARM_NEON_DOT;
21303 for (uint32_t n = 9; n < 16; n++) {
21304 for (size_t k = 1; k <= 40; k += 9) {
21305 for (uint32_t m = 1; m <= 8; m++) {
21306 GemmMicrokernelTester()
21307 .mr(8)
21308 .nr(8)
21309 .kr(4)
21310 .sr(1)
21311 .m(m)
21312 .n(n)
21313 .k(k)
21314 .iterations(1)
21315 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21316 }
21317 }
21318 }
21319 }
21320
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,n_div_8)21321 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, n_div_8) {
21322 TEST_REQUIRES_ARM_NEON_DOT;
21323 for (uint32_t n = 16; n <= 24; n += 8) {
21324 for (size_t k = 1; k <= 40; k += 9) {
21325 GemmMicrokernelTester()
21326 .mr(8)
21327 .nr(8)
21328 .kr(4)
21329 .sr(1)
21330 .m(8)
21331 .n(n)
21332 .k(k)
21333 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21334 }
21335 }
21336 }
21337
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,n_div_8_strided_cn)21338 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, n_div_8_strided_cn) {
21339 TEST_REQUIRES_ARM_NEON_DOT;
21340 for (uint32_t n = 16; n <= 24; n += 8) {
21341 for (size_t k = 1; k <= 40; k += 9) {
21342 GemmMicrokernelTester()
21343 .mr(8)
21344 .nr(8)
21345 .kr(4)
21346 .sr(1)
21347 .m(8)
21348 .n(n)
21349 .k(k)
21350 .cn_stride(11)
21351 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21352 }
21353 }
21354 }
21355
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,n_div_8_strided_a)21356 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, n_div_8_strided_a) {
21357 TEST_REQUIRES_ARM_NEON_DOT;
21358 for (uint32_t n = 16; n <= 24; n += 8) {
21359 for (size_t k = 1; k <= 40; k += 9) {
21360 GemmMicrokernelTester()
21361 .mr(8)
21362 .nr(8)
21363 .kr(4)
21364 .sr(1)
21365 .m(8)
21366 .n(n)
21367 .k(k)
21368 .a_stride(43)
21369 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21370 }
21371 }
21372 }
21373
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,n_div_8_subtile)21374 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, n_div_8_subtile) {
21375 TEST_REQUIRES_ARM_NEON_DOT;
21376 for (uint32_t n = 16; n <= 24; n += 8) {
21377 for (size_t k = 1; k <= 40; k += 9) {
21378 for (uint32_t m = 1; m <= 8; m++) {
21379 GemmMicrokernelTester()
21380 .mr(8)
21381 .nr(8)
21382 .kr(4)
21383 .sr(1)
21384 .m(m)
21385 .n(n)
21386 .k(k)
21387 .iterations(1)
21388 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21389 }
21390 }
21391 }
21392 }
21393
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,strided_cm_subtile)21394 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, strided_cm_subtile) {
21395 TEST_REQUIRES_ARM_NEON_DOT;
21396 for (size_t k = 1; k <= 40; k += 9) {
21397 for (uint32_t n = 1; n <= 8; n++) {
21398 for (uint32_t m = 1; m <= 8; m++) {
21399 GemmMicrokernelTester()
21400 .mr(8)
21401 .nr(8)
21402 .kr(4)
21403 .sr(1)
21404 .m(m)
21405 .n(n)
21406 .k(k)
21407 .cm_stride(11)
21408 .iterations(1)
21409 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21410 }
21411 }
21412 }
21413 }
21414
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,qmin)21415 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, qmin) {
21416 TEST_REQUIRES_ARM_NEON_DOT;
21417 GemmMicrokernelTester()
21418 .mr(8)
21419 .nr(8)
21420 .kr(4)
21421 .sr(1)
21422 .m(8)
21423 .n(8)
21424 .k(8)
21425 .qmin(128)
21426 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21427 }
21428
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,qmax)21429 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, qmax) {
21430 TEST_REQUIRES_ARM_NEON_DOT;
21431 GemmMicrokernelTester()
21432 .mr(8)
21433 .nr(8)
21434 .kr(4)
21435 .sr(1)
21436 .m(8)
21437 .n(8)
21438 .k(8)
21439 .qmax(128)
21440 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21441 }
21442
TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT,strided_cm)21443 TEST(QC8_GEMM_MINMAX_FP32_8X8C4__NEONDOT, strided_cm) {
21444 TEST_REQUIRES_ARM_NEON_DOT;
21445 GemmMicrokernelTester()
21446 .mr(8)
21447 .nr(8)
21448 .kr(4)
21449 .sr(1)
21450 .m(8)
21451 .n(8)
21452 .k(8)
21453 .cm_stride(11)
21454 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21455 }
21456 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
21457
21458
21459 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,k_eq_8)21460 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, k_eq_8) {
21461 TEST_REQUIRES_ARM_NEON_DOT;
21462 GemmMicrokernelTester()
21463 .mr(8)
21464 .nr(16)
21465 .kr(4)
21466 .sr(1)
21467 .m(8)
21468 .n(16)
21469 .k(8)
21470 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21471 }
21472
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,strided_cn)21473 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, strided_cn) {
21474 TEST_REQUIRES_ARM_NEON_DOT;
21475 GemmMicrokernelTester()
21476 .mr(8)
21477 .nr(16)
21478 .kr(4)
21479 .sr(1)
21480 .m(8)
21481 .n(16)
21482 .k(8)
21483 .cn_stride(19)
21484 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21485 }
21486
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,k_eq_8_strided_a)21487 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, k_eq_8_strided_a) {
21488 TEST_REQUIRES_ARM_NEON_DOT;
21489 GemmMicrokernelTester()
21490 .mr(8)
21491 .nr(16)
21492 .kr(4)
21493 .sr(1)
21494 .m(8)
21495 .n(16)
21496 .k(8)
21497 .a_stride(11)
21498 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21499 }
21500
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,k_eq_8_subtile)21501 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, k_eq_8_subtile) {
21502 TEST_REQUIRES_ARM_NEON_DOT;
21503 for (uint32_t n = 1; n <= 16; n++) {
21504 for (uint32_t m = 1; m <= 8; m++) {
21505 GemmMicrokernelTester()
21506 .mr(8)
21507 .nr(16)
21508 .kr(4)
21509 .sr(1)
21510 .m(m)
21511 .n(n)
21512 .k(8)
21513 .iterations(1)
21514 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21515 }
21516 }
21517 }
21518
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,k_eq_8_subtile_m)21519 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, k_eq_8_subtile_m) {
21520 TEST_REQUIRES_ARM_NEON_DOT;
21521 for (uint32_t m = 1; m <= 8; m++) {
21522 GemmMicrokernelTester()
21523 .mr(8)
21524 .nr(16)
21525 .kr(4)
21526 .sr(1)
21527 .m(m)
21528 .n(16)
21529 .k(8)
21530 .iterations(1)
21531 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21532 }
21533 }
21534
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,k_eq_8_subtile_n)21535 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, k_eq_8_subtile_n) {
21536 TEST_REQUIRES_ARM_NEON_DOT;
21537 for (uint32_t n = 1; n <= 16; n++) {
21538 GemmMicrokernelTester()
21539 .mr(8)
21540 .nr(16)
21541 .kr(4)
21542 .sr(1)
21543 .m(8)
21544 .n(n)
21545 .k(8)
21546 .iterations(1)
21547 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21548 }
21549 }
21550
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,k_lt_8)21551 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, k_lt_8) {
21552 TEST_REQUIRES_ARM_NEON_DOT;
21553 for (size_t k = 1; k < 8; k++) {
21554 GemmMicrokernelTester()
21555 .mr(8)
21556 .nr(16)
21557 .kr(4)
21558 .sr(1)
21559 .m(8)
21560 .n(16)
21561 .k(k)
21562 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21563 }
21564 }
21565
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,k_lt_8_strided_a)21566 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, k_lt_8_strided_a) {
21567 TEST_REQUIRES_ARM_NEON_DOT;
21568 for (size_t k = 1; k < 8; k++) {
21569 GemmMicrokernelTester()
21570 .mr(8)
21571 .nr(16)
21572 .kr(4)
21573 .sr(1)
21574 .m(8)
21575 .n(16)
21576 .k(k)
21577 .a_stride(11)
21578 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21579 }
21580 }
21581
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,k_lt_8_subtile)21582 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, k_lt_8_subtile) {
21583 TEST_REQUIRES_ARM_NEON_DOT;
21584 for (size_t k = 1; k < 8; k++) {
21585 for (uint32_t n = 1; n <= 16; n++) {
21586 for (uint32_t m = 1; m <= 8; m++) {
21587 GemmMicrokernelTester()
21588 .mr(8)
21589 .nr(16)
21590 .kr(4)
21591 .sr(1)
21592 .m(m)
21593 .n(n)
21594 .k(k)
21595 .iterations(1)
21596 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21597 }
21598 }
21599 }
21600 }
21601
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,k_gt_8)21602 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, k_gt_8) {
21603 TEST_REQUIRES_ARM_NEON_DOT;
21604 for (size_t k = 9; k < 16; k++) {
21605 GemmMicrokernelTester()
21606 .mr(8)
21607 .nr(16)
21608 .kr(4)
21609 .sr(1)
21610 .m(8)
21611 .n(16)
21612 .k(k)
21613 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21614 }
21615 }
21616
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,k_gt_8_strided_a)21617 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, k_gt_8_strided_a) {
21618 TEST_REQUIRES_ARM_NEON_DOT;
21619 for (size_t k = 9; k < 16; k++) {
21620 GemmMicrokernelTester()
21621 .mr(8)
21622 .nr(16)
21623 .kr(4)
21624 .sr(1)
21625 .m(8)
21626 .n(16)
21627 .k(k)
21628 .a_stride(19)
21629 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21630 }
21631 }
21632
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,k_gt_8_subtile)21633 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, k_gt_8_subtile) {
21634 TEST_REQUIRES_ARM_NEON_DOT;
21635 for (size_t k = 9; k < 16; k++) {
21636 for (uint32_t n = 1; n <= 16; n++) {
21637 for (uint32_t m = 1; m <= 8; m++) {
21638 GemmMicrokernelTester()
21639 .mr(8)
21640 .nr(16)
21641 .kr(4)
21642 .sr(1)
21643 .m(m)
21644 .n(n)
21645 .k(k)
21646 .iterations(1)
21647 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21648 }
21649 }
21650 }
21651 }
21652
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,k_div_8)21653 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, k_div_8) {
21654 TEST_REQUIRES_ARM_NEON_DOT;
21655 for (size_t k = 16; k <= 80; k += 8) {
21656 GemmMicrokernelTester()
21657 .mr(8)
21658 .nr(16)
21659 .kr(4)
21660 .sr(1)
21661 .m(8)
21662 .n(16)
21663 .k(k)
21664 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21665 }
21666 }
21667
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,k_div_8_strided_a)21668 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, k_div_8_strided_a) {
21669 TEST_REQUIRES_ARM_NEON_DOT;
21670 for (size_t k = 16; k <= 80; k += 8) {
21671 GemmMicrokernelTester()
21672 .mr(8)
21673 .nr(16)
21674 .kr(4)
21675 .sr(1)
21676 .m(8)
21677 .n(16)
21678 .k(k)
21679 .a_stride(83)
21680 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21681 }
21682 }
21683
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,k_div_8_subtile)21684 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, k_div_8_subtile) {
21685 TEST_REQUIRES_ARM_NEON_DOT;
21686 for (size_t k = 16; k <= 80; k += 8) {
21687 for (uint32_t n = 1; n <= 16; n++) {
21688 for (uint32_t m = 1; m <= 8; m++) {
21689 GemmMicrokernelTester()
21690 .mr(8)
21691 .nr(16)
21692 .kr(4)
21693 .sr(1)
21694 .m(m)
21695 .n(n)
21696 .k(k)
21697 .iterations(1)
21698 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21699 }
21700 }
21701 }
21702 }
21703
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,n_gt_16)21704 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, n_gt_16) {
21705 TEST_REQUIRES_ARM_NEON_DOT;
21706 for (uint32_t n = 17; n < 32; n++) {
21707 for (size_t k = 1; k <= 40; k += 9) {
21708 GemmMicrokernelTester()
21709 .mr(8)
21710 .nr(16)
21711 .kr(4)
21712 .sr(1)
21713 .m(8)
21714 .n(n)
21715 .k(k)
21716 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21717 }
21718 }
21719 }
21720
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,n_gt_16_strided_cn)21721 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, n_gt_16_strided_cn) {
21722 TEST_REQUIRES_ARM_NEON_DOT;
21723 for (uint32_t n = 17; n < 32; n++) {
21724 for (size_t k = 1; k <= 40; k += 9) {
21725 GemmMicrokernelTester()
21726 .mr(8)
21727 .nr(16)
21728 .kr(4)
21729 .sr(1)
21730 .m(8)
21731 .n(n)
21732 .k(k)
21733 .cn_stride(19)
21734 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21735 }
21736 }
21737 }
21738
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,n_gt_16_strided_a)21739 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, n_gt_16_strided_a) {
21740 TEST_REQUIRES_ARM_NEON_DOT;
21741 for (uint32_t n = 17; n < 32; n++) {
21742 for (size_t k = 1; k <= 40; k += 9) {
21743 GemmMicrokernelTester()
21744 .mr(8)
21745 .nr(16)
21746 .kr(4)
21747 .sr(1)
21748 .m(8)
21749 .n(n)
21750 .k(k)
21751 .a_stride(43)
21752 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21753 }
21754 }
21755 }
21756
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,n_gt_16_subtile)21757 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, n_gt_16_subtile) {
21758 TEST_REQUIRES_ARM_NEON_DOT;
21759 for (uint32_t n = 17; n < 32; n++) {
21760 for (size_t k = 1; k <= 40; k += 9) {
21761 for (uint32_t m = 1; m <= 8; m++) {
21762 GemmMicrokernelTester()
21763 .mr(8)
21764 .nr(16)
21765 .kr(4)
21766 .sr(1)
21767 .m(m)
21768 .n(n)
21769 .k(k)
21770 .iterations(1)
21771 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21772 }
21773 }
21774 }
21775 }
21776
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,n_div_16)21777 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, n_div_16) {
21778 TEST_REQUIRES_ARM_NEON_DOT;
21779 for (uint32_t n = 32; n <= 48; n += 16) {
21780 for (size_t k = 1; k <= 40; k += 9) {
21781 GemmMicrokernelTester()
21782 .mr(8)
21783 .nr(16)
21784 .kr(4)
21785 .sr(1)
21786 .m(8)
21787 .n(n)
21788 .k(k)
21789 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21790 }
21791 }
21792 }
21793
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,n_div_16_strided_cn)21794 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, n_div_16_strided_cn) {
21795 TEST_REQUIRES_ARM_NEON_DOT;
21796 for (uint32_t n = 32; n <= 48; n += 16) {
21797 for (size_t k = 1; k <= 40; k += 9) {
21798 GemmMicrokernelTester()
21799 .mr(8)
21800 .nr(16)
21801 .kr(4)
21802 .sr(1)
21803 .m(8)
21804 .n(n)
21805 .k(k)
21806 .cn_stride(19)
21807 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21808 }
21809 }
21810 }
21811
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,n_div_16_strided_a)21812 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, n_div_16_strided_a) {
21813 TEST_REQUIRES_ARM_NEON_DOT;
21814 for (uint32_t n = 32; n <= 48; n += 16) {
21815 for (size_t k = 1; k <= 40; k += 9) {
21816 GemmMicrokernelTester()
21817 .mr(8)
21818 .nr(16)
21819 .kr(4)
21820 .sr(1)
21821 .m(8)
21822 .n(n)
21823 .k(k)
21824 .a_stride(43)
21825 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21826 }
21827 }
21828 }
21829
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,n_div_16_subtile)21830 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, n_div_16_subtile) {
21831 TEST_REQUIRES_ARM_NEON_DOT;
21832 for (uint32_t n = 32; n <= 48; n += 16) {
21833 for (size_t k = 1; k <= 40; k += 9) {
21834 for (uint32_t m = 1; m <= 8; m++) {
21835 GemmMicrokernelTester()
21836 .mr(8)
21837 .nr(16)
21838 .kr(4)
21839 .sr(1)
21840 .m(m)
21841 .n(n)
21842 .k(k)
21843 .iterations(1)
21844 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21845 }
21846 }
21847 }
21848 }
21849
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,strided_cm_subtile)21850 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, strided_cm_subtile) {
21851 TEST_REQUIRES_ARM_NEON_DOT;
21852 for (size_t k = 1; k <= 40; k += 9) {
21853 for (uint32_t n = 1; n <= 16; n++) {
21854 for (uint32_t m = 1; m <= 8; m++) {
21855 GemmMicrokernelTester()
21856 .mr(8)
21857 .nr(16)
21858 .kr(4)
21859 .sr(1)
21860 .m(m)
21861 .n(n)
21862 .k(k)
21863 .cm_stride(19)
21864 .iterations(1)
21865 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21866 }
21867 }
21868 }
21869 }
21870
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,qmin)21871 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, qmin) {
21872 TEST_REQUIRES_ARM_NEON_DOT;
21873 GemmMicrokernelTester()
21874 .mr(8)
21875 .nr(16)
21876 .kr(4)
21877 .sr(1)
21878 .m(8)
21879 .n(16)
21880 .k(8)
21881 .qmin(128)
21882 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21883 }
21884
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,qmax)21885 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, qmax) {
21886 TEST_REQUIRES_ARM_NEON_DOT;
21887 GemmMicrokernelTester()
21888 .mr(8)
21889 .nr(16)
21890 .kr(4)
21891 .sr(1)
21892 .m(8)
21893 .n(16)
21894 .k(8)
21895 .qmax(128)
21896 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21897 }
21898
TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT,strided_cm)21899 TEST(QC8_GEMM_MINMAX_FP32_8X16C4__NEONDOT, strided_cm) {
21900 TEST_REQUIRES_ARM_NEON_DOT;
21901 GemmMicrokernelTester()
21902 .mr(8)
21903 .nr(16)
21904 .kr(4)
21905 .sr(1)
21906 .m(8)
21907 .n(16)
21908 .k(8)
21909 .cm_stride(19)
21910 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
21911 }
21912 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
21913
21914
21915 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8)21916 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8) {
21917 TEST_REQUIRES_X86_SSE2;
21918 GemmMicrokernelTester()
21919 .mr(1)
21920 .nr(4)
21921 .kr(2)
21922 .sr(1)
21923 .m(1)
21924 .n(4)
21925 .k(8)
21926 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21927 }
21928
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,strided_cn)21929 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cn) {
21930 TEST_REQUIRES_X86_SSE2;
21931 GemmMicrokernelTester()
21932 .mr(1)
21933 .nr(4)
21934 .kr(2)
21935 .sr(1)
21936 .m(1)
21937 .n(4)
21938 .k(8)
21939 .cn_stride(7)
21940 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21941 }
21942
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8_strided_a)21943 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_strided_a) {
21944 TEST_REQUIRES_X86_SSE2;
21945 GemmMicrokernelTester()
21946 .mr(1)
21947 .nr(4)
21948 .kr(2)
21949 .sr(1)
21950 .m(1)
21951 .n(4)
21952 .k(8)
21953 .a_stride(11)
21954 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21955 }
21956
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8_subtile)21957 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile) {
21958 TEST_REQUIRES_X86_SSE2;
21959 for (uint32_t n = 1; n <= 4; n++) {
21960 for (uint32_t m = 1; m <= 1; m++) {
21961 GemmMicrokernelTester()
21962 .mr(1)
21963 .nr(4)
21964 .kr(2)
21965 .sr(1)
21966 .m(m)
21967 .n(n)
21968 .k(8)
21969 .iterations(1)
21970 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21971 }
21972 }
21973 }
21974
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8_subtile_m)21975 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile_m) {
21976 TEST_REQUIRES_X86_SSE2;
21977 for (uint32_t m = 1; m <= 1; m++) {
21978 GemmMicrokernelTester()
21979 .mr(1)
21980 .nr(4)
21981 .kr(2)
21982 .sr(1)
21983 .m(m)
21984 .n(4)
21985 .k(8)
21986 .iterations(1)
21987 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21988 }
21989 }
21990
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8_subtile_n)21991 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile_n) {
21992 TEST_REQUIRES_X86_SSE2;
21993 for (uint32_t n = 1; n <= 4; n++) {
21994 GemmMicrokernelTester()
21995 .mr(1)
21996 .nr(4)
21997 .kr(2)
21998 .sr(1)
21999 .m(1)
22000 .n(n)
22001 .k(8)
22002 .iterations(1)
22003 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22004 }
22005 }
22006
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_lt_8)22007 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_lt_8) {
22008 TEST_REQUIRES_X86_SSE2;
22009 for (size_t k = 1; k < 8; k++) {
22010 GemmMicrokernelTester()
22011 .mr(1)
22012 .nr(4)
22013 .kr(2)
22014 .sr(1)
22015 .m(1)
22016 .n(4)
22017 .k(k)
22018 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22019 }
22020 }
22021
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_lt_8_strided_a)22022 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_lt_8_strided_a) {
22023 TEST_REQUIRES_X86_SSE2;
22024 for (size_t k = 1; k < 8; k++) {
22025 GemmMicrokernelTester()
22026 .mr(1)
22027 .nr(4)
22028 .kr(2)
22029 .sr(1)
22030 .m(1)
22031 .n(4)
22032 .k(k)
22033 .a_stride(11)
22034 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22035 }
22036 }
22037
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_lt_8_subtile)22038 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_lt_8_subtile) {
22039 TEST_REQUIRES_X86_SSE2;
22040 for (size_t k = 1; k < 8; k++) {
22041 for (uint32_t n = 1; n <= 4; n++) {
22042 for (uint32_t m = 1; m <= 1; m++) {
22043 GemmMicrokernelTester()
22044 .mr(1)
22045 .nr(4)
22046 .kr(2)
22047 .sr(1)
22048 .m(m)
22049 .n(n)
22050 .k(k)
22051 .iterations(1)
22052 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22053 }
22054 }
22055 }
22056 }
22057
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_gt_8)22058 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_gt_8) {
22059 TEST_REQUIRES_X86_SSE2;
22060 for (size_t k = 9; k < 16; k++) {
22061 GemmMicrokernelTester()
22062 .mr(1)
22063 .nr(4)
22064 .kr(2)
22065 .sr(1)
22066 .m(1)
22067 .n(4)
22068 .k(k)
22069 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22070 }
22071 }
22072
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_gt_8_strided_a)22073 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_gt_8_strided_a) {
22074 TEST_REQUIRES_X86_SSE2;
22075 for (size_t k = 9; k < 16; k++) {
22076 GemmMicrokernelTester()
22077 .mr(1)
22078 .nr(4)
22079 .kr(2)
22080 .sr(1)
22081 .m(1)
22082 .n(4)
22083 .k(k)
22084 .a_stride(19)
22085 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22086 }
22087 }
22088
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_gt_8_subtile)22089 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_gt_8_subtile) {
22090 TEST_REQUIRES_X86_SSE2;
22091 for (size_t k = 9; k < 16; k++) {
22092 for (uint32_t n = 1; n <= 4; n++) {
22093 for (uint32_t m = 1; m <= 1; m++) {
22094 GemmMicrokernelTester()
22095 .mr(1)
22096 .nr(4)
22097 .kr(2)
22098 .sr(1)
22099 .m(m)
22100 .n(n)
22101 .k(k)
22102 .iterations(1)
22103 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22104 }
22105 }
22106 }
22107 }
22108
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_div_8)22109 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_div_8) {
22110 TEST_REQUIRES_X86_SSE2;
22111 for (size_t k = 16; k <= 80; k += 8) {
22112 GemmMicrokernelTester()
22113 .mr(1)
22114 .nr(4)
22115 .kr(2)
22116 .sr(1)
22117 .m(1)
22118 .n(4)
22119 .k(k)
22120 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22121 }
22122 }
22123
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_div_8_strided_a)22124 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_div_8_strided_a) {
22125 TEST_REQUIRES_X86_SSE2;
22126 for (size_t k = 16; k <= 80; k += 8) {
22127 GemmMicrokernelTester()
22128 .mr(1)
22129 .nr(4)
22130 .kr(2)
22131 .sr(1)
22132 .m(1)
22133 .n(4)
22134 .k(k)
22135 .a_stride(83)
22136 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22137 }
22138 }
22139
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_div_8_subtile)22140 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_div_8_subtile) {
22141 TEST_REQUIRES_X86_SSE2;
22142 for (size_t k = 16; k <= 80; k += 8) {
22143 for (uint32_t n = 1; n <= 4; n++) {
22144 for (uint32_t m = 1; m <= 1; m++) {
22145 GemmMicrokernelTester()
22146 .mr(1)
22147 .nr(4)
22148 .kr(2)
22149 .sr(1)
22150 .m(m)
22151 .n(n)
22152 .k(k)
22153 .iterations(1)
22154 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22155 }
22156 }
22157 }
22158 }
22159
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4)22160 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4) {
22161 TEST_REQUIRES_X86_SSE2;
22162 for (uint32_t n = 5; n < 8; n++) {
22163 for (size_t k = 1; k <= 40; k += 9) {
22164 GemmMicrokernelTester()
22165 .mr(1)
22166 .nr(4)
22167 .kr(2)
22168 .sr(1)
22169 .m(1)
22170 .n(n)
22171 .k(k)
22172 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22173 }
22174 }
22175 }
22176
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4_strided_cn)22177 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_strided_cn) {
22178 TEST_REQUIRES_X86_SSE2;
22179 for (uint32_t n = 5; n < 8; n++) {
22180 for (size_t k = 1; k <= 40; k += 9) {
22181 GemmMicrokernelTester()
22182 .mr(1)
22183 .nr(4)
22184 .kr(2)
22185 .sr(1)
22186 .m(1)
22187 .n(n)
22188 .k(k)
22189 .cn_stride(7)
22190 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22191 }
22192 }
22193 }
22194
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4_strided_a)22195 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_strided_a) {
22196 TEST_REQUIRES_X86_SSE2;
22197 for (uint32_t n = 5; n < 8; n++) {
22198 for (size_t k = 1; k <= 40; k += 9) {
22199 GemmMicrokernelTester()
22200 .mr(1)
22201 .nr(4)
22202 .kr(2)
22203 .sr(1)
22204 .m(1)
22205 .n(n)
22206 .k(k)
22207 .a_stride(43)
22208 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22209 }
22210 }
22211 }
22212
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4_subtile)22213 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_subtile) {
22214 TEST_REQUIRES_X86_SSE2;
22215 for (uint32_t n = 5; n < 8; n++) {
22216 for (size_t k = 1; k <= 40; k += 9) {
22217 for (uint32_t m = 1; m <= 1; m++) {
22218 GemmMicrokernelTester()
22219 .mr(1)
22220 .nr(4)
22221 .kr(2)
22222 .sr(1)
22223 .m(m)
22224 .n(n)
22225 .k(k)
22226 .iterations(1)
22227 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22228 }
22229 }
22230 }
22231 }
22232
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4)22233 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4) {
22234 TEST_REQUIRES_X86_SSE2;
22235 for (uint32_t n = 8; n <= 12; n += 4) {
22236 for (size_t k = 1; k <= 40; k += 9) {
22237 GemmMicrokernelTester()
22238 .mr(1)
22239 .nr(4)
22240 .kr(2)
22241 .sr(1)
22242 .m(1)
22243 .n(n)
22244 .k(k)
22245 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22246 }
22247 }
22248 }
22249
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4_strided_cn)22250 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_strided_cn) {
22251 TEST_REQUIRES_X86_SSE2;
22252 for (uint32_t n = 8; n <= 12; n += 4) {
22253 for (size_t k = 1; k <= 40; k += 9) {
22254 GemmMicrokernelTester()
22255 .mr(1)
22256 .nr(4)
22257 .kr(2)
22258 .sr(1)
22259 .m(1)
22260 .n(n)
22261 .k(k)
22262 .cn_stride(7)
22263 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22264 }
22265 }
22266 }
22267
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4_strided_a)22268 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_strided_a) {
22269 TEST_REQUIRES_X86_SSE2;
22270 for (uint32_t n = 8; n <= 12; n += 4) {
22271 for (size_t k = 1; k <= 40; k += 9) {
22272 GemmMicrokernelTester()
22273 .mr(1)
22274 .nr(4)
22275 .kr(2)
22276 .sr(1)
22277 .m(1)
22278 .n(n)
22279 .k(k)
22280 .a_stride(43)
22281 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22282 }
22283 }
22284 }
22285
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4_subtile)22286 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_subtile) {
22287 TEST_REQUIRES_X86_SSE2;
22288 for (uint32_t n = 8; n <= 12; n += 4) {
22289 for (size_t k = 1; k <= 40; k += 9) {
22290 for (uint32_t m = 1; m <= 1; m++) {
22291 GemmMicrokernelTester()
22292 .mr(1)
22293 .nr(4)
22294 .kr(2)
22295 .sr(1)
22296 .m(m)
22297 .n(n)
22298 .k(k)
22299 .iterations(1)
22300 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22301 }
22302 }
22303 }
22304 }
22305
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,strided_cm_subtile)22306 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cm_subtile) {
22307 TEST_REQUIRES_X86_SSE2;
22308 for (size_t k = 1; k <= 40; k += 9) {
22309 for (uint32_t n = 1; n <= 4; n++) {
22310 for (uint32_t m = 1; m <= 1; m++) {
22311 GemmMicrokernelTester()
22312 .mr(1)
22313 .nr(4)
22314 .kr(2)
22315 .sr(1)
22316 .m(m)
22317 .n(n)
22318 .k(k)
22319 .cm_stride(7)
22320 .iterations(1)
22321 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22322 }
22323 }
22324 }
22325 }
22326
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,qmin)22327 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, qmin) {
22328 TEST_REQUIRES_X86_SSE2;
22329 GemmMicrokernelTester()
22330 .mr(1)
22331 .nr(4)
22332 .kr(2)
22333 .sr(1)
22334 .m(1)
22335 .n(4)
22336 .k(8)
22337 .qmin(128)
22338 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22339 }
22340
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,qmax)22341 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, qmax) {
22342 TEST_REQUIRES_X86_SSE2;
22343 GemmMicrokernelTester()
22344 .mr(1)
22345 .nr(4)
22346 .kr(2)
22347 .sr(1)
22348 .m(1)
22349 .n(4)
22350 .k(8)
22351 .qmax(128)
22352 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22353 }
22354
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64,strided_cm)22355 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cm) {
22356 TEST_REQUIRES_X86_SSE2;
22357 GemmMicrokernelTester()
22358 .mr(1)
22359 .nr(4)
22360 .kr(2)
22361 .sr(1)
22362 .m(1)
22363 .n(4)
22364 .k(8)
22365 .cm_stride(7)
22366 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22367 }
22368 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22369
22370
22371 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_eq_8)22372 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8) {
22373 TEST_REQUIRES_X86_SSE41;
22374 GemmMicrokernelTester()
22375 .mr(2)
22376 .nr(4)
22377 .kr(2)
22378 .sr(1)
22379 .m(2)
22380 .n(4)
22381 .k(8)
22382 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22383 }
22384
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,strided_cn)22385 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cn) {
22386 TEST_REQUIRES_X86_SSE41;
22387 GemmMicrokernelTester()
22388 .mr(2)
22389 .nr(4)
22390 .kr(2)
22391 .sr(1)
22392 .m(2)
22393 .n(4)
22394 .k(8)
22395 .cn_stride(7)
22396 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22397 }
22398
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_eq_8_strided_a)22399 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_strided_a) {
22400 TEST_REQUIRES_X86_SSE41;
22401 GemmMicrokernelTester()
22402 .mr(2)
22403 .nr(4)
22404 .kr(2)
22405 .sr(1)
22406 .m(2)
22407 .n(4)
22408 .k(8)
22409 .a_stride(11)
22410 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22411 }
22412
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_eq_8_subtile)22413 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile) {
22414 TEST_REQUIRES_X86_SSE41;
22415 for (uint32_t n = 1; n <= 4; n++) {
22416 for (uint32_t m = 1; m <= 2; m++) {
22417 GemmMicrokernelTester()
22418 .mr(2)
22419 .nr(4)
22420 .kr(2)
22421 .sr(1)
22422 .m(m)
22423 .n(n)
22424 .k(8)
22425 .iterations(1)
22426 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22427 }
22428 }
22429 }
22430
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_eq_8_subtile_m)22431 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile_m) {
22432 TEST_REQUIRES_X86_SSE41;
22433 for (uint32_t m = 1; m <= 2; m++) {
22434 GemmMicrokernelTester()
22435 .mr(2)
22436 .nr(4)
22437 .kr(2)
22438 .sr(1)
22439 .m(m)
22440 .n(4)
22441 .k(8)
22442 .iterations(1)
22443 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22444 }
22445 }
22446
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_eq_8_subtile_n)22447 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile_n) {
22448 TEST_REQUIRES_X86_SSE41;
22449 for (uint32_t n = 1; n <= 4; n++) {
22450 GemmMicrokernelTester()
22451 .mr(2)
22452 .nr(4)
22453 .kr(2)
22454 .sr(1)
22455 .m(2)
22456 .n(n)
22457 .k(8)
22458 .iterations(1)
22459 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22460 }
22461 }
22462
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_lt_8)22463 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_lt_8) {
22464 TEST_REQUIRES_X86_SSE41;
22465 for (size_t k = 1; k < 8; k++) {
22466 GemmMicrokernelTester()
22467 .mr(2)
22468 .nr(4)
22469 .kr(2)
22470 .sr(1)
22471 .m(2)
22472 .n(4)
22473 .k(k)
22474 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22475 }
22476 }
22477
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_lt_8_strided_a)22478 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_lt_8_strided_a) {
22479 TEST_REQUIRES_X86_SSE41;
22480 for (size_t k = 1; k < 8; k++) {
22481 GemmMicrokernelTester()
22482 .mr(2)
22483 .nr(4)
22484 .kr(2)
22485 .sr(1)
22486 .m(2)
22487 .n(4)
22488 .k(k)
22489 .a_stride(11)
22490 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22491 }
22492 }
22493
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_lt_8_subtile)22494 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_lt_8_subtile) {
22495 TEST_REQUIRES_X86_SSE41;
22496 for (size_t k = 1; k < 8; k++) {
22497 for (uint32_t n = 1; n <= 4; n++) {
22498 for (uint32_t m = 1; m <= 2; m++) {
22499 GemmMicrokernelTester()
22500 .mr(2)
22501 .nr(4)
22502 .kr(2)
22503 .sr(1)
22504 .m(m)
22505 .n(n)
22506 .k(k)
22507 .iterations(1)
22508 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22509 }
22510 }
22511 }
22512 }
22513
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_gt_8)22514 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_gt_8) {
22515 TEST_REQUIRES_X86_SSE41;
22516 for (size_t k = 9; k < 16; k++) {
22517 GemmMicrokernelTester()
22518 .mr(2)
22519 .nr(4)
22520 .kr(2)
22521 .sr(1)
22522 .m(2)
22523 .n(4)
22524 .k(k)
22525 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22526 }
22527 }
22528
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_gt_8_strided_a)22529 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_gt_8_strided_a) {
22530 TEST_REQUIRES_X86_SSE41;
22531 for (size_t k = 9; k < 16; k++) {
22532 GemmMicrokernelTester()
22533 .mr(2)
22534 .nr(4)
22535 .kr(2)
22536 .sr(1)
22537 .m(2)
22538 .n(4)
22539 .k(k)
22540 .a_stride(19)
22541 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22542 }
22543 }
22544
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_gt_8_subtile)22545 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_gt_8_subtile) {
22546 TEST_REQUIRES_X86_SSE41;
22547 for (size_t k = 9; k < 16; k++) {
22548 for (uint32_t n = 1; n <= 4; n++) {
22549 for (uint32_t m = 1; m <= 2; m++) {
22550 GemmMicrokernelTester()
22551 .mr(2)
22552 .nr(4)
22553 .kr(2)
22554 .sr(1)
22555 .m(m)
22556 .n(n)
22557 .k(k)
22558 .iterations(1)
22559 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22560 }
22561 }
22562 }
22563 }
22564
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_div_8)22565 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_div_8) {
22566 TEST_REQUIRES_X86_SSE41;
22567 for (size_t k = 16; k <= 80; k += 8) {
22568 GemmMicrokernelTester()
22569 .mr(2)
22570 .nr(4)
22571 .kr(2)
22572 .sr(1)
22573 .m(2)
22574 .n(4)
22575 .k(k)
22576 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22577 }
22578 }
22579
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_div_8_strided_a)22580 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_div_8_strided_a) {
22581 TEST_REQUIRES_X86_SSE41;
22582 for (size_t k = 16; k <= 80; k += 8) {
22583 GemmMicrokernelTester()
22584 .mr(2)
22585 .nr(4)
22586 .kr(2)
22587 .sr(1)
22588 .m(2)
22589 .n(4)
22590 .k(k)
22591 .a_stride(83)
22592 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22593 }
22594 }
22595
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_div_8_subtile)22596 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_div_8_subtile) {
22597 TEST_REQUIRES_X86_SSE41;
22598 for (size_t k = 16; k <= 80; k += 8) {
22599 for (uint32_t n = 1; n <= 4; n++) {
22600 for (uint32_t m = 1; m <= 2; m++) {
22601 GemmMicrokernelTester()
22602 .mr(2)
22603 .nr(4)
22604 .kr(2)
22605 .sr(1)
22606 .m(m)
22607 .n(n)
22608 .k(k)
22609 .iterations(1)
22610 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22611 }
22612 }
22613 }
22614 }
22615
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_gt_4)22616 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4) {
22617 TEST_REQUIRES_X86_SSE41;
22618 for (uint32_t n = 5; n < 8; n++) {
22619 for (size_t k = 1; k <= 40; k += 9) {
22620 GemmMicrokernelTester()
22621 .mr(2)
22622 .nr(4)
22623 .kr(2)
22624 .sr(1)
22625 .m(2)
22626 .n(n)
22627 .k(k)
22628 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22629 }
22630 }
22631 }
22632
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_gt_4_strided_cn)22633 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_strided_cn) {
22634 TEST_REQUIRES_X86_SSE41;
22635 for (uint32_t n = 5; n < 8; n++) {
22636 for (size_t k = 1; k <= 40; k += 9) {
22637 GemmMicrokernelTester()
22638 .mr(2)
22639 .nr(4)
22640 .kr(2)
22641 .sr(1)
22642 .m(2)
22643 .n(n)
22644 .k(k)
22645 .cn_stride(7)
22646 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22647 }
22648 }
22649 }
22650
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_gt_4_strided_a)22651 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_strided_a) {
22652 TEST_REQUIRES_X86_SSE41;
22653 for (uint32_t n = 5; n < 8; n++) {
22654 for (size_t k = 1; k <= 40; k += 9) {
22655 GemmMicrokernelTester()
22656 .mr(2)
22657 .nr(4)
22658 .kr(2)
22659 .sr(1)
22660 .m(2)
22661 .n(n)
22662 .k(k)
22663 .a_stride(43)
22664 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22665 }
22666 }
22667 }
22668
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_gt_4_subtile)22669 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_subtile) {
22670 TEST_REQUIRES_X86_SSE41;
22671 for (uint32_t n = 5; n < 8; n++) {
22672 for (size_t k = 1; k <= 40; k += 9) {
22673 for (uint32_t m = 1; m <= 2; m++) {
22674 GemmMicrokernelTester()
22675 .mr(2)
22676 .nr(4)
22677 .kr(2)
22678 .sr(1)
22679 .m(m)
22680 .n(n)
22681 .k(k)
22682 .iterations(1)
22683 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22684 }
22685 }
22686 }
22687 }
22688
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_div_4)22689 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4) {
22690 TEST_REQUIRES_X86_SSE41;
22691 for (uint32_t n = 8; n <= 12; n += 4) {
22692 for (size_t k = 1; k <= 40; k += 9) {
22693 GemmMicrokernelTester()
22694 .mr(2)
22695 .nr(4)
22696 .kr(2)
22697 .sr(1)
22698 .m(2)
22699 .n(n)
22700 .k(k)
22701 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22702 }
22703 }
22704 }
22705
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_div_4_strided_cn)22706 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_strided_cn) {
22707 TEST_REQUIRES_X86_SSE41;
22708 for (uint32_t n = 8; n <= 12; n += 4) {
22709 for (size_t k = 1; k <= 40; k += 9) {
22710 GemmMicrokernelTester()
22711 .mr(2)
22712 .nr(4)
22713 .kr(2)
22714 .sr(1)
22715 .m(2)
22716 .n(n)
22717 .k(k)
22718 .cn_stride(7)
22719 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22720 }
22721 }
22722 }
22723
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_div_4_strided_a)22724 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_strided_a) {
22725 TEST_REQUIRES_X86_SSE41;
22726 for (uint32_t n = 8; n <= 12; n += 4) {
22727 for (size_t k = 1; k <= 40; k += 9) {
22728 GemmMicrokernelTester()
22729 .mr(2)
22730 .nr(4)
22731 .kr(2)
22732 .sr(1)
22733 .m(2)
22734 .n(n)
22735 .k(k)
22736 .a_stride(43)
22737 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22738 }
22739 }
22740 }
22741
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_div_4_subtile)22742 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_subtile) {
22743 TEST_REQUIRES_X86_SSE41;
22744 for (uint32_t n = 8; n <= 12; n += 4) {
22745 for (size_t k = 1; k <= 40; k += 9) {
22746 for (uint32_t m = 1; m <= 2; m++) {
22747 GemmMicrokernelTester()
22748 .mr(2)
22749 .nr(4)
22750 .kr(2)
22751 .sr(1)
22752 .m(m)
22753 .n(n)
22754 .k(k)
22755 .iterations(1)
22756 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22757 }
22758 }
22759 }
22760 }
22761
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,strided_cm_subtile)22762 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cm_subtile) {
22763 TEST_REQUIRES_X86_SSE41;
22764 for (size_t k = 1; k <= 40; k += 9) {
22765 for (uint32_t n = 1; n <= 4; n++) {
22766 for (uint32_t m = 1; m <= 2; m++) {
22767 GemmMicrokernelTester()
22768 .mr(2)
22769 .nr(4)
22770 .kr(2)
22771 .sr(1)
22772 .m(m)
22773 .n(n)
22774 .k(k)
22775 .cm_stride(7)
22776 .iterations(1)
22777 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22778 }
22779 }
22780 }
22781 }
22782
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,qmin)22783 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, qmin) {
22784 TEST_REQUIRES_X86_SSE41;
22785 GemmMicrokernelTester()
22786 .mr(2)
22787 .nr(4)
22788 .kr(2)
22789 .sr(1)
22790 .m(2)
22791 .n(4)
22792 .k(8)
22793 .qmin(128)
22794 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22795 }
22796
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,qmax)22797 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, qmax) {
22798 TEST_REQUIRES_X86_SSE41;
22799 GemmMicrokernelTester()
22800 .mr(2)
22801 .nr(4)
22802 .kr(2)
22803 .sr(1)
22804 .m(2)
22805 .n(4)
22806 .k(8)
22807 .qmax(128)
22808 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22809 }
22810
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64,strided_cm)22811 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cm) {
22812 TEST_REQUIRES_X86_SSE41;
22813 GemmMicrokernelTester()
22814 .mr(2)
22815 .nr(4)
22816 .kr(2)
22817 .sr(1)
22818 .m(2)
22819 .n(4)
22820 .k(8)
22821 .cm_stride(7)
22822 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22823 }
22824 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22825
22826
22827 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8)22828 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8) {
22829 TEST_REQUIRES_X86_SSE2;
22830 GemmMicrokernelTester()
22831 .mr(3)
22832 .nr(4)
22833 .kr(2)
22834 .sr(1)
22835 .m(3)
22836 .n(4)
22837 .k(8)
22838 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22839 }
22840
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cn)22841 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cn) {
22842 TEST_REQUIRES_X86_SSE2;
22843 GemmMicrokernelTester()
22844 .mr(3)
22845 .nr(4)
22846 .kr(2)
22847 .sr(1)
22848 .m(3)
22849 .n(4)
22850 .k(8)
22851 .cn_stride(7)
22852 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22853 }
22854
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_strided_a)22855 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_strided_a) {
22856 TEST_REQUIRES_X86_SSE2;
22857 GemmMicrokernelTester()
22858 .mr(3)
22859 .nr(4)
22860 .kr(2)
22861 .sr(1)
22862 .m(3)
22863 .n(4)
22864 .k(8)
22865 .a_stride(11)
22866 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22867 }
22868
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile)22869 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile) {
22870 TEST_REQUIRES_X86_SSE2;
22871 for (uint32_t n = 1; n <= 4; n++) {
22872 for (uint32_t m = 1; m <= 3; m++) {
22873 GemmMicrokernelTester()
22874 .mr(3)
22875 .nr(4)
22876 .kr(2)
22877 .sr(1)
22878 .m(m)
22879 .n(n)
22880 .k(8)
22881 .iterations(1)
22882 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22883 }
22884 }
22885 }
22886
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile_m)22887 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_m) {
22888 TEST_REQUIRES_X86_SSE2;
22889 for (uint32_t m = 1; m <= 3; m++) {
22890 GemmMicrokernelTester()
22891 .mr(3)
22892 .nr(4)
22893 .kr(2)
22894 .sr(1)
22895 .m(m)
22896 .n(4)
22897 .k(8)
22898 .iterations(1)
22899 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22900 }
22901 }
22902
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile_n)22903 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_n) {
22904 TEST_REQUIRES_X86_SSE2;
22905 for (uint32_t n = 1; n <= 4; n++) {
22906 GemmMicrokernelTester()
22907 .mr(3)
22908 .nr(4)
22909 .kr(2)
22910 .sr(1)
22911 .m(3)
22912 .n(n)
22913 .k(8)
22914 .iterations(1)
22915 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22916 }
22917 }
22918
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_lt_8)22919 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8) {
22920 TEST_REQUIRES_X86_SSE2;
22921 for (size_t k = 1; k < 8; k++) {
22922 GemmMicrokernelTester()
22923 .mr(3)
22924 .nr(4)
22925 .kr(2)
22926 .sr(1)
22927 .m(3)
22928 .n(4)
22929 .k(k)
22930 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22931 }
22932 }
22933
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_lt_8_strided_a)22934 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8_strided_a) {
22935 TEST_REQUIRES_X86_SSE2;
22936 for (size_t k = 1; k < 8; k++) {
22937 GemmMicrokernelTester()
22938 .mr(3)
22939 .nr(4)
22940 .kr(2)
22941 .sr(1)
22942 .m(3)
22943 .n(4)
22944 .k(k)
22945 .a_stride(11)
22946 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22947 }
22948 }
22949
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_lt_8_subtile)22950 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8_subtile) {
22951 TEST_REQUIRES_X86_SSE2;
22952 for (size_t k = 1; k < 8; k++) {
22953 for (uint32_t n = 1; n <= 4; n++) {
22954 for (uint32_t m = 1; m <= 3; m++) {
22955 GemmMicrokernelTester()
22956 .mr(3)
22957 .nr(4)
22958 .kr(2)
22959 .sr(1)
22960 .m(m)
22961 .n(n)
22962 .k(k)
22963 .iterations(1)
22964 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22965 }
22966 }
22967 }
22968 }
22969
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_gt_8)22970 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8) {
22971 TEST_REQUIRES_X86_SSE2;
22972 for (size_t k = 9; k < 16; k++) {
22973 GemmMicrokernelTester()
22974 .mr(3)
22975 .nr(4)
22976 .kr(2)
22977 .sr(1)
22978 .m(3)
22979 .n(4)
22980 .k(k)
22981 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22982 }
22983 }
22984
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_gt_8_strided_a)22985 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8_strided_a) {
22986 TEST_REQUIRES_X86_SSE2;
22987 for (size_t k = 9; k < 16; k++) {
22988 GemmMicrokernelTester()
22989 .mr(3)
22990 .nr(4)
22991 .kr(2)
22992 .sr(1)
22993 .m(3)
22994 .n(4)
22995 .k(k)
22996 .a_stride(19)
22997 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22998 }
22999 }
23000
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_gt_8_subtile)23001 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8_subtile) {
23002 TEST_REQUIRES_X86_SSE2;
23003 for (size_t k = 9; k < 16; k++) {
23004 for (uint32_t n = 1; n <= 4; n++) {
23005 for (uint32_t m = 1; m <= 3; m++) {
23006 GemmMicrokernelTester()
23007 .mr(3)
23008 .nr(4)
23009 .kr(2)
23010 .sr(1)
23011 .m(m)
23012 .n(n)
23013 .k(k)
23014 .iterations(1)
23015 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23016 }
23017 }
23018 }
23019 }
23020
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_div_8)23021 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8) {
23022 TEST_REQUIRES_X86_SSE2;
23023 for (size_t k = 16; k <= 80; k += 8) {
23024 GemmMicrokernelTester()
23025 .mr(3)
23026 .nr(4)
23027 .kr(2)
23028 .sr(1)
23029 .m(3)
23030 .n(4)
23031 .k(k)
23032 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23033 }
23034 }
23035
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_div_8_strided_a)23036 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8_strided_a) {
23037 TEST_REQUIRES_X86_SSE2;
23038 for (size_t k = 16; k <= 80; k += 8) {
23039 GemmMicrokernelTester()
23040 .mr(3)
23041 .nr(4)
23042 .kr(2)
23043 .sr(1)
23044 .m(3)
23045 .n(4)
23046 .k(k)
23047 .a_stride(83)
23048 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23049 }
23050 }
23051
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_div_8_subtile)23052 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8_subtile) {
23053 TEST_REQUIRES_X86_SSE2;
23054 for (size_t k = 16; k <= 80; k += 8) {
23055 for (uint32_t n = 1; n <= 4; n++) {
23056 for (uint32_t m = 1; m <= 3; m++) {
23057 GemmMicrokernelTester()
23058 .mr(3)
23059 .nr(4)
23060 .kr(2)
23061 .sr(1)
23062 .m(m)
23063 .n(n)
23064 .k(k)
23065 .iterations(1)
23066 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23067 }
23068 }
23069 }
23070 }
23071
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4)23072 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4) {
23073 TEST_REQUIRES_X86_SSE2;
23074 for (uint32_t n = 5; n < 8; n++) {
23075 for (size_t k = 1; k <= 40; k += 9) {
23076 GemmMicrokernelTester()
23077 .mr(3)
23078 .nr(4)
23079 .kr(2)
23080 .sr(1)
23081 .m(3)
23082 .n(n)
23083 .k(k)
23084 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23085 }
23086 }
23087 }
23088
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_strided_cn)23089 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_strided_cn) {
23090 TEST_REQUIRES_X86_SSE2;
23091 for (uint32_t n = 5; n < 8; n++) {
23092 for (size_t k = 1; k <= 40; k += 9) {
23093 GemmMicrokernelTester()
23094 .mr(3)
23095 .nr(4)
23096 .kr(2)
23097 .sr(1)
23098 .m(3)
23099 .n(n)
23100 .k(k)
23101 .cn_stride(7)
23102 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23103 }
23104 }
23105 }
23106
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_strided_a)23107 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_strided_a) {
23108 TEST_REQUIRES_X86_SSE2;
23109 for (uint32_t n = 5; n < 8; n++) {
23110 for (size_t k = 1; k <= 40; k += 9) {
23111 GemmMicrokernelTester()
23112 .mr(3)
23113 .nr(4)
23114 .kr(2)
23115 .sr(1)
23116 .m(3)
23117 .n(n)
23118 .k(k)
23119 .a_stride(43)
23120 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23121 }
23122 }
23123 }
23124
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_subtile)23125 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_subtile) {
23126 TEST_REQUIRES_X86_SSE2;
23127 for (uint32_t n = 5; n < 8; n++) {
23128 for (size_t k = 1; k <= 40; k += 9) {
23129 for (uint32_t m = 1; m <= 3; m++) {
23130 GemmMicrokernelTester()
23131 .mr(3)
23132 .nr(4)
23133 .kr(2)
23134 .sr(1)
23135 .m(m)
23136 .n(n)
23137 .k(k)
23138 .iterations(1)
23139 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23140 }
23141 }
23142 }
23143 }
23144
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4)23145 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4) {
23146 TEST_REQUIRES_X86_SSE2;
23147 for (uint32_t n = 8; n <= 12; n += 4) {
23148 for (size_t k = 1; k <= 40; k += 9) {
23149 GemmMicrokernelTester()
23150 .mr(3)
23151 .nr(4)
23152 .kr(2)
23153 .sr(1)
23154 .m(3)
23155 .n(n)
23156 .k(k)
23157 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23158 }
23159 }
23160 }
23161
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_strided_cn)23162 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_strided_cn) {
23163 TEST_REQUIRES_X86_SSE2;
23164 for (uint32_t n = 8; n <= 12; n += 4) {
23165 for (size_t k = 1; k <= 40; k += 9) {
23166 GemmMicrokernelTester()
23167 .mr(3)
23168 .nr(4)
23169 .kr(2)
23170 .sr(1)
23171 .m(3)
23172 .n(n)
23173 .k(k)
23174 .cn_stride(7)
23175 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23176 }
23177 }
23178 }
23179
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_strided_a)23180 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_strided_a) {
23181 TEST_REQUIRES_X86_SSE2;
23182 for (uint32_t n = 8; n <= 12; n += 4) {
23183 for (size_t k = 1; k <= 40; k += 9) {
23184 GemmMicrokernelTester()
23185 .mr(3)
23186 .nr(4)
23187 .kr(2)
23188 .sr(1)
23189 .m(3)
23190 .n(n)
23191 .k(k)
23192 .a_stride(43)
23193 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23194 }
23195 }
23196 }
23197
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_subtile)23198 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_subtile) {
23199 TEST_REQUIRES_X86_SSE2;
23200 for (uint32_t n = 8; n <= 12; n += 4) {
23201 for (size_t k = 1; k <= 40; k += 9) {
23202 for (uint32_t m = 1; m <= 3; m++) {
23203 GemmMicrokernelTester()
23204 .mr(3)
23205 .nr(4)
23206 .kr(2)
23207 .sr(1)
23208 .m(m)
23209 .n(n)
23210 .k(k)
23211 .iterations(1)
23212 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23213 }
23214 }
23215 }
23216 }
23217
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cm_subtile)23218 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm_subtile) {
23219 TEST_REQUIRES_X86_SSE2;
23220 for (size_t k = 1; k <= 40; k += 9) {
23221 for (uint32_t n = 1; n <= 4; n++) {
23222 for (uint32_t m = 1; m <= 3; m++) {
23223 GemmMicrokernelTester()
23224 .mr(3)
23225 .nr(4)
23226 .kr(2)
23227 .sr(1)
23228 .m(m)
23229 .n(n)
23230 .k(k)
23231 .cm_stride(7)
23232 .iterations(1)
23233 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23234 }
23235 }
23236 }
23237 }
23238
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,qmin)23239 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmin) {
23240 TEST_REQUIRES_X86_SSE2;
23241 GemmMicrokernelTester()
23242 .mr(3)
23243 .nr(4)
23244 .kr(2)
23245 .sr(1)
23246 .m(3)
23247 .n(4)
23248 .k(8)
23249 .qmin(128)
23250 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23251 }
23252
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,qmax)23253 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmax) {
23254 TEST_REQUIRES_X86_SSE2;
23255 GemmMicrokernelTester()
23256 .mr(3)
23257 .nr(4)
23258 .kr(2)
23259 .sr(1)
23260 .m(3)
23261 .n(4)
23262 .k(8)
23263 .qmax(128)
23264 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23265 }
23266
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cm)23267 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm) {
23268 TEST_REQUIRES_X86_SSE2;
23269 GemmMicrokernelTester()
23270 .mr(3)
23271 .nr(4)
23272 .kr(2)
23273 .sr(1)
23274 .m(3)
23275 .n(4)
23276 .k(8)
23277 .cm_stride(7)
23278 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23279 }
23280 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23281
23282
23283 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8)23284 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8) {
23285 TEST_REQUIRES_X86_SSE41;
23286 GemmMicrokernelTester()
23287 .mr(3)
23288 .nr(4)
23289 .kr(2)
23290 .sr(1)
23291 .m(3)
23292 .n(4)
23293 .k(8)
23294 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23295 }
23296
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,strided_cn)23297 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cn) {
23298 TEST_REQUIRES_X86_SSE41;
23299 GemmMicrokernelTester()
23300 .mr(3)
23301 .nr(4)
23302 .kr(2)
23303 .sr(1)
23304 .m(3)
23305 .n(4)
23306 .k(8)
23307 .cn_stride(7)
23308 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23309 }
23310
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_strided_a)23311 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_strided_a) {
23312 TEST_REQUIRES_X86_SSE41;
23313 GemmMicrokernelTester()
23314 .mr(3)
23315 .nr(4)
23316 .kr(2)
23317 .sr(1)
23318 .m(3)
23319 .n(4)
23320 .k(8)
23321 .a_stride(11)
23322 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23323 }
23324
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_subtile)23325 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile) {
23326 TEST_REQUIRES_X86_SSE41;
23327 for (uint32_t n = 1; n <= 4; n++) {
23328 for (uint32_t m = 1; m <= 3; m++) {
23329 GemmMicrokernelTester()
23330 .mr(3)
23331 .nr(4)
23332 .kr(2)
23333 .sr(1)
23334 .m(m)
23335 .n(n)
23336 .k(8)
23337 .iterations(1)
23338 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23339 }
23340 }
23341 }
23342
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_subtile_m)23343 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile_m) {
23344 TEST_REQUIRES_X86_SSE41;
23345 for (uint32_t m = 1; m <= 3; m++) {
23346 GemmMicrokernelTester()
23347 .mr(3)
23348 .nr(4)
23349 .kr(2)
23350 .sr(1)
23351 .m(m)
23352 .n(4)
23353 .k(8)
23354 .iterations(1)
23355 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23356 }
23357 }
23358
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_subtile_n)23359 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile_n) {
23360 TEST_REQUIRES_X86_SSE41;
23361 for (uint32_t n = 1; n <= 4; n++) {
23362 GemmMicrokernelTester()
23363 .mr(3)
23364 .nr(4)
23365 .kr(2)
23366 .sr(1)
23367 .m(3)
23368 .n(n)
23369 .k(8)
23370 .iterations(1)
23371 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23372 }
23373 }
23374
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_lt_8)23375 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8) {
23376 TEST_REQUIRES_X86_SSE41;
23377 for (size_t k = 1; k < 8; k++) {
23378 GemmMicrokernelTester()
23379 .mr(3)
23380 .nr(4)
23381 .kr(2)
23382 .sr(1)
23383 .m(3)
23384 .n(4)
23385 .k(k)
23386 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23387 }
23388 }
23389
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_lt_8_strided_a)23390 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8_strided_a) {
23391 TEST_REQUIRES_X86_SSE41;
23392 for (size_t k = 1; k < 8; k++) {
23393 GemmMicrokernelTester()
23394 .mr(3)
23395 .nr(4)
23396 .kr(2)
23397 .sr(1)
23398 .m(3)
23399 .n(4)
23400 .k(k)
23401 .a_stride(11)
23402 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23403 }
23404 }
23405
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_lt_8_subtile)23406 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8_subtile) {
23407 TEST_REQUIRES_X86_SSE41;
23408 for (size_t k = 1; k < 8; k++) {
23409 for (uint32_t n = 1; n <= 4; n++) {
23410 for (uint32_t m = 1; m <= 3; m++) {
23411 GemmMicrokernelTester()
23412 .mr(3)
23413 .nr(4)
23414 .kr(2)
23415 .sr(1)
23416 .m(m)
23417 .n(n)
23418 .k(k)
23419 .iterations(1)
23420 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23421 }
23422 }
23423 }
23424 }
23425
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_gt_8)23426 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8) {
23427 TEST_REQUIRES_X86_SSE41;
23428 for (size_t k = 9; k < 16; k++) {
23429 GemmMicrokernelTester()
23430 .mr(3)
23431 .nr(4)
23432 .kr(2)
23433 .sr(1)
23434 .m(3)
23435 .n(4)
23436 .k(k)
23437 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23438 }
23439 }
23440
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_gt_8_strided_a)23441 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8_strided_a) {
23442 TEST_REQUIRES_X86_SSE41;
23443 for (size_t k = 9; k < 16; k++) {
23444 GemmMicrokernelTester()
23445 .mr(3)
23446 .nr(4)
23447 .kr(2)
23448 .sr(1)
23449 .m(3)
23450 .n(4)
23451 .k(k)
23452 .a_stride(19)
23453 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23454 }
23455 }
23456
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_gt_8_subtile)23457 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8_subtile) {
23458 TEST_REQUIRES_X86_SSE41;
23459 for (size_t k = 9; k < 16; k++) {
23460 for (uint32_t n = 1; n <= 4; n++) {
23461 for (uint32_t m = 1; m <= 3; m++) {
23462 GemmMicrokernelTester()
23463 .mr(3)
23464 .nr(4)
23465 .kr(2)
23466 .sr(1)
23467 .m(m)
23468 .n(n)
23469 .k(k)
23470 .iterations(1)
23471 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23472 }
23473 }
23474 }
23475 }
23476
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_div_8)23477 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8) {
23478 TEST_REQUIRES_X86_SSE41;
23479 for (size_t k = 16; k <= 80; k += 8) {
23480 GemmMicrokernelTester()
23481 .mr(3)
23482 .nr(4)
23483 .kr(2)
23484 .sr(1)
23485 .m(3)
23486 .n(4)
23487 .k(k)
23488 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23489 }
23490 }
23491
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_div_8_strided_a)23492 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8_strided_a) {
23493 TEST_REQUIRES_X86_SSE41;
23494 for (size_t k = 16; k <= 80; k += 8) {
23495 GemmMicrokernelTester()
23496 .mr(3)
23497 .nr(4)
23498 .kr(2)
23499 .sr(1)
23500 .m(3)
23501 .n(4)
23502 .k(k)
23503 .a_stride(83)
23504 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23505 }
23506 }
23507
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_div_8_subtile)23508 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8_subtile) {
23509 TEST_REQUIRES_X86_SSE41;
23510 for (size_t k = 16; k <= 80; k += 8) {
23511 for (uint32_t n = 1; n <= 4; n++) {
23512 for (uint32_t m = 1; m <= 3; m++) {
23513 GemmMicrokernelTester()
23514 .mr(3)
23515 .nr(4)
23516 .kr(2)
23517 .sr(1)
23518 .m(m)
23519 .n(n)
23520 .k(k)
23521 .iterations(1)
23522 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23523 }
23524 }
23525 }
23526 }
23527
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4)23528 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4) {
23529 TEST_REQUIRES_X86_SSE41;
23530 for (uint32_t n = 5; n < 8; n++) {
23531 for (size_t k = 1; k <= 40; k += 9) {
23532 GemmMicrokernelTester()
23533 .mr(3)
23534 .nr(4)
23535 .kr(2)
23536 .sr(1)
23537 .m(3)
23538 .n(n)
23539 .k(k)
23540 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23541 }
23542 }
23543 }
23544
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4_strided_cn)23545 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_strided_cn) {
23546 TEST_REQUIRES_X86_SSE41;
23547 for (uint32_t n = 5; n < 8; n++) {
23548 for (size_t k = 1; k <= 40; k += 9) {
23549 GemmMicrokernelTester()
23550 .mr(3)
23551 .nr(4)
23552 .kr(2)
23553 .sr(1)
23554 .m(3)
23555 .n(n)
23556 .k(k)
23557 .cn_stride(7)
23558 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23559 }
23560 }
23561 }
23562
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4_strided_a)23563 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_strided_a) {
23564 TEST_REQUIRES_X86_SSE41;
23565 for (uint32_t n = 5; n < 8; n++) {
23566 for (size_t k = 1; k <= 40; k += 9) {
23567 GemmMicrokernelTester()
23568 .mr(3)
23569 .nr(4)
23570 .kr(2)
23571 .sr(1)
23572 .m(3)
23573 .n(n)
23574 .k(k)
23575 .a_stride(43)
23576 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23577 }
23578 }
23579 }
23580
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4_subtile)23581 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_subtile) {
23582 TEST_REQUIRES_X86_SSE41;
23583 for (uint32_t n = 5; n < 8; n++) {
23584 for (size_t k = 1; k <= 40; k += 9) {
23585 for (uint32_t m = 1; m <= 3; m++) {
23586 GemmMicrokernelTester()
23587 .mr(3)
23588 .nr(4)
23589 .kr(2)
23590 .sr(1)
23591 .m(m)
23592 .n(n)
23593 .k(k)
23594 .iterations(1)
23595 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23596 }
23597 }
23598 }
23599 }
23600
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4)23601 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4) {
23602 TEST_REQUIRES_X86_SSE41;
23603 for (uint32_t n = 8; n <= 12; n += 4) {
23604 for (size_t k = 1; k <= 40; k += 9) {
23605 GemmMicrokernelTester()
23606 .mr(3)
23607 .nr(4)
23608 .kr(2)
23609 .sr(1)
23610 .m(3)
23611 .n(n)
23612 .k(k)
23613 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23614 }
23615 }
23616 }
23617
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4_strided_cn)23618 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_strided_cn) {
23619 TEST_REQUIRES_X86_SSE41;
23620 for (uint32_t n = 8; n <= 12; n += 4) {
23621 for (size_t k = 1; k <= 40; k += 9) {
23622 GemmMicrokernelTester()
23623 .mr(3)
23624 .nr(4)
23625 .kr(2)
23626 .sr(1)
23627 .m(3)
23628 .n(n)
23629 .k(k)
23630 .cn_stride(7)
23631 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23632 }
23633 }
23634 }
23635
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4_strided_a)23636 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_strided_a) {
23637 TEST_REQUIRES_X86_SSE41;
23638 for (uint32_t n = 8; n <= 12; n += 4) {
23639 for (size_t k = 1; k <= 40; k += 9) {
23640 GemmMicrokernelTester()
23641 .mr(3)
23642 .nr(4)
23643 .kr(2)
23644 .sr(1)
23645 .m(3)
23646 .n(n)
23647 .k(k)
23648 .a_stride(43)
23649 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23650 }
23651 }
23652 }
23653
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4_subtile)23654 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_subtile) {
23655 TEST_REQUIRES_X86_SSE41;
23656 for (uint32_t n = 8; n <= 12; n += 4) {
23657 for (size_t k = 1; k <= 40; k += 9) {
23658 for (uint32_t m = 1; m <= 3; m++) {
23659 GemmMicrokernelTester()
23660 .mr(3)
23661 .nr(4)
23662 .kr(2)
23663 .sr(1)
23664 .m(m)
23665 .n(n)
23666 .k(k)
23667 .iterations(1)
23668 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23669 }
23670 }
23671 }
23672 }
23673
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,strided_cm_subtile)23674 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cm_subtile) {
23675 TEST_REQUIRES_X86_SSE41;
23676 for (size_t k = 1; k <= 40; k += 9) {
23677 for (uint32_t n = 1; n <= 4; n++) {
23678 for (uint32_t m = 1; m <= 3; m++) {
23679 GemmMicrokernelTester()
23680 .mr(3)
23681 .nr(4)
23682 .kr(2)
23683 .sr(1)
23684 .m(m)
23685 .n(n)
23686 .k(k)
23687 .cm_stride(7)
23688 .iterations(1)
23689 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23690 }
23691 }
23692 }
23693 }
23694
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,qmin)23695 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, qmin) {
23696 TEST_REQUIRES_X86_SSE41;
23697 GemmMicrokernelTester()
23698 .mr(3)
23699 .nr(4)
23700 .kr(2)
23701 .sr(1)
23702 .m(3)
23703 .n(4)
23704 .k(8)
23705 .qmin(128)
23706 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23707 }
23708
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,qmax)23709 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, qmax) {
23710 TEST_REQUIRES_X86_SSE41;
23711 GemmMicrokernelTester()
23712 .mr(3)
23713 .nr(4)
23714 .kr(2)
23715 .sr(1)
23716 .m(3)
23717 .n(4)
23718 .k(8)
23719 .qmax(128)
23720 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23721 }
23722
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,strided_cm)23723 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cm) {
23724 TEST_REQUIRES_X86_SSE41;
23725 GemmMicrokernelTester()
23726 .mr(3)
23727 .nr(4)
23728 .kr(2)
23729 .sr(1)
23730 .m(3)
23731 .n(4)
23732 .k(8)
23733 .cm_stride(7)
23734 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23735 }
23736 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23737
23738
23739 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8)23740 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8) {
23741 TEST_REQUIRES_X86_SSE41;
23742 GemmMicrokernelTester()
23743 .mr(4)
23744 .nr(4)
23745 .kr(2)
23746 .sr(1)
23747 .m(4)
23748 .n(4)
23749 .k(8)
23750 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23751 }
23752
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,strided_cn)23753 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cn) {
23754 TEST_REQUIRES_X86_SSE41;
23755 GemmMicrokernelTester()
23756 .mr(4)
23757 .nr(4)
23758 .kr(2)
23759 .sr(1)
23760 .m(4)
23761 .n(4)
23762 .k(8)
23763 .cn_stride(7)
23764 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23765 }
23766
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8_strided_a)23767 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_strided_a) {
23768 TEST_REQUIRES_X86_SSE41;
23769 GemmMicrokernelTester()
23770 .mr(4)
23771 .nr(4)
23772 .kr(2)
23773 .sr(1)
23774 .m(4)
23775 .n(4)
23776 .k(8)
23777 .a_stride(11)
23778 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23779 }
23780
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8_subtile)23781 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile) {
23782 TEST_REQUIRES_X86_SSE41;
23783 for (uint32_t n = 1; n <= 4; n++) {
23784 for (uint32_t m = 1; m <= 4; m++) {
23785 GemmMicrokernelTester()
23786 .mr(4)
23787 .nr(4)
23788 .kr(2)
23789 .sr(1)
23790 .m(m)
23791 .n(n)
23792 .k(8)
23793 .iterations(1)
23794 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23795 }
23796 }
23797 }
23798
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8_subtile_m)23799 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile_m) {
23800 TEST_REQUIRES_X86_SSE41;
23801 for (uint32_t m = 1; m <= 4; m++) {
23802 GemmMicrokernelTester()
23803 .mr(4)
23804 .nr(4)
23805 .kr(2)
23806 .sr(1)
23807 .m(m)
23808 .n(4)
23809 .k(8)
23810 .iterations(1)
23811 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23812 }
23813 }
23814
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8_subtile_n)23815 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile_n) {
23816 TEST_REQUIRES_X86_SSE41;
23817 for (uint32_t n = 1; n <= 4; n++) {
23818 GemmMicrokernelTester()
23819 .mr(4)
23820 .nr(4)
23821 .kr(2)
23822 .sr(1)
23823 .m(4)
23824 .n(n)
23825 .k(8)
23826 .iterations(1)
23827 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23828 }
23829 }
23830
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_lt_8)23831 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_lt_8) {
23832 TEST_REQUIRES_X86_SSE41;
23833 for (size_t k = 1; k < 8; k++) {
23834 GemmMicrokernelTester()
23835 .mr(4)
23836 .nr(4)
23837 .kr(2)
23838 .sr(1)
23839 .m(4)
23840 .n(4)
23841 .k(k)
23842 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23843 }
23844 }
23845
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_lt_8_strided_a)23846 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_lt_8_strided_a) {
23847 TEST_REQUIRES_X86_SSE41;
23848 for (size_t k = 1; k < 8; k++) {
23849 GemmMicrokernelTester()
23850 .mr(4)
23851 .nr(4)
23852 .kr(2)
23853 .sr(1)
23854 .m(4)
23855 .n(4)
23856 .k(k)
23857 .a_stride(11)
23858 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23859 }
23860 }
23861
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_lt_8_subtile)23862 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_lt_8_subtile) {
23863 TEST_REQUIRES_X86_SSE41;
23864 for (size_t k = 1; k < 8; k++) {
23865 for (uint32_t n = 1; n <= 4; n++) {
23866 for (uint32_t m = 1; m <= 4; m++) {
23867 GemmMicrokernelTester()
23868 .mr(4)
23869 .nr(4)
23870 .kr(2)
23871 .sr(1)
23872 .m(m)
23873 .n(n)
23874 .k(k)
23875 .iterations(1)
23876 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23877 }
23878 }
23879 }
23880 }
23881
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_gt_8)23882 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_gt_8) {
23883 TEST_REQUIRES_X86_SSE41;
23884 for (size_t k = 9; k < 16; k++) {
23885 GemmMicrokernelTester()
23886 .mr(4)
23887 .nr(4)
23888 .kr(2)
23889 .sr(1)
23890 .m(4)
23891 .n(4)
23892 .k(k)
23893 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23894 }
23895 }
23896
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_gt_8_strided_a)23897 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_gt_8_strided_a) {
23898 TEST_REQUIRES_X86_SSE41;
23899 for (size_t k = 9; k < 16; k++) {
23900 GemmMicrokernelTester()
23901 .mr(4)
23902 .nr(4)
23903 .kr(2)
23904 .sr(1)
23905 .m(4)
23906 .n(4)
23907 .k(k)
23908 .a_stride(19)
23909 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23910 }
23911 }
23912
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_gt_8_subtile)23913 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_gt_8_subtile) {
23914 TEST_REQUIRES_X86_SSE41;
23915 for (size_t k = 9; k < 16; k++) {
23916 for (uint32_t n = 1; n <= 4; n++) {
23917 for (uint32_t m = 1; m <= 4; m++) {
23918 GemmMicrokernelTester()
23919 .mr(4)
23920 .nr(4)
23921 .kr(2)
23922 .sr(1)
23923 .m(m)
23924 .n(n)
23925 .k(k)
23926 .iterations(1)
23927 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23928 }
23929 }
23930 }
23931 }
23932
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_div_8)23933 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_div_8) {
23934 TEST_REQUIRES_X86_SSE41;
23935 for (size_t k = 16; k <= 80; k += 8) {
23936 GemmMicrokernelTester()
23937 .mr(4)
23938 .nr(4)
23939 .kr(2)
23940 .sr(1)
23941 .m(4)
23942 .n(4)
23943 .k(k)
23944 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23945 }
23946 }
23947
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_div_8_strided_a)23948 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_div_8_strided_a) {
23949 TEST_REQUIRES_X86_SSE41;
23950 for (size_t k = 16; k <= 80; k += 8) {
23951 GemmMicrokernelTester()
23952 .mr(4)
23953 .nr(4)
23954 .kr(2)
23955 .sr(1)
23956 .m(4)
23957 .n(4)
23958 .k(k)
23959 .a_stride(83)
23960 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23961 }
23962 }
23963
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_div_8_subtile)23964 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_div_8_subtile) {
23965 TEST_REQUIRES_X86_SSE41;
23966 for (size_t k = 16; k <= 80; k += 8) {
23967 for (uint32_t n = 1; n <= 4; n++) {
23968 for (uint32_t m = 1; m <= 4; m++) {
23969 GemmMicrokernelTester()
23970 .mr(4)
23971 .nr(4)
23972 .kr(2)
23973 .sr(1)
23974 .m(m)
23975 .n(n)
23976 .k(k)
23977 .iterations(1)
23978 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23979 }
23980 }
23981 }
23982 }
23983
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4)23984 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4) {
23985 TEST_REQUIRES_X86_SSE41;
23986 for (uint32_t n = 5; n < 8; n++) {
23987 for (size_t k = 1; k <= 40; k += 9) {
23988 GemmMicrokernelTester()
23989 .mr(4)
23990 .nr(4)
23991 .kr(2)
23992 .sr(1)
23993 .m(4)
23994 .n(n)
23995 .k(k)
23996 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23997 }
23998 }
23999 }
24000
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4_strided_cn)24001 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_strided_cn) {
24002 TEST_REQUIRES_X86_SSE41;
24003 for (uint32_t n = 5; n < 8; n++) {
24004 for (size_t k = 1; k <= 40; k += 9) {
24005 GemmMicrokernelTester()
24006 .mr(4)
24007 .nr(4)
24008 .kr(2)
24009 .sr(1)
24010 .m(4)
24011 .n(n)
24012 .k(k)
24013 .cn_stride(7)
24014 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24015 }
24016 }
24017 }
24018
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4_strided_a)24019 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_strided_a) {
24020 TEST_REQUIRES_X86_SSE41;
24021 for (uint32_t n = 5; n < 8; n++) {
24022 for (size_t k = 1; k <= 40; k += 9) {
24023 GemmMicrokernelTester()
24024 .mr(4)
24025 .nr(4)
24026 .kr(2)
24027 .sr(1)
24028 .m(4)
24029 .n(n)
24030 .k(k)
24031 .a_stride(43)
24032 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24033 }
24034 }
24035 }
24036
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4_subtile)24037 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_subtile) {
24038 TEST_REQUIRES_X86_SSE41;
24039 for (uint32_t n = 5; n < 8; n++) {
24040 for (size_t k = 1; k <= 40; k += 9) {
24041 for (uint32_t m = 1; m <= 4; m++) {
24042 GemmMicrokernelTester()
24043 .mr(4)
24044 .nr(4)
24045 .kr(2)
24046 .sr(1)
24047 .m(m)
24048 .n(n)
24049 .k(k)
24050 .iterations(1)
24051 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24052 }
24053 }
24054 }
24055 }
24056
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4)24057 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4) {
24058 TEST_REQUIRES_X86_SSE41;
24059 for (uint32_t n = 8; n <= 12; n += 4) {
24060 for (size_t k = 1; k <= 40; k += 9) {
24061 GemmMicrokernelTester()
24062 .mr(4)
24063 .nr(4)
24064 .kr(2)
24065 .sr(1)
24066 .m(4)
24067 .n(n)
24068 .k(k)
24069 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24070 }
24071 }
24072 }
24073
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4_strided_cn)24074 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_strided_cn) {
24075 TEST_REQUIRES_X86_SSE41;
24076 for (uint32_t n = 8; n <= 12; n += 4) {
24077 for (size_t k = 1; k <= 40; k += 9) {
24078 GemmMicrokernelTester()
24079 .mr(4)
24080 .nr(4)
24081 .kr(2)
24082 .sr(1)
24083 .m(4)
24084 .n(n)
24085 .k(k)
24086 .cn_stride(7)
24087 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24088 }
24089 }
24090 }
24091
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4_strided_a)24092 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_strided_a) {
24093 TEST_REQUIRES_X86_SSE41;
24094 for (uint32_t n = 8; n <= 12; n += 4) {
24095 for (size_t k = 1; k <= 40; k += 9) {
24096 GemmMicrokernelTester()
24097 .mr(4)
24098 .nr(4)
24099 .kr(2)
24100 .sr(1)
24101 .m(4)
24102 .n(n)
24103 .k(k)
24104 .a_stride(43)
24105 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24106 }
24107 }
24108 }
24109
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4_subtile)24110 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_subtile) {
24111 TEST_REQUIRES_X86_SSE41;
24112 for (uint32_t n = 8; n <= 12; n += 4) {
24113 for (size_t k = 1; k <= 40; k += 9) {
24114 for (uint32_t m = 1; m <= 4; m++) {
24115 GemmMicrokernelTester()
24116 .mr(4)
24117 .nr(4)
24118 .kr(2)
24119 .sr(1)
24120 .m(m)
24121 .n(n)
24122 .k(k)
24123 .iterations(1)
24124 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24125 }
24126 }
24127 }
24128 }
24129
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,strided_cm_subtile)24130 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cm_subtile) {
24131 TEST_REQUIRES_X86_SSE41;
24132 for (size_t k = 1; k <= 40; k += 9) {
24133 for (uint32_t n = 1; n <= 4; n++) {
24134 for (uint32_t m = 1; m <= 4; m++) {
24135 GemmMicrokernelTester()
24136 .mr(4)
24137 .nr(4)
24138 .kr(2)
24139 .sr(1)
24140 .m(m)
24141 .n(n)
24142 .k(k)
24143 .cm_stride(7)
24144 .iterations(1)
24145 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24146 }
24147 }
24148 }
24149 }
24150
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,qmin)24151 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, qmin) {
24152 TEST_REQUIRES_X86_SSE41;
24153 GemmMicrokernelTester()
24154 .mr(4)
24155 .nr(4)
24156 .kr(2)
24157 .sr(1)
24158 .m(4)
24159 .n(4)
24160 .k(8)
24161 .qmin(128)
24162 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24163 }
24164
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,qmax)24165 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, qmax) {
24166 TEST_REQUIRES_X86_SSE41;
24167 GemmMicrokernelTester()
24168 .mr(4)
24169 .nr(4)
24170 .kr(2)
24171 .sr(1)
24172 .m(4)
24173 .n(4)
24174 .k(8)
24175 .qmax(128)
24176 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24177 }
24178
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64,strided_cm)24179 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cm) {
24180 TEST_REQUIRES_X86_SSE41;
24181 GemmMicrokernelTester()
24182 .mr(4)
24183 .nr(4)
24184 .kr(2)
24185 .sr(1)
24186 .m(4)
24187 .n(4)
24188 .k(8)
24189 .cm_stride(7)
24190 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24191 }
24192 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24193
24194
24195 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8)24196 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8) {
24197 TEST_REQUIRES_X86_AVX;
24198 GemmMicrokernelTester()
24199 .mr(1)
24200 .nr(4)
24201 .kr(2)
24202 .sr(1)
24203 .m(1)
24204 .n(4)
24205 .k(8)
24206 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24207 }
24208
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,strided_cn)24209 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, strided_cn) {
24210 TEST_REQUIRES_X86_AVX;
24211 GemmMicrokernelTester()
24212 .mr(1)
24213 .nr(4)
24214 .kr(2)
24215 .sr(1)
24216 .m(1)
24217 .n(4)
24218 .k(8)
24219 .cn_stride(7)
24220 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24221 }
24222
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8_strided_a)24223 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_strided_a) {
24224 TEST_REQUIRES_X86_AVX;
24225 GemmMicrokernelTester()
24226 .mr(1)
24227 .nr(4)
24228 .kr(2)
24229 .sr(1)
24230 .m(1)
24231 .n(4)
24232 .k(8)
24233 .a_stride(11)
24234 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24235 }
24236
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8_subtile)24237 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_subtile) {
24238 TEST_REQUIRES_X86_AVX;
24239 for (uint32_t n = 1; n <= 4; n++) {
24240 for (uint32_t m = 1; m <= 1; m++) {
24241 GemmMicrokernelTester()
24242 .mr(1)
24243 .nr(4)
24244 .kr(2)
24245 .sr(1)
24246 .m(m)
24247 .n(n)
24248 .k(8)
24249 .iterations(1)
24250 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24251 }
24252 }
24253 }
24254
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8_subtile_m)24255 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_subtile_m) {
24256 TEST_REQUIRES_X86_AVX;
24257 for (uint32_t m = 1; m <= 1; m++) {
24258 GemmMicrokernelTester()
24259 .mr(1)
24260 .nr(4)
24261 .kr(2)
24262 .sr(1)
24263 .m(m)
24264 .n(4)
24265 .k(8)
24266 .iterations(1)
24267 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24268 }
24269 }
24270
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8_subtile_n)24271 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_subtile_n) {
24272 TEST_REQUIRES_X86_AVX;
24273 for (uint32_t n = 1; n <= 4; n++) {
24274 GemmMicrokernelTester()
24275 .mr(1)
24276 .nr(4)
24277 .kr(2)
24278 .sr(1)
24279 .m(1)
24280 .n(n)
24281 .k(8)
24282 .iterations(1)
24283 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24284 }
24285 }
24286
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_lt_8)24287 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_lt_8) {
24288 TEST_REQUIRES_X86_AVX;
24289 for (size_t k = 1; k < 8; k++) {
24290 GemmMicrokernelTester()
24291 .mr(1)
24292 .nr(4)
24293 .kr(2)
24294 .sr(1)
24295 .m(1)
24296 .n(4)
24297 .k(k)
24298 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24299 }
24300 }
24301
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_lt_8_strided_a)24302 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_lt_8_strided_a) {
24303 TEST_REQUIRES_X86_AVX;
24304 for (size_t k = 1; k < 8; k++) {
24305 GemmMicrokernelTester()
24306 .mr(1)
24307 .nr(4)
24308 .kr(2)
24309 .sr(1)
24310 .m(1)
24311 .n(4)
24312 .k(k)
24313 .a_stride(11)
24314 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24315 }
24316 }
24317
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_lt_8_subtile)24318 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_lt_8_subtile) {
24319 TEST_REQUIRES_X86_AVX;
24320 for (size_t k = 1; k < 8; k++) {
24321 for (uint32_t n = 1; n <= 4; n++) {
24322 for (uint32_t m = 1; m <= 1; m++) {
24323 GemmMicrokernelTester()
24324 .mr(1)
24325 .nr(4)
24326 .kr(2)
24327 .sr(1)
24328 .m(m)
24329 .n(n)
24330 .k(k)
24331 .iterations(1)
24332 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24333 }
24334 }
24335 }
24336 }
24337
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_gt_8)24338 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_gt_8) {
24339 TEST_REQUIRES_X86_AVX;
24340 for (size_t k = 9; k < 16; k++) {
24341 GemmMicrokernelTester()
24342 .mr(1)
24343 .nr(4)
24344 .kr(2)
24345 .sr(1)
24346 .m(1)
24347 .n(4)
24348 .k(k)
24349 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24350 }
24351 }
24352
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_gt_8_strided_a)24353 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_gt_8_strided_a) {
24354 TEST_REQUIRES_X86_AVX;
24355 for (size_t k = 9; k < 16; k++) {
24356 GemmMicrokernelTester()
24357 .mr(1)
24358 .nr(4)
24359 .kr(2)
24360 .sr(1)
24361 .m(1)
24362 .n(4)
24363 .k(k)
24364 .a_stride(19)
24365 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24366 }
24367 }
24368
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_gt_8_subtile)24369 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_gt_8_subtile) {
24370 TEST_REQUIRES_X86_AVX;
24371 for (size_t k = 9; k < 16; k++) {
24372 for (uint32_t n = 1; n <= 4; n++) {
24373 for (uint32_t m = 1; m <= 1; m++) {
24374 GemmMicrokernelTester()
24375 .mr(1)
24376 .nr(4)
24377 .kr(2)
24378 .sr(1)
24379 .m(m)
24380 .n(n)
24381 .k(k)
24382 .iterations(1)
24383 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24384 }
24385 }
24386 }
24387 }
24388
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_div_8)24389 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_div_8) {
24390 TEST_REQUIRES_X86_AVX;
24391 for (size_t k = 16; k <= 80; k += 8) {
24392 GemmMicrokernelTester()
24393 .mr(1)
24394 .nr(4)
24395 .kr(2)
24396 .sr(1)
24397 .m(1)
24398 .n(4)
24399 .k(k)
24400 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24401 }
24402 }
24403
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_div_8_strided_a)24404 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_div_8_strided_a) {
24405 TEST_REQUIRES_X86_AVX;
24406 for (size_t k = 16; k <= 80; k += 8) {
24407 GemmMicrokernelTester()
24408 .mr(1)
24409 .nr(4)
24410 .kr(2)
24411 .sr(1)
24412 .m(1)
24413 .n(4)
24414 .k(k)
24415 .a_stride(83)
24416 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24417 }
24418 }
24419
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_div_8_subtile)24420 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_div_8_subtile) {
24421 TEST_REQUIRES_X86_AVX;
24422 for (size_t k = 16; k <= 80; k += 8) {
24423 for (uint32_t n = 1; n <= 4; n++) {
24424 for (uint32_t m = 1; m <= 1; m++) {
24425 GemmMicrokernelTester()
24426 .mr(1)
24427 .nr(4)
24428 .kr(2)
24429 .sr(1)
24430 .m(m)
24431 .n(n)
24432 .k(k)
24433 .iterations(1)
24434 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24435 }
24436 }
24437 }
24438 }
24439
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4)24440 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4) {
24441 TEST_REQUIRES_X86_AVX;
24442 for (uint32_t n = 5; n < 8; n++) {
24443 for (size_t k = 1; k <= 40; k += 9) {
24444 GemmMicrokernelTester()
24445 .mr(1)
24446 .nr(4)
24447 .kr(2)
24448 .sr(1)
24449 .m(1)
24450 .n(n)
24451 .k(k)
24452 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24453 }
24454 }
24455 }
24456
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4_strided_cn)24457 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4_strided_cn) {
24458 TEST_REQUIRES_X86_AVX;
24459 for (uint32_t n = 5; n < 8; n++) {
24460 for (size_t k = 1; k <= 40; k += 9) {
24461 GemmMicrokernelTester()
24462 .mr(1)
24463 .nr(4)
24464 .kr(2)
24465 .sr(1)
24466 .m(1)
24467 .n(n)
24468 .k(k)
24469 .cn_stride(7)
24470 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24471 }
24472 }
24473 }
24474
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4_strided_a)24475 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4_strided_a) {
24476 TEST_REQUIRES_X86_AVX;
24477 for (uint32_t n = 5; n < 8; n++) {
24478 for (size_t k = 1; k <= 40; k += 9) {
24479 GemmMicrokernelTester()
24480 .mr(1)
24481 .nr(4)
24482 .kr(2)
24483 .sr(1)
24484 .m(1)
24485 .n(n)
24486 .k(k)
24487 .a_stride(43)
24488 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24489 }
24490 }
24491 }
24492
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4_subtile)24493 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4_subtile) {
24494 TEST_REQUIRES_X86_AVX;
24495 for (uint32_t n = 5; n < 8; n++) {
24496 for (size_t k = 1; k <= 40; k += 9) {
24497 for (uint32_t m = 1; m <= 1; m++) {
24498 GemmMicrokernelTester()
24499 .mr(1)
24500 .nr(4)
24501 .kr(2)
24502 .sr(1)
24503 .m(m)
24504 .n(n)
24505 .k(k)
24506 .iterations(1)
24507 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24508 }
24509 }
24510 }
24511 }
24512
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4)24513 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4) {
24514 TEST_REQUIRES_X86_AVX;
24515 for (uint32_t n = 8; n <= 12; n += 4) {
24516 for (size_t k = 1; k <= 40; k += 9) {
24517 GemmMicrokernelTester()
24518 .mr(1)
24519 .nr(4)
24520 .kr(2)
24521 .sr(1)
24522 .m(1)
24523 .n(n)
24524 .k(k)
24525 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24526 }
24527 }
24528 }
24529
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4_strided_cn)24530 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4_strided_cn) {
24531 TEST_REQUIRES_X86_AVX;
24532 for (uint32_t n = 8; n <= 12; n += 4) {
24533 for (size_t k = 1; k <= 40; k += 9) {
24534 GemmMicrokernelTester()
24535 .mr(1)
24536 .nr(4)
24537 .kr(2)
24538 .sr(1)
24539 .m(1)
24540 .n(n)
24541 .k(k)
24542 .cn_stride(7)
24543 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24544 }
24545 }
24546 }
24547
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4_strided_a)24548 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4_strided_a) {
24549 TEST_REQUIRES_X86_AVX;
24550 for (uint32_t n = 8; n <= 12; n += 4) {
24551 for (size_t k = 1; k <= 40; k += 9) {
24552 GemmMicrokernelTester()
24553 .mr(1)
24554 .nr(4)
24555 .kr(2)
24556 .sr(1)
24557 .m(1)
24558 .n(n)
24559 .k(k)
24560 .a_stride(43)
24561 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24562 }
24563 }
24564 }
24565
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4_subtile)24566 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4_subtile) {
24567 TEST_REQUIRES_X86_AVX;
24568 for (uint32_t n = 8; n <= 12; n += 4) {
24569 for (size_t k = 1; k <= 40; k += 9) {
24570 for (uint32_t m = 1; m <= 1; m++) {
24571 GemmMicrokernelTester()
24572 .mr(1)
24573 .nr(4)
24574 .kr(2)
24575 .sr(1)
24576 .m(m)
24577 .n(n)
24578 .k(k)
24579 .iterations(1)
24580 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24581 }
24582 }
24583 }
24584 }
24585
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,strided_cm_subtile)24586 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, strided_cm_subtile) {
24587 TEST_REQUIRES_X86_AVX;
24588 for (size_t k = 1; k <= 40; k += 9) {
24589 for (uint32_t n = 1; n <= 4; n++) {
24590 for (uint32_t m = 1; m <= 1; m++) {
24591 GemmMicrokernelTester()
24592 .mr(1)
24593 .nr(4)
24594 .kr(2)
24595 .sr(1)
24596 .m(m)
24597 .n(n)
24598 .k(k)
24599 .cm_stride(7)
24600 .iterations(1)
24601 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24602 }
24603 }
24604 }
24605 }
24606
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,qmin)24607 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, qmin) {
24608 TEST_REQUIRES_X86_AVX;
24609 GemmMicrokernelTester()
24610 .mr(1)
24611 .nr(4)
24612 .kr(2)
24613 .sr(1)
24614 .m(1)
24615 .n(4)
24616 .k(8)
24617 .qmin(128)
24618 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24619 }
24620
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,qmax)24621 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, qmax) {
24622 TEST_REQUIRES_X86_AVX;
24623 GemmMicrokernelTester()
24624 .mr(1)
24625 .nr(4)
24626 .kr(2)
24627 .sr(1)
24628 .m(1)
24629 .n(4)
24630 .k(8)
24631 .qmax(128)
24632 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24633 }
24634
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64,strided_cm)24635 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD64, strided_cm) {
24636 TEST_REQUIRES_X86_AVX;
24637 GemmMicrokernelTester()
24638 .mr(1)
24639 .nr(4)
24640 .kr(2)
24641 .sr(1)
24642 .m(1)
24643 .n(4)
24644 .k(8)
24645 .cm_stride(7)
24646 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24647 }
24648 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24649
24650
24651 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8)24652 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8) {
24653 TEST_REQUIRES_X86_XOP;
24654 GemmMicrokernelTester()
24655 .mr(2)
24656 .nr(4)
24657 .kr(2)
24658 .sr(1)
24659 .m(2)
24660 .n(4)
24661 .k(8)
24662 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24663 }
24664
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,strided_cn)24665 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cn) {
24666 TEST_REQUIRES_X86_XOP;
24667 GemmMicrokernelTester()
24668 .mr(2)
24669 .nr(4)
24670 .kr(2)
24671 .sr(1)
24672 .m(2)
24673 .n(4)
24674 .k(8)
24675 .cn_stride(7)
24676 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24677 }
24678
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_strided_a)24679 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_strided_a) {
24680 TEST_REQUIRES_X86_XOP;
24681 GemmMicrokernelTester()
24682 .mr(2)
24683 .nr(4)
24684 .kr(2)
24685 .sr(1)
24686 .m(2)
24687 .n(4)
24688 .k(8)
24689 .a_stride(11)
24690 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24691 }
24692
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_subtile)24693 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile) {
24694 TEST_REQUIRES_X86_XOP;
24695 for (uint32_t n = 1; n <= 4; n++) {
24696 for (uint32_t m = 1; m <= 2; m++) {
24697 GemmMicrokernelTester()
24698 .mr(2)
24699 .nr(4)
24700 .kr(2)
24701 .sr(1)
24702 .m(m)
24703 .n(n)
24704 .k(8)
24705 .iterations(1)
24706 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24707 }
24708 }
24709 }
24710
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_subtile_m)24711 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_m) {
24712 TEST_REQUIRES_X86_XOP;
24713 for (uint32_t m = 1; m <= 2; m++) {
24714 GemmMicrokernelTester()
24715 .mr(2)
24716 .nr(4)
24717 .kr(2)
24718 .sr(1)
24719 .m(m)
24720 .n(4)
24721 .k(8)
24722 .iterations(1)
24723 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24724 }
24725 }
24726
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_subtile_n)24727 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_n) {
24728 TEST_REQUIRES_X86_XOP;
24729 for (uint32_t n = 1; n <= 4; n++) {
24730 GemmMicrokernelTester()
24731 .mr(2)
24732 .nr(4)
24733 .kr(2)
24734 .sr(1)
24735 .m(2)
24736 .n(n)
24737 .k(8)
24738 .iterations(1)
24739 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24740 }
24741 }
24742
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_lt_8)24743 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8) {
24744 TEST_REQUIRES_X86_XOP;
24745 for (size_t k = 1; k < 8; k++) {
24746 GemmMicrokernelTester()
24747 .mr(2)
24748 .nr(4)
24749 .kr(2)
24750 .sr(1)
24751 .m(2)
24752 .n(4)
24753 .k(k)
24754 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24755 }
24756 }
24757
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_lt_8_strided_a)24758 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8_strided_a) {
24759 TEST_REQUIRES_X86_XOP;
24760 for (size_t k = 1; k < 8; k++) {
24761 GemmMicrokernelTester()
24762 .mr(2)
24763 .nr(4)
24764 .kr(2)
24765 .sr(1)
24766 .m(2)
24767 .n(4)
24768 .k(k)
24769 .a_stride(11)
24770 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24771 }
24772 }
24773
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_lt_8_subtile)24774 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8_subtile) {
24775 TEST_REQUIRES_X86_XOP;
24776 for (size_t k = 1; k < 8; k++) {
24777 for (uint32_t n = 1; n <= 4; n++) {
24778 for (uint32_t m = 1; m <= 2; m++) {
24779 GemmMicrokernelTester()
24780 .mr(2)
24781 .nr(4)
24782 .kr(2)
24783 .sr(1)
24784 .m(m)
24785 .n(n)
24786 .k(k)
24787 .iterations(1)
24788 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24789 }
24790 }
24791 }
24792 }
24793
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_gt_8)24794 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8) {
24795 TEST_REQUIRES_X86_XOP;
24796 for (size_t k = 9; k < 16; k++) {
24797 GemmMicrokernelTester()
24798 .mr(2)
24799 .nr(4)
24800 .kr(2)
24801 .sr(1)
24802 .m(2)
24803 .n(4)
24804 .k(k)
24805 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24806 }
24807 }
24808
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_gt_8_strided_a)24809 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8_strided_a) {
24810 TEST_REQUIRES_X86_XOP;
24811 for (size_t k = 9; k < 16; k++) {
24812 GemmMicrokernelTester()
24813 .mr(2)
24814 .nr(4)
24815 .kr(2)
24816 .sr(1)
24817 .m(2)
24818 .n(4)
24819 .k(k)
24820 .a_stride(19)
24821 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24822 }
24823 }
24824
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_gt_8_subtile)24825 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8_subtile) {
24826 TEST_REQUIRES_X86_XOP;
24827 for (size_t k = 9; k < 16; k++) {
24828 for (uint32_t n = 1; n <= 4; n++) {
24829 for (uint32_t m = 1; m <= 2; m++) {
24830 GemmMicrokernelTester()
24831 .mr(2)
24832 .nr(4)
24833 .kr(2)
24834 .sr(1)
24835 .m(m)
24836 .n(n)
24837 .k(k)
24838 .iterations(1)
24839 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24840 }
24841 }
24842 }
24843 }
24844
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_div_8)24845 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8) {
24846 TEST_REQUIRES_X86_XOP;
24847 for (size_t k = 16; k <= 80; k += 8) {
24848 GemmMicrokernelTester()
24849 .mr(2)
24850 .nr(4)
24851 .kr(2)
24852 .sr(1)
24853 .m(2)
24854 .n(4)
24855 .k(k)
24856 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24857 }
24858 }
24859
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_div_8_strided_a)24860 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8_strided_a) {
24861 TEST_REQUIRES_X86_XOP;
24862 for (size_t k = 16; k <= 80; k += 8) {
24863 GemmMicrokernelTester()
24864 .mr(2)
24865 .nr(4)
24866 .kr(2)
24867 .sr(1)
24868 .m(2)
24869 .n(4)
24870 .k(k)
24871 .a_stride(83)
24872 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24873 }
24874 }
24875
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_div_8_subtile)24876 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8_subtile) {
24877 TEST_REQUIRES_X86_XOP;
24878 for (size_t k = 16; k <= 80; k += 8) {
24879 for (uint32_t n = 1; n <= 4; n++) {
24880 for (uint32_t m = 1; m <= 2; m++) {
24881 GemmMicrokernelTester()
24882 .mr(2)
24883 .nr(4)
24884 .kr(2)
24885 .sr(1)
24886 .m(m)
24887 .n(n)
24888 .k(k)
24889 .iterations(1)
24890 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24891 }
24892 }
24893 }
24894 }
24895
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4)24896 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4) {
24897 TEST_REQUIRES_X86_XOP;
24898 for (uint32_t n = 5; n < 8; n++) {
24899 for (size_t k = 1; k <= 40; k += 9) {
24900 GemmMicrokernelTester()
24901 .mr(2)
24902 .nr(4)
24903 .kr(2)
24904 .sr(1)
24905 .m(2)
24906 .n(n)
24907 .k(k)
24908 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24909 }
24910 }
24911 }
24912
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4_strided_cn)24913 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_strided_cn) {
24914 TEST_REQUIRES_X86_XOP;
24915 for (uint32_t n = 5; n < 8; n++) {
24916 for (size_t k = 1; k <= 40; k += 9) {
24917 GemmMicrokernelTester()
24918 .mr(2)
24919 .nr(4)
24920 .kr(2)
24921 .sr(1)
24922 .m(2)
24923 .n(n)
24924 .k(k)
24925 .cn_stride(7)
24926 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24927 }
24928 }
24929 }
24930
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4_strided_a)24931 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_strided_a) {
24932 TEST_REQUIRES_X86_XOP;
24933 for (uint32_t n = 5; n < 8; n++) {
24934 for (size_t k = 1; k <= 40; k += 9) {
24935 GemmMicrokernelTester()
24936 .mr(2)
24937 .nr(4)
24938 .kr(2)
24939 .sr(1)
24940 .m(2)
24941 .n(n)
24942 .k(k)
24943 .a_stride(43)
24944 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24945 }
24946 }
24947 }
24948
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4_subtile)24949 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_subtile) {
24950 TEST_REQUIRES_X86_XOP;
24951 for (uint32_t n = 5; n < 8; n++) {
24952 for (size_t k = 1; k <= 40; k += 9) {
24953 for (uint32_t m = 1; m <= 2; m++) {
24954 GemmMicrokernelTester()
24955 .mr(2)
24956 .nr(4)
24957 .kr(2)
24958 .sr(1)
24959 .m(m)
24960 .n(n)
24961 .k(k)
24962 .iterations(1)
24963 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24964 }
24965 }
24966 }
24967 }
24968
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4)24969 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4) {
24970 TEST_REQUIRES_X86_XOP;
24971 for (uint32_t n = 8; n <= 12; n += 4) {
24972 for (size_t k = 1; k <= 40; k += 9) {
24973 GemmMicrokernelTester()
24974 .mr(2)
24975 .nr(4)
24976 .kr(2)
24977 .sr(1)
24978 .m(2)
24979 .n(n)
24980 .k(k)
24981 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24982 }
24983 }
24984 }
24985
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4_strided_cn)24986 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_strided_cn) {
24987 TEST_REQUIRES_X86_XOP;
24988 for (uint32_t n = 8; n <= 12; n += 4) {
24989 for (size_t k = 1; k <= 40; k += 9) {
24990 GemmMicrokernelTester()
24991 .mr(2)
24992 .nr(4)
24993 .kr(2)
24994 .sr(1)
24995 .m(2)
24996 .n(n)
24997 .k(k)
24998 .cn_stride(7)
24999 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25000 }
25001 }
25002 }
25003
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4_strided_a)25004 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_strided_a) {
25005 TEST_REQUIRES_X86_XOP;
25006 for (uint32_t n = 8; n <= 12; n += 4) {
25007 for (size_t k = 1; k <= 40; k += 9) {
25008 GemmMicrokernelTester()
25009 .mr(2)
25010 .nr(4)
25011 .kr(2)
25012 .sr(1)
25013 .m(2)
25014 .n(n)
25015 .k(k)
25016 .a_stride(43)
25017 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25018 }
25019 }
25020 }
25021
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4_subtile)25022 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_subtile) {
25023 TEST_REQUIRES_X86_XOP;
25024 for (uint32_t n = 8; n <= 12; n += 4) {
25025 for (size_t k = 1; k <= 40; k += 9) {
25026 for (uint32_t m = 1; m <= 2; m++) {
25027 GemmMicrokernelTester()
25028 .mr(2)
25029 .nr(4)
25030 .kr(2)
25031 .sr(1)
25032 .m(m)
25033 .n(n)
25034 .k(k)
25035 .iterations(1)
25036 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25037 }
25038 }
25039 }
25040 }
25041
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,strided_cm_subtile)25042 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm_subtile) {
25043 TEST_REQUIRES_X86_XOP;
25044 for (size_t k = 1; k <= 40; k += 9) {
25045 for (uint32_t n = 1; n <= 4; n++) {
25046 for (uint32_t m = 1; m <= 2; m++) {
25047 GemmMicrokernelTester()
25048 .mr(2)
25049 .nr(4)
25050 .kr(2)
25051 .sr(1)
25052 .m(m)
25053 .n(n)
25054 .k(k)
25055 .cm_stride(7)
25056 .iterations(1)
25057 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25058 }
25059 }
25060 }
25061 }
25062
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,qmin)25063 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmin) {
25064 TEST_REQUIRES_X86_XOP;
25065 GemmMicrokernelTester()
25066 .mr(2)
25067 .nr(4)
25068 .kr(2)
25069 .sr(1)
25070 .m(2)
25071 .n(4)
25072 .k(8)
25073 .qmin(128)
25074 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25075 }
25076
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,qmax)25077 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmax) {
25078 TEST_REQUIRES_X86_XOP;
25079 GemmMicrokernelTester()
25080 .mr(2)
25081 .nr(4)
25082 .kr(2)
25083 .sr(1)
25084 .m(2)
25085 .n(4)
25086 .k(8)
25087 .qmax(128)
25088 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25089 }
25090
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,strided_cm)25091 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm) {
25092 TEST_REQUIRES_X86_XOP;
25093 GemmMicrokernelTester()
25094 .mr(2)
25095 .nr(4)
25096 .kr(2)
25097 .sr(1)
25098 .m(2)
25099 .n(4)
25100 .k(8)
25101 .cm_stride(7)
25102 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25103 }
25104 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25105
25106
25107 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8)25108 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8) {
25109 TEST_REQUIRES_X86_AVX;
25110 GemmMicrokernelTester()
25111 .mr(3)
25112 .nr(4)
25113 .kr(2)
25114 .sr(1)
25115 .m(3)
25116 .n(4)
25117 .k(8)
25118 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25119 }
25120
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cn)25121 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cn) {
25122 TEST_REQUIRES_X86_AVX;
25123 GemmMicrokernelTester()
25124 .mr(3)
25125 .nr(4)
25126 .kr(2)
25127 .sr(1)
25128 .m(3)
25129 .n(4)
25130 .k(8)
25131 .cn_stride(7)
25132 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25133 }
25134
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_strided_a)25135 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_strided_a) {
25136 TEST_REQUIRES_X86_AVX;
25137 GemmMicrokernelTester()
25138 .mr(3)
25139 .nr(4)
25140 .kr(2)
25141 .sr(1)
25142 .m(3)
25143 .n(4)
25144 .k(8)
25145 .a_stride(11)
25146 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25147 }
25148
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile)25149 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile) {
25150 TEST_REQUIRES_X86_AVX;
25151 for (uint32_t n = 1; n <= 4; n++) {
25152 for (uint32_t m = 1; m <= 3; m++) {
25153 GemmMicrokernelTester()
25154 .mr(3)
25155 .nr(4)
25156 .kr(2)
25157 .sr(1)
25158 .m(m)
25159 .n(n)
25160 .k(8)
25161 .iterations(1)
25162 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25163 }
25164 }
25165 }
25166
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile_m)25167 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_m) {
25168 TEST_REQUIRES_X86_AVX;
25169 for (uint32_t m = 1; m <= 3; m++) {
25170 GemmMicrokernelTester()
25171 .mr(3)
25172 .nr(4)
25173 .kr(2)
25174 .sr(1)
25175 .m(m)
25176 .n(4)
25177 .k(8)
25178 .iterations(1)
25179 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25180 }
25181 }
25182
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile_n)25183 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_n) {
25184 TEST_REQUIRES_X86_AVX;
25185 for (uint32_t n = 1; n <= 4; n++) {
25186 GemmMicrokernelTester()
25187 .mr(3)
25188 .nr(4)
25189 .kr(2)
25190 .sr(1)
25191 .m(3)
25192 .n(n)
25193 .k(8)
25194 .iterations(1)
25195 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25196 }
25197 }
25198
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_lt_8)25199 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8) {
25200 TEST_REQUIRES_X86_AVX;
25201 for (size_t k = 1; k < 8; k++) {
25202 GemmMicrokernelTester()
25203 .mr(3)
25204 .nr(4)
25205 .kr(2)
25206 .sr(1)
25207 .m(3)
25208 .n(4)
25209 .k(k)
25210 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25211 }
25212 }
25213
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_lt_8_strided_a)25214 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8_strided_a) {
25215 TEST_REQUIRES_X86_AVX;
25216 for (size_t k = 1; k < 8; k++) {
25217 GemmMicrokernelTester()
25218 .mr(3)
25219 .nr(4)
25220 .kr(2)
25221 .sr(1)
25222 .m(3)
25223 .n(4)
25224 .k(k)
25225 .a_stride(11)
25226 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25227 }
25228 }
25229
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_lt_8_subtile)25230 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8_subtile) {
25231 TEST_REQUIRES_X86_AVX;
25232 for (size_t k = 1; k < 8; k++) {
25233 for (uint32_t n = 1; n <= 4; n++) {
25234 for (uint32_t m = 1; m <= 3; m++) {
25235 GemmMicrokernelTester()
25236 .mr(3)
25237 .nr(4)
25238 .kr(2)
25239 .sr(1)
25240 .m(m)
25241 .n(n)
25242 .k(k)
25243 .iterations(1)
25244 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25245 }
25246 }
25247 }
25248 }
25249
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_gt_8)25250 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8) {
25251 TEST_REQUIRES_X86_AVX;
25252 for (size_t k = 9; k < 16; k++) {
25253 GemmMicrokernelTester()
25254 .mr(3)
25255 .nr(4)
25256 .kr(2)
25257 .sr(1)
25258 .m(3)
25259 .n(4)
25260 .k(k)
25261 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25262 }
25263 }
25264
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_gt_8_strided_a)25265 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8_strided_a) {
25266 TEST_REQUIRES_X86_AVX;
25267 for (size_t k = 9; k < 16; k++) {
25268 GemmMicrokernelTester()
25269 .mr(3)
25270 .nr(4)
25271 .kr(2)
25272 .sr(1)
25273 .m(3)
25274 .n(4)
25275 .k(k)
25276 .a_stride(19)
25277 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25278 }
25279 }
25280
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_gt_8_subtile)25281 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8_subtile) {
25282 TEST_REQUIRES_X86_AVX;
25283 for (size_t k = 9; k < 16; k++) {
25284 for (uint32_t n = 1; n <= 4; n++) {
25285 for (uint32_t m = 1; m <= 3; m++) {
25286 GemmMicrokernelTester()
25287 .mr(3)
25288 .nr(4)
25289 .kr(2)
25290 .sr(1)
25291 .m(m)
25292 .n(n)
25293 .k(k)
25294 .iterations(1)
25295 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25296 }
25297 }
25298 }
25299 }
25300
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_div_8)25301 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8) {
25302 TEST_REQUIRES_X86_AVX;
25303 for (size_t k = 16; k <= 80; k += 8) {
25304 GemmMicrokernelTester()
25305 .mr(3)
25306 .nr(4)
25307 .kr(2)
25308 .sr(1)
25309 .m(3)
25310 .n(4)
25311 .k(k)
25312 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25313 }
25314 }
25315
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_div_8_strided_a)25316 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8_strided_a) {
25317 TEST_REQUIRES_X86_AVX;
25318 for (size_t k = 16; k <= 80; k += 8) {
25319 GemmMicrokernelTester()
25320 .mr(3)
25321 .nr(4)
25322 .kr(2)
25323 .sr(1)
25324 .m(3)
25325 .n(4)
25326 .k(k)
25327 .a_stride(83)
25328 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25329 }
25330 }
25331
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_div_8_subtile)25332 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8_subtile) {
25333 TEST_REQUIRES_X86_AVX;
25334 for (size_t k = 16; k <= 80; k += 8) {
25335 for (uint32_t n = 1; n <= 4; n++) {
25336 for (uint32_t m = 1; m <= 3; m++) {
25337 GemmMicrokernelTester()
25338 .mr(3)
25339 .nr(4)
25340 .kr(2)
25341 .sr(1)
25342 .m(m)
25343 .n(n)
25344 .k(k)
25345 .iterations(1)
25346 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25347 }
25348 }
25349 }
25350 }
25351
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4)25352 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4) {
25353 TEST_REQUIRES_X86_AVX;
25354 for (uint32_t n = 5; n < 8; n++) {
25355 for (size_t k = 1; k <= 40; k += 9) {
25356 GemmMicrokernelTester()
25357 .mr(3)
25358 .nr(4)
25359 .kr(2)
25360 .sr(1)
25361 .m(3)
25362 .n(n)
25363 .k(k)
25364 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25365 }
25366 }
25367 }
25368
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_strided_cn)25369 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_strided_cn) {
25370 TEST_REQUIRES_X86_AVX;
25371 for (uint32_t n = 5; n < 8; n++) {
25372 for (size_t k = 1; k <= 40; k += 9) {
25373 GemmMicrokernelTester()
25374 .mr(3)
25375 .nr(4)
25376 .kr(2)
25377 .sr(1)
25378 .m(3)
25379 .n(n)
25380 .k(k)
25381 .cn_stride(7)
25382 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25383 }
25384 }
25385 }
25386
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_strided_a)25387 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_strided_a) {
25388 TEST_REQUIRES_X86_AVX;
25389 for (uint32_t n = 5; n < 8; n++) {
25390 for (size_t k = 1; k <= 40; k += 9) {
25391 GemmMicrokernelTester()
25392 .mr(3)
25393 .nr(4)
25394 .kr(2)
25395 .sr(1)
25396 .m(3)
25397 .n(n)
25398 .k(k)
25399 .a_stride(43)
25400 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25401 }
25402 }
25403 }
25404
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_subtile)25405 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_subtile) {
25406 TEST_REQUIRES_X86_AVX;
25407 for (uint32_t n = 5; n < 8; n++) {
25408 for (size_t k = 1; k <= 40; k += 9) {
25409 for (uint32_t m = 1; m <= 3; m++) {
25410 GemmMicrokernelTester()
25411 .mr(3)
25412 .nr(4)
25413 .kr(2)
25414 .sr(1)
25415 .m(m)
25416 .n(n)
25417 .k(k)
25418 .iterations(1)
25419 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25420 }
25421 }
25422 }
25423 }
25424
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4)25425 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4) {
25426 TEST_REQUIRES_X86_AVX;
25427 for (uint32_t n = 8; n <= 12; n += 4) {
25428 for (size_t k = 1; k <= 40; k += 9) {
25429 GemmMicrokernelTester()
25430 .mr(3)
25431 .nr(4)
25432 .kr(2)
25433 .sr(1)
25434 .m(3)
25435 .n(n)
25436 .k(k)
25437 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25438 }
25439 }
25440 }
25441
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_strided_cn)25442 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_strided_cn) {
25443 TEST_REQUIRES_X86_AVX;
25444 for (uint32_t n = 8; n <= 12; n += 4) {
25445 for (size_t k = 1; k <= 40; k += 9) {
25446 GemmMicrokernelTester()
25447 .mr(3)
25448 .nr(4)
25449 .kr(2)
25450 .sr(1)
25451 .m(3)
25452 .n(n)
25453 .k(k)
25454 .cn_stride(7)
25455 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25456 }
25457 }
25458 }
25459
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_strided_a)25460 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_strided_a) {
25461 TEST_REQUIRES_X86_AVX;
25462 for (uint32_t n = 8; n <= 12; n += 4) {
25463 for (size_t k = 1; k <= 40; k += 9) {
25464 GemmMicrokernelTester()
25465 .mr(3)
25466 .nr(4)
25467 .kr(2)
25468 .sr(1)
25469 .m(3)
25470 .n(n)
25471 .k(k)
25472 .a_stride(43)
25473 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25474 }
25475 }
25476 }
25477
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_subtile)25478 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_subtile) {
25479 TEST_REQUIRES_X86_AVX;
25480 for (uint32_t n = 8; n <= 12; n += 4) {
25481 for (size_t k = 1; k <= 40; k += 9) {
25482 for (uint32_t m = 1; m <= 3; m++) {
25483 GemmMicrokernelTester()
25484 .mr(3)
25485 .nr(4)
25486 .kr(2)
25487 .sr(1)
25488 .m(m)
25489 .n(n)
25490 .k(k)
25491 .iterations(1)
25492 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25493 }
25494 }
25495 }
25496 }
25497
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cm_subtile)25498 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm_subtile) {
25499 TEST_REQUIRES_X86_AVX;
25500 for (size_t k = 1; k <= 40; k += 9) {
25501 for (uint32_t n = 1; n <= 4; n++) {
25502 for (uint32_t m = 1; m <= 3; m++) {
25503 GemmMicrokernelTester()
25504 .mr(3)
25505 .nr(4)
25506 .kr(2)
25507 .sr(1)
25508 .m(m)
25509 .n(n)
25510 .k(k)
25511 .cm_stride(7)
25512 .iterations(1)
25513 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25514 }
25515 }
25516 }
25517 }
25518
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,qmin)25519 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmin) {
25520 TEST_REQUIRES_X86_AVX;
25521 GemmMicrokernelTester()
25522 .mr(3)
25523 .nr(4)
25524 .kr(2)
25525 .sr(1)
25526 .m(3)
25527 .n(4)
25528 .k(8)
25529 .qmin(128)
25530 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25531 }
25532
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,qmax)25533 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmax) {
25534 TEST_REQUIRES_X86_AVX;
25535 GemmMicrokernelTester()
25536 .mr(3)
25537 .nr(4)
25538 .kr(2)
25539 .sr(1)
25540 .m(3)
25541 .n(4)
25542 .k(8)
25543 .qmax(128)
25544 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25545 }
25546
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cm)25547 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm) {
25548 TEST_REQUIRES_X86_AVX;
25549 GemmMicrokernelTester()
25550 .mr(3)
25551 .nr(4)
25552 .kr(2)
25553 .sr(1)
25554 .m(3)
25555 .n(4)
25556 .k(8)
25557 .cm_stride(7)
25558 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25559 }
25560 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25561
25562
25563 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8)25564 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8) {
25565 TEST_REQUIRES_X86_XOP;
25566 GemmMicrokernelTester()
25567 .mr(4)
25568 .nr(4)
25569 .kr(2)
25570 .sr(1)
25571 .m(4)
25572 .n(4)
25573 .k(8)
25574 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25575 }
25576
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,strided_cn)25577 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cn) {
25578 TEST_REQUIRES_X86_XOP;
25579 GemmMicrokernelTester()
25580 .mr(4)
25581 .nr(4)
25582 .kr(2)
25583 .sr(1)
25584 .m(4)
25585 .n(4)
25586 .k(8)
25587 .cn_stride(7)
25588 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25589 }
25590
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_strided_a)25591 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_strided_a) {
25592 TEST_REQUIRES_X86_XOP;
25593 GemmMicrokernelTester()
25594 .mr(4)
25595 .nr(4)
25596 .kr(2)
25597 .sr(1)
25598 .m(4)
25599 .n(4)
25600 .k(8)
25601 .a_stride(11)
25602 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25603 }
25604
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_subtile)25605 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile) {
25606 TEST_REQUIRES_X86_XOP;
25607 for (uint32_t n = 1; n <= 4; n++) {
25608 for (uint32_t m = 1; m <= 4; m++) {
25609 GemmMicrokernelTester()
25610 .mr(4)
25611 .nr(4)
25612 .kr(2)
25613 .sr(1)
25614 .m(m)
25615 .n(n)
25616 .k(8)
25617 .iterations(1)
25618 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25619 }
25620 }
25621 }
25622
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_subtile_m)25623 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_m) {
25624 TEST_REQUIRES_X86_XOP;
25625 for (uint32_t m = 1; m <= 4; m++) {
25626 GemmMicrokernelTester()
25627 .mr(4)
25628 .nr(4)
25629 .kr(2)
25630 .sr(1)
25631 .m(m)
25632 .n(4)
25633 .k(8)
25634 .iterations(1)
25635 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25636 }
25637 }
25638
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_subtile_n)25639 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_n) {
25640 TEST_REQUIRES_X86_XOP;
25641 for (uint32_t n = 1; n <= 4; n++) {
25642 GemmMicrokernelTester()
25643 .mr(4)
25644 .nr(4)
25645 .kr(2)
25646 .sr(1)
25647 .m(4)
25648 .n(n)
25649 .k(8)
25650 .iterations(1)
25651 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25652 }
25653 }
25654
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_lt_8)25655 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8) {
25656 TEST_REQUIRES_X86_XOP;
25657 for (size_t k = 1; k < 8; k++) {
25658 GemmMicrokernelTester()
25659 .mr(4)
25660 .nr(4)
25661 .kr(2)
25662 .sr(1)
25663 .m(4)
25664 .n(4)
25665 .k(k)
25666 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25667 }
25668 }
25669
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_lt_8_strided_a)25670 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8_strided_a) {
25671 TEST_REQUIRES_X86_XOP;
25672 for (size_t k = 1; k < 8; k++) {
25673 GemmMicrokernelTester()
25674 .mr(4)
25675 .nr(4)
25676 .kr(2)
25677 .sr(1)
25678 .m(4)
25679 .n(4)
25680 .k(k)
25681 .a_stride(11)
25682 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25683 }
25684 }
25685
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_lt_8_subtile)25686 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8_subtile) {
25687 TEST_REQUIRES_X86_XOP;
25688 for (size_t k = 1; k < 8; k++) {
25689 for (uint32_t n = 1; n <= 4; n++) {
25690 for (uint32_t m = 1; m <= 4; m++) {
25691 GemmMicrokernelTester()
25692 .mr(4)
25693 .nr(4)
25694 .kr(2)
25695 .sr(1)
25696 .m(m)
25697 .n(n)
25698 .k(k)
25699 .iterations(1)
25700 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25701 }
25702 }
25703 }
25704 }
25705
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_gt_8)25706 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8) {
25707 TEST_REQUIRES_X86_XOP;
25708 for (size_t k = 9; k < 16; k++) {
25709 GemmMicrokernelTester()
25710 .mr(4)
25711 .nr(4)
25712 .kr(2)
25713 .sr(1)
25714 .m(4)
25715 .n(4)
25716 .k(k)
25717 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25718 }
25719 }
25720
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_gt_8_strided_a)25721 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8_strided_a) {
25722 TEST_REQUIRES_X86_XOP;
25723 for (size_t k = 9; k < 16; k++) {
25724 GemmMicrokernelTester()
25725 .mr(4)
25726 .nr(4)
25727 .kr(2)
25728 .sr(1)
25729 .m(4)
25730 .n(4)
25731 .k(k)
25732 .a_stride(19)
25733 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25734 }
25735 }
25736
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_gt_8_subtile)25737 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8_subtile) {
25738 TEST_REQUIRES_X86_XOP;
25739 for (size_t k = 9; k < 16; k++) {
25740 for (uint32_t n = 1; n <= 4; n++) {
25741 for (uint32_t m = 1; m <= 4; m++) {
25742 GemmMicrokernelTester()
25743 .mr(4)
25744 .nr(4)
25745 .kr(2)
25746 .sr(1)
25747 .m(m)
25748 .n(n)
25749 .k(k)
25750 .iterations(1)
25751 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25752 }
25753 }
25754 }
25755 }
25756
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_div_8)25757 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8) {
25758 TEST_REQUIRES_X86_XOP;
25759 for (size_t k = 16; k <= 80; k += 8) {
25760 GemmMicrokernelTester()
25761 .mr(4)
25762 .nr(4)
25763 .kr(2)
25764 .sr(1)
25765 .m(4)
25766 .n(4)
25767 .k(k)
25768 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25769 }
25770 }
25771
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_div_8_strided_a)25772 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8_strided_a) {
25773 TEST_REQUIRES_X86_XOP;
25774 for (size_t k = 16; k <= 80; k += 8) {
25775 GemmMicrokernelTester()
25776 .mr(4)
25777 .nr(4)
25778 .kr(2)
25779 .sr(1)
25780 .m(4)
25781 .n(4)
25782 .k(k)
25783 .a_stride(83)
25784 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25785 }
25786 }
25787
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_div_8_subtile)25788 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8_subtile) {
25789 TEST_REQUIRES_X86_XOP;
25790 for (size_t k = 16; k <= 80; k += 8) {
25791 for (uint32_t n = 1; n <= 4; n++) {
25792 for (uint32_t m = 1; m <= 4; m++) {
25793 GemmMicrokernelTester()
25794 .mr(4)
25795 .nr(4)
25796 .kr(2)
25797 .sr(1)
25798 .m(m)
25799 .n(n)
25800 .k(k)
25801 .iterations(1)
25802 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25803 }
25804 }
25805 }
25806 }
25807
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4)25808 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4) {
25809 TEST_REQUIRES_X86_XOP;
25810 for (uint32_t n = 5; n < 8; n++) {
25811 for (size_t k = 1; k <= 40; k += 9) {
25812 GemmMicrokernelTester()
25813 .mr(4)
25814 .nr(4)
25815 .kr(2)
25816 .sr(1)
25817 .m(4)
25818 .n(n)
25819 .k(k)
25820 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25821 }
25822 }
25823 }
25824
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4_strided_cn)25825 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_strided_cn) {
25826 TEST_REQUIRES_X86_XOP;
25827 for (uint32_t n = 5; n < 8; n++) {
25828 for (size_t k = 1; k <= 40; k += 9) {
25829 GemmMicrokernelTester()
25830 .mr(4)
25831 .nr(4)
25832 .kr(2)
25833 .sr(1)
25834 .m(4)
25835 .n(n)
25836 .k(k)
25837 .cn_stride(7)
25838 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25839 }
25840 }
25841 }
25842
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4_strided_a)25843 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_strided_a) {
25844 TEST_REQUIRES_X86_XOP;
25845 for (uint32_t n = 5; n < 8; n++) {
25846 for (size_t k = 1; k <= 40; k += 9) {
25847 GemmMicrokernelTester()
25848 .mr(4)
25849 .nr(4)
25850 .kr(2)
25851 .sr(1)
25852 .m(4)
25853 .n(n)
25854 .k(k)
25855 .a_stride(43)
25856 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25857 }
25858 }
25859 }
25860
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4_subtile)25861 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_subtile) {
25862 TEST_REQUIRES_X86_XOP;
25863 for (uint32_t n = 5; n < 8; n++) {
25864 for (size_t k = 1; k <= 40; k += 9) {
25865 for (uint32_t m = 1; m <= 4; m++) {
25866 GemmMicrokernelTester()
25867 .mr(4)
25868 .nr(4)
25869 .kr(2)
25870 .sr(1)
25871 .m(m)
25872 .n(n)
25873 .k(k)
25874 .iterations(1)
25875 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25876 }
25877 }
25878 }
25879 }
25880
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4)25881 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4) {
25882 TEST_REQUIRES_X86_XOP;
25883 for (uint32_t n = 8; n <= 12; n += 4) {
25884 for (size_t k = 1; k <= 40; k += 9) {
25885 GemmMicrokernelTester()
25886 .mr(4)
25887 .nr(4)
25888 .kr(2)
25889 .sr(1)
25890 .m(4)
25891 .n(n)
25892 .k(k)
25893 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25894 }
25895 }
25896 }
25897
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4_strided_cn)25898 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_strided_cn) {
25899 TEST_REQUIRES_X86_XOP;
25900 for (uint32_t n = 8; n <= 12; n += 4) {
25901 for (size_t k = 1; k <= 40; k += 9) {
25902 GemmMicrokernelTester()
25903 .mr(4)
25904 .nr(4)
25905 .kr(2)
25906 .sr(1)
25907 .m(4)
25908 .n(n)
25909 .k(k)
25910 .cn_stride(7)
25911 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25912 }
25913 }
25914 }
25915
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4_strided_a)25916 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_strided_a) {
25917 TEST_REQUIRES_X86_XOP;
25918 for (uint32_t n = 8; n <= 12; n += 4) {
25919 for (size_t k = 1; k <= 40; k += 9) {
25920 GemmMicrokernelTester()
25921 .mr(4)
25922 .nr(4)
25923 .kr(2)
25924 .sr(1)
25925 .m(4)
25926 .n(n)
25927 .k(k)
25928 .a_stride(43)
25929 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25930 }
25931 }
25932 }
25933
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4_subtile)25934 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_subtile) {
25935 TEST_REQUIRES_X86_XOP;
25936 for (uint32_t n = 8; n <= 12; n += 4) {
25937 for (size_t k = 1; k <= 40; k += 9) {
25938 for (uint32_t m = 1; m <= 4; m++) {
25939 GemmMicrokernelTester()
25940 .mr(4)
25941 .nr(4)
25942 .kr(2)
25943 .sr(1)
25944 .m(m)
25945 .n(n)
25946 .k(k)
25947 .iterations(1)
25948 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25949 }
25950 }
25951 }
25952 }
25953
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,strided_cm_subtile)25954 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm_subtile) {
25955 TEST_REQUIRES_X86_XOP;
25956 for (size_t k = 1; k <= 40; k += 9) {
25957 for (uint32_t n = 1; n <= 4; n++) {
25958 for (uint32_t m = 1; m <= 4; m++) {
25959 GemmMicrokernelTester()
25960 .mr(4)
25961 .nr(4)
25962 .kr(2)
25963 .sr(1)
25964 .m(m)
25965 .n(n)
25966 .k(k)
25967 .cm_stride(7)
25968 .iterations(1)
25969 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25970 }
25971 }
25972 }
25973 }
25974
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,qmin)25975 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmin) {
25976 TEST_REQUIRES_X86_XOP;
25977 GemmMicrokernelTester()
25978 .mr(4)
25979 .nr(4)
25980 .kr(2)
25981 .sr(1)
25982 .m(4)
25983 .n(4)
25984 .k(8)
25985 .qmin(128)
25986 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25987 }
25988
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,qmax)25989 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmax) {
25990 TEST_REQUIRES_X86_XOP;
25991 GemmMicrokernelTester()
25992 .mr(4)
25993 .nr(4)
25994 .kr(2)
25995 .sr(1)
25996 .m(4)
25997 .n(4)
25998 .k(8)
25999 .qmax(128)
26000 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26001 }
26002
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,strided_cm)26003 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm) {
26004 TEST_REQUIRES_X86_XOP;
26005 GemmMicrokernelTester()
26006 .mr(4)
26007 .nr(4)
26008 .kr(2)
26009 .sr(1)
26010 .m(4)
26011 .n(4)
26012 .k(8)
26013 .cm_stride(7)
26014 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26015 }
26016 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26017
26018
26019 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8)26020 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8) {
26021 TEST_REQUIRES_X86_SSE2;
26022 GemmMicrokernelTester()
26023 .mr(2)
26024 .nr(4)
26025 .kr(2)
26026 .sr(1)
26027 .m(2)
26028 .n(4)
26029 .k(8)
26030 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26031 }
26032
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,strided_cn)26033 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cn) {
26034 TEST_REQUIRES_X86_SSE2;
26035 GemmMicrokernelTester()
26036 .mr(2)
26037 .nr(4)
26038 .kr(2)
26039 .sr(1)
26040 .m(2)
26041 .n(4)
26042 .k(8)
26043 .cn_stride(7)
26044 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26045 }
26046
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_strided_a)26047 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_strided_a) {
26048 TEST_REQUIRES_X86_SSE2;
26049 GemmMicrokernelTester()
26050 .mr(2)
26051 .nr(4)
26052 .kr(2)
26053 .sr(1)
26054 .m(2)
26055 .n(4)
26056 .k(8)
26057 .a_stride(11)
26058 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26059 }
26060
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_subtile)26061 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile) {
26062 TEST_REQUIRES_X86_SSE2;
26063 for (uint32_t n = 1; n <= 4; n++) {
26064 for (uint32_t m = 1; m <= 2; m++) {
26065 GemmMicrokernelTester()
26066 .mr(2)
26067 .nr(4)
26068 .kr(2)
26069 .sr(1)
26070 .m(m)
26071 .n(n)
26072 .k(8)
26073 .iterations(1)
26074 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26075 }
26076 }
26077 }
26078
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_subtile_m)26079 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile_m) {
26080 TEST_REQUIRES_X86_SSE2;
26081 for (uint32_t m = 1; m <= 2; m++) {
26082 GemmMicrokernelTester()
26083 .mr(2)
26084 .nr(4)
26085 .kr(2)
26086 .sr(1)
26087 .m(m)
26088 .n(4)
26089 .k(8)
26090 .iterations(1)
26091 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26092 }
26093 }
26094
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_subtile_n)26095 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile_n) {
26096 TEST_REQUIRES_X86_SSE2;
26097 for (uint32_t n = 1; n <= 4; n++) {
26098 GemmMicrokernelTester()
26099 .mr(2)
26100 .nr(4)
26101 .kr(2)
26102 .sr(1)
26103 .m(2)
26104 .n(n)
26105 .k(8)
26106 .iterations(1)
26107 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26108 }
26109 }
26110
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_lt_8)26111 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8) {
26112 TEST_REQUIRES_X86_SSE2;
26113 for (size_t k = 1; k < 8; k++) {
26114 GemmMicrokernelTester()
26115 .mr(2)
26116 .nr(4)
26117 .kr(2)
26118 .sr(1)
26119 .m(2)
26120 .n(4)
26121 .k(k)
26122 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26123 }
26124 }
26125
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_lt_8_strided_a)26126 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8_strided_a) {
26127 TEST_REQUIRES_X86_SSE2;
26128 for (size_t k = 1; k < 8; k++) {
26129 GemmMicrokernelTester()
26130 .mr(2)
26131 .nr(4)
26132 .kr(2)
26133 .sr(1)
26134 .m(2)
26135 .n(4)
26136 .k(k)
26137 .a_stride(11)
26138 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26139 }
26140 }
26141
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_lt_8_subtile)26142 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8_subtile) {
26143 TEST_REQUIRES_X86_SSE2;
26144 for (size_t k = 1; k < 8; k++) {
26145 for (uint32_t n = 1; n <= 4; n++) {
26146 for (uint32_t m = 1; m <= 2; m++) {
26147 GemmMicrokernelTester()
26148 .mr(2)
26149 .nr(4)
26150 .kr(2)
26151 .sr(1)
26152 .m(m)
26153 .n(n)
26154 .k(k)
26155 .iterations(1)
26156 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26157 }
26158 }
26159 }
26160 }
26161
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_gt_8)26162 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8) {
26163 TEST_REQUIRES_X86_SSE2;
26164 for (size_t k = 9; k < 16; k++) {
26165 GemmMicrokernelTester()
26166 .mr(2)
26167 .nr(4)
26168 .kr(2)
26169 .sr(1)
26170 .m(2)
26171 .n(4)
26172 .k(k)
26173 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26174 }
26175 }
26176
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_gt_8_strided_a)26177 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8_strided_a) {
26178 TEST_REQUIRES_X86_SSE2;
26179 for (size_t k = 9; k < 16; k++) {
26180 GemmMicrokernelTester()
26181 .mr(2)
26182 .nr(4)
26183 .kr(2)
26184 .sr(1)
26185 .m(2)
26186 .n(4)
26187 .k(k)
26188 .a_stride(19)
26189 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26190 }
26191 }
26192
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_gt_8_subtile)26193 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8_subtile) {
26194 TEST_REQUIRES_X86_SSE2;
26195 for (size_t k = 9; k < 16; k++) {
26196 for (uint32_t n = 1; n <= 4; n++) {
26197 for (uint32_t m = 1; m <= 2; m++) {
26198 GemmMicrokernelTester()
26199 .mr(2)
26200 .nr(4)
26201 .kr(2)
26202 .sr(1)
26203 .m(m)
26204 .n(n)
26205 .k(k)
26206 .iterations(1)
26207 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26208 }
26209 }
26210 }
26211 }
26212
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_div_8)26213 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8) {
26214 TEST_REQUIRES_X86_SSE2;
26215 for (size_t k = 16; k <= 80; k += 8) {
26216 GemmMicrokernelTester()
26217 .mr(2)
26218 .nr(4)
26219 .kr(2)
26220 .sr(1)
26221 .m(2)
26222 .n(4)
26223 .k(k)
26224 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26225 }
26226 }
26227
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_div_8_strided_a)26228 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8_strided_a) {
26229 TEST_REQUIRES_X86_SSE2;
26230 for (size_t k = 16; k <= 80; k += 8) {
26231 GemmMicrokernelTester()
26232 .mr(2)
26233 .nr(4)
26234 .kr(2)
26235 .sr(1)
26236 .m(2)
26237 .n(4)
26238 .k(k)
26239 .a_stride(83)
26240 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26241 }
26242 }
26243
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_div_8_subtile)26244 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8_subtile) {
26245 TEST_REQUIRES_X86_SSE2;
26246 for (size_t k = 16; k <= 80; k += 8) {
26247 for (uint32_t n = 1; n <= 4; n++) {
26248 for (uint32_t m = 1; m <= 2; m++) {
26249 GemmMicrokernelTester()
26250 .mr(2)
26251 .nr(4)
26252 .kr(2)
26253 .sr(1)
26254 .m(m)
26255 .n(n)
26256 .k(k)
26257 .iterations(1)
26258 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26259 }
26260 }
26261 }
26262 }
26263
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4)26264 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4) {
26265 TEST_REQUIRES_X86_SSE2;
26266 for (uint32_t n = 5; n < 8; n++) {
26267 for (size_t k = 1; k <= 40; k += 9) {
26268 GemmMicrokernelTester()
26269 .mr(2)
26270 .nr(4)
26271 .kr(2)
26272 .sr(1)
26273 .m(2)
26274 .n(n)
26275 .k(k)
26276 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26277 }
26278 }
26279 }
26280
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4_strided_cn)26281 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_strided_cn) {
26282 TEST_REQUIRES_X86_SSE2;
26283 for (uint32_t n = 5; n < 8; n++) {
26284 for (size_t k = 1; k <= 40; k += 9) {
26285 GemmMicrokernelTester()
26286 .mr(2)
26287 .nr(4)
26288 .kr(2)
26289 .sr(1)
26290 .m(2)
26291 .n(n)
26292 .k(k)
26293 .cn_stride(7)
26294 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26295 }
26296 }
26297 }
26298
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4_strided_a)26299 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_strided_a) {
26300 TEST_REQUIRES_X86_SSE2;
26301 for (uint32_t n = 5; n < 8; n++) {
26302 for (size_t k = 1; k <= 40; k += 9) {
26303 GemmMicrokernelTester()
26304 .mr(2)
26305 .nr(4)
26306 .kr(2)
26307 .sr(1)
26308 .m(2)
26309 .n(n)
26310 .k(k)
26311 .a_stride(43)
26312 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26313 }
26314 }
26315 }
26316
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4_subtile)26317 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_subtile) {
26318 TEST_REQUIRES_X86_SSE2;
26319 for (uint32_t n = 5; n < 8; n++) {
26320 for (size_t k = 1; k <= 40; k += 9) {
26321 for (uint32_t m = 1; m <= 2; m++) {
26322 GemmMicrokernelTester()
26323 .mr(2)
26324 .nr(4)
26325 .kr(2)
26326 .sr(1)
26327 .m(m)
26328 .n(n)
26329 .k(k)
26330 .iterations(1)
26331 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26332 }
26333 }
26334 }
26335 }
26336
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4)26337 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4) {
26338 TEST_REQUIRES_X86_SSE2;
26339 for (uint32_t n = 8; n <= 12; n += 4) {
26340 for (size_t k = 1; k <= 40; k += 9) {
26341 GemmMicrokernelTester()
26342 .mr(2)
26343 .nr(4)
26344 .kr(2)
26345 .sr(1)
26346 .m(2)
26347 .n(n)
26348 .k(k)
26349 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26350 }
26351 }
26352 }
26353
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4_strided_cn)26354 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_strided_cn) {
26355 TEST_REQUIRES_X86_SSE2;
26356 for (uint32_t n = 8; n <= 12; n += 4) {
26357 for (size_t k = 1; k <= 40; k += 9) {
26358 GemmMicrokernelTester()
26359 .mr(2)
26360 .nr(4)
26361 .kr(2)
26362 .sr(1)
26363 .m(2)
26364 .n(n)
26365 .k(k)
26366 .cn_stride(7)
26367 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26368 }
26369 }
26370 }
26371
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4_strided_a)26372 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_strided_a) {
26373 TEST_REQUIRES_X86_SSE2;
26374 for (uint32_t n = 8; n <= 12; n += 4) {
26375 for (size_t k = 1; k <= 40; k += 9) {
26376 GemmMicrokernelTester()
26377 .mr(2)
26378 .nr(4)
26379 .kr(2)
26380 .sr(1)
26381 .m(2)
26382 .n(n)
26383 .k(k)
26384 .a_stride(43)
26385 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26386 }
26387 }
26388 }
26389
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4_subtile)26390 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_subtile) {
26391 TEST_REQUIRES_X86_SSE2;
26392 for (uint32_t n = 8; n <= 12; n += 4) {
26393 for (size_t k = 1; k <= 40; k += 9) {
26394 for (uint32_t m = 1; m <= 2; m++) {
26395 GemmMicrokernelTester()
26396 .mr(2)
26397 .nr(4)
26398 .kr(2)
26399 .sr(1)
26400 .m(m)
26401 .n(n)
26402 .k(k)
26403 .iterations(1)
26404 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26405 }
26406 }
26407 }
26408 }
26409
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,strided_cm_subtile)26410 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cm_subtile) {
26411 TEST_REQUIRES_X86_SSE2;
26412 for (size_t k = 1; k <= 40; k += 9) {
26413 for (uint32_t n = 1; n <= 4; n++) {
26414 for (uint32_t m = 1; m <= 2; m++) {
26415 GemmMicrokernelTester()
26416 .mr(2)
26417 .nr(4)
26418 .kr(2)
26419 .sr(1)
26420 .m(m)
26421 .n(n)
26422 .k(k)
26423 .cm_stride(7)
26424 .iterations(1)
26425 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26426 }
26427 }
26428 }
26429 }
26430
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,qmin)26431 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, qmin) {
26432 TEST_REQUIRES_X86_SSE2;
26433 GemmMicrokernelTester()
26434 .mr(2)
26435 .nr(4)
26436 .kr(2)
26437 .sr(1)
26438 .m(2)
26439 .n(4)
26440 .k(8)
26441 .qmin(128)
26442 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26443 }
26444
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,qmax)26445 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, qmax) {
26446 TEST_REQUIRES_X86_SSE2;
26447 GemmMicrokernelTester()
26448 .mr(2)
26449 .nr(4)
26450 .kr(2)
26451 .sr(1)
26452 .m(2)
26453 .n(4)
26454 .k(8)
26455 .qmax(128)
26456 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26457 }
26458
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,strided_cm)26459 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cm) {
26460 TEST_REQUIRES_X86_SSE2;
26461 GemmMicrokernelTester()
26462 .mr(2)
26463 .nr(4)
26464 .kr(2)
26465 .sr(1)
26466 .m(2)
26467 .n(4)
26468 .k(8)
26469 .cm_stride(7)
26470 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26471 }
26472 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26473
26474
26475 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8)26476 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8) {
26477 TEST_REQUIRES_X86_SSE41;
26478 GemmMicrokernelTester()
26479 .mr(3)
26480 .nr(4)
26481 .kr(2)
26482 .sr(1)
26483 .m(3)
26484 .n(4)
26485 .k(8)
26486 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26487 }
26488
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,strided_cn)26489 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cn) {
26490 TEST_REQUIRES_X86_SSE41;
26491 GemmMicrokernelTester()
26492 .mr(3)
26493 .nr(4)
26494 .kr(2)
26495 .sr(1)
26496 .m(3)
26497 .n(4)
26498 .k(8)
26499 .cn_stride(7)
26500 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26501 }
26502
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_strided_a)26503 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_strided_a) {
26504 TEST_REQUIRES_X86_SSE41;
26505 GemmMicrokernelTester()
26506 .mr(3)
26507 .nr(4)
26508 .kr(2)
26509 .sr(1)
26510 .m(3)
26511 .n(4)
26512 .k(8)
26513 .a_stride(11)
26514 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26515 }
26516
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_subtile)26517 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile) {
26518 TEST_REQUIRES_X86_SSE41;
26519 for (uint32_t n = 1; n <= 4; n++) {
26520 for (uint32_t m = 1; m <= 3; m++) {
26521 GemmMicrokernelTester()
26522 .mr(3)
26523 .nr(4)
26524 .kr(2)
26525 .sr(1)
26526 .m(m)
26527 .n(n)
26528 .k(8)
26529 .iterations(1)
26530 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26531 }
26532 }
26533 }
26534
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_subtile_m)26535 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_m) {
26536 TEST_REQUIRES_X86_SSE41;
26537 for (uint32_t m = 1; m <= 3; m++) {
26538 GemmMicrokernelTester()
26539 .mr(3)
26540 .nr(4)
26541 .kr(2)
26542 .sr(1)
26543 .m(m)
26544 .n(4)
26545 .k(8)
26546 .iterations(1)
26547 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26548 }
26549 }
26550
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_subtile_n)26551 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_n) {
26552 TEST_REQUIRES_X86_SSE41;
26553 for (uint32_t n = 1; n <= 4; n++) {
26554 GemmMicrokernelTester()
26555 .mr(3)
26556 .nr(4)
26557 .kr(2)
26558 .sr(1)
26559 .m(3)
26560 .n(n)
26561 .k(8)
26562 .iterations(1)
26563 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26564 }
26565 }
26566
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_lt_8)26567 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8) {
26568 TEST_REQUIRES_X86_SSE41;
26569 for (size_t k = 1; k < 8; k++) {
26570 GemmMicrokernelTester()
26571 .mr(3)
26572 .nr(4)
26573 .kr(2)
26574 .sr(1)
26575 .m(3)
26576 .n(4)
26577 .k(k)
26578 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26579 }
26580 }
26581
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_lt_8_strided_a)26582 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8_strided_a) {
26583 TEST_REQUIRES_X86_SSE41;
26584 for (size_t k = 1; k < 8; k++) {
26585 GemmMicrokernelTester()
26586 .mr(3)
26587 .nr(4)
26588 .kr(2)
26589 .sr(1)
26590 .m(3)
26591 .n(4)
26592 .k(k)
26593 .a_stride(11)
26594 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26595 }
26596 }
26597
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_lt_8_subtile)26598 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8_subtile) {
26599 TEST_REQUIRES_X86_SSE41;
26600 for (size_t k = 1; k < 8; k++) {
26601 for (uint32_t n = 1; n <= 4; n++) {
26602 for (uint32_t m = 1; m <= 3; m++) {
26603 GemmMicrokernelTester()
26604 .mr(3)
26605 .nr(4)
26606 .kr(2)
26607 .sr(1)
26608 .m(m)
26609 .n(n)
26610 .k(k)
26611 .iterations(1)
26612 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26613 }
26614 }
26615 }
26616 }
26617
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_gt_8)26618 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8) {
26619 TEST_REQUIRES_X86_SSE41;
26620 for (size_t k = 9; k < 16; k++) {
26621 GemmMicrokernelTester()
26622 .mr(3)
26623 .nr(4)
26624 .kr(2)
26625 .sr(1)
26626 .m(3)
26627 .n(4)
26628 .k(k)
26629 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26630 }
26631 }
26632
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_gt_8_strided_a)26633 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8_strided_a) {
26634 TEST_REQUIRES_X86_SSE41;
26635 for (size_t k = 9; k < 16; k++) {
26636 GemmMicrokernelTester()
26637 .mr(3)
26638 .nr(4)
26639 .kr(2)
26640 .sr(1)
26641 .m(3)
26642 .n(4)
26643 .k(k)
26644 .a_stride(19)
26645 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26646 }
26647 }
26648
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_gt_8_subtile)26649 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8_subtile) {
26650 TEST_REQUIRES_X86_SSE41;
26651 for (size_t k = 9; k < 16; k++) {
26652 for (uint32_t n = 1; n <= 4; n++) {
26653 for (uint32_t m = 1; m <= 3; m++) {
26654 GemmMicrokernelTester()
26655 .mr(3)
26656 .nr(4)
26657 .kr(2)
26658 .sr(1)
26659 .m(m)
26660 .n(n)
26661 .k(k)
26662 .iterations(1)
26663 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26664 }
26665 }
26666 }
26667 }
26668
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_div_8)26669 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8) {
26670 TEST_REQUIRES_X86_SSE41;
26671 for (size_t k = 16; k <= 80; k += 8) {
26672 GemmMicrokernelTester()
26673 .mr(3)
26674 .nr(4)
26675 .kr(2)
26676 .sr(1)
26677 .m(3)
26678 .n(4)
26679 .k(k)
26680 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26681 }
26682 }
26683
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_div_8_strided_a)26684 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8_strided_a) {
26685 TEST_REQUIRES_X86_SSE41;
26686 for (size_t k = 16; k <= 80; k += 8) {
26687 GemmMicrokernelTester()
26688 .mr(3)
26689 .nr(4)
26690 .kr(2)
26691 .sr(1)
26692 .m(3)
26693 .n(4)
26694 .k(k)
26695 .a_stride(83)
26696 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26697 }
26698 }
26699
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_div_8_subtile)26700 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8_subtile) {
26701 TEST_REQUIRES_X86_SSE41;
26702 for (size_t k = 16; k <= 80; k += 8) {
26703 for (uint32_t n = 1; n <= 4; n++) {
26704 for (uint32_t m = 1; m <= 3; m++) {
26705 GemmMicrokernelTester()
26706 .mr(3)
26707 .nr(4)
26708 .kr(2)
26709 .sr(1)
26710 .m(m)
26711 .n(n)
26712 .k(k)
26713 .iterations(1)
26714 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26715 }
26716 }
26717 }
26718 }
26719
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4)26720 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4) {
26721 TEST_REQUIRES_X86_SSE41;
26722 for (uint32_t n = 5; n < 8; n++) {
26723 for (size_t k = 1; k <= 40; k += 9) {
26724 GemmMicrokernelTester()
26725 .mr(3)
26726 .nr(4)
26727 .kr(2)
26728 .sr(1)
26729 .m(3)
26730 .n(n)
26731 .k(k)
26732 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26733 }
26734 }
26735 }
26736
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4_strided_cn)26737 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_strided_cn) {
26738 TEST_REQUIRES_X86_SSE41;
26739 for (uint32_t n = 5; n < 8; n++) {
26740 for (size_t k = 1; k <= 40; k += 9) {
26741 GemmMicrokernelTester()
26742 .mr(3)
26743 .nr(4)
26744 .kr(2)
26745 .sr(1)
26746 .m(3)
26747 .n(n)
26748 .k(k)
26749 .cn_stride(7)
26750 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26751 }
26752 }
26753 }
26754
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4_strided_a)26755 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_strided_a) {
26756 TEST_REQUIRES_X86_SSE41;
26757 for (uint32_t n = 5; n < 8; n++) {
26758 for (size_t k = 1; k <= 40; k += 9) {
26759 GemmMicrokernelTester()
26760 .mr(3)
26761 .nr(4)
26762 .kr(2)
26763 .sr(1)
26764 .m(3)
26765 .n(n)
26766 .k(k)
26767 .a_stride(43)
26768 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26769 }
26770 }
26771 }
26772
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4_subtile)26773 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_subtile) {
26774 TEST_REQUIRES_X86_SSE41;
26775 for (uint32_t n = 5; n < 8; n++) {
26776 for (size_t k = 1; k <= 40; k += 9) {
26777 for (uint32_t m = 1; m <= 3; m++) {
26778 GemmMicrokernelTester()
26779 .mr(3)
26780 .nr(4)
26781 .kr(2)
26782 .sr(1)
26783 .m(m)
26784 .n(n)
26785 .k(k)
26786 .iterations(1)
26787 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26788 }
26789 }
26790 }
26791 }
26792
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4)26793 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4) {
26794 TEST_REQUIRES_X86_SSE41;
26795 for (uint32_t n = 8; n <= 12; n += 4) {
26796 for (size_t k = 1; k <= 40; k += 9) {
26797 GemmMicrokernelTester()
26798 .mr(3)
26799 .nr(4)
26800 .kr(2)
26801 .sr(1)
26802 .m(3)
26803 .n(n)
26804 .k(k)
26805 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26806 }
26807 }
26808 }
26809
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4_strided_cn)26810 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_strided_cn) {
26811 TEST_REQUIRES_X86_SSE41;
26812 for (uint32_t n = 8; n <= 12; n += 4) {
26813 for (size_t k = 1; k <= 40; k += 9) {
26814 GemmMicrokernelTester()
26815 .mr(3)
26816 .nr(4)
26817 .kr(2)
26818 .sr(1)
26819 .m(3)
26820 .n(n)
26821 .k(k)
26822 .cn_stride(7)
26823 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26824 }
26825 }
26826 }
26827
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4_strided_a)26828 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_strided_a) {
26829 TEST_REQUIRES_X86_SSE41;
26830 for (uint32_t n = 8; n <= 12; n += 4) {
26831 for (size_t k = 1; k <= 40; k += 9) {
26832 GemmMicrokernelTester()
26833 .mr(3)
26834 .nr(4)
26835 .kr(2)
26836 .sr(1)
26837 .m(3)
26838 .n(n)
26839 .k(k)
26840 .a_stride(43)
26841 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26842 }
26843 }
26844 }
26845
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4_subtile)26846 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_subtile) {
26847 TEST_REQUIRES_X86_SSE41;
26848 for (uint32_t n = 8; n <= 12; n += 4) {
26849 for (size_t k = 1; k <= 40; k += 9) {
26850 for (uint32_t m = 1; m <= 3; m++) {
26851 GemmMicrokernelTester()
26852 .mr(3)
26853 .nr(4)
26854 .kr(2)
26855 .sr(1)
26856 .m(m)
26857 .n(n)
26858 .k(k)
26859 .iterations(1)
26860 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26861 }
26862 }
26863 }
26864 }
26865
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,strided_cm_subtile)26866 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm_subtile) {
26867 TEST_REQUIRES_X86_SSE41;
26868 for (size_t k = 1; k <= 40; k += 9) {
26869 for (uint32_t n = 1; n <= 4; n++) {
26870 for (uint32_t m = 1; m <= 3; m++) {
26871 GemmMicrokernelTester()
26872 .mr(3)
26873 .nr(4)
26874 .kr(2)
26875 .sr(1)
26876 .m(m)
26877 .n(n)
26878 .k(k)
26879 .cm_stride(7)
26880 .iterations(1)
26881 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26882 }
26883 }
26884 }
26885 }
26886
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,qmin)26887 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmin) {
26888 TEST_REQUIRES_X86_SSE41;
26889 GemmMicrokernelTester()
26890 .mr(3)
26891 .nr(4)
26892 .kr(2)
26893 .sr(1)
26894 .m(3)
26895 .n(4)
26896 .k(8)
26897 .qmin(128)
26898 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26899 }
26900
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,qmax)26901 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmax) {
26902 TEST_REQUIRES_X86_SSE41;
26903 GemmMicrokernelTester()
26904 .mr(3)
26905 .nr(4)
26906 .kr(2)
26907 .sr(1)
26908 .m(3)
26909 .n(4)
26910 .k(8)
26911 .qmax(128)
26912 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26913 }
26914
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,strided_cm)26915 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm) {
26916 TEST_REQUIRES_X86_SSE41;
26917 GemmMicrokernelTester()
26918 .mr(3)
26919 .nr(4)
26920 .kr(2)
26921 .sr(1)
26922 .m(3)
26923 .n(4)
26924 .k(8)
26925 .cm_stride(7)
26926 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26927 }
26928 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26929
26930
26931 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8)26932 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8) {
26933 TEST_REQUIRES_X86_SSE2;
26934 GemmMicrokernelTester()
26935 .mr(4)
26936 .nr(4)
26937 .kr(2)
26938 .sr(1)
26939 .m(4)
26940 .n(4)
26941 .k(8)
26942 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26943 }
26944
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cn)26945 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cn) {
26946 TEST_REQUIRES_X86_SSE2;
26947 GemmMicrokernelTester()
26948 .mr(4)
26949 .nr(4)
26950 .kr(2)
26951 .sr(1)
26952 .m(4)
26953 .n(4)
26954 .k(8)
26955 .cn_stride(7)
26956 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26957 }
26958
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_strided_a)26959 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_strided_a) {
26960 TEST_REQUIRES_X86_SSE2;
26961 GemmMicrokernelTester()
26962 .mr(4)
26963 .nr(4)
26964 .kr(2)
26965 .sr(1)
26966 .m(4)
26967 .n(4)
26968 .k(8)
26969 .a_stride(11)
26970 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26971 }
26972
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile)26973 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile) {
26974 TEST_REQUIRES_X86_SSE2;
26975 for (uint32_t n = 1; n <= 4; n++) {
26976 for (uint32_t m = 1; m <= 4; m++) {
26977 GemmMicrokernelTester()
26978 .mr(4)
26979 .nr(4)
26980 .kr(2)
26981 .sr(1)
26982 .m(m)
26983 .n(n)
26984 .k(8)
26985 .iterations(1)
26986 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26987 }
26988 }
26989 }
26990
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile_m)26991 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_m) {
26992 TEST_REQUIRES_X86_SSE2;
26993 for (uint32_t m = 1; m <= 4; m++) {
26994 GemmMicrokernelTester()
26995 .mr(4)
26996 .nr(4)
26997 .kr(2)
26998 .sr(1)
26999 .m(m)
27000 .n(4)
27001 .k(8)
27002 .iterations(1)
27003 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27004 }
27005 }
27006
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile_n)27007 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_n) {
27008 TEST_REQUIRES_X86_SSE2;
27009 for (uint32_t n = 1; n <= 4; n++) {
27010 GemmMicrokernelTester()
27011 .mr(4)
27012 .nr(4)
27013 .kr(2)
27014 .sr(1)
27015 .m(4)
27016 .n(n)
27017 .k(8)
27018 .iterations(1)
27019 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27020 }
27021 }
27022
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_lt_8)27023 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8) {
27024 TEST_REQUIRES_X86_SSE2;
27025 for (size_t k = 1; k < 8; k++) {
27026 GemmMicrokernelTester()
27027 .mr(4)
27028 .nr(4)
27029 .kr(2)
27030 .sr(1)
27031 .m(4)
27032 .n(4)
27033 .k(k)
27034 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27035 }
27036 }
27037
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_lt_8_strided_a)27038 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8_strided_a) {
27039 TEST_REQUIRES_X86_SSE2;
27040 for (size_t k = 1; k < 8; k++) {
27041 GemmMicrokernelTester()
27042 .mr(4)
27043 .nr(4)
27044 .kr(2)
27045 .sr(1)
27046 .m(4)
27047 .n(4)
27048 .k(k)
27049 .a_stride(11)
27050 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27051 }
27052 }
27053
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_lt_8_subtile)27054 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8_subtile) {
27055 TEST_REQUIRES_X86_SSE2;
27056 for (size_t k = 1; k < 8; k++) {
27057 for (uint32_t n = 1; n <= 4; n++) {
27058 for (uint32_t m = 1; m <= 4; m++) {
27059 GemmMicrokernelTester()
27060 .mr(4)
27061 .nr(4)
27062 .kr(2)
27063 .sr(1)
27064 .m(m)
27065 .n(n)
27066 .k(k)
27067 .iterations(1)
27068 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27069 }
27070 }
27071 }
27072 }
27073
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_gt_8)27074 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8) {
27075 TEST_REQUIRES_X86_SSE2;
27076 for (size_t k = 9; k < 16; k++) {
27077 GemmMicrokernelTester()
27078 .mr(4)
27079 .nr(4)
27080 .kr(2)
27081 .sr(1)
27082 .m(4)
27083 .n(4)
27084 .k(k)
27085 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27086 }
27087 }
27088
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_gt_8_strided_a)27089 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8_strided_a) {
27090 TEST_REQUIRES_X86_SSE2;
27091 for (size_t k = 9; k < 16; k++) {
27092 GemmMicrokernelTester()
27093 .mr(4)
27094 .nr(4)
27095 .kr(2)
27096 .sr(1)
27097 .m(4)
27098 .n(4)
27099 .k(k)
27100 .a_stride(19)
27101 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27102 }
27103 }
27104
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_gt_8_subtile)27105 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8_subtile) {
27106 TEST_REQUIRES_X86_SSE2;
27107 for (size_t k = 9; k < 16; k++) {
27108 for (uint32_t n = 1; n <= 4; n++) {
27109 for (uint32_t m = 1; m <= 4; m++) {
27110 GemmMicrokernelTester()
27111 .mr(4)
27112 .nr(4)
27113 .kr(2)
27114 .sr(1)
27115 .m(m)
27116 .n(n)
27117 .k(k)
27118 .iterations(1)
27119 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27120 }
27121 }
27122 }
27123 }
27124
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_div_8)27125 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8) {
27126 TEST_REQUIRES_X86_SSE2;
27127 for (size_t k = 16; k <= 80; k += 8) {
27128 GemmMicrokernelTester()
27129 .mr(4)
27130 .nr(4)
27131 .kr(2)
27132 .sr(1)
27133 .m(4)
27134 .n(4)
27135 .k(k)
27136 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27137 }
27138 }
27139
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_div_8_strided_a)27140 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8_strided_a) {
27141 TEST_REQUIRES_X86_SSE2;
27142 for (size_t k = 16; k <= 80; k += 8) {
27143 GemmMicrokernelTester()
27144 .mr(4)
27145 .nr(4)
27146 .kr(2)
27147 .sr(1)
27148 .m(4)
27149 .n(4)
27150 .k(k)
27151 .a_stride(83)
27152 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27153 }
27154 }
27155
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_div_8_subtile)27156 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8_subtile) {
27157 TEST_REQUIRES_X86_SSE2;
27158 for (size_t k = 16; k <= 80; k += 8) {
27159 for (uint32_t n = 1; n <= 4; n++) {
27160 for (uint32_t m = 1; m <= 4; m++) {
27161 GemmMicrokernelTester()
27162 .mr(4)
27163 .nr(4)
27164 .kr(2)
27165 .sr(1)
27166 .m(m)
27167 .n(n)
27168 .k(k)
27169 .iterations(1)
27170 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27171 }
27172 }
27173 }
27174 }
27175
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4)27176 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4) {
27177 TEST_REQUIRES_X86_SSE2;
27178 for (uint32_t n = 5; n < 8; n++) {
27179 for (size_t k = 1; k <= 40; k += 9) {
27180 GemmMicrokernelTester()
27181 .mr(4)
27182 .nr(4)
27183 .kr(2)
27184 .sr(1)
27185 .m(4)
27186 .n(n)
27187 .k(k)
27188 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27189 }
27190 }
27191 }
27192
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_strided_cn)27193 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_strided_cn) {
27194 TEST_REQUIRES_X86_SSE2;
27195 for (uint32_t n = 5; n < 8; n++) {
27196 for (size_t k = 1; k <= 40; k += 9) {
27197 GemmMicrokernelTester()
27198 .mr(4)
27199 .nr(4)
27200 .kr(2)
27201 .sr(1)
27202 .m(4)
27203 .n(n)
27204 .k(k)
27205 .cn_stride(7)
27206 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27207 }
27208 }
27209 }
27210
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_strided_a)27211 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_strided_a) {
27212 TEST_REQUIRES_X86_SSE2;
27213 for (uint32_t n = 5; n < 8; n++) {
27214 for (size_t k = 1; k <= 40; k += 9) {
27215 GemmMicrokernelTester()
27216 .mr(4)
27217 .nr(4)
27218 .kr(2)
27219 .sr(1)
27220 .m(4)
27221 .n(n)
27222 .k(k)
27223 .a_stride(43)
27224 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27225 }
27226 }
27227 }
27228
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_subtile)27229 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_subtile) {
27230 TEST_REQUIRES_X86_SSE2;
27231 for (uint32_t n = 5; n < 8; n++) {
27232 for (size_t k = 1; k <= 40; k += 9) {
27233 for (uint32_t m = 1; m <= 4; m++) {
27234 GemmMicrokernelTester()
27235 .mr(4)
27236 .nr(4)
27237 .kr(2)
27238 .sr(1)
27239 .m(m)
27240 .n(n)
27241 .k(k)
27242 .iterations(1)
27243 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27244 }
27245 }
27246 }
27247 }
27248
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4)27249 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4) {
27250 TEST_REQUIRES_X86_SSE2;
27251 for (uint32_t n = 8; n <= 12; n += 4) {
27252 for (size_t k = 1; k <= 40; k += 9) {
27253 GemmMicrokernelTester()
27254 .mr(4)
27255 .nr(4)
27256 .kr(2)
27257 .sr(1)
27258 .m(4)
27259 .n(n)
27260 .k(k)
27261 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27262 }
27263 }
27264 }
27265
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_strided_cn)27266 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_strided_cn) {
27267 TEST_REQUIRES_X86_SSE2;
27268 for (uint32_t n = 8; n <= 12; n += 4) {
27269 for (size_t k = 1; k <= 40; k += 9) {
27270 GemmMicrokernelTester()
27271 .mr(4)
27272 .nr(4)
27273 .kr(2)
27274 .sr(1)
27275 .m(4)
27276 .n(n)
27277 .k(k)
27278 .cn_stride(7)
27279 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27280 }
27281 }
27282 }
27283
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_strided_a)27284 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_strided_a) {
27285 TEST_REQUIRES_X86_SSE2;
27286 for (uint32_t n = 8; n <= 12; n += 4) {
27287 for (size_t k = 1; k <= 40; k += 9) {
27288 GemmMicrokernelTester()
27289 .mr(4)
27290 .nr(4)
27291 .kr(2)
27292 .sr(1)
27293 .m(4)
27294 .n(n)
27295 .k(k)
27296 .a_stride(43)
27297 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27298 }
27299 }
27300 }
27301
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_subtile)27302 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_subtile) {
27303 TEST_REQUIRES_X86_SSE2;
27304 for (uint32_t n = 8; n <= 12; n += 4) {
27305 for (size_t k = 1; k <= 40; k += 9) {
27306 for (uint32_t m = 1; m <= 4; m++) {
27307 GemmMicrokernelTester()
27308 .mr(4)
27309 .nr(4)
27310 .kr(2)
27311 .sr(1)
27312 .m(m)
27313 .n(n)
27314 .k(k)
27315 .iterations(1)
27316 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27317 }
27318 }
27319 }
27320 }
27321
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cm_subtile)27322 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm_subtile) {
27323 TEST_REQUIRES_X86_SSE2;
27324 for (size_t k = 1; k <= 40; k += 9) {
27325 for (uint32_t n = 1; n <= 4; n++) {
27326 for (uint32_t m = 1; m <= 4; m++) {
27327 GemmMicrokernelTester()
27328 .mr(4)
27329 .nr(4)
27330 .kr(2)
27331 .sr(1)
27332 .m(m)
27333 .n(n)
27334 .k(k)
27335 .cm_stride(7)
27336 .iterations(1)
27337 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27338 }
27339 }
27340 }
27341 }
27342
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,qmin)27343 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmin) {
27344 TEST_REQUIRES_X86_SSE2;
27345 GemmMicrokernelTester()
27346 .mr(4)
27347 .nr(4)
27348 .kr(2)
27349 .sr(1)
27350 .m(4)
27351 .n(4)
27352 .k(8)
27353 .qmin(128)
27354 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27355 }
27356
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,qmax)27357 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmax) {
27358 TEST_REQUIRES_X86_SSE2;
27359 GemmMicrokernelTester()
27360 .mr(4)
27361 .nr(4)
27362 .kr(2)
27363 .sr(1)
27364 .m(4)
27365 .n(4)
27366 .k(8)
27367 .qmax(128)
27368 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27369 }
27370
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cm)27371 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm) {
27372 TEST_REQUIRES_X86_SSE2;
27373 GemmMicrokernelTester()
27374 .mr(4)
27375 .nr(4)
27376 .kr(2)
27377 .sr(1)
27378 .m(4)
27379 .n(4)
27380 .k(8)
27381 .cm_stride(7)
27382 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27383 }
27384 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27385
27386
27387 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8)27388 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8) {
27389 TEST_REQUIRES_X86_SSE41;
27390 GemmMicrokernelTester()
27391 .mr(4)
27392 .nr(4)
27393 .kr(2)
27394 .sr(1)
27395 .m(4)
27396 .n(4)
27397 .k(8)
27398 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27399 }
27400
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,strided_cn)27401 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cn) {
27402 TEST_REQUIRES_X86_SSE41;
27403 GemmMicrokernelTester()
27404 .mr(4)
27405 .nr(4)
27406 .kr(2)
27407 .sr(1)
27408 .m(4)
27409 .n(4)
27410 .k(8)
27411 .cn_stride(7)
27412 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27413 }
27414
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_strided_a)27415 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_strided_a) {
27416 TEST_REQUIRES_X86_SSE41;
27417 GemmMicrokernelTester()
27418 .mr(4)
27419 .nr(4)
27420 .kr(2)
27421 .sr(1)
27422 .m(4)
27423 .n(4)
27424 .k(8)
27425 .a_stride(11)
27426 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27427 }
27428
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_subtile)27429 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile) {
27430 TEST_REQUIRES_X86_SSE41;
27431 for (uint32_t n = 1; n <= 4; n++) {
27432 for (uint32_t m = 1; m <= 4; m++) {
27433 GemmMicrokernelTester()
27434 .mr(4)
27435 .nr(4)
27436 .kr(2)
27437 .sr(1)
27438 .m(m)
27439 .n(n)
27440 .k(8)
27441 .iterations(1)
27442 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27443 }
27444 }
27445 }
27446
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_subtile_m)27447 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_m) {
27448 TEST_REQUIRES_X86_SSE41;
27449 for (uint32_t m = 1; m <= 4; m++) {
27450 GemmMicrokernelTester()
27451 .mr(4)
27452 .nr(4)
27453 .kr(2)
27454 .sr(1)
27455 .m(m)
27456 .n(4)
27457 .k(8)
27458 .iterations(1)
27459 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27460 }
27461 }
27462
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_subtile_n)27463 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_n) {
27464 TEST_REQUIRES_X86_SSE41;
27465 for (uint32_t n = 1; n <= 4; n++) {
27466 GemmMicrokernelTester()
27467 .mr(4)
27468 .nr(4)
27469 .kr(2)
27470 .sr(1)
27471 .m(4)
27472 .n(n)
27473 .k(8)
27474 .iterations(1)
27475 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27476 }
27477 }
27478
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_lt_8)27479 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8) {
27480 TEST_REQUIRES_X86_SSE41;
27481 for (size_t k = 1; k < 8; k++) {
27482 GemmMicrokernelTester()
27483 .mr(4)
27484 .nr(4)
27485 .kr(2)
27486 .sr(1)
27487 .m(4)
27488 .n(4)
27489 .k(k)
27490 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27491 }
27492 }
27493
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_lt_8_strided_a)27494 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8_strided_a) {
27495 TEST_REQUIRES_X86_SSE41;
27496 for (size_t k = 1; k < 8; k++) {
27497 GemmMicrokernelTester()
27498 .mr(4)
27499 .nr(4)
27500 .kr(2)
27501 .sr(1)
27502 .m(4)
27503 .n(4)
27504 .k(k)
27505 .a_stride(11)
27506 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27507 }
27508 }
27509
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_lt_8_subtile)27510 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8_subtile) {
27511 TEST_REQUIRES_X86_SSE41;
27512 for (size_t k = 1; k < 8; k++) {
27513 for (uint32_t n = 1; n <= 4; n++) {
27514 for (uint32_t m = 1; m <= 4; m++) {
27515 GemmMicrokernelTester()
27516 .mr(4)
27517 .nr(4)
27518 .kr(2)
27519 .sr(1)
27520 .m(m)
27521 .n(n)
27522 .k(k)
27523 .iterations(1)
27524 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27525 }
27526 }
27527 }
27528 }
27529
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_gt_8)27530 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8) {
27531 TEST_REQUIRES_X86_SSE41;
27532 for (size_t k = 9; k < 16; k++) {
27533 GemmMicrokernelTester()
27534 .mr(4)
27535 .nr(4)
27536 .kr(2)
27537 .sr(1)
27538 .m(4)
27539 .n(4)
27540 .k(k)
27541 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27542 }
27543 }
27544
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_gt_8_strided_a)27545 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8_strided_a) {
27546 TEST_REQUIRES_X86_SSE41;
27547 for (size_t k = 9; k < 16; k++) {
27548 GemmMicrokernelTester()
27549 .mr(4)
27550 .nr(4)
27551 .kr(2)
27552 .sr(1)
27553 .m(4)
27554 .n(4)
27555 .k(k)
27556 .a_stride(19)
27557 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27558 }
27559 }
27560
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_gt_8_subtile)27561 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8_subtile) {
27562 TEST_REQUIRES_X86_SSE41;
27563 for (size_t k = 9; k < 16; k++) {
27564 for (uint32_t n = 1; n <= 4; n++) {
27565 for (uint32_t m = 1; m <= 4; m++) {
27566 GemmMicrokernelTester()
27567 .mr(4)
27568 .nr(4)
27569 .kr(2)
27570 .sr(1)
27571 .m(m)
27572 .n(n)
27573 .k(k)
27574 .iterations(1)
27575 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27576 }
27577 }
27578 }
27579 }
27580
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_div_8)27581 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8) {
27582 TEST_REQUIRES_X86_SSE41;
27583 for (size_t k = 16; k <= 80; k += 8) {
27584 GemmMicrokernelTester()
27585 .mr(4)
27586 .nr(4)
27587 .kr(2)
27588 .sr(1)
27589 .m(4)
27590 .n(4)
27591 .k(k)
27592 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27593 }
27594 }
27595
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_div_8_strided_a)27596 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8_strided_a) {
27597 TEST_REQUIRES_X86_SSE41;
27598 for (size_t k = 16; k <= 80; k += 8) {
27599 GemmMicrokernelTester()
27600 .mr(4)
27601 .nr(4)
27602 .kr(2)
27603 .sr(1)
27604 .m(4)
27605 .n(4)
27606 .k(k)
27607 .a_stride(83)
27608 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27609 }
27610 }
27611
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_div_8_subtile)27612 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8_subtile) {
27613 TEST_REQUIRES_X86_SSE41;
27614 for (size_t k = 16; k <= 80; k += 8) {
27615 for (uint32_t n = 1; n <= 4; n++) {
27616 for (uint32_t m = 1; m <= 4; m++) {
27617 GemmMicrokernelTester()
27618 .mr(4)
27619 .nr(4)
27620 .kr(2)
27621 .sr(1)
27622 .m(m)
27623 .n(n)
27624 .k(k)
27625 .iterations(1)
27626 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27627 }
27628 }
27629 }
27630 }
27631
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4)27632 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4) {
27633 TEST_REQUIRES_X86_SSE41;
27634 for (uint32_t n = 5; n < 8; n++) {
27635 for (size_t k = 1; k <= 40; k += 9) {
27636 GemmMicrokernelTester()
27637 .mr(4)
27638 .nr(4)
27639 .kr(2)
27640 .sr(1)
27641 .m(4)
27642 .n(n)
27643 .k(k)
27644 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27645 }
27646 }
27647 }
27648
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4_strided_cn)27649 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_strided_cn) {
27650 TEST_REQUIRES_X86_SSE41;
27651 for (uint32_t n = 5; n < 8; n++) {
27652 for (size_t k = 1; k <= 40; k += 9) {
27653 GemmMicrokernelTester()
27654 .mr(4)
27655 .nr(4)
27656 .kr(2)
27657 .sr(1)
27658 .m(4)
27659 .n(n)
27660 .k(k)
27661 .cn_stride(7)
27662 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27663 }
27664 }
27665 }
27666
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4_strided_a)27667 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_strided_a) {
27668 TEST_REQUIRES_X86_SSE41;
27669 for (uint32_t n = 5; n < 8; n++) {
27670 for (size_t k = 1; k <= 40; k += 9) {
27671 GemmMicrokernelTester()
27672 .mr(4)
27673 .nr(4)
27674 .kr(2)
27675 .sr(1)
27676 .m(4)
27677 .n(n)
27678 .k(k)
27679 .a_stride(43)
27680 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27681 }
27682 }
27683 }
27684
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4_subtile)27685 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_subtile) {
27686 TEST_REQUIRES_X86_SSE41;
27687 for (uint32_t n = 5; n < 8; n++) {
27688 for (size_t k = 1; k <= 40; k += 9) {
27689 for (uint32_t m = 1; m <= 4; m++) {
27690 GemmMicrokernelTester()
27691 .mr(4)
27692 .nr(4)
27693 .kr(2)
27694 .sr(1)
27695 .m(m)
27696 .n(n)
27697 .k(k)
27698 .iterations(1)
27699 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27700 }
27701 }
27702 }
27703 }
27704
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4)27705 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4) {
27706 TEST_REQUIRES_X86_SSE41;
27707 for (uint32_t n = 8; n <= 12; n += 4) {
27708 for (size_t k = 1; k <= 40; k += 9) {
27709 GemmMicrokernelTester()
27710 .mr(4)
27711 .nr(4)
27712 .kr(2)
27713 .sr(1)
27714 .m(4)
27715 .n(n)
27716 .k(k)
27717 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27718 }
27719 }
27720 }
27721
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4_strided_cn)27722 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_strided_cn) {
27723 TEST_REQUIRES_X86_SSE41;
27724 for (uint32_t n = 8; n <= 12; n += 4) {
27725 for (size_t k = 1; k <= 40; k += 9) {
27726 GemmMicrokernelTester()
27727 .mr(4)
27728 .nr(4)
27729 .kr(2)
27730 .sr(1)
27731 .m(4)
27732 .n(n)
27733 .k(k)
27734 .cn_stride(7)
27735 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27736 }
27737 }
27738 }
27739
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4_strided_a)27740 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_strided_a) {
27741 TEST_REQUIRES_X86_SSE41;
27742 for (uint32_t n = 8; n <= 12; n += 4) {
27743 for (size_t k = 1; k <= 40; k += 9) {
27744 GemmMicrokernelTester()
27745 .mr(4)
27746 .nr(4)
27747 .kr(2)
27748 .sr(1)
27749 .m(4)
27750 .n(n)
27751 .k(k)
27752 .a_stride(43)
27753 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27754 }
27755 }
27756 }
27757
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4_subtile)27758 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_subtile) {
27759 TEST_REQUIRES_X86_SSE41;
27760 for (uint32_t n = 8; n <= 12; n += 4) {
27761 for (size_t k = 1; k <= 40; k += 9) {
27762 for (uint32_t m = 1; m <= 4; m++) {
27763 GemmMicrokernelTester()
27764 .mr(4)
27765 .nr(4)
27766 .kr(2)
27767 .sr(1)
27768 .m(m)
27769 .n(n)
27770 .k(k)
27771 .iterations(1)
27772 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27773 }
27774 }
27775 }
27776 }
27777
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,strided_cm_subtile)27778 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm_subtile) {
27779 TEST_REQUIRES_X86_SSE41;
27780 for (size_t k = 1; k <= 40; k += 9) {
27781 for (uint32_t n = 1; n <= 4; n++) {
27782 for (uint32_t m = 1; m <= 4; m++) {
27783 GemmMicrokernelTester()
27784 .mr(4)
27785 .nr(4)
27786 .kr(2)
27787 .sr(1)
27788 .m(m)
27789 .n(n)
27790 .k(k)
27791 .cm_stride(7)
27792 .iterations(1)
27793 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27794 }
27795 }
27796 }
27797 }
27798
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,qmin)27799 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmin) {
27800 TEST_REQUIRES_X86_SSE41;
27801 GemmMicrokernelTester()
27802 .mr(4)
27803 .nr(4)
27804 .kr(2)
27805 .sr(1)
27806 .m(4)
27807 .n(4)
27808 .k(8)
27809 .qmin(128)
27810 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27811 }
27812
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,qmax)27813 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmax) {
27814 TEST_REQUIRES_X86_SSE41;
27815 GemmMicrokernelTester()
27816 .mr(4)
27817 .nr(4)
27818 .kr(2)
27819 .sr(1)
27820 .m(4)
27821 .n(4)
27822 .k(8)
27823 .qmax(128)
27824 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27825 }
27826
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,strided_cm)27827 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm) {
27828 TEST_REQUIRES_X86_SSE41;
27829 GemmMicrokernelTester()
27830 .mr(4)
27831 .nr(4)
27832 .kr(2)
27833 .sr(1)
27834 .m(4)
27835 .n(4)
27836 .k(8)
27837 .cm_stride(7)
27838 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27839 }
27840 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27841
27842
27843 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_eq_8)27844 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8) {
27845 TEST_REQUIRES_X86_AVX;
27846 GemmMicrokernelTester()
27847 .mr(1)
27848 .nr(4)
27849 .kr(2)
27850 .sr(1)
27851 .m(1)
27852 .n(4)
27853 .k(8)
27854 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27855 }
27856
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,strided_cn)27857 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, strided_cn) {
27858 TEST_REQUIRES_X86_AVX;
27859 GemmMicrokernelTester()
27860 .mr(1)
27861 .nr(4)
27862 .kr(2)
27863 .sr(1)
27864 .m(1)
27865 .n(4)
27866 .k(8)
27867 .cn_stride(7)
27868 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27869 }
27870
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_eq_8_strided_a)27871 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_strided_a) {
27872 TEST_REQUIRES_X86_AVX;
27873 GemmMicrokernelTester()
27874 .mr(1)
27875 .nr(4)
27876 .kr(2)
27877 .sr(1)
27878 .m(1)
27879 .n(4)
27880 .k(8)
27881 .a_stride(11)
27882 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27883 }
27884
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_eq_8_subtile)27885 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_subtile) {
27886 TEST_REQUIRES_X86_AVX;
27887 for (uint32_t n = 1; n <= 4; n++) {
27888 for (uint32_t m = 1; m <= 1; m++) {
27889 GemmMicrokernelTester()
27890 .mr(1)
27891 .nr(4)
27892 .kr(2)
27893 .sr(1)
27894 .m(m)
27895 .n(n)
27896 .k(8)
27897 .iterations(1)
27898 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27899 }
27900 }
27901 }
27902
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_eq_8_subtile_m)27903 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_subtile_m) {
27904 TEST_REQUIRES_X86_AVX;
27905 for (uint32_t m = 1; m <= 1; m++) {
27906 GemmMicrokernelTester()
27907 .mr(1)
27908 .nr(4)
27909 .kr(2)
27910 .sr(1)
27911 .m(m)
27912 .n(4)
27913 .k(8)
27914 .iterations(1)
27915 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27916 }
27917 }
27918
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_eq_8_subtile_n)27919 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_subtile_n) {
27920 TEST_REQUIRES_X86_AVX;
27921 for (uint32_t n = 1; n <= 4; n++) {
27922 GemmMicrokernelTester()
27923 .mr(1)
27924 .nr(4)
27925 .kr(2)
27926 .sr(1)
27927 .m(1)
27928 .n(n)
27929 .k(8)
27930 .iterations(1)
27931 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27932 }
27933 }
27934
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_lt_8)27935 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_lt_8) {
27936 TEST_REQUIRES_X86_AVX;
27937 for (size_t k = 1; k < 8; k++) {
27938 GemmMicrokernelTester()
27939 .mr(1)
27940 .nr(4)
27941 .kr(2)
27942 .sr(1)
27943 .m(1)
27944 .n(4)
27945 .k(k)
27946 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27947 }
27948 }
27949
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_lt_8_strided_a)27950 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_lt_8_strided_a) {
27951 TEST_REQUIRES_X86_AVX;
27952 for (size_t k = 1; k < 8; k++) {
27953 GemmMicrokernelTester()
27954 .mr(1)
27955 .nr(4)
27956 .kr(2)
27957 .sr(1)
27958 .m(1)
27959 .n(4)
27960 .k(k)
27961 .a_stride(11)
27962 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27963 }
27964 }
27965
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_lt_8_subtile)27966 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_lt_8_subtile) {
27967 TEST_REQUIRES_X86_AVX;
27968 for (size_t k = 1; k < 8; k++) {
27969 for (uint32_t n = 1; n <= 4; n++) {
27970 for (uint32_t m = 1; m <= 1; m++) {
27971 GemmMicrokernelTester()
27972 .mr(1)
27973 .nr(4)
27974 .kr(2)
27975 .sr(1)
27976 .m(m)
27977 .n(n)
27978 .k(k)
27979 .iterations(1)
27980 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27981 }
27982 }
27983 }
27984 }
27985
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_gt_8)27986 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_gt_8) {
27987 TEST_REQUIRES_X86_AVX;
27988 for (size_t k = 9; k < 16; k++) {
27989 GemmMicrokernelTester()
27990 .mr(1)
27991 .nr(4)
27992 .kr(2)
27993 .sr(1)
27994 .m(1)
27995 .n(4)
27996 .k(k)
27997 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27998 }
27999 }
28000
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_gt_8_strided_a)28001 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_gt_8_strided_a) {
28002 TEST_REQUIRES_X86_AVX;
28003 for (size_t k = 9; k < 16; k++) {
28004 GemmMicrokernelTester()
28005 .mr(1)
28006 .nr(4)
28007 .kr(2)
28008 .sr(1)
28009 .m(1)
28010 .n(4)
28011 .k(k)
28012 .a_stride(19)
28013 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28014 }
28015 }
28016
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_gt_8_subtile)28017 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_gt_8_subtile) {
28018 TEST_REQUIRES_X86_AVX;
28019 for (size_t k = 9; k < 16; k++) {
28020 for (uint32_t n = 1; n <= 4; n++) {
28021 for (uint32_t m = 1; m <= 1; m++) {
28022 GemmMicrokernelTester()
28023 .mr(1)
28024 .nr(4)
28025 .kr(2)
28026 .sr(1)
28027 .m(m)
28028 .n(n)
28029 .k(k)
28030 .iterations(1)
28031 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28032 }
28033 }
28034 }
28035 }
28036
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_div_8)28037 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_div_8) {
28038 TEST_REQUIRES_X86_AVX;
28039 for (size_t k = 16; k <= 80; k += 8) {
28040 GemmMicrokernelTester()
28041 .mr(1)
28042 .nr(4)
28043 .kr(2)
28044 .sr(1)
28045 .m(1)
28046 .n(4)
28047 .k(k)
28048 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28049 }
28050 }
28051
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_div_8_strided_a)28052 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_div_8_strided_a) {
28053 TEST_REQUIRES_X86_AVX;
28054 for (size_t k = 16; k <= 80; k += 8) {
28055 GemmMicrokernelTester()
28056 .mr(1)
28057 .nr(4)
28058 .kr(2)
28059 .sr(1)
28060 .m(1)
28061 .n(4)
28062 .k(k)
28063 .a_stride(83)
28064 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28065 }
28066 }
28067
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_div_8_subtile)28068 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_div_8_subtile) {
28069 TEST_REQUIRES_X86_AVX;
28070 for (size_t k = 16; k <= 80; k += 8) {
28071 for (uint32_t n = 1; n <= 4; n++) {
28072 for (uint32_t m = 1; m <= 1; m++) {
28073 GemmMicrokernelTester()
28074 .mr(1)
28075 .nr(4)
28076 .kr(2)
28077 .sr(1)
28078 .m(m)
28079 .n(n)
28080 .k(k)
28081 .iterations(1)
28082 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28083 }
28084 }
28085 }
28086 }
28087
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_gt_4)28088 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4) {
28089 TEST_REQUIRES_X86_AVX;
28090 for (uint32_t n = 5; n < 8; n++) {
28091 for (size_t k = 1; k <= 40; k += 9) {
28092 GemmMicrokernelTester()
28093 .mr(1)
28094 .nr(4)
28095 .kr(2)
28096 .sr(1)
28097 .m(1)
28098 .n(n)
28099 .k(k)
28100 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28101 }
28102 }
28103 }
28104
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_gt_4_strided_cn)28105 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4_strided_cn) {
28106 TEST_REQUIRES_X86_AVX;
28107 for (uint32_t n = 5; n < 8; n++) {
28108 for (size_t k = 1; k <= 40; k += 9) {
28109 GemmMicrokernelTester()
28110 .mr(1)
28111 .nr(4)
28112 .kr(2)
28113 .sr(1)
28114 .m(1)
28115 .n(n)
28116 .k(k)
28117 .cn_stride(7)
28118 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28119 }
28120 }
28121 }
28122
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_gt_4_strided_a)28123 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4_strided_a) {
28124 TEST_REQUIRES_X86_AVX;
28125 for (uint32_t n = 5; n < 8; n++) {
28126 for (size_t k = 1; k <= 40; k += 9) {
28127 GemmMicrokernelTester()
28128 .mr(1)
28129 .nr(4)
28130 .kr(2)
28131 .sr(1)
28132 .m(1)
28133 .n(n)
28134 .k(k)
28135 .a_stride(43)
28136 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28137 }
28138 }
28139 }
28140
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_gt_4_subtile)28141 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4_subtile) {
28142 TEST_REQUIRES_X86_AVX;
28143 for (uint32_t n = 5; n < 8; n++) {
28144 for (size_t k = 1; k <= 40; k += 9) {
28145 for (uint32_t m = 1; m <= 1; m++) {
28146 GemmMicrokernelTester()
28147 .mr(1)
28148 .nr(4)
28149 .kr(2)
28150 .sr(1)
28151 .m(m)
28152 .n(n)
28153 .k(k)
28154 .iterations(1)
28155 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28156 }
28157 }
28158 }
28159 }
28160
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_div_4)28161 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4) {
28162 TEST_REQUIRES_X86_AVX;
28163 for (uint32_t n = 8; n <= 12; n += 4) {
28164 for (size_t k = 1; k <= 40; k += 9) {
28165 GemmMicrokernelTester()
28166 .mr(1)
28167 .nr(4)
28168 .kr(2)
28169 .sr(1)
28170 .m(1)
28171 .n(n)
28172 .k(k)
28173 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28174 }
28175 }
28176 }
28177
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_div_4_strided_cn)28178 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4_strided_cn) {
28179 TEST_REQUIRES_X86_AVX;
28180 for (uint32_t n = 8; n <= 12; n += 4) {
28181 for (size_t k = 1; k <= 40; k += 9) {
28182 GemmMicrokernelTester()
28183 .mr(1)
28184 .nr(4)
28185 .kr(2)
28186 .sr(1)
28187 .m(1)
28188 .n(n)
28189 .k(k)
28190 .cn_stride(7)
28191 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28192 }
28193 }
28194 }
28195
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_div_4_strided_a)28196 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4_strided_a) {
28197 TEST_REQUIRES_X86_AVX;
28198 for (uint32_t n = 8; n <= 12; n += 4) {
28199 for (size_t k = 1; k <= 40; k += 9) {
28200 GemmMicrokernelTester()
28201 .mr(1)
28202 .nr(4)
28203 .kr(2)
28204 .sr(1)
28205 .m(1)
28206 .n(n)
28207 .k(k)
28208 .a_stride(43)
28209 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28210 }
28211 }
28212 }
28213
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_div_4_subtile)28214 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4_subtile) {
28215 TEST_REQUIRES_X86_AVX;
28216 for (uint32_t n = 8; n <= 12; n += 4) {
28217 for (size_t k = 1; k <= 40; k += 9) {
28218 for (uint32_t m = 1; m <= 1; m++) {
28219 GemmMicrokernelTester()
28220 .mr(1)
28221 .nr(4)
28222 .kr(2)
28223 .sr(1)
28224 .m(m)
28225 .n(n)
28226 .k(k)
28227 .iterations(1)
28228 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28229 }
28230 }
28231 }
28232 }
28233
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,strided_cm_subtile)28234 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, strided_cm_subtile) {
28235 TEST_REQUIRES_X86_AVX;
28236 for (size_t k = 1; k <= 40; k += 9) {
28237 for (uint32_t n = 1; n <= 4; n++) {
28238 for (uint32_t m = 1; m <= 1; m++) {
28239 GemmMicrokernelTester()
28240 .mr(1)
28241 .nr(4)
28242 .kr(2)
28243 .sr(1)
28244 .m(m)
28245 .n(n)
28246 .k(k)
28247 .cm_stride(7)
28248 .iterations(1)
28249 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28250 }
28251 }
28252 }
28253 }
28254
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,qmin)28255 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, qmin) {
28256 TEST_REQUIRES_X86_AVX;
28257 GemmMicrokernelTester()
28258 .mr(1)
28259 .nr(4)
28260 .kr(2)
28261 .sr(1)
28262 .m(1)
28263 .n(4)
28264 .k(8)
28265 .qmin(128)
28266 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28267 }
28268
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,qmax)28269 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, qmax) {
28270 TEST_REQUIRES_X86_AVX;
28271 GemmMicrokernelTester()
28272 .mr(1)
28273 .nr(4)
28274 .kr(2)
28275 .sr(1)
28276 .m(1)
28277 .n(4)
28278 .k(8)
28279 .qmax(128)
28280 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28281 }
28282
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128,strided_cm)28283 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, strided_cm) {
28284 TEST_REQUIRES_X86_AVX;
28285 GemmMicrokernelTester()
28286 .mr(1)
28287 .nr(4)
28288 .kr(2)
28289 .sr(1)
28290 .m(1)
28291 .n(4)
28292 .k(8)
28293 .cm_stride(7)
28294 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28295 }
28296 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
28297
28298
28299 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8)28300 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8) {
28301 TEST_REQUIRES_X86_XOP;
28302 GemmMicrokernelTester()
28303 .mr(1)
28304 .nr(4)
28305 .kr(2)
28306 .sr(1)
28307 .m(1)
28308 .n(4)
28309 .k(8)
28310 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28311 }
28312
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,strided_cn)28313 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cn) {
28314 TEST_REQUIRES_X86_XOP;
28315 GemmMicrokernelTester()
28316 .mr(1)
28317 .nr(4)
28318 .kr(2)
28319 .sr(1)
28320 .m(1)
28321 .n(4)
28322 .k(8)
28323 .cn_stride(7)
28324 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28325 }
28326
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_strided_a)28327 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_strided_a) {
28328 TEST_REQUIRES_X86_XOP;
28329 GemmMicrokernelTester()
28330 .mr(1)
28331 .nr(4)
28332 .kr(2)
28333 .sr(1)
28334 .m(1)
28335 .n(4)
28336 .k(8)
28337 .a_stride(11)
28338 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28339 }
28340
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_subtile)28341 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile) {
28342 TEST_REQUIRES_X86_XOP;
28343 for (uint32_t n = 1; n <= 4; n++) {
28344 for (uint32_t m = 1; m <= 1; m++) {
28345 GemmMicrokernelTester()
28346 .mr(1)
28347 .nr(4)
28348 .kr(2)
28349 .sr(1)
28350 .m(m)
28351 .n(n)
28352 .k(8)
28353 .iterations(1)
28354 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28355 }
28356 }
28357 }
28358
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_subtile_m)28359 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_m) {
28360 TEST_REQUIRES_X86_XOP;
28361 for (uint32_t m = 1; m <= 1; m++) {
28362 GemmMicrokernelTester()
28363 .mr(1)
28364 .nr(4)
28365 .kr(2)
28366 .sr(1)
28367 .m(m)
28368 .n(4)
28369 .k(8)
28370 .iterations(1)
28371 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28372 }
28373 }
28374
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_subtile_n)28375 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_n) {
28376 TEST_REQUIRES_X86_XOP;
28377 for (uint32_t n = 1; n <= 4; n++) {
28378 GemmMicrokernelTester()
28379 .mr(1)
28380 .nr(4)
28381 .kr(2)
28382 .sr(1)
28383 .m(1)
28384 .n(n)
28385 .k(8)
28386 .iterations(1)
28387 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28388 }
28389 }
28390
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_lt_8)28391 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8) {
28392 TEST_REQUIRES_X86_XOP;
28393 for (size_t k = 1; k < 8; k++) {
28394 GemmMicrokernelTester()
28395 .mr(1)
28396 .nr(4)
28397 .kr(2)
28398 .sr(1)
28399 .m(1)
28400 .n(4)
28401 .k(k)
28402 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28403 }
28404 }
28405
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_lt_8_strided_a)28406 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8_strided_a) {
28407 TEST_REQUIRES_X86_XOP;
28408 for (size_t k = 1; k < 8; k++) {
28409 GemmMicrokernelTester()
28410 .mr(1)
28411 .nr(4)
28412 .kr(2)
28413 .sr(1)
28414 .m(1)
28415 .n(4)
28416 .k(k)
28417 .a_stride(11)
28418 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28419 }
28420 }
28421
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_lt_8_subtile)28422 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8_subtile) {
28423 TEST_REQUIRES_X86_XOP;
28424 for (size_t k = 1; k < 8; k++) {
28425 for (uint32_t n = 1; n <= 4; n++) {
28426 for (uint32_t m = 1; m <= 1; m++) {
28427 GemmMicrokernelTester()
28428 .mr(1)
28429 .nr(4)
28430 .kr(2)
28431 .sr(1)
28432 .m(m)
28433 .n(n)
28434 .k(k)
28435 .iterations(1)
28436 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28437 }
28438 }
28439 }
28440 }
28441
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_gt_8)28442 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8) {
28443 TEST_REQUIRES_X86_XOP;
28444 for (size_t k = 9; k < 16; k++) {
28445 GemmMicrokernelTester()
28446 .mr(1)
28447 .nr(4)
28448 .kr(2)
28449 .sr(1)
28450 .m(1)
28451 .n(4)
28452 .k(k)
28453 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28454 }
28455 }
28456
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_gt_8_strided_a)28457 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8_strided_a) {
28458 TEST_REQUIRES_X86_XOP;
28459 for (size_t k = 9; k < 16; k++) {
28460 GemmMicrokernelTester()
28461 .mr(1)
28462 .nr(4)
28463 .kr(2)
28464 .sr(1)
28465 .m(1)
28466 .n(4)
28467 .k(k)
28468 .a_stride(19)
28469 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28470 }
28471 }
28472
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_gt_8_subtile)28473 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8_subtile) {
28474 TEST_REQUIRES_X86_XOP;
28475 for (size_t k = 9; k < 16; k++) {
28476 for (uint32_t n = 1; n <= 4; n++) {
28477 for (uint32_t m = 1; m <= 1; m++) {
28478 GemmMicrokernelTester()
28479 .mr(1)
28480 .nr(4)
28481 .kr(2)
28482 .sr(1)
28483 .m(m)
28484 .n(n)
28485 .k(k)
28486 .iterations(1)
28487 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28488 }
28489 }
28490 }
28491 }
28492
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_div_8)28493 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8) {
28494 TEST_REQUIRES_X86_XOP;
28495 for (size_t k = 16; k <= 80; k += 8) {
28496 GemmMicrokernelTester()
28497 .mr(1)
28498 .nr(4)
28499 .kr(2)
28500 .sr(1)
28501 .m(1)
28502 .n(4)
28503 .k(k)
28504 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28505 }
28506 }
28507
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_div_8_strided_a)28508 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8_strided_a) {
28509 TEST_REQUIRES_X86_XOP;
28510 for (size_t k = 16; k <= 80; k += 8) {
28511 GemmMicrokernelTester()
28512 .mr(1)
28513 .nr(4)
28514 .kr(2)
28515 .sr(1)
28516 .m(1)
28517 .n(4)
28518 .k(k)
28519 .a_stride(83)
28520 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28521 }
28522 }
28523
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_div_8_subtile)28524 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8_subtile) {
28525 TEST_REQUIRES_X86_XOP;
28526 for (size_t k = 16; k <= 80; k += 8) {
28527 for (uint32_t n = 1; n <= 4; n++) {
28528 for (uint32_t m = 1; m <= 1; m++) {
28529 GemmMicrokernelTester()
28530 .mr(1)
28531 .nr(4)
28532 .kr(2)
28533 .sr(1)
28534 .m(m)
28535 .n(n)
28536 .k(k)
28537 .iterations(1)
28538 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28539 }
28540 }
28541 }
28542 }
28543
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4)28544 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4) {
28545 TEST_REQUIRES_X86_XOP;
28546 for (uint32_t n = 5; n < 8; n++) {
28547 for (size_t k = 1; k <= 40; k += 9) {
28548 GemmMicrokernelTester()
28549 .mr(1)
28550 .nr(4)
28551 .kr(2)
28552 .sr(1)
28553 .m(1)
28554 .n(n)
28555 .k(k)
28556 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28557 }
28558 }
28559 }
28560
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4_strided_cn)28561 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_strided_cn) {
28562 TEST_REQUIRES_X86_XOP;
28563 for (uint32_t n = 5; n < 8; n++) {
28564 for (size_t k = 1; k <= 40; k += 9) {
28565 GemmMicrokernelTester()
28566 .mr(1)
28567 .nr(4)
28568 .kr(2)
28569 .sr(1)
28570 .m(1)
28571 .n(n)
28572 .k(k)
28573 .cn_stride(7)
28574 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28575 }
28576 }
28577 }
28578
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4_strided_a)28579 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_strided_a) {
28580 TEST_REQUIRES_X86_XOP;
28581 for (uint32_t n = 5; n < 8; n++) {
28582 for (size_t k = 1; k <= 40; k += 9) {
28583 GemmMicrokernelTester()
28584 .mr(1)
28585 .nr(4)
28586 .kr(2)
28587 .sr(1)
28588 .m(1)
28589 .n(n)
28590 .k(k)
28591 .a_stride(43)
28592 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28593 }
28594 }
28595 }
28596
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4_subtile)28597 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_subtile) {
28598 TEST_REQUIRES_X86_XOP;
28599 for (uint32_t n = 5; n < 8; n++) {
28600 for (size_t k = 1; k <= 40; k += 9) {
28601 for (uint32_t m = 1; m <= 1; m++) {
28602 GemmMicrokernelTester()
28603 .mr(1)
28604 .nr(4)
28605 .kr(2)
28606 .sr(1)
28607 .m(m)
28608 .n(n)
28609 .k(k)
28610 .iterations(1)
28611 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28612 }
28613 }
28614 }
28615 }
28616
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4)28617 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4) {
28618 TEST_REQUIRES_X86_XOP;
28619 for (uint32_t n = 8; n <= 12; n += 4) {
28620 for (size_t k = 1; k <= 40; k += 9) {
28621 GemmMicrokernelTester()
28622 .mr(1)
28623 .nr(4)
28624 .kr(2)
28625 .sr(1)
28626 .m(1)
28627 .n(n)
28628 .k(k)
28629 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28630 }
28631 }
28632 }
28633
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4_strided_cn)28634 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_strided_cn) {
28635 TEST_REQUIRES_X86_XOP;
28636 for (uint32_t n = 8; n <= 12; n += 4) {
28637 for (size_t k = 1; k <= 40; k += 9) {
28638 GemmMicrokernelTester()
28639 .mr(1)
28640 .nr(4)
28641 .kr(2)
28642 .sr(1)
28643 .m(1)
28644 .n(n)
28645 .k(k)
28646 .cn_stride(7)
28647 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28648 }
28649 }
28650 }
28651
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4_strided_a)28652 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_strided_a) {
28653 TEST_REQUIRES_X86_XOP;
28654 for (uint32_t n = 8; n <= 12; n += 4) {
28655 for (size_t k = 1; k <= 40; k += 9) {
28656 GemmMicrokernelTester()
28657 .mr(1)
28658 .nr(4)
28659 .kr(2)
28660 .sr(1)
28661 .m(1)
28662 .n(n)
28663 .k(k)
28664 .a_stride(43)
28665 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28666 }
28667 }
28668 }
28669
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4_subtile)28670 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_subtile) {
28671 TEST_REQUIRES_X86_XOP;
28672 for (uint32_t n = 8; n <= 12; n += 4) {
28673 for (size_t k = 1; k <= 40; k += 9) {
28674 for (uint32_t m = 1; m <= 1; m++) {
28675 GemmMicrokernelTester()
28676 .mr(1)
28677 .nr(4)
28678 .kr(2)
28679 .sr(1)
28680 .m(m)
28681 .n(n)
28682 .k(k)
28683 .iterations(1)
28684 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28685 }
28686 }
28687 }
28688 }
28689
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,strided_cm_subtile)28690 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm_subtile) {
28691 TEST_REQUIRES_X86_XOP;
28692 for (size_t k = 1; k <= 40; k += 9) {
28693 for (uint32_t n = 1; n <= 4; n++) {
28694 for (uint32_t m = 1; m <= 1; m++) {
28695 GemmMicrokernelTester()
28696 .mr(1)
28697 .nr(4)
28698 .kr(2)
28699 .sr(1)
28700 .m(m)
28701 .n(n)
28702 .k(k)
28703 .cm_stride(7)
28704 .iterations(1)
28705 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28706 }
28707 }
28708 }
28709 }
28710
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,qmin)28711 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmin) {
28712 TEST_REQUIRES_X86_XOP;
28713 GemmMicrokernelTester()
28714 .mr(1)
28715 .nr(4)
28716 .kr(2)
28717 .sr(1)
28718 .m(1)
28719 .n(4)
28720 .k(8)
28721 .qmin(128)
28722 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28723 }
28724
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,qmax)28725 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmax) {
28726 TEST_REQUIRES_X86_XOP;
28727 GemmMicrokernelTester()
28728 .mr(1)
28729 .nr(4)
28730 .kr(2)
28731 .sr(1)
28732 .m(1)
28733 .n(4)
28734 .k(8)
28735 .qmax(128)
28736 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28737 }
28738
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,strided_cm)28739 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm) {
28740 TEST_REQUIRES_X86_XOP;
28741 GemmMicrokernelTester()
28742 .mr(1)
28743 .nr(4)
28744 .kr(2)
28745 .sr(1)
28746 .m(1)
28747 .n(4)
28748 .k(8)
28749 .cm_stride(7)
28750 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28751 }
28752 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
28753
28754
28755 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8)28756 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8) {
28757 TEST_REQUIRES_X86_XOP;
28758 GemmMicrokernelTester()
28759 .mr(3)
28760 .nr(4)
28761 .kr(2)
28762 .sr(1)
28763 .m(3)
28764 .n(4)
28765 .k(8)
28766 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28767 }
28768
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,strided_cn)28769 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cn) {
28770 TEST_REQUIRES_X86_XOP;
28771 GemmMicrokernelTester()
28772 .mr(3)
28773 .nr(4)
28774 .kr(2)
28775 .sr(1)
28776 .m(3)
28777 .n(4)
28778 .k(8)
28779 .cn_stride(7)
28780 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28781 }
28782
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8_strided_a)28783 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_strided_a) {
28784 TEST_REQUIRES_X86_XOP;
28785 GemmMicrokernelTester()
28786 .mr(3)
28787 .nr(4)
28788 .kr(2)
28789 .sr(1)
28790 .m(3)
28791 .n(4)
28792 .k(8)
28793 .a_stride(11)
28794 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28795 }
28796
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8_subtile)28797 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile) {
28798 TEST_REQUIRES_X86_XOP;
28799 for (uint32_t n = 1; n <= 4; n++) {
28800 for (uint32_t m = 1; m <= 3; m++) {
28801 GemmMicrokernelTester()
28802 .mr(3)
28803 .nr(4)
28804 .kr(2)
28805 .sr(1)
28806 .m(m)
28807 .n(n)
28808 .k(8)
28809 .iterations(1)
28810 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28811 }
28812 }
28813 }
28814
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8_subtile_m)28815 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile_m) {
28816 TEST_REQUIRES_X86_XOP;
28817 for (uint32_t m = 1; m <= 3; m++) {
28818 GemmMicrokernelTester()
28819 .mr(3)
28820 .nr(4)
28821 .kr(2)
28822 .sr(1)
28823 .m(m)
28824 .n(4)
28825 .k(8)
28826 .iterations(1)
28827 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28828 }
28829 }
28830
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8_subtile_n)28831 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile_n) {
28832 TEST_REQUIRES_X86_XOP;
28833 for (uint32_t n = 1; n <= 4; n++) {
28834 GemmMicrokernelTester()
28835 .mr(3)
28836 .nr(4)
28837 .kr(2)
28838 .sr(1)
28839 .m(3)
28840 .n(n)
28841 .k(8)
28842 .iterations(1)
28843 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28844 }
28845 }
28846
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_lt_8)28847 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_lt_8) {
28848 TEST_REQUIRES_X86_XOP;
28849 for (size_t k = 1; k < 8; k++) {
28850 GemmMicrokernelTester()
28851 .mr(3)
28852 .nr(4)
28853 .kr(2)
28854 .sr(1)
28855 .m(3)
28856 .n(4)
28857 .k(k)
28858 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28859 }
28860 }
28861
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_lt_8_strided_a)28862 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_lt_8_strided_a) {
28863 TEST_REQUIRES_X86_XOP;
28864 for (size_t k = 1; k < 8; k++) {
28865 GemmMicrokernelTester()
28866 .mr(3)
28867 .nr(4)
28868 .kr(2)
28869 .sr(1)
28870 .m(3)
28871 .n(4)
28872 .k(k)
28873 .a_stride(11)
28874 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28875 }
28876 }
28877
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_lt_8_subtile)28878 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_lt_8_subtile) {
28879 TEST_REQUIRES_X86_XOP;
28880 for (size_t k = 1; k < 8; k++) {
28881 for (uint32_t n = 1; n <= 4; n++) {
28882 for (uint32_t m = 1; m <= 3; m++) {
28883 GemmMicrokernelTester()
28884 .mr(3)
28885 .nr(4)
28886 .kr(2)
28887 .sr(1)
28888 .m(m)
28889 .n(n)
28890 .k(k)
28891 .iterations(1)
28892 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28893 }
28894 }
28895 }
28896 }
28897
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_gt_8)28898 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_gt_8) {
28899 TEST_REQUIRES_X86_XOP;
28900 for (size_t k = 9; k < 16; k++) {
28901 GemmMicrokernelTester()
28902 .mr(3)
28903 .nr(4)
28904 .kr(2)
28905 .sr(1)
28906 .m(3)
28907 .n(4)
28908 .k(k)
28909 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28910 }
28911 }
28912
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_gt_8_strided_a)28913 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_gt_8_strided_a) {
28914 TEST_REQUIRES_X86_XOP;
28915 for (size_t k = 9; k < 16; k++) {
28916 GemmMicrokernelTester()
28917 .mr(3)
28918 .nr(4)
28919 .kr(2)
28920 .sr(1)
28921 .m(3)
28922 .n(4)
28923 .k(k)
28924 .a_stride(19)
28925 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28926 }
28927 }
28928
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_gt_8_subtile)28929 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_gt_8_subtile) {
28930 TEST_REQUIRES_X86_XOP;
28931 for (size_t k = 9; k < 16; k++) {
28932 for (uint32_t n = 1; n <= 4; n++) {
28933 for (uint32_t m = 1; m <= 3; m++) {
28934 GemmMicrokernelTester()
28935 .mr(3)
28936 .nr(4)
28937 .kr(2)
28938 .sr(1)
28939 .m(m)
28940 .n(n)
28941 .k(k)
28942 .iterations(1)
28943 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28944 }
28945 }
28946 }
28947 }
28948
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_div_8)28949 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_div_8) {
28950 TEST_REQUIRES_X86_XOP;
28951 for (size_t k = 16; k <= 80; k += 8) {
28952 GemmMicrokernelTester()
28953 .mr(3)
28954 .nr(4)
28955 .kr(2)
28956 .sr(1)
28957 .m(3)
28958 .n(4)
28959 .k(k)
28960 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28961 }
28962 }
28963
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_div_8_strided_a)28964 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_div_8_strided_a) {
28965 TEST_REQUIRES_X86_XOP;
28966 for (size_t k = 16; k <= 80; k += 8) {
28967 GemmMicrokernelTester()
28968 .mr(3)
28969 .nr(4)
28970 .kr(2)
28971 .sr(1)
28972 .m(3)
28973 .n(4)
28974 .k(k)
28975 .a_stride(83)
28976 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28977 }
28978 }
28979
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_div_8_subtile)28980 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_div_8_subtile) {
28981 TEST_REQUIRES_X86_XOP;
28982 for (size_t k = 16; k <= 80; k += 8) {
28983 for (uint32_t n = 1; n <= 4; n++) {
28984 for (uint32_t m = 1; m <= 3; m++) {
28985 GemmMicrokernelTester()
28986 .mr(3)
28987 .nr(4)
28988 .kr(2)
28989 .sr(1)
28990 .m(m)
28991 .n(n)
28992 .k(k)
28993 .iterations(1)
28994 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28995 }
28996 }
28997 }
28998 }
28999
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4)29000 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4) {
29001 TEST_REQUIRES_X86_XOP;
29002 for (uint32_t n = 5; n < 8; n++) {
29003 for (size_t k = 1; k <= 40; k += 9) {
29004 GemmMicrokernelTester()
29005 .mr(3)
29006 .nr(4)
29007 .kr(2)
29008 .sr(1)
29009 .m(3)
29010 .n(n)
29011 .k(k)
29012 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29013 }
29014 }
29015 }
29016
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4_strided_cn)29017 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_strided_cn) {
29018 TEST_REQUIRES_X86_XOP;
29019 for (uint32_t n = 5; n < 8; n++) {
29020 for (size_t k = 1; k <= 40; k += 9) {
29021 GemmMicrokernelTester()
29022 .mr(3)
29023 .nr(4)
29024 .kr(2)
29025 .sr(1)
29026 .m(3)
29027 .n(n)
29028 .k(k)
29029 .cn_stride(7)
29030 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29031 }
29032 }
29033 }
29034
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4_strided_a)29035 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_strided_a) {
29036 TEST_REQUIRES_X86_XOP;
29037 for (uint32_t n = 5; n < 8; n++) {
29038 for (size_t k = 1; k <= 40; k += 9) {
29039 GemmMicrokernelTester()
29040 .mr(3)
29041 .nr(4)
29042 .kr(2)
29043 .sr(1)
29044 .m(3)
29045 .n(n)
29046 .k(k)
29047 .a_stride(43)
29048 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29049 }
29050 }
29051 }
29052
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4_subtile)29053 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_subtile) {
29054 TEST_REQUIRES_X86_XOP;
29055 for (uint32_t n = 5; n < 8; n++) {
29056 for (size_t k = 1; k <= 40; k += 9) {
29057 for (uint32_t m = 1; m <= 3; m++) {
29058 GemmMicrokernelTester()
29059 .mr(3)
29060 .nr(4)
29061 .kr(2)
29062 .sr(1)
29063 .m(m)
29064 .n(n)
29065 .k(k)
29066 .iterations(1)
29067 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29068 }
29069 }
29070 }
29071 }
29072
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4)29073 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4) {
29074 TEST_REQUIRES_X86_XOP;
29075 for (uint32_t n = 8; n <= 12; n += 4) {
29076 for (size_t k = 1; k <= 40; k += 9) {
29077 GemmMicrokernelTester()
29078 .mr(3)
29079 .nr(4)
29080 .kr(2)
29081 .sr(1)
29082 .m(3)
29083 .n(n)
29084 .k(k)
29085 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29086 }
29087 }
29088 }
29089
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4_strided_cn)29090 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_strided_cn) {
29091 TEST_REQUIRES_X86_XOP;
29092 for (uint32_t n = 8; n <= 12; n += 4) {
29093 for (size_t k = 1; k <= 40; k += 9) {
29094 GemmMicrokernelTester()
29095 .mr(3)
29096 .nr(4)
29097 .kr(2)
29098 .sr(1)
29099 .m(3)
29100 .n(n)
29101 .k(k)
29102 .cn_stride(7)
29103 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29104 }
29105 }
29106 }
29107
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4_strided_a)29108 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_strided_a) {
29109 TEST_REQUIRES_X86_XOP;
29110 for (uint32_t n = 8; n <= 12; n += 4) {
29111 for (size_t k = 1; k <= 40; k += 9) {
29112 GemmMicrokernelTester()
29113 .mr(3)
29114 .nr(4)
29115 .kr(2)
29116 .sr(1)
29117 .m(3)
29118 .n(n)
29119 .k(k)
29120 .a_stride(43)
29121 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29122 }
29123 }
29124 }
29125
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4_subtile)29126 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_subtile) {
29127 TEST_REQUIRES_X86_XOP;
29128 for (uint32_t n = 8; n <= 12; n += 4) {
29129 for (size_t k = 1; k <= 40; k += 9) {
29130 for (uint32_t m = 1; m <= 3; m++) {
29131 GemmMicrokernelTester()
29132 .mr(3)
29133 .nr(4)
29134 .kr(2)
29135 .sr(1)
29136 .m(m)
29137 .n(n)
29138 .k(k)
29139 .iterations(1)
29140 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29141 }
29142 }
29143 }
29144 }
29145
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,strided_cm_subtile)29146 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cm_subtile) {
29147 TEST_REQUIRES_X86_XOP;
29148 for (size_t k = 1; k <= 40; k += 9) {
29149 for (uint32_t n = 1; n <= 4; n++) {
29150 for (uint32_t m = 1; m <= 3; m++) {
29151 GemmMicrokernelTester()
29152 .mr(3)
29153 .nr(4)
29154 .kr(2)
29155 .sr(1)
29156 .m(m)
29157 .n(n)
29158 .k(k)
29159 .cm_stride(7)
29160 .iterations(1)
29161 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29162 }
29163 }
29164 }
29165 }
29166
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,qmin)29167 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, qmin) {
29168 TEST_REQUIRES_X86_XOP;
29169 GemmMicrokernelTester()
29170 .mr(3)
29171 .nr(4)
29172 .kr(2)
29173 .sr(1)
29174 .m(3)
29175 .n(4)
29176 .k(8)
29177 .qmin(128)
29178 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29179 }
29180
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,qmax)29181 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, qmax) {
29182 TEST_REQUIRES_X86_XOP;
29183 GemmMicrokernelTester()
29184 .mr(3)
29185 .nr(4)
29186 .kr(2)
29187 .sr(1)
29188 .m(3)
29189 .n(4)
29190 .k(8)
29191 .qmax(128)
29192 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29193 }
29194
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128,strided_cm)29195 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cm) {
29196 TEST_REQUIRES_X86_XOP;
29197 GemmMicrokernelTester()
29198 .mr(3)
29199 .nr(4)
29200 .kr(2)
29201 .sr(1)
29202 .m(3)
29203 .n(4)
29204 .k(8)
29205 .cm_stride(7)
29206 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29207 }
29208 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29209
29210
29211 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8)29212 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8) {
29213 TEST_REQUIRES_X86_XOP;
29214 GemmMicrokernelTester()
29215 .mr(4)
29216 .nr(4)
29217 .kr(2)
29218 .sr(1)
29219 .m(4)
29220 .n(4)
29221 .k(8)
29222 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29223 }
29224
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cn)29225 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cn) {
29226 TEST_REQUIRES_X86_XOP;
29227 GemmMicrokernelTester()
29228 .mr(4)
29229 .nr(4)
29230 .kr(2)
29231 .sr(1)
29232 .m(4)
29233 .n(4)
29234 .k(8)
29235 .cn_stride(7)
29236 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29237 }
29238
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_strided_a)29239 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_strided_a) {
29240 TEST_REQUIRES_X86_XOP;
29241 GemmMicrokernelTester()
29242 .mr(4)
29243 .nr(4)
29244 .kr(2)
29245 .sr(1)
29246 .m(4)
29247 .n(4)
29248 .k(8)
29249 .a_stride(11)
29250 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29251 }
29252
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile)29253 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile) {
29254 TEST_REQUIRES_X86_XOP;
29255 for (uint32_t n = 1; n <= 4; n++) {
29256 for (uint32_t m = 1; m <= 4; m++) {
29257 GemmMicrokernelTester()
29258 .mr(4)
29259 .nr(4)
29260 .kr(2)
29261 .sr(1)
29262 .m(m)
29263 .n(n)
29264 .k(8)
29265 .iterations(1)
29266 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29267 }
29268 }
29269 }
29270
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile_m)29271 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_m) {
29272 TEST_REQUIRES_X86_XOP;
29273 for (uint32_t m = 1; m <= 4; m++) {
29274 GemmMicrokernelTester()
29275 .mr(4)
29276 .nr(4)
29277 .kr(2)
29278 .sr(1)
29279 .m(m)
29280 .n(4)
29281 .k(8)
29282 .iterations(1)
29283 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29284 }
29285 }
29286
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile_n)29287 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_n) {
29288 TEST_REQUIRES_X86_XOP;
29289 for (uint32_t n = 1; n <= 4; n++) {
29290 GemmMicrokernelTester()
29291 .mr(4)
29292 .nr(4)
29293 .kr(2)
29294 .sr(1)
29295 .m(4)
29296 .n(n)
29297 .k(8)
29298 .iterations(1)
29299 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29300 }
29301 }
29302
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_lt_8)29303 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8) {
29304 TEST_REQUIRES_X86_XOP;
29305 for (size_t k = 1; k < 8; k++) {
29306 GemmMicrokernelTester()
29307 .mr(4)
29308 .nr(4)
29309 .kr(2)
29310 .sr(1)
29311 .m(4)
29312 .n(4)
29313 .k(k)
29314 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29315 }
29316 }
29317
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_lt_8_strided_a)29318 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8_strided_a) {
29319 TEST_REQUIRES_X86_XOP;
29320 for (size_t k = 1; k < 8; k++) {
29321 GemmMicrokernelTester()
29322 .mr(4)
29323 .nr(4)
29324 .kr(2)
29325 .sr(1)
29326 .m(4)
29327 .n(4)
29328 .k(k)
29329 .a_stride(11)
29330 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29331 }
29332 }
29333
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_lt_8_subtile)29334 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8_subtile) {
29335 TEST_REQUIRES_X86_XOP;
29336 for (size_t k = 1; k < 8; k++) {
29337 for (uint32_t n = 1; n <= 4; n++) {
29338 for (uint32_t m = 1; m <= 4; m++) {
29339 GemmMicrokernelTester()
29340 .mr(4)
29341 .nr(4)
29342 .kr(2)
29343 .sr(1)
29344 .m(m)
29345 .n(n)
29346 .k(k)
29347 .iterations(1)
29348 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29349 }
29350 }
29351 }
29352 }
29353
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_gt_8)29354 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8) {
29355 TEST_REQUIRES_X86_XOP;
29356 for (size_t k = 9; k < 16; k++) {
29357 GemmMicrokernelTester()
29358 .mr(4)
29359 .nr(4)
29360 .kr(2)
29361 .sr(1)
29362 .m(4)
29363 .n(4)
29364 .k(k)
29365 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29366 }
29367 }
29368
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_gt_8_strided_a)29369 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8_strided_a) {
29370 TEST_REQUIRES_X86_XOP;
29371 for (size_t k = 9; k < 16; k++) {
29372 GemmMicrokernelTester()
29373 .mr(4)
29374 .nr(4)
29375 .kr(2)
29376 .sr(1)
29377 .m(4)
29378 .n(4)
29379 .k(k)
29380 .a_stride(19)
29381 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29382 }
29383 }
29384
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_gt_8_subtile)29385 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8_subtile) {
29386 TEST_REQUIRES_X86_XOP;
29387 for (size_t k = 9; k < 16; k++) {
29388 for (uint32_t n = 1; n <= 4; n++) {
29389 for (uint32_t m = 1; m <= 4; m++) {
29390 GemmMicrokernelTester()
29391 .mr(4)
29392 .nr(4)
29393 .kr(2)
29394 .sr(1)
29395 .m(m)
29396 .n(n)
29397 .k(k)
29398 .iterations(1)
29399 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29400 }
29401 }
29402 }
29403 }
29404
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_div_8)29405 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8) {
29406 TEST_REQUIRES_X86_XOP;
29407 for (size_t k = 16; k <= 80; k += 8) {
29408 GemmMicrokernelTester()
29409 .mr(4)
29410 .nr(4)
29411 .kr(2)
29412 .sr(1)
29413 .m(4)
29414 .n(4)
29415 .k(k)
29416 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29417 }
29418 }
29419
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_div_8_strided_a)29420 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8_strided_a) {
29421 TEST_REQUIRES_X86_XOP;
29422 for (size_t k = 16; k <= 80; k += 8) {
29423 GemmMicrokernelTester()
29424 .mr(4)
29425 .nr(4)
29426 .kr(2)
29427 .sr(1)
29428 .m(4)
29429 .n(4)
29430 .k(k)
29431 .a_stride(83)
29432 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29433 }
29434 }
29435
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_div_8_subtile)29436 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8_subtile) {
29437 TEST_REQUIRES_X86_XOP;
29438 for (size_t k = 16; k <= 80; k += 8) {
29439 for (uint32_t n = 1; n <= 4; n++) {
29440 for (uint32_t m = 1; m <= 4; m++) {
29441 GemmMicrokernelTester()
29442 .mr(4)
29443 .nr(4)
29444 .kr(2)
29445 .sr(1)
29446 .m(m)
29447 .n(n)
29448 .k(k)
29449 .iterations(1)
29450 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29451 }
29452 }
29453 }
29454 }
29455
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4)29456 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4) {
29457 TEST_REQUIRES_X86_XOP;
29458 for (uint32_t n = 5; n < 8; n++) {
29459 for (size_t k = 1; k <= 40; k += 9) {
29460 GemmMicrokernelTester()
29461 .mr(4)
29462 .nr(4)
29463 .kr(2)
29464 .sr(1)
29465 .m(4)
29466 .n(n)
29467 .k(k)
29468 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29469 }
29470 }
29471 }
29472
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_strided_cn)29473 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_strided_cn) {
29474 TEST_REQUIRES_X86_XOP;
29475 for (uint32_t n = 5; n < 8; n++) {
29476 for (size_t k = 1; k <= 40; k += 9) {
29477 GemmMicrokernelTester()
29478 .mr(4)
29479 .nr(4)
29480 .kr(2)
29481 .sr(1)
29482 .m(4)
29483 .n(n)
29484 .k(k)
29485 .cn_stride(7)
29486 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29487 }
29488 }
29489 }
29490
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_strided_a)29491 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_strided_a) {
29492 TEST_REQUIRES_X86_XOP;
29493 for (uint32_t n = 5; n < 8; n++) {
29494 for (size_t k = 1; k <= 40; k += 9) {
29495 GemmMicrokernelTester()
29496 .mr(4)
29497 .nr(4)
29498 .kr(2)
29499 .sr(1)
29500 .m(4)
29501 .n(n)
29502 .k(k)
29503 .a_stride(43)
29504 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29505 }
29506 }
29507 }
29508
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_subtile)29509 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_subtile) {
29510 TEST_REQUIRES_X86_XOP;
29511 for (uint32_t n = 5; n < 8; n++) {
29512 for (size_t k = 1; k <= 40; k += 9) {
29513 for (uint32_t m = 1; m <= 4; m++) {
29514 GemmMicrokernelTester()
29515 .mr(4)
29516 .nr(4)
29517 .kr(2)
29518 .sr(1)
29519 .m(m)
29520 .n(n)
29521 .k(k)
29522 .iterations(1)
29523 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29524 }
29525 }
29526 }
29527 }
29528
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4)29529 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4) {
29530 TEST_REQUIRES_X86_XOP;
29531 for (uint32_t n = 8; n <= 12; n += 4) {
29532 for (size_t k = 1; k <= 40; k += 9) {
29533 GemmMicrokernelTester()
29534 .mr(4)
29535 .nr(4)
29536 .kr(2)
29537 .sr(1)
29538 .m(4)
29539 .n(n)
29540 .k(k)
29541 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29542 }
29543 }
29544 }
29545
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_strided_cn)29546 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_strided_cn) {
29547 TEST_REQUIRES_X86_XOP;
29548 for (uint32_t n = 8; n <= 12; n += 4) {
29549 for (size_t k = 1; k <= 40; k += 9) {
29550 GemmMicrokernelTester()
29551 .mr(4)
29552 .nr(4)
29553 .kr(2)
29554 .sr(1)
29555 .m(4)
29556 .n(n)
29557 .k(k)
29558 .cn_stride(7)
29559 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29560 }
29561 }
29562 }
29563
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_strided_a)29564 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_strided_a) {
29565 TEST_REQUIRES_X86_XOP;
29566 for (uint32_t n = 8; n <= 12; n += 4) {
29567 for (size_t k = 1; k <= 40; k += 9) {
29568 GemmMicrokernelTester()
29569 .mr(4)
29570 .nr(4)
29571 .kr(2)
29572 .sr(1)
29573 .m(4)
29574 .n(n)
29575 .k(k)
29576 .a_stride(43)
29577 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29578 }
29579 }
29580 }
29581
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_subtile)29582 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_subtile) {
29583 TEST_REQUIRES_X86_XOP;
29584 for (uint32_t n = 8; n <= 12; n += 4) {
29585 for (size_t k = 1; k <= 40; k += 9) {
29586 for (uint32_t m = 1; m <= 4; m++) {
29587 GemmMicrokernelTester()
29588 .mr(4)
29589 .nr(4)
29590 .kr(2)
29591 .sr(1)
29592 .m(m)
29593 .n(n)
29594 .k(k)
29595 .iterations(1)
29596 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29597 }
29598 }
29599 }
29600 }
29601
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cm_subtile)29602 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm_subtile) {
29603 TEST_REQUIRES_X86_XOP;
29604 for (size_t k = 1; k <= 40; k += 9) {
29605 for (uint32_t n = 1; n <= 4; n++) {
29606 for (uint32_t m = 1; m <= 4; m++) {
29607 GemmMicrokernelTester()
29608 .mr(4)
29609 .nr(4)
29610 .kr(2)
29611 .sr(1)
29612 .m(m)
29613 .n(n)
29614 .k(k)
29615 .cm_stride(7)
29616 .iterations(1)
29617 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29618 }
29619 }
29620 }
29621 }
29622
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,qmin)29623 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmin) {
29624 TEST_REQUIRES_X86_XOP;
29625 GemmMicrokernelTester()
29626 .mr(4)
29627 .nr(4)
29628 .kr(2)
29629 .sr(1)
29630 .m(4)
29631 .n(4)
29632 .k(8)
29633 .qmin(128)
29634 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29635 }
29636
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,qmax)29637 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmax) {
29638 TEST_REQUIRES_X86_XOP;
29639 GemmMicrokernelTester()
29640 .mr(4)
29641 .nr(4)
29642 .kr(2)
29643 .sr(1)
29644 .m(4)
29645 .n(4)
29646 .k(8)
29647 .qmax(128)
29648 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29649 }
29650
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cm)29651 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm) {
29652 TEST_REQUIRES_X86_XOP;
29653 GemmMicrokernelTester()
29654 .mr(4)
29655 .nr(4)
29656 .kr(2)
29657 .sr(1)
29658 .m(4)
29659 .n(4)
29660 .k(8)
29661 .cm_stride(7)
29662 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29663 }
29664 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29665
29666
29667 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8)29668 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8) {
29669 TEST_REQUIRES_X86_SSE41;
29670 GemmMicrokernelTester()
29671 .mr(1)
29672 .nr(4)
29673 .kr(2)
29674 .sr(4)
29675 .m(1)
29676 .n(4)
29677 .k(8)
29678 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29679 }
29680
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,strided_cn)29681 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, strided_cn) {
29682 TEST_REQUIRES_X86_SSE41;
29683 GemmMicrokernelTester()
29684 .mr(1)
29685 .nr(4)
29686 .kr(2)
29687 .sr(4)
29688 .m(1)
29689 .n(4)
29690 .k(8)
29691 .cn_stride(7)
29692 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29693 }
29694
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8_strided_a)29695 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8_strided_a) {
29696 TEST_REQUIRES_X86_SSE41;
29697 GemmMicrokernelTester()
29698 .mr(1)
29699 .nr(4)
29700 .kr(2)
29701 .sr(4)
29702 .m(1)
29703 .n(4)
29704 .k(8)
29705 .a_stride(11)
29706 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29707 }
29708
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8_subtile)29709 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8_subtile) {
29710 TEST_REQUIRES_X86_SSE41;
29711 for (uint32_t n = 1; n <= 4; n++) {
29712 for (uint32_t m = 1; m <= 1; m++) {
29713 GemmMicrokernelTester()
29714 .mr(1)
29715 .nr(4)
29716 .kr(2)
29717 .sr(4)
29718 .m(m)
29719 .n(n)
29720 .k(8)
29721 .iterations(1)
29722 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29723 }
29724 }
29725 }
29726
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8_subtile_m)29727 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
29728 TEST_REQUIRES_X86_SSE41;
29729 for (uint32_t m = 1; m <= 1; m++) {
29730 GemmMicrokernelTester()
29731 .mr(1)
29732 .nr(4)
29733 .kr(2)
29734 .sr(4)
29735 .m(m)
29736 .n(4)
29737 .k(8)
29738 .iterations(1)
29739 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29740 }
29741 }
29742
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8_subtile_n)29743 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
29744 TEST_REQUIRES_X86_SSE41;
29745 for (uint32_t n = 1; n <= 4; n++) {
29746 GemmMicrokernelTester()
29747 .mr(1)
29748 .nr(4)
29749 .kr(2)
29750 .sr(4)
29751 .m(1)
29752 .n(n)
29753 .k(8)
29754 .iterations(1)
29755 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29756 }
29757 }
29758
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_lt_8)29759 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_lt_8) {
29760 TEST_REQUIRES_X86_SSE41;
29761 for (size_t k = 1; k < 8; k++) {
29762 GemmMicrokernelTester()
29763 .mr(1)
29764 .nr(4)
29765 .kr(2)
29766 .sr(4)
29767 .m(1)
29768 .n(4)
29769 .k(k)
29770 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29771 }
29772 }
29773
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_lt_8_strided_a)29774 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_lt_8_strided_a) {
29775 TEST_REQUIRES_X86_SSE41;
29776 for (size_t k = 1; k < 8; k++) {
29777 GemmMicrokernelTester()
29778 .mr(1)
29779 .nr(4)
29780 .kr(2)
29781 .sr(4)
29782 .m(1)
29783 .n(4)
29784 .k(k)
29785 .a_stride(11)
29786 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29787 }
29788 }
29789
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_lt_8_subtile)29790 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_lt_8_subtile) {
29791 TEST_REQUIRES_X86_SSE41;
29792 for (size_t k = 1; k < 8; k++) {
29793 for (uint32_t n = 1; n <= 4; n++) {
29794 for (uint32_t m = 1; m <= 1; m++) {
29795 GemmMicrokernelTester()
29796 .mr(1)
29797 .nr(4)
29798 .kr(2)
29799 .sr(4)
29800 .m(m)
29801 .n(n)
29802 .k(k)
29803 .iterations(1)
29804 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29805 }
29806 }
29807 }
29808 }
29809
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_gt_8)29810 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_gt_8) {
29811 TEST_REQUIRES_X86_SSE41;
29812 for (size_t k = 9; k < 16; k++) {
29813 GemmMicrokernelTester()
29814 .mr(1)
29815 .nr(4)
29816 .kr(2)
29817 .sr(4)
29818 .m(1)
29819 .n(4)
29820 .k(k)
29821 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29822 }
29823 }
29824
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_gt_8_strided_a)29825 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_gt_8_strided_a) {
29826 TEST_REQUIRES_X86_SSE41;
29827 for (size_t k = 9; k < 16; k++) {
29828 GemmMicrokernelTester()
29829 .mr(1)
29830 .nr(4)
29831 .kr(2)
29832 .sr(4)
29833 .m(1)
29834 .n(4)
29835 .k(k)
29836 .a_stride(19)
29837 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29838 }
29839 }
29840
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_gt_8_subtile)29841 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_gt_8_subtile) {
29842 TEST_REQUIRES_X86_SSE41;
29843 for (size_t k = 9; k < 16; k++) {
29844 for (uint32_t n = 1; n <= 4; n++) {
29845 for (uint32_t m = 1; m <= 1; m++) {
29846 GemmMicrokernelTester()
29847 .mr(1)
29848 .nr(4)
29849 .kr(2)
29850 .sr(4)
29851 .m(m)
29852 .n(n)
29853 .k(k)
29854 .iterations(1)
29855 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29856 }
29857 }
29858 }
29859 }
29860
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_div_8)29861 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_div_8) {
29862 TEST_REQUIRES_X86_SSE41;
29863 for (size_t k = 16; k <= 80; k += 8) {
29864 GemmMicrokernelTester()
29865 .mr(1)
29866 .nr(4)
29867 .kr(2)
29868 .sr(4)
29869 .m(1)
29870 .n(4)
29871 .k(k)
29872 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29873 }
29874 }
29875
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_div_8_strided_a)29876 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_div_8_strided_a) {
29877 TEST_REQUIRES_X86_SSE41;
29878 for (size_t k = 16; k <= 80; k += 8) {
29879 GemmMicrokernelTester()
29880 .mr(1)
29881 .nr(4)
29882 .kr(2)
29883 .sr(4)
29884 .m(1)
29885 .n(4)
29886 .k(k)
29887 .a_stride(83)
29888 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29889 }
29890 }
29891
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_div_8_subtile)29892 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_div_8_subtile) {
29893 TEST_REQUIRES_X86_SSE41;
29894 for (size_t k = 16; k <= 80; k += 8) {
29895 for (uint32_t n = 1; n <= 4; n++) {
29896 for (uint32_t m = 1; m <= 1; m++) {
29897 GemmMicrokernelTester()
29898 .mr(1)
29899 .nr(4)
29900 .kr(2)
29901 .sr(4)
29902 .m(m)
29903 .n(n)
29904 .k(k)
29905 .iterations(1)
29906 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29907 }
29908 }
29909 }
29910 }
29911
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4)29912 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4) {
29913 TEST_REQUIRES_X86_SSE41;
29914 for (uint32_t n = 5; n < 8; n++) {
29915 for (size_t k = 1; k <= 40; k += 9) {
29916 GemmMicrokernelTester()
29917 .mr(1)
29918 .nr(4)
29919 .kr(2)
29920 .sr(4)
29921 .m(1)
29922 .n(n)
29923 .k(k)
29924 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29925 }
29926 }
29927 }
29928
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4_strided_cn)29929 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
29930 TEST_REQUIRES_X86_SSE41;
29931 for (uint32_t n = 5; n < 8; n++) {
29932 for (size_t k = 1; k <= 40; k += 9) {
29933 GemmMicrokernelTester()
29934 .mr(1)
29935 .nr(4)
29936 .kr(2)
29937 .sr(4)
29938 .m(1)
29939 .n(n)
29940 .k(k)
29941 .cn_stride(7)
29942 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29943 }
29944 }
29945 }
29946
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4_strided_a)29947 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4_strided_a) {
29948 TEST_REQUIRES_X86_SSE41;
29949 for (uint32_t n = 5; n < 8; n++) {
29950 for (size_t k = 1; k <= 40; k += 9) {
29951 GemmMicrokernelTester()
29952 .mr(1)
29953 .nr(4)
29954 .kr(2)
29955 .sr(4)
29956 .m(1)
29957 .n(n)
29958 .k(k)
29959 .a_stride(43)
29960 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29961 }
29962 }
29963 }
29964
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4_subtile)29965 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4_subtile) {
29966 TEST_REQUIRES_X86_SSE41;
29967 for (uint32_t n = 5; n < 8; n++) {
29968 for (size_t k = 1; k <= 40; k += 9) {
29969 for (uint32_t m = 1; m <= 1; m++) {
29970 GemmMicrokernelTester()
29971 .mr(1)
29972 .nr(4)
29973 .kr(2)
29974 .sr(4)
29975 .m(m)
29976 .n(n)
29977 .k(k)
29978 .iterations(1)
29979 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29980 }
29981 }
29982 }
29983 }
29984
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4)29985 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4) {
29986 TEST_REQUIRES_X86_SSE41;
29987 for (uint32_t n = 8; n <= 12; n += 4) {
29988 for (size_t k = 1; k <= 40; k += 9) {
29989 GemmMicrokernelTester()
29990 .mr(1)
29991 .nr(4)
29992 .kr(2)
29993 .sr(4)
29994 .m(1)
29995 .n(n)
29996 .k(k)
29997 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29998 }
29999 }
30000 }
30001
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4_strided_cn)30002 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
30003 TEST_REQUIRES_X86_SSE41;
30004 for (uint32_t n = 8; n <= 12; n += 4) {
30005 for (size_t k = 1; k <= 40; k += 9) {
30006 GemmMicrokernelTester()
30007 .mr(1)
30008 .nr(4)
30009 .kr(2)
30010 .sr(4)
30011 .m(1)
30012 .n(n)
30013 .k(k)
30014 .cn_stride(7)
30015 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30016 }
30017 }
30018 }
30019
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4_strided_a)30020 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4_strided_a) {
30021 TEST_REQUIRES_X86_SSE41;
30022 for (uint32_t n = 8; n <= 12; n += 4) {
30023 for (size_t k = 1; k <= 40; k += 9) {
30024 GemmMicrokernelTester()
30025 .mr(1)
30026 .nr(4)
30027 .kr(2)
30028 .sr(4)
30029 .m(1)
30030 .n(n)
30031 .k(k)
30032 .a_stride(43)
30033 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30034 }
30035 }
30036 }
30037
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4_subtile)30038 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4_subtile) {
30039 TEST_REQUIRES_X86_SSE41;
30040 for (uint32_t n = 8; n <= 12; n += 4) {
30041 for (size_t k = 1; k <= 40; k += 9) {
30042 for (uint32_t m = 1; m <= 1; m++) {
30043 GemmMicrokernelTester()
30044 .mr(1)
30045 .nr(4)
30046 .kr(2)
30047 .sr(4)
30048 .m(m)
30049 .n(n)
30050 .k(k)
30051 .iterations(1)
30052 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30053 }
30054 }
30055 }
30056 }
30057
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,strided_cm_subtile)30058 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, strided_cm_subtile) {
30059 TEST_REQUIRES_X86_SSE41;
30060 for (size_t k = 1; k <= 40; k += 9) {
30061 for (uint32_t n = 1; n <= 4; n++) {
30062 for (uint32_t m = 1; m <= 1; m++) {
30063 GemmMicrokernelTester()
30064 .mr(1)
30065 .nr(4)
30066 .kr(2)
30067 .sr(4)
30068 .m(m)
30069 .n(n)
30070 .k(k)
30071 .cm_stride(7)
30072 .iterations(1)
30073 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30074 }
30075 }
30076 }
30077 }
30078
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,qmin)30079 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, qmin) {
30080 TEST_REQUIRES_X86_SSE41;
30081 GemmMicrokernelTester()
30082 .mr(1)
30083 .nr(4)
30084 .kr(2)
30085 .sr(4)
30086 .m(1)
30087 .n(4)
30088 .k(8)
30089 .qmin(128)
30090 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30091 }
30092
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,qmax)30093 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, qmax) {
30094 TEST_REQUIRES_X86_SSE41;
30095 GemmMicrokernelTester()
30096 .mr(1)
30097 .nr(4)
30098 .kr(2)
30099 .sr(4)
30100 .m(1)
30101 .n(4)
30102 .k(8)
30103 .qmax(128)
30104 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30105 }
30106
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,strided_cm)30107 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, strided_cm) {
30108 TEST_REQUIRES_X86_SSE41;
30109 GemmMicrokernelTester()
30110 .mr(1)
30111 .nr(4)
30112 .kr(2)
30113 .sr(4)
30114 .m(1)
30115 .n(4)
30116 .k(8)
30117 .cm_stride(7)
30118 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30119 }
30120 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
30121
30122
30123 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8)30124 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8) {
30125 TEST_REQUIRES_X86_SSE41;
30126 GemmMicrokernelTester()
30127 .mr(3)
30128 .nr(4)
30129 .kr(2)
30130 .sr(4)
30131 .m(3)
30132 .n(4)
30133 .k(8)
30134 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30135 }
30136
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,strided_cn)30137 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, strided_cn) {
30138 TEST_REQUIRES_X86_SSE41;
30139 GemmMicrokernelTester()
30140 .mr(3)
30141 .nr(4)
30142 .kr(2)
30143 .sr(4)
30144 .m(3)
30145 .n(4)
30146 .k(8)
30147 .cn_stride(7)
30148 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30149 }
30150
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_strided_a)30151 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_strided_a) {
30152 TEST_REQUIRES_X86_SSE41;
30153 GemmMicrokernelTester()
30154 .mr(3)
30155 .nr(4)
30156 .kr(2)
30157 .sr(4)
30158 .m(3)
30159 .n(4)
30160 .k(8)
30161 .a_stride(11)
30162 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30163 }
30164
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_subtile)30165 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_subtile) {
30166 TEST_REQUIRES_X86_SSE41;
30167 for (uint32_t n = 1; n <= 4; n++) {
30168 for (uint32_t m = 1; m <= 3; m++) {
30169 GemmMicrokernelTester()
30170 .mr(3)
30171 .nr(4)
30172 .kr(2)
30173 .sr(4)
30174 .m(m)
30175 .n(n)
30176 .k(8)
30177 .iterations(1)
30178 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30179 }
30180 }
30181 }
30182
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_subtile_m)30183 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
30184 TEST_REQUIRES_X86_SSE41;
30185 for (uint32_t m = 1; m <= 3; m++) {
30186 GemmMicrokernelTester()
30187 .mr(3)
30188 .nr(4)
30189 .kr(2)
30190 .sr(4)
30191 .m(m)
30192 .n(4)
30193 .k(8)
30194 .iterations(1)
30195 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30196 }
30197 }
30198
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_subtile_n)30199 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
30200 TEST_REQUIRES_X86_SSE41;
30201 for (uint32_t n = 1; n <= 4; n++) {
30202 GemmMicrokernelTester()
30203 .mr(3)
30204 .nr(4)
30205 .kr(2)
30206 .sr(4)
30207 .m(3)
30208 .n(n)
30209 .k(8)
30210 .iterations(1)
30211 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30212 }
30213 }
30214
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_lt_8)30215 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_lt_8) {
30216 TEST_REQUIRES_X86_SSE41;
30217 for (size_t k = 1; k < 8; k++) {
30218 GemmMicrokernelTester()
30219 .mr(3)
30220 .nr(4)
30221 .kr(2)
30222 .sr(4)
30223 .m(3)
30224 .n(4)
30225 .k(k)
30226 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30227 }
30228 }
30229
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_lt_8_strided_a)30230 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_lt_8_strided_a) {
30231 TEST_REQUIRES_X86_SSE41;
30232 for (size_t k = 1; k < 8; k++) {
30233 GemmMicrokernelTester()
30234 .mr(3)
30235 .nr(4)
30236 .kr(2)
30237 .sr(4)
30238 .m(3)
30239 .n(4)
30240 .k(k)
30241 .a_stride(11)
30242 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30243 }
30244 }
30245
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_lt_8_subtile)30246 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_lt_8_subtile) {
30247 TEST_REQUIRES_X86_SSE41;
30248 for (size_t k = 1; k < 8; k++) {
30249 for (uint32_t n = 1; n <= 4; n++) {
30250 for (uint32_t m = 1; m <= 3; m++) {
30251 GemmMicrokernelTester()
30252 .mr(3)
30253 .nr(4)
30254 .kr(2)
30255 .sr(4)
30256 .m(m)
30257 .n(n)
30258 .k(k)
30259 .iterations(1)
30260 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30261 }
30262 }
30263 }
30264 }
30265
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_gt_8)30266 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_gt_8) {
30267 TEST_REQUIRES_X86_SSE41;
30268 for (size_t k = 9; k < 16; k++) {
30269 GemmMicrokernelTester()
30270 .mr(3)
30271 .nr(4)
30272 .kr(2)
30273 .sr(4)
30274 .m(3)
30275 .n(4)
30276 .k(k)
30277 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30278 }
30279 }
30280
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_gt_8_strided_a)30281 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_gt_8_strided_a) {
30282 TEST_REQUIRES_X86_SSE41;
30283 for (size_t k = 9; k < 16; k++) {
30284 GemmMicrokernelTester()
30285 .mr(3)
30286 .nr(4)
30287 .kr(2)
30288 .sr(4)
30289 .m(3)
30290 .n(4)
30291 .k(k)
30292 .a_stride(19)
30293 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30294 }
30295 }
30296
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_gt_8_subtile)30297 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_gt_8_subtile) {
30298 TEST_REQUIRES_X86_SSE41;
30299 for (size_t k = 9; k < 16; k++) {
30300 for (uint32_t n = 1; n <= 4; n++) {
30301 for (uint32_t m = 1; m <= 3; m++) {
30302 GemmMicrokernelTester()
30303 .mr(3)
30304 .nr(4)
30305 .kr(2)
30306 .sr(4)
30307 .m(m)
30308 .n(n)
30309 .k(k)
30310 .iterations(1)
30311 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30312 }
30313 }
30314 }
30315 }
30316
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_div_8)30317 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_div_8) {
30318 TEST_REQUIRES_X86_SSE41;
30319 for (size_t k = 16; k <= 80; k += 8) {
30320 GemmMicrokernelTester()
30321 .mr(3)
30322 .nr(4)
30323 .kr(2)
30324 .sr(4)
30325 .m(3)
30326 .n(4)
30327 .k(k)
30328 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30329 }
30330 }
30331
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_div_8_strided_a)30332 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_div_8_strided_a) {
30333 TEST_REQUIRES_X86_SSE41;
30334 for (size_t k = 16; k <= 80; k += 8) {
30335 GemmMicrokernelTester()
30336 .mr(3)
30337 .nr(4)
30338 .kr(2)
30339 .sr(4)
30340 .m(3)
30341 .n(4)
30342 .k(k)
30343 .a_stride(83)
30344 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30345 }
30346 }
30347
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_div_8_subtile)30348 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_div_8_subtile) {
30349 TEST_REQUIRES_X86_SSE41;
30350 for (size_t k = 16; k <= 80; k += 8) {
30351 for (uint32_t n = 1; n <= 4; n++) {
30352 for (uint32_t m = 1; m <= 3; m++) {
30353 GemmMicrokernelTester()
30354 .mr(3)
30355 .nr(4)
30356 .kr(2)
30357 .sr(4)
30358 .m(m)
30359 .n(n)
30360 .k(k)
30361 .iterations(1)
30362 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30363 }
30364 }
30365 }
30366 }
30367
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4)30368 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4) {
30369 TEST_REQUIRES_X86_SSE41;
30370 for (uint32_t n = 5; n < 8; n++) {
30371 for (size_t k = 1; k <= 40; k += 9) {
30372 GemmMicrokernelTester()
30373 .mr(3)
30374 .nr(4)
30375 .kr(2)
30376 .sr(4)
30377 .m(3)
30378 .n(n)
30379 .k(k)
30380 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30381 }
30382 }
30383 }
30384
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4_strided_cn)30385 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
30386 TEST_REQUIRES_X86_SSE41;
30387 for (uint32_t n = 5; n < 8; n++) {
30388 for (size_t k = 1; k <= 40; k += 9) {
30389 GemmMicrokernelTester()
30390 .mr(3)
30391 .nr(4)
30392 .kr(2)
30393 .sr(4)
30394 .m(3)
30395 .n(n)
30396 .k(k)
30397 .cn_stride(7)
30398 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30399 }
30400 }
30401 }
30402
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4_strided_a)30403 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4_strided_a) {
30404 TEST_REQUIRES_X86_SSE41;
30405 for (uint32_t n = 5; n < 8; n++) {
30406 for (size_t k = 1; k <= 40; k += 9) {
30407 GemmMicrokernelTester()
30408 .mr(3)
30409 .nr(4)
30410 .kr(2)
30411 .sr(4)
30412 .m(3)
30413 .n(n)
30414 .k(k)
30415 .a_stride(43)
30416 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30417 }
30418 }
30419 }
30420
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4_subtile)30421 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4_subtile) {
30422 TEST_REQUIRES_X86_SSE41;
30423 for (uint32_t n = 5; n < 8; n++) {
30424 for (size_t k = 1; k <= 40; k += 9) {
30425 for (uint32_t m = 1; m <= 3; m++) {
30426 GemmMicrokernelTester()
30427 .mr(3)
30428 .nr(4)
30429 .kr(2)
30430 .sr(4)
30431 .m(m)
30432 .n(n)
30433 .k(k)
30434 .iterations(1)
30435 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30436 }
30437 }
30438 }
30439 }
30440
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4)30441 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4) {
30442 TEST_REQUIRES_X86_SSE41;
30443 for (uint32_t n = 8; n <= 12; n += 4) {
30444 for (size_t k = 1; k <= 40; k += 9) {
30445 GemmMicrokernelTester()
30446 .mr(3)
30447 .nr(4)
30448 .kr(2)
30449 .sr(4)
30450 .m(3)
30451 .n(n)
30452 .k(k)
30453 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30454 }
30455 }
30456 }
30457
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4_strided_cn)30458 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
30459 TEST_REQUIRES_X86_SSE41;
30460 for (uint32_t n = 8; n <= 12; n += 4) {
30461 for (size_t k = 1; k <= 40; k += 9) {
30462 GemmMicrokernelTester()
30463 .mr(3)
30464 .nr(4)
30465 .kr(2)
30466 .sr(4)
30467 .m(3)
30468 .n(n)
30469 .k(k)
30470 .cn_stride(7)
30471 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30472 }
30473 }
30474 }
30475
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4_strided_a)30476 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4_strided_a) {
30477 TEST_REQUIRES_X86_SSE41;
30478 for (uint32_t n = 8; n <= 12; n += 4) {
30479 for (size_t k = 1; k <= 40; k += 9) {
30480 GemmMicrokernelTester()
30481 .mr(3)
30482 .nr(4)
30483 .kr(2)
30484 .sr(4)
30485 .m(3)
30486 .n(n)
30487 .k(k)
30488 .a_stride(43)
30489 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30490 }
30491 }
30492 }
30493
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4_subtile)30494 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4_subtile) {
30495 TEST_REQUIRES_X86_SSE41;
30496 for (uint32_t n = 8; n <= 12; n += 4) {
30497 for (size_t k = 1; k <= 40; k += 9) {
30498 for (uint32_t m = 1; m <= 3; m++) {
30499 GemmMicrokernelTester()
30500 .mr(3)
30501 .nr(4)
30502 .kr(2)
30503 .sr(4)
30504 .m(m)
30505 .n(n)
30506 .k(k)
30507 .iterations(1)
30508 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30509 }
30510 }
30511 }
30512 }
30513
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,strided_cm_subtile)30514 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, strided_cm_subtile) {
30515 TEST_REQUIRES_X86_SSE41;
30516 for (size_t k = 1; k <= 40; k += 9) {
30517 for (uint32_t n = 1; n <= 4; n++) {
30518 for (uint32_t m = 1; m <= 3; m++) {
30519 GemmMicrokernelTester()
30520 .mr(3)
30521 .nr(4)
30522 .kr(2)
30523 .sr(4)
30524 .m(m)
30525 .n(n)
30526 .k(k)
30527 .cm_stride(7)
30528 .iterations(1)
30529 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30530 }
30531 }
30532 }
30533 }
30534
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,qmin)30535 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, qmin) {
30536 TEST_REQUIRES_X86_SSE41;
30537 GemmMicrokernelTester()
30538 .mr(3)
30539 .nr(4)
30540 .kr(2)
30541 .sr(4)
30542 .m(3)
30543 .n(4)
30544 .k(8)
30545 .qmin(128)
30546 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30547 }
30548
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,qmax)30549 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, qmax) {
30550 TEST_REQUIRES_X86_SSE41;
30551 GemmMicrokernelTester()
30552 .mr(3)
30553 .nr(4)
30554 .kr(2)
30555 .sr(4)
30556 .m(3)
30557 .n(4)
30558 .k(8)
30559 .qmax(128)
30560 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30561 }
30562
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,strided_cm)30563 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, strided_cm) {
30564 TEST_REQUIRES_X86_SSE41;
30565 GemmMicrokernelTester()
30566 .mr(3)
30567 .nr(4)
30568 .kr(2)
30569 .sr(4)
30570 .m(3)
30571 .n(4)
30572 .k(8)
30573 .cm_stride(7)
30574 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30575 }
30576 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
30577
30578
30579 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_eq_8)30580 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_eq_8) {
30581 TEST_REQUIRES_X86_AVX;
30582 GemmMicrokernelTester()
30583 .mr(1)
30584 .nr(4)
30585 .kr(2)
30586 .sr(4)
30587 .m(1)
30588 .n(4)
30589 .k(8)
30590 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30591 }
30592
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,strided_cn)30593 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, strided_cn) {
30594 TEST_REQUIRES_X86_AVX;
30595 GemmMicrokernelTester()
30596 .mr(1)
30597 .nr(4)
30598 .kr(2)
30599 .sr(4)
30600 .m(1)
30601 .n(4)
30602 .k(8)
30603 .cn_stride(7)
30604 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30605 }
30606
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_eq_8_strided_a)30607 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_eq_8_strided_a) {
30608 TEST_REQUIRES_X86_AVX;
30609 GemmMicrokernelTester()
30610 .mr(1)
30611 .nr(4)
30612 .kr(2)
30613 .sr(4)
30614 .m(1)
30615 .n(4)
30616 .k(8)
30617 .a_stride(11)
30618 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30619 }
30620
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_eq_8_subtile)30621 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_eq_8_subtile) {
30622 TEST_REQUIRES_X86_AVX;
30623 for (uint32_t n = 1; n <= 4; n++) {
30624 for (uint32_t m = 1; m <= 1; m++) {
30625 GemmMicrokernelTester()
30626 .mr(1)
30627 .nr(4)
30628 .kr(2)
30629 .sr(4)
30630 .m(m)
30631 .n(n)
30632 .k(8)
30633 .iterations(1)
30634 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30635 }
30636 }
30637 }
30638
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_eq_8_subtile_m)30639 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
30640 TEST_REQUIRES_X86_AVX;
30641 for (uint32_t m = 1; m <= 1; m++) {
30642 GemmMicrokernelTester()
30643 .mr(1)
30644 .nr(4)
30645 .kr(2)
30646 .sr(4)
30647 .m(m)
30648 .n(4)
30649 .k(8)
30650 .iterations(1)
30651 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30652 }
30653 }
30654
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_eq_8_subtile_n)30655 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
30656 TEST_REQUIRES_X86_AVX;
30657 for (uint32_t n = 1; n <= 4; n++) {
30658 GemmMicrokernelTester()
30659 .mr(1)
30660 .nr(4)
30661 .kr(2)
30662 .sr(4)
30663 .m(1)
30664 .n(n)
30665 .k(8)
30666 .iterations(1)
30667 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30668 }
30669 }
30670
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_lt_8)30671 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_lt_8) {
30672 TEST_REQUIRES_X86_AVX;
30673 for (size_t k = 1; k < 8; k++) {
30674 GemmMicrokernelTester()
30675 .mr(1)
30676 .nr(4)
30677 .kr(2)
30678 .sr(4)
30679 .m(1)
30680 .n(4)
30681 .k(k)
30682 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30683 }
30684 }
30685
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_lt_8_strided_a)30686 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_lt_8_strided_a) {
30687 TEST_REQUIRES_X86_AVX;
30688 for (size_t k = 1; k < 8; k++) {
30689 GemmMicrokernelTester()
30690 .mr(1)
30691 .nr(4)
30692 .kr(2)
30693 .sr(4)
30694 .m(1)
30695 .n(4)
30696 .k(k)
30697 .a_stride(11)
30698 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30699 }
30700 }
30701
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_lt_8_subtile)30702 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_lt_8_subtile) {
30703 TEST_REQUIRES_X86_AVX;
30704 for (size_t k = 1; k < 8; k++) {
30705 for (uint32_t n = 1; n <= 4; n++) {
30706 for (uint32_t m = 1; m <= 1; m++) {
30707 GemmMicrokernelTester()
30708 .mr(1)
30709 .nr(4)
30710 .kr(2)
30711 .sr(4)
30712 .m(m)
30713 .n(n)
30714 .k(k)
30715 .iterations(1)
30716 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30717 }
30718 }
30719 }
30720 }
30721
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_gt_8)30722 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_gt_8) {
30723 TEST_REQUIRES_X86_AVX;
30724 for (size_t k = 9; k < 16; k++) {
30725 GemmMicrokernelTester()
30726 .mr(1)
30727 .nr(4)
30728 .kr(2)
30729 .sr(4)
30730 .m(1)
30731 .n(4)
30732 .k(k)
30733 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30734 }
30735 }
30736
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_gt_8_strided_a)30737 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_gt_8_strided_a) {
30738 TEST_REQUIRES_X86_AVX;
30739 for (size_t k = 9; k < 16; k++) {
30740 GemmMicrokernelTester()
30741 .mr(1)
30742 .nr(4)
30743 .kr(2)
30744 .sr(4)
30745 .m(1)
30746 .n(4)
30747 .k(k)
30748 .a_stride(19)
30749 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30750 }
30751 }
30752
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_gt_8_subtile)30753 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_gt_8_subtile) {
30754 TEST_REQUIRES_X86_AVX;
30755 for (size_t k = 9; k < 16; k++) {
30756 for (uint32_t n = 1; n <= 4; n++) {
30757 for (uint32_t m = 1; m <= 1; m++) {
30758 GemmMicrokernelTester()
30759 .mr(1)
30760 .nr(4)
30761 .kr(2)
30762 .sr(4)
30763 .m(m)
30764 .n(n)
30765 .k(k)
30766 .iterations(1)
30767 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30768 }
30769 }
30770 }
30771 }
30772
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_div_8)30773 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_div_8) {
30774 TEST_REQUIRES_X86_AVX;
30775 for (size_t k = 16; k <= 80; k += 8) {
30776 GemmMicrokernelTester()
30777 .mr(1)
30778 .nr(4)
30779 .kr(2)
30780 .sr(4)
30781 .m(1)
30782 .n(4)
30783 .k(k)
30784 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30785 }
30786 }
30787
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_div_8_strided_a)30788 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_div_8_strided_a) {
30789 TEST_REQUIRES_X86_AVX;
30790 for (size_t k = 16; k <= 80; k += 8) {
30791 GemmMicrokernelTester()
30792 .mr(1)
30793 .nr(4)
30794 .kr(2)
30795 .sr(4)
30796 .m(1)
30797 .n(4)
30798 .k(k)
30799 .a_stride(83)
30800 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30801 }
30802 }
30803
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_div_8_subtile)30804 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_div_8_subtile) {
30805 TEST_REQUIRES_X86_AVX;
30806 for (size_t k = 16; k <= 80; k += 8) {
30807 for (uint32_t n = 1; n <= 4; n++) {
30808 for (uint32_t m = 1; m <= 1; m++) {
30809 GemmMicrokernelTester()
30810 .mr(1)
30811 .nr(4)
30812 .kr(2)
30813 .sr(4)
30814 .m(m)
30815 .n(n)
30816 .k(k)
30817 .iterations(1)
30818 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30819 }
30820 }
30821 }
30822 }
30823
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_gt_4)30824 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_gt_4) {
30825 TEST_REQUIRES_X86_AVX;
30826 for (uint32_t n = 5; n < 8; n++) {
30827 for (size_t k = 1; k <= 40; k += 9) {
30828 GemmMicrokernelTester()
30829 .mr(1)
30830 .nr(4)
30831 .kr(2)
30832 .sr(4)
30833 .m(1)
30834 .n(n)
30835 .k(k)
30836 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30837 }
30838 }
30839 }
30840
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_gt_4_strided_cn)30841 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
30842 TEST_REQUIRES_X86_AVX;
30843 for (uint32_t n = 5; n < 8; n++) {
30844 for (size_t k = 1; k <= 40; k += 9) {
30845 GemmMicrokernelTester()
30846 .mr(1)
30847 .nr(4)
30848 .kr(2)
30849 .sr(4)
30850 .m(1)
30851 .n(n)
30852 .k(k)
30853 .cn_stride(7)
30854 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30855 }
30856 }
30857 }
30858
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_gt_4_strided_a)30859 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_gt_4_strided_a) {
30860 TEST_REQUIRES_X86_AVX;
30861 for (uint32_t n = 5; n < 8; n++) {
30862 for (size_t k = 1; k <= 40; k += 9) {
30863 GemmMicrokernelTester()
30864 .mr(1)
30865 .nr(4)
30866 .kr(2)
30867 .sr(4)
30868 .m(1)
30869 .n(n)
30870 .k(k)
30871 .a_stride(43)
30872 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30873 }
30874 }
30875 }
30876
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_gt_4_subtile)30877 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_gt_4_subtile) {
30878 TEST_REQUIRES_X86_AVX;
30879 for (uint32_t n = 5; n < 8; n++) {
30880 for (size_t k = 1; k <= 40; k += 9) {
30881 for (uint32_t m = 1; m <= 1; m++) {
30882 GemmMicrokernelTester()
30883 .mr(1)
30884 .nr(4)
30885 .kr(2)
30886 .sr(4)
30887 .m(m)
30888 .n(n)
30889 .k(k)
30890 .iterations(1)
30891 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30892 }
30893 }
30894 }
30895 }
30896
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_div_4)30897 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_div_4) {
30898 TEST_REQUIRES_X86_AVX;
30899 for (uint32_t n = 8; n <= 12; n += 4) {
30900 for (size_t k = 1; k <= 40; k += 9) {
30901 GemmMicrokernelTester()
30902 .mr(1)
30903 .nr(4)
30904 .kr(2)
30905 .sr(4)
30906 .m(1)
30907 .n(n)
30908 .k(k)
30909 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30910 }
30911 }
30912 }
30913
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_div_4_strided_cn)30914 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_div_4_strided_cn) {
30915 TEST_REQUIRES_X86_AVX;
30916 for (uint32_t n = 8; n <= 12; n += 4) {
30917 for (size_t k = 1; k <= 40; k += 9) {
30918 GemmMicrokernelTester()
30919 .mr(1)
30920 .nr(4)
30921 .kr(2)
30922 .sr(4)
30923 .m(1)
30924 .n(n)
30925 .k(k)
30926 .cn_stride(7)
30927 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30928 }
30929 }
30930 }
30931
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_div_4_strided_a)30932 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_div_4_strided_a) {
30933 TEST_REQUIRES_X86_AVX;
30934 for (uint32_t n = 8; n <= 12; n += 4) {
30935 for (size_t k = 1; k <= 40; k += 9) {
30936 GemmMicrokernelTester()
30937 .mr(1)
30938 .nr(4)
30939 .kr(2)
30940 .sr(4)
30941 .m(1)
30942 .n(n)
30943 .k(k)
30944 .a_stride(43)
30945 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30946 }
30947 }
30948 }
30949
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_div_4_subtile)30950 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_div_4_subtile) {
30951 TEST_REQUIRES_X86_AVX;
30952 for (uint32_t n = 8; n <= 12; n += 4) {
30953 for (size_t k = 1; k <= 40; k += 9) {
30954 for (uint32_t m = 1; m <= 1; m++) {
30955 GemmMicrokernelTester()
30956 .mr(1)
30957 .nr(4)
30958 .kr(2)
30959 .sr(4)
30960 .m(m)
30961 .n(n)
30962 .k(k)
30963 .iterations(1)
30964 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30965 }
30966 }
30967 }
30968 }
30969
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,strided_cm_subtile)30970 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, strided_cm_subtile) {
30971 TEST_REQUIRES_X86_AVX;
30972 for (size_t k = 1; k <= 40; k += 9) {
30973 for (uint32_t n = 1; n <= 4; n++) {
30974 for (uint32_t m = 1; m <= 1; m++) {
30975 GemmMicrokernelTester()
30976 .mr(1)
30977 .nr(4)
30978 .kr(2)
30979 .sr(4)
30980 .m(m)
30981 .n(n)
30982 .k(k)
30983 .cm_stride(7)
30984 .iterations(1)
30985 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30986 }
30987 }
30988 }
30989 }
30990
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,qmin)30991 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, qmin) {
30992 TEST_REQUIRES_X86_AVX;
30993 GemmMicrokernelTester()
30994 .mr(1)
30995 .nr(4)
30996 .kr(2)
30997 .sr(4)
30998 .m(1)
30999 .n(4)
31000 .k(8)
31001 .qmin(128)
31002 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31003 }
31004
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,qmax)31005 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, qmax) {
31006 TEST_REQUIRES_X86_AVX;
31007 GemmMicrokernelTester()
31008 .mr(1)
31009 .nr(4)
31010 .kr(2)
31011 .sr(4)
31012 .m(1)
31013 .n(4)
31014 .k(8)
31015 .qmax(128)
31016 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31017 }
31018
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,strided_cm)31019 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, strided_cm) {
31020 TEST_REQUIRES_X86_AVX;
31021 GemmMicrokernelTester()
31022 .mr(1)
31023 .nr(4)
31024 .kr(2)
31025 .sr(4)
31026 .m(1)
31027 .n(4)
31028 .k(8)
31029 .cm_stride(7)
31030 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31031 }
31032 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
31033
31034
31035 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_eq_8)31036 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_eq_8) {
31037 TEST_REQUIRES_X86_XOP;
31038 GemmMicrokernelTester()
31039 .mr(1)
31040 .nr(4)
31041 .kr(2)
31042 .sr(4)
31043 .m(1)
31044 .n(4)
31045 .k(8)
31046 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31047 }
31048
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,strided_cn)31049 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, strided_cn) {
31050 TEST_REQUIRES_X86_XOP;
31051 GemmMicrokernelTester()
31052 .mr(1)
31053 .nr(4)
31054 .kr(2)
31055 .sr(4)
31056 .m(1)
31057 .n(4)
31058 .k(8)
31059 .cn_stride(7)
31060 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31061 }
31062
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_eq_8_strided_a)31063 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_eq_8_strided_a) {
31064 TEST_REQUIRES_X86_XOP;
31065 GemmMicrokernelTester()
31066 .mr(1)
31067 .nr(4)
31068 .kr(2)
31069 .sr(4)
31070 .m(1)
31071 .n(4)
31072 .k(8)
31073 .a_stride(11)
31074 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31075 }
31076
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_eq_8_subtile)31077 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_eq_8_subtile) {
31078 TEST_REQUIRES_X86_XOP;
31079 for (uint32_t n = 1; n <= 4; n++) {
31080 for (uint32_t m = 1; m <= 1; m++) {
31081 GemmMicrokernelTester()
31082 .mr(1)
31083 .nr(4)
31084 .kr(2)
31085 .sr(4)
31086 .m(m)
31087 .n(n)
31088 .k(8)
31089 .iterations(1)
31090 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31091 }
31092 }
31093 }
31094
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_eq_8_subtile_m)31095 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
31096 TEST_REQUIRES_X86_XOP;
31097 for (uint32_t m = 1; m <= 1; m++) {
31098 GemmMicrokernelTester()
31099 .mr(1)
31100 .nr(4)
31101 .kr(2)
31102 .sr(4)
31103 .m(m)
31104 .n(4)
31105 .k(8)
31106 .iterations(1)
31107 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31108 }
31109 }
31110
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_eq_8_subtile_n)31111 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
31112 TEST_REQUIRES_X86_XOP;
31113 for (uint32_t n = 1; n <= 4; n++) {
31114 GemmMicrokernelTester()
31115 .mr(1)
31116 .nr(4)
31117 .kr(2)
31118 .sr(4)
31119 .m(1)
31120 .n(n)
31121 .k(8)
31122 .iterations(1)
31123 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31124 }
31125 }
31126
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_lt_8)31127 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_lt_8) {
31128 TEST_REQUIRES_X86_XOP;
31129 for (size_t k = 1; k < 8; k++) {
31130 GemmMicrokernelTester()
31131 .mr(1)
31132 .nr(4)
31133 .kr(2)
31134 .sr(4)
31135 .m(1)
31136 .n(4)
31137 .k(k)
31138 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31139 }
31140 }
31141
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_lt_8_strided_a)31142 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_lt_8_strided_a) {
31143 TEST_REQUIRES_X86_XOP;
31144 for (size_t k = 1; k < 8; k++) {
31145 GemmMicrokernelTester()
31146 .mr(1)
31147 .nr(4)
31148 .kr(2)
31149 .sr(4)
31150 .m(1)
31151 .n(4)
31152 .k(k)
31153 .a_stride(11)
31154 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31155 }
31156 }
31157
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_lt_8_subtile)31158 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_lt_8_subtile) {
31159 TEST_REQUIRES_X86_XOP;
31160 for (size_t k = 1; k < 8; k++) {
31161 for (uint32_t n = 1; n <= 4; n++) {
31162 for (uint32_t m = 1; m <= 1; m++) {
31163 GemmMicrokernelTester()
31164 .mr(1)
31165 .nr(4)
31166 .kr(2)
31167 .sr(4)
31168 .m(m)
31169 .n(n)
31170 .k(k)
31171 .iterations(1)
31172 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31173 }
31174 }
31175 }
31176 }
31177
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_gt_8)31178 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_gt_8) {
31179 TEST_REQUIRES_X86_XOP;
31180 for (size_t k = 9; k < 16; k++) {
31181 GemmMicrokernelTester()
31182 .mr(1)
31183 .nr(4)
31184 .kr(2)
31185 .sr(4)
31186 .m(1)
31187 .n(4)
31188 .k(k)
31189 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31190 }
31191 }
31192
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_gt_8_strided_a)31193 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_gt_8_strided_a) {
31194 TEST_REQUIRES_X86_XOP;
31195 for (size_t k = 9; k < 16; k++) {
31196 GemmMicrokernelTester()
31197 .mr(1)
31198 .nr(4)
31199 .kr(2)
31200 .sr(4)
31201 .m(1)
31202 .n(4)
31203 .k(k)
31204 .a_stride(19)
31205 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31206 }
31207 }
31208
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_gt_8_subtile)31209 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_gt_8_subtile) {
31210 TEST_REQUIRES_X86_XOP;
31211 for (size_t k = 9; k < 16; k++) {
31212 for (uint32_t n = 1; n <= 4; n++) {
31213 for (uint32_t m = 1; m <= 1; m++) {
31214 GemmMicrokernelTester()
31215 .mr(1)
31216 .nr(4)
31217 .kr(2)
31218 .sr(4)
31219 .m(m)
31220 .n(n)
31221 .k(k)
31222 .iterations(1)
31223 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31224 }
31225 }
31226 }
31227 }
31228
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_div_8)31229 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_div_8) {
31230 TEST_REQUIRES_X86_XOP;
31231 for (size_t k = 16; k <= 80; k += 8) {
31232 GemmMicrokernelTester()
31233 .mr(1)
31234 .nr(4)
31235 .kr(2)
31236 .sr(4)
31237 .m(1)
31238 .n(4)
31239 .k(k)
31240 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31241 }
31242 }
31243
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_div_8_strided_a)31244 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_div_8_strided_a) {
31245 TEST_REQUIRES_X86_XOP;
31246 for (size_t k = 16; k <= 80; k += 8) {
31247 GemmMicrokernelTester()
31248 .mr(1)
31249 .nr(4)
31250 .kr(2)
31251 .sr(4)
31252 .m(1)
31253 .n(4)
31254 .k(k)
31255 .a_stride(83)
31256 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31257 }
31258 }
31259
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_div_8_subtile)31260 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_div_8_subtile) {
31261 TEST_REQUIRES_X86_XOP;
31262 for (size_t k = 16; k <= 80; k += 8) {
31263 for (uint32_t n = 1; n <= 4; n++) {
31264 for (uint32_t m = 1; m <= 1; m++) {
31265 GemmMicrokernelTester()
31266 .mr(1)
31267 .nr(4)
31268 .kr(2)
31269 .sr(4)
31270 .m(m)
31271 .n(n)
31272 .k(k)
31273 .iterations(1)
31274 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31275 }
31276 }
31277 }
31278 }
31279
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_gt_4)31280 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_gt_4) {
31281 TEST_REQUIRES_X86_XOP;
31282 for (uint32_t n = 5; n < 8; n++) {
31283 for (size_t k = 1; k <= 40; k += 9) {
31284 GemmMicrokernelTester()
31285 .mr(1)
31286 .nr(4)
31287 .kr(2)
31288 .sr(4)
31289 .m(1)
31290 .n(n)
31291 .k(k)
31292 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31293 }
31294 }
31295 }
31296
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_gt_4_strided_cn)31297 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
31298 TEST_REQUIRES_X86_XOP;
31299 for (uint32_t n = 5; n < 8; n++) {
31300 for (size_t k = 1; k <= 40; k += 9) {
31301 GemmMicrokernelTester()
31302 .mr(1)
31303 .nr(4)
31304 .kr(2)
31305 .sr(4)
31306 .m(1)
31307 .n(n)
31308 .k(k)
31309 .cn_stride(7)
31310 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31311 }
31312 }
31313 }
31314
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_gt_4_strided_a)31315 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_gt_4_strided_a) {
31316 TEST_REQUIRES_X86_XOP;
31317 for (uint32_t n = 5; n < 8; n++) {
31318 for (size_t k = 1; k <= 40; k += 9) {
31319 GemmMicrokernelTester()
31320 .mr(1)
31321 .nr(4)
31322 .kr(2)
31323 .sr(4)
31324 .m(1)
31325 .n(n)
31326 .k(k)
31327 .a_stride(43)
31328 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31329 }
31330 }
31331 }
31332
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_gt_4_subtile)31333 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_gt_4_subtile) {
31334 TEST_REQUIRES_X86_XOP;
31335 for (uint32_t n = 5; n < 8; n++) {
31336 for (size_t k = 1; k <= 40; k += 9) {
31337 for (uint32_t m = 1; m <= 1; m++) {
31338 GemmMicrokernelTester()
31339 .mr(1)
31340 .nr(4)
31341 .kr(2)
31342 .sr(4)
31343 .m(m)
31344 .n(n)
31345 .k(k)
31346 .iterations(1)
31347 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31348 }
31349 }
31350 }
31351 }
31352
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_div_4)31353 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_div_4) {
31354 TEST_REQUIRES_X86_XOP;
31355 for (uint32_t n = 8; n <= 12; n += 4) {
31356 for (size_t k = 1; k <= 40; k += 9) {
31357 GemmMicrokernelTester()
31358 .mr(1)
31359 .nr(4)
31360 .kr(2)
31361 .sr(4)
31362 .m(1)
31363 .n(n)
31364 .k(k)
31365 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31366 }
31367 }
31368 }
31369
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_div_4_strided_cn)31370 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_div_4_strided_cn) {
31371 TEST_REQUIRES_X86_XOP;
31372 for (uint32_t n = 8; n <= 12; n += 4) {
31373 for (size_t k = 1; k <= 40; k += 9) {
31374 GemmMicrokernelTester()
31375 .mr(1)
31376 .nr(4)
31377 .kr(2)
31378 .sr(4)
31379 .m(1)
31380 .n(n)
31381 .k(k)
31382 .cn_stride(7)
31383 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31384 }
31385 }
31386 }
31387
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_div_4_strided_a)31388 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_div_4_strided_a) {
31389 TEST_REQUIRES_X86_XOP;
31390 for (uint32_t n = 8; n <= 12; n += 4) {
31391 for (size_t k = 1; k <= 40; k += 9) {
31392 GemmMicrokernelTester()
31393 .mr(1)
31394 .nr(4)
31395 .kr(2)
31396 .sr(4)
31397 .m(1)
31398 .n(n)
31399 .k(k)
31400 .a_stride(43)
31401 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31402 }
31403 }
31404 }
31405
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_div_4_subtile)31406 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_div_4_subtile) {
31407 TEST_REQUIRES_X86_XOP;
31408 for (uint32_t n = 8; n <= 12; n += 4) {
31409 for (size_t k = 1; k <= 40; k += 9) {
31410 for (uint32_t m = 1; m <= 1; m++) {
31411 GemmMicrokernelTester()
31412 .mr(1)
31413 .nr(4)
31414 .kr(2)
31415 .sr(4)
31416 .m(m)
31417 .n(n)
31418 .k(k)
31419 .iterations(1)
31420 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31421 }
31422 }
31423 }
31424 }
31425
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,strided_cm_subtile)31426 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, strided_cm_subtile) {
31427 TEST_REQUIRES_X86_XOP;
31428 for (size_t k = 1; k <= 40; k += 9) {
31429 for (uint32_t n = 1; n <= 4; n++) {
31430 for (uint32_t m = 1; m <= 1; m++) {
31431 GemmMicrokernelTester()
31432 .mr(1)
31433 .nr(4)
31434 .kr(2)
31435 .sr(4)
31436 .m(m)
31437 .n(n)
31438 .k(k)
31439 .cm_stride(7)
31440 .iterations(1)
31441 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31442 }
31443 }
31444 }
31445 }
31446
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,qmin)31447 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, qmin) {
31448 TEST_REQUIRES_X86_XOP;
31449 GemmMicrokernelTester()
31450 .mr(1)
31451 .nr(4)
31452 .kr(2)
31453 .sr(4)
31454 .m(1)
31455 .n(4)
31456 .k(8)
31457 .qmin(128)
31458 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31459 }
31460
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,qmax)31461 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, qmax) {
31462 TEST_REQUIRES_X86_XOP;
31463 GemmMicrokernelTester()
31464 .mr(1)
31465 .nr(4)
31466 .kr(2)
31467 .sr(4)
31468 .m(1)
31469 .n(4)
31470 .k(8)
31471 .qmax(128)
31472 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31473 }
31474
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,strided_cm)31475 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, strided_cm) {
31476 TEST_REQUIRES_X86_XOP;
31477 GemmMicrokernelTester()
31478 .mr(1)
31479 .nr(4)
31480 .kr(2)
31481 .sr(4)
31482 .m(1)
31483 .n(4)
31484 .k(8)
31485 .cm_stride(7)
31486 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31487 }
31488 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
31489
31490
31491 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8)31492 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8) {
31493 TEST_REQUIRES_X86_XOP;
31494 GemmMicrokernelTester()
31495 .mr(3)
31496 .nr(4)
31497 .kr(2)
31498 .sr(4)
31499 .m(3)
31500 .n(4)
31501 .k(8)
31502 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31503 }
31504
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cn)31505 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cn) {
31506 TEST_REQUIRES_X86_XOP;
31507 GemmMicrokernelTester()
31508 .mr(3)
31509 .nr(4)
31510 .kr(2)
31511 .sr(4)
31512 .m(3)
31513 .n(4)
31514 .k(8)
31515 .cn_stride(7)
31516 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31517 }
31518
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_strided_a)31519 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_strided_a) {
31520 TEST_REQUIRES_X86_XOP;
31521 GemmMicrokernelTester()
31522 .mr(3)
31523 .nr(4)
31524 .kr(2)
31525 .sr(4)
31526 .m(3)
31527 .n(4)
31528 .k(8)
31529 .a_stride(11)
31530 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31531 }
31532
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile)31533 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile) {
31534 TEST_REQUIRES_X86_XOP;
31535 for (uint32_t n = 1; n <= 4; n++) {
31536 for (uint32_t m = 1; m <= 3; m++) {
31537 GemmMicrokernelTester()
31538 .mr(3)
31539 .nr(4)
31540 .kr(2)
31541 .sr(4)
31542 .m(m)
31543 .n(n)
31544 .k(8)
31545 .iterations(1)
31546 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31547 }
31548 }
31549 }
31550
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile_m)31551 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
31552 TEST_REQUIRES_X86_XOP;
31553 for (uint32_t m = 1; m <= 3; m++) {
31554 GemmMicrokernelTester()
31555 .mr(3)
31556 .nr(4)
31557 .kr(2)
31558 .sr(4)
31559 .m(m)
31560 .n(4)
31561 .k(8)
31562 .iterations(1)
31563 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31564 }
31565 }
31566
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile_n)31567 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
31568 TEST_REQUIRES_X86_XOP;
31569 for (uint32_t n = 1; n <= 4; n++) {
31570 GemmMicrokernelTester()
31571 .mr(3)
31572 .nr(4)
31573 .kr(2)
31574 .sr(4)
31575 .m(3)
31576 .n(n)
31577 .k(8)
31578 .iterations(1)
31579 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31580 }
31581 }
31582
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_lt_8)31583 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_lt_8) {
31584 TEST_REQUIRES_X86_XOP;
31585 for (size_t k = 1; k < 8; k++) {
31586 GemmMicrokernelTester()
31587 .mr(3)
31588 .nr(4)
31589 .kr(2)
31590 .sr(4)
31591 .m(3)
31592 .n(4)
31593 .k(k)
31594 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31595 }
31596 }
31597
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_lt_8_strided_a)31598 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_lt_8_strided_a) {
31599 TEST_REQUIRES_X86_XOP;
31600 for (size_t k = 1; k < 8; k++) {
31601 GemmMicrokernelTester()
31602 .mr(3)
31603 .nr(4)
31604 .kr(2)
31605 .sr(4)
31606 .m(3)
31607 .n(4)
31608 .k(k)
31609 .a_stride(11)
31610 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31611 }
31612 }
31613
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_lt_8_subtile)31614 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_lt_8_subtile) {
31615 TEST_REQUIRES_X86_XOP;
31616 for (size_t k = 1; k < 8; k++) {
31617 for (uint32_t n = 1; n <= 4; n++) {
31618 for (uint32_t m = 1; m <= 3; m++) {
31619 GemmMicrokernelTester()
31620 .mr(3)
31621 .nr(4)
31622 .kr(2)
31623 .sr(4)
31624 .m(m)
31625 .n(n)
31626 .k(k)
31627 .iterations(1)
31628 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31629 }
31630 }
31631 }
31632 }
31633
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_gt_8)31634 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_gt_8) {
31635 TEST_REQUIRES_X86_XOP;
31636 for (size_t k = 9; k < 16; k++) {
31637 GemmMicrokernelTester()
31638 .mr(3)
31639 .nr(4)
31640 .kr(2)
31641 .sr(4)
31642 .m(3)
31643 .n(4)
31644 .k(k)
31645 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31646 }
31647 }
31648
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_gt_8_strided_a)31649 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_gt_8_strided_a) {
31650 TEST_REQUIRES_X86_XOP;
31651 for (size_t k = 9; k < 16; k++) {
31652 GemmMicrokernelTester()
31653 .mr(3)
31654 .nr(4)
31655 .kr(2)
31656 .sr(4)
31657 .m(3)
31658 .n(4)
31659 .k(k)
31660 .a_stride(19)
31661 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31662 }
31663 }
31664
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_gt_8_subtile)31665 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_gt_8_subtile) {
31666 TEST_REQUIRES_X86_XOP;
31667 for (size_t k = 9; k < 16; k++) {
31668 for (uint32_t n = 1; n <= 4; n++) {
31669 for (uint32_t m = 1; m <= 3; m++) {
31670 GemmMicrokernelTester()
31671 .mr(3)
31672 .nr(4)
31673 .kr(2)
31674 .sr(4)
31675 .m(m)
31676 .n(n)
31677 .k(k)
31678 .iterations(1)
31679 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31680 }
31681 }
31682 }
31683 }
31684
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_div_8)31685 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_div_8) {
31686 TEST_REQUIRES_X86_XOP;
31687 for (size_t k = 16; k <= 80; k += 8) {
31688 GemmMicrokernelTester()
31689 .mr(3)
31690 .nr(4)
31691 .kr(2)
31692 .sr(4)
31693 .m(3)
31694 .n(4)
31695 .k(k)
31696 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31697 }
31698 }
31699
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_div_8_strided_a)31700 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_div_8_strided_a) {
31701 TEST_REQUIRES_X86_XOP;
31702 for (size_t k = 16; k <= 80; k += 8) {
31703 GemmMicrokernelTester()
31704 .mr(3)
31705 .nr(4)
31706 .kr(2)
31707 .sr(4)
31708 .m(3)
31709 .n(4)
31710 .k(k)
31711 .a_stride(83)
31712 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31713 }
31714 }
31715
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_div_8_subtile)31716 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_div_8_subtile) {
31717 TEST_REQUIRES_X86_XOP;
31718 for (size_t k = 16; k <= 80; k += 8) {
31719 for (uint32_t n = 1; n <= 4; n++) {
31720 for (uint32_t m = 1; m <= 3; m++) {
31721 GemmMicrokernelTester()
31722 .mr(3)
31723 .nr(4)
31724 .kr(2)
31725 .sr(4)
31726 .m(m)
31727 .n(n)
31728 .k(k)
31729 .iterations(1)
31730 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31731 }
31732 }
31733 }
31734 }
31735
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4)31736 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4) {
31737 TEST_REQUIRES_X86_XOP;
31738 for (uint32_t n = 5; n < 8; n++) {
31739 for (size_t k = 1; k <= 40; k += 9) {
31740 GemmMicrokernelTester()
31741 .mr(3)
31742 .nr(4)
31743 .kr(2)
31744 .sr(4)
31745 .m(3)
31746 .n(n)
31747 .k(k)
31748 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31749 }
31750 }
31751 }
31752
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_strided_cn)31753 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
31754 TEST_REQUIRES_X86_XOP;
31755 for (uint32_t n = 5; n < 8; n++) {
31756 for (size_t k = 1; k <= 40; k += 9) {
31757 GemmMicrokernelTester()
31758 .mr(3)
31759 .nr(4)
31760 .kr(2)
31761 .sr(4)
31762 .m(3)
31763 .n(n)
31764 .k(k)
31765 .cn_stride(7)
31766 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31767 }
31768 }
31769 }
31770
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_strided_a)31771 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_strided_a) {
31772 TEST_REQUIRES_X86_XOP;
31773 for (uint32_t n = 5; n < 8; n++) {
31774 for (size_t k = 1; k <= 40; k += 9) {
31775 GemmMicrokernelTester()
31776 .mr(3)
31777 .nr(4)
31778 .kr(2)
31779 .sr(4)
31780 .m(3)
31781 .n(n)
31782 .k(k)
31783 .a_stride(43)
31784 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31785 }
31786 }
31787 }
31788
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_subtile)31789 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_subtile) {
31790 TEST_REQUIRES_X86_XOP;
31791 for (uint32_t n = 5; n < 8; n++) {
31792 for (size_t k = 1; k <= 40; k += 9) {
31793 for (uint32_t m = 1; m <= 3; m++) {
31794 GemmMicrokernelTester()
31795 .mr(3)
31796 .nr(4)
31797 .kr(2)
31798 .sr(4)
31799 .m(m)
31800 .n(n)
31801 .k(k)
31802 .iterations(1)
31803 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31804 }
31805 }
31806 }
31807 }
31808
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4)31809 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4) {
31810 TEST_REQUIRES_X86_XOP;
31811 for (uint32_t n = 8; n <= 12; n += 4) {
31812 for (size_t k = 1; k <= 40; k += 9) {
31813 GemmMicrokernelTester()
31814 .mr(3)
31815 .nr(4)
31816 .kr(2)
31817 .sr(4)
31818 .m(3)
31819 .n(n)
31820 .k(k)
31821 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31822 }
31823 }
31824 }
31825
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_strided_cn)31826 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_strided_cn) {
31827 TEST_REQUIRES_X86_XOP;
31828 for (uint32_t n = 8; n <= 12; n += 4) {
31829 for (size_t k = 1; k <= 40; k += 9) {
31830 GemmMicrokernelTester()
31831 .mr(3)
31832 .nr(4)
31833 .kr(2)
31834 .sr(4)
31835 .m(3)
31836 .n(n)
31837 .k(k)
31838 .cn_stride(7)
31839 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31840 }
31841 }
31842 }
31843
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_strided_a)31844 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_strided_a) {
31845 TEST_REQUIRES_X86_XOP;
31846 for (uint32_t n = 8; n <= 12; n += 4) {
31847 for (size_t k = 1; k <= 40; k += 9) {
31848 GemmMicrokernelTester()
31849 .mr(3)
31850 .nr(4)
31851 .kr(2)
31852 .sr(4)
31853 .m(3)
31854 .n(n)
31855 .k(k)
31856 .a_stride(43)
31857 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31858 }
31859 }
31860 }
31861
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_subtile)31862 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_subtile) {
31863 TEST_REQUIRES_X86_XOP;
31864 for (uint32_t n = 8; n <= 12; n += 4) {
31865 for (size_t k = 1; k <= 40; k += 9) {
31866 for (uint32_t m = 1; m <= 3; m++) {
31867 GemmMicrokernelTester()
31868 .mr(3)
31869 .nr(4)
31870 .kr(2)
31871 .sr(4)
31872 .m(m)
31873 .n(n)
31874 .k(k)
31875 .iterations(1)
31876 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31877 }
31878 }
31879 }
31880 }
31881
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cm_subtile)31882 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cm_subtile) {
31883 TEST_REQUIRES_X86_XOP;
31884 for (size_t k = 1; k <= 40; k += 9) {
31885 for (uint32_t n = 1; n <= 4; n++) {
31886 for (uint32_t m = 1; m <= 3; m++) {
31887 GemmMicrokernelTester()
31888 .mr(3)
31889 .nr(4)
31890 .kr(2)
31891 .sr(4)
31892 .m(m)
31893 .n(n)
31894 .k(k)
31895 .cm_stride(7)
31896 .iterations(1)
31897 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31898 }
31899 }
31900 }
31901 }
31902
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,qmin)31903 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, qmin) {
31904 TEST_REQUIRES_X86_XOP;
31905 GemmMicrokernelTester()
31906 .mr(3)
31907 .nr(4)
31908 .kr(2)
31909 .sr(4)
31910 .m(3)
31911 .n(4)
31912 .k(8)
31913 .qmin(128)
31914 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31915 }
31916
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,qmax)31917 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, qmax) {
31918 TEST_REQUIRES_X86_XOP;
31919 GemmMicrokernelTester()
31920 .mr(3)
31921 .nr(4)
31922 .kr(2)
31923 .sr(4)
31924 .m(3)
31925 .n(4)
31926 .k(8)
31927 .qmax(128)
31928 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31929 }
31930
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cm)31931 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cm) {
31932 TEST_REQUIRES_X86_XOP;
31933 GemmMicrokernelTester()
31934 .mr(3)
31935 .nr(4)
31936 .kr(2)
31937 .sr(4)
31938 .m(3)
31939 .n(4)
31940 .k(8)
31941 .cm_stride(7)
31942 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31943 }
31944 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
31945
31946
31947 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_eq_8)31948 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_eq_8) {
31949 TEST_REQUIRES_X86_XOP;
31950 GemmMicrokernelTester()
31951 .mr(4)
31952 .nr(4)
31953 .kr(2)
31954 .sr(4)
31955 .m(4)
31956 .n(4)
31957 .k(8)
31958 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31959 }
31960
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,strided_cn)31961 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, strided_cn) {
31962 TEST_REQUIRES_X86_XOP;
31963 GemmMicrokernelTester()
31964 .mr(4)
31965 .nr(4)
31966 .kr(2)
31967 .sr(4)
31968 .m(4)
31969 .n(4)
31970 .k(8)
31971 .cn_stride(7)
31972 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31973 }
31974
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_eq_8_strided_a)31975 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_eq_8_strided_a) {
31976 TEST_REQUIRES_X86_XOP;
31977 GemmMicrokernelTester()
31978 .mr(4)
31979 .nr(4)
31980 .kr(2)
31981 .sr(4)
31982 .m(4)
31983 .n(4)
31984 .k(8)
31985 .a_stride(11)
31986 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31987 }
31988
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_eq_8_subtile)31989 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_eq_8_subtile) {
31990 TEST_REQUIRES_X86_XOP;
31991 for (uint32_t n = 1; n <= 4; n++) {
31992 for (uint32_t m = 1; m <= 4; m++) {
31993 GemmMicrokernelTester()
31994 .mr(4)
31995 .nr(4)
31996 .kr(2)
31997 .sr(4)
31998 .m(m)
31999 .n(n)
32000 .k(8)
32001 .iterations(1)
32002 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32003 }
32004 }
32005 }
32006
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_eq_8_subtile_m)32007 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
32008 TEST_REQUIRES_X86_XOP;
32009 for (uint32_t m = 1; m <= 4; m++) {
32010 GemmMicrokernelTester()
32011 .mr(4)
32012 .nr(4)
32013 .kr(2)
32014 .sr(4)
32015 .m(m)
32016 .n(4)
32017 .k(8)
32018 .iterations(1)
32019 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32020 }
32021 }
32022
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_eq_8_subtile_n)32023 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
32024 TEST_REQUIRES_X86_XOP;
32025 for (uint32_t n = 1; n <= 4; n++) {
32026 GemmMicrokernelTester()
32027 .mr(4)
32028 .nr(4)
32029 .kr(2)
32030 .sr(4)
32031 .m(4)
32032 .n(n)
32033 .k(8)
32034 .iterations(1)
32035 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32036 }
32037 }
32038
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_lt_8)32039 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_lt_8) {
32040 TEST_REQUIRES_X86_XOP;
32041 for (size_t k = 1; k < 8; k++) {
32042 GemmMicrokernelTester()
32043 .mr(4)
32044 .nr(4)
32045 .kr(2)
32046 .sr(4)
32047 .m(4)
32048 .n(4)
32049 .k(k)
32050 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32051 }
32052 }
32053
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_lt_8_strided_a)32054 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_lt_8_strided_a) {
32055 TEST_REQUIRES_X86_XOP;
32056 for (size_t k = 1; k < 8; k++) {
32057 GemmMicrokernelTester()
32058 .mr(4)
32059 .nr(4)
32060 .kr(2)
32061 .sr(4)
32062 .m(4)
32063 .n(4)
32064 .k(k)
32065 .a_stride(11)
32066 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32067 }
32068 }
32069
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_lt_8_subtile)32070 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_lt_8_subtile) {
32071 TEST_REQUIRES_X86_XOP;
32072 for (size_t k = 1; k < 8; k++) {
32073 for (uint32_t n = 1; n <= 4; n++) {
32074 for (uint32_t m = 1; m <= 4; m++) {
32075 GemmMicrokernelTester()
32076 .mr(4)
32077 .nr(4)
32078 .kr(2)
32079 .sr(4)
32080 .m(m)
32081 .n(n)
32082 .k(k)
32083 .iterations(1)
32084 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32085 }
32086 }
32087 }
32088 }
32089
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_gt_8)32090 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_gt_8) {
32091 TEST_REQUIRES_X86_XOP;
32092 for (size_t k = 9; k < 16; k++) {
32093 GemmMicrokernelTester()
32094 .mr(4)
32095 .nr(4)
32096 .kr(2)
32097 .sr(4)
32098 .m(4)
32099 .n(4)
32100 .k(k)
32101 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32102 }
32103 }
32104
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_gt_8_strided_a)32105 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_gt_8_strided_a) {
32106 TEST_REQUIRES_X86_XOP;
32107 for (size_t k = 9; k < 16; k++) {
32108 GemmMicrokernelTester()
32109 .mr(4)
32110 .nr(4)
32111 .kr(2)
32112 .sr(4)
32113 .m(4)
32114 .n(4)
32115 .k(k)
32116 .a_stride(19)
32117 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32118 }
32119 }
32120
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_gt_8_subtile)32121 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_gt_8_subtile) {
32122 TEST_REQUIRES_X86_XOP;
32123 for (size_t k = 9; k < 16; k++) {
32124 for (uint32_t n = 1; n <= 4; n++) {
32125 for (uint32_t m = 1; m <= 4; m++) {
32126 GemmMicrokernelTester()
32127 .mr(4)
32128 .nr(4)
32129 .kr(2)
32130 .sr(4)
32131 .m(m)
32132 .n(n)
32133 .k(k)
32134 .iterations(1)
32135 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32136 }
32137 }
32138 }
32139 }
32140
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_div_8)32141 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_div_8) {
32142 TEST_REQUIRES_X86_XOP;
32143 for (size_t k = 16; k <= 80; k += 8) {
32144 GemmMicrokernelTester()
32145 .mr(4)
32146 .nr(4)
32147 .kr(2)
32148 .sr(4)
32149 .m(4)
32150 .n(4)
32151 .k(k)
32152 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32153 }
32154 }
32155
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_div_8_strided_a)32156 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_div_8_strided_a) {
32157 TEST_REQUIRES_X86_XOP;
32158 for (size_t k = 16; k <= 80; k += 8) {
32159 GemmMicrokernelTester()
32160 .mr(4)
32161 .nr(4)
32162 .kr(2)
32163 .sr(4)
32164 .m(4)
32165 .n(4)
32166 .k(k)
32167 .a_stride(83)
32168 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32169 }
32170 }
32171
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_div_8_subtile)32172 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_div_8_subtile) {
32173 TEST_REQUIRES_X86_XOP;
32174 for (size_t k = 16; k <= 80; k += 8) {
32175 for (uint32_t n = 1; n <= 4; n++) {
32176 for (uint32_t m = 1; m <= 4; m++) {
32177 GemmMicrokernelTester()
32178 .mr(4)
32179 .nr(4)
32180 .kr(2)
32181 .sr(4)
32182 .m(m)
32183 .n(n)
32184 .k(k)
32185 .iterations(1)
32186 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32187 }
32188 }
32189 }
32190 }
32191
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_gt_4)32192 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_gt_4) {
32193 TEST_REQUIRES_X86_XOP;
32194 for (uint32_t n = 5; n < 8; n++) {
32195 for (size_t k = 1; k <= 40; k += 9) {
32196 GemmMicrokernelTester()
32197 .mr(4)
32198 .nr(4)
32199 .kr(2)
32200 .sr(4)
32201 .m(4)
32202 .n(n)
32203 .k(k)
32204 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32205 }
32206 }
32207 }
32208
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_gt_4_strided_cn)32209 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
32210 TEST_REQUIRES_X86_XOP;
32211 for (uint32_t n = 5; n < 8; n++) {
32212 for (size_t k = 1; k <= 40; k += 9) {
32213 GemmMicrokernelTester()
32214 .mr(4)
32215 .nr(4)
32216 .kr(2)
32217 .sr(4)
32218 .m(4)
32219 .n(n)
32220 .k(k)
32221 .cn_stride(7)
32222 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32223 }
32224 }
32225 }
32226
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_gt_4_strided_a)32227 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_gt_4_strided_a) {
32228 TEST_REQUIRES_X86_XOP;
32229 for (uint32_t n = 5; n < 8; n++) {
32230 for (size_t k = 1; k <= 40; k += 9) {
32231 GemmMicrokernelTester()
32232 .mr(4)
32233 .nr(4)
32234 .kr(2)
32235 .sr(4)
32236 .m(4)
32237 .n(n)
32238 .k(k)
32239 .a_stride(43)
32240 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32241 }
32242 }
32243 }
32244
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_gt_4_subtile)32245 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_gt_4_subtile) {
32246 TEST_REQUIRES_X86_XOP;
32247 for (uint32_t n = 5; n < 8; n++) {
32248 for (size_t k = 1; k <= 40; k += 9) {
32249 for (uint32_t m = 1; m <= 4; m++) {
32250 GemmMicrokernelTester()
32251 .mr(4)
32252 .nr(4)
32253 .kr(2)
32254 .sr(4)
32255 .m(m)
32256 .n(n)
32257 .k(k)
32258 .iterations(1)
32259 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32260 }
32261 }
32262 }
32263 }
32264
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_div_4)32265 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_div_4) {
32266 TEST_REQUIRES_X86_XOP;
32267 for (uint32_t n = 8; n <= 12; n += 4) {
32268 for (size_t k = 1; k <= 40; k += 9) {
32269 GemmMicrokernelTester()
32270 .mr(4)
32271 .nr(4)
32272 .kr(2)
32273 .sr(4)
32274 .m(4)
32275 .n(n)
32276 .k(k)
32277 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32278 }
32279 }
32280 }
32281
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_div_4_strided_cn)32282 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_div_4_strided_cn) {
32283 TEST_REQUIRES_X86_XOP;
32284 for (uint32_t n = 8; n <= 12; n += 4) {
32285 for (size_t k = 1; k <= 40; k += 9) {
32286 GemmMicrokernelTester()
32287 .mr(4)
32288 .nr(4)
32289 .kr(2)
32290 .sr(4)
32291 .m(4)
32292 .n(n)
32293 .k(k)
32294 .cn_stride(7)
32295 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32296 }
32297 }
32298 }
32299
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_div_4_strided_a)32300 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_div_4_strided_a) {
32301 TEST_REQUIRES_X86_XOP;
32302 for (uint32_t n = 8; n <= 12; n += 4) {
32303 for (size_t k = 1; k <= 40; k += 9) {
32304 GemmMicrokernelTester()
32305 .mr(4)
32306 .nr(4)
32307 .kr(2)
32308 .sr(4)
32309 .m(4)
32310 .n(n)
32311 .k(k)
32312 .a_stride(43)
32313 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32314 }
32315 }
32316 }
32317
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_div_4_subtile)32318 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_div_4_subtile) {
32319 TEST_REQUIRES_X86_XOP;
32320 for (uint32_t n = 8; n <= 12; n += 4) {
32321 for (size_t k = 1; k <= 40; k += 9) {
32322 for (uint32_t m = 1; m <= 4; m++) {
32323 GemmMicrokernelTester()
32324 .mr(4)
32325 .nr(4)
32326 .kr(2)
32327 .sr(4)
32328 .m(m)
32329 .n(n)
32330 .k(k)
32331 .iterations(1)
32332 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32333 }
32334 }
32335 }
32336 }
32337
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,strided_cm_subtile)32338 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, strided_cm_subtile) {
32339 TEST_REQUIRES_X86_XOP;
32340 for (size_t k = 1; k <= 40; k += 9) {
32341 for (uint32_t n = 1; n <= 4; n++) {
32342 for (uint32_t m = 1; m <= 4; m++) {
32343 GemmMicrokernelTester()
32344 .mr(4)
32345 .nr(4)
32346 .kr(2)
32347 .sr(4)
32348 .m(m)
32349 .n(n)
32350 .k(k)
32351 .cm_stride(7)
32352 .iterations(1)
32353 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32354 }
32355 }
32356 }
32357 }
32358
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,qmin)32359 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, qmin) {
32360 TEST_REQUIRES_X86_XOP;
32361 GemmMicrokernelTester()
32362 .mr(4)
32363 .nr(4)
32364 .kr(2)
32365 .sr(4)
32366 .m(4)
32367 .n(4)
32368 .k(8)
32369 .qmin(128)
32370 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32371 }
32372
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,qmax)32373 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, qmax) {
32374 TEST_REQUIRES_X86_XOP;
32375 GemmMicrokernelTester()
32376 .mr(4)
32377 .nr(4)
32378 .kr(2)
32379 .sr(4)
32380 .m(4)
32381 .n(4)
32382 .k(8)
32383 .qmax(128)
32384 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32385 }
32386
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,strided_cm)32387 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, strided_cm) {
32388 TEST_REQUIRES_X86_XOP;
32389 GemmMicrokernelTester()
32390 .mr(4)
32391 .nr(4)
32392 .kr(2)
32393 .sr(4)
32394 .m(4)
32395 .n(4)
32396 .k(8)
32397 .cm_stride(7)
32398 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32399 }
32400 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
32401
32402
32403 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_eq_8)32404 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_eq_8) {
32405 TEST_REQUIRES_X86_SSE41;
32406 GemmMicrokernelTester()
32407 .mr(3)
32408 .nr(4)
32409 .kr(2)
32410 .sr(4)
32411 .m(3)
32412 .n(4)
32413 .k(8)
32414 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32415 }
32416
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,strided_cn)32417 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, strided_cn) {
32418 TEST_REQUIRES_X86_SSE41;
32419 GemmMicrokernelTester()
32420 .mr(3)
32421 .nr(4)
32422 .kr(2)
32423 .sr(4)
32424 .m(3)
32425 .n(4)
32426 .k(8)
32427 .cn_stride(7)
32428 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32429 }
32430
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_eq_8_strided_a)32431 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_eq_8_strided_a) {
32432 TEST_REQUIRES_X86_SSE41;
32433 GemmMicrokernelTester()
32434 .mr(3)
32435 .nr(4)
32436 .kr(2)
32437 .sr(4)
32438 .m(3)
32439 .n(4)
32440 .k(8)
32441 .a_stride(11)
32442 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32443 }
32444
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_eq_8_subtile)32445 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_eq_8_subtile) {
32446 TEST_REQUIRES_X86_SSE41;
32447 for (uint32_t n = 1; n <= 4; n++) {
32448 for (uint32_t m = 1; m <= 3; m++) {
32449 GemmMicrokernelTester()
32450 .mr(3)
32451 .nr(4)
32452 .kr(2)
32453 .sr(4)
32454 .m(m)
32455 .n(n)
32456 .k(8)
32457 .iterations(1)
32458 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32459 }
32460 }
32461 }
32462
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_eq_8_subtile_m)32463 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
32464 TEST_REQUIRES_X86_SSE41;
32465 for (uint32_t m = 1; m <= 3; m++) {
32466 GemmMicrokernelTester()
32467 .mr(3)
32468 .nr(4)
32469 .kr(2)
32470 .sr(4)
32471 .m(m)
32472 .n(4)
32473 .k(8)
32474 .iterations(1)
32475 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32476 }
32477 }
32478
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_eq_8_subtile_n)32479 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
32480 TEST_REQUIRES_X86_SSE41;
32481 for (uint32_t n = 1; n <= 4; n++) {
32482 GemmMicrokernelTester()
32483 .mr(3)
32484 .nr(4)
32485 .kr(2)
32486 .sr(4)
32487 .m(3)
32488 .n(n)
32489 .k(8)
32490 .iterations(1)
32491 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32492 }
32493 }
32494
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_lt_8)32495 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_lt_8) {
32496 TEST_REQUIRES_X86_SSE41;
32497 for (size_t k = 1; k < 8; k++) {
32498 GemmMicrokernelTester()
32499 .mr(3)
32500 .nr(4)
32501 .kr(2)
32502 .sr(4)
32503 .m(3)
32504 .n(4)
32505 .k(k)
32506 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32507 }
32508 }
32509
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_lt_8_strided_a)32510 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_lt_8_strided_a) {
32511 TEST_REQUIRES_X86_SSE41;
32512 for (size_t k = 1; k < 8; k++) {
32513 GemmMicrokernelTester()
32514 .mr(3)
32515 .nr(4)
32516 .kr(2)
32517 .sr(4)
32518 .m(3)
32519 .n(4)
32520 .k(k)
32521 .a_stride(11)
32522 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32523 }
32524 }
32525
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_lt_8_subtile)32526 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_lt_8_subtile) {
32527 TEST_REQUIRES_X86_SSE41;
32528 for (size_t k = 1; k < 8; k++) {
32529 for (uint32_t n = 1; n <= 4; n++) {
32530 for (uint32_t m = 1; m <= 3; m++) {
32531 GemmMicrokernelTester()
32532 .mr(3)
32533 .nr(4)
32534 .kr(2)
32535 .sr(4)
32536 .m(m)
32537 .n(n)
32538 .k(k)
32539 .iterations(1)
32540 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32541 }
32542 }
32543 }
32544 }
32545
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_gt_8)32546 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_gt_8) {
32547 TEST_REQUIRES_X86_SSE41;
32548 for (size_t k = 9; k < 16; k++) {
32549 GemmMicrokernelTester()
32550 .mr(3)
32551 .nr(4)
32552 .kr(2)
32553 .sr(4)
32554 .m(3)
32555 .n(4)
32556 .k(k)
32557 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32558 }
32559 }
32560
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_gt_8_strided_a)32561 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_gt_8_strided_a) {
32562 TEST_REQUIRES_X86_SSE41;
32563 for (size_t k = 9; k < 16; k++) {
32564 GemmMicrokernelTester()
32565 .mr(3)
32566 .nr(4)
32567 .kr(2)
32568 .sr(4)
32569 .m(3)
32570 .n(4)
32571 .k(k)
32572 .a_stride(19)
32573 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32574 }
32575 }
32576
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_gt_8_subtile)32577 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_gt_8_subtile) {
32578 TEST_REQUIRES_X86_SSE41;
32579 for (size_t k = 9; k < 16; k++) {
32580 for (uint32_t n = 1; n <= 4; n++) {
32581 for (uint32_t m = 1; m <= 3; m++) {
32582 GemmMicrokernelTester()
32583 .mr(3)
32584 .nr(4)
32585 .kr(2)
32586 .sr(4)
32587 .m(m)
32588 .n(n)
32589 .k(k)
32590 .iterations(1)
32591 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32592 }
32593 }
32594 }
32595 }
32596
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_div_8)32597 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_div_8) {
32598 TEST_REQUIRES_X86_SSE41;
32599 for (size_t k = 16; k <= 80; k += 8) {
32600 GemmMicrokernelTester()
32601 .mr(3)
32602 .nr(4)
32603 .kr(2)
32604 .sr(4)
32605 .m(3)
32606 .n(4)
32607 .k(k)
32608 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32609 }
32610 }
32611
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_div_8_strided_a)32612 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_div_8_strided_a) {
32613 TEST_REQUIRES_X86_SSE41;
32614 for (size_t k = 16; k <= 80; k += 8) {
32615 GemmMicrokernelTester()
32616 .mr(3)
32617 .nr(4)
32618 .kr(2)
32619 .sr(4)
32620 .m(3)
32621 .n(4)
32622 .k(k)
32623 .a_stride(83)
32624 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32625 }
32626 }
32627
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_div_8_subtile)32628 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_div_8_subtile) {
32629 TEST_REQUIRES_X86_SSE41;
32630 for (size_t k = 16; k <= 80; k += 8) {
32631 for (uint32_t n = 1; n <= 4; n++) {
32632 for (uint32_t m = 1; m <= 3; m++) {
32633 GemmMicrokernelTester()
32634 .mr(3)
32635 .nr(4)
32636 .kr(2)
32637 .sr(4)
32638 .m(m)
32639 .n(n)
32640 .k(k)
32641 .iterations(1)
32642 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32643 }
32644 }
32645 }
32646 }
32647
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_gt_4)32648 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_gt_4) {
32649 TEST_REQUIRES_X86_SSE41;
32650 for (uint32_t n = 5; n < 8; n++) {
32651 for (size_t k = 1; k <= 40; k += 9) {
32652 GemmMicrokernelTester()
32653 .mr(3)
32654 .nr(4)
32655 .kr(2)
32656 .sr(4)
32657 .m(3)
32658 .n(n)
32659 .k(k)
32660 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32661 }
32662 }
32663 }
32664
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_gt_4_strided_cn)32665 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
32666 TEST_REQUIRES_X86_SSE41;
32667 for (uint32_t n = 5; n < 8; n++) {
32668 for (size_t k = 1; k <= 40; k += 9) {
32669 GemmMicrokernelTester()
32670 .mr(3)
32671 .nr(4)
32672 .kr(2)
32673 .sr(4)
32674 .m(3)
32675 .n(n)
32676 .k(k)
32677 .cn_stride(7)
32678 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32679 }
32680 }
32681 }
32682
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_gt_4_strided_a)32683 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_gt_4_strided_a) {
32684 TEST_REQUIRES_X86_SSE41;
32685 for (uint32_t n = 5; n < 8; n++) {
32686 for (size_t k = 1; k <= 40; k += 9) {
32687 GemmMicrokernelTester()
32688 .mr(3)
32689 .nr(4)
32690 .kr(2)
32691 .sr(4)
32692 .m(3)
32693 .n(n)
32694 .k(k)
32695 .a_stride(43)
32696 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32697 }
32698 }
32699 }
32700
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_gt_4_subtile)32701 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_gt_4_subtile) {
32702 TEST_REQUIRES_X86_SSE41;
32703 for (uint32_t n = 5; n < 8; n++) {
32704 for (size_t k = 1; k <= 40; k += 9) {
32705 for (uint32_t m = 1; m <= 3; m++) {
32706 GemmMicrokernelTester()
32707 .mr(3)
32708 .nr(4)
32709 .kr(2)
32710 .sr(4)
32711 .m(m)
32712 .n(n)
32713 .k(k)
32714 .iterations(1)
32715 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32716 }
32717 }
32718 }
32719 }
32720
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_div_4)32721 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_div_4) {
32722 TEST_REQUIRES_X86_SSE41;
32723 for (uint32_t n = 8; n <= 12; n += 4) {
32724 for (size_t k = 1; k <= 40; k += 9) {
32725 GemmMicrokernelTester()
32726 .mr(3)
32727 .nr(4)
32728 .kr(2)
32729 .sr(4)
32730 .m(3)
32731 .n(n)
32732 .k(k)
32733 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32734 }
32735 }
32736 }
32737
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_div_4_strided_cn)32738 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
32739 TEST_REQUIRES_X86_SSE41;
32740 for (uint32_t n = 8; n <= 12; n += 4) {
32741 for (size_t k = 1; k <= 40; k += 9) {
32742 GemmMicrokernelTester()
32743 .mr(3)
32744 .nr(4)
32745 .kr(2)
32746 .sr(4)
32747 .m(3)
32748 .n(n)
32749 .k(k)
32750 .cn_stride(7)
32751 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32752 }
32753 }
32754 }
32755
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_div_4_strided_a)32756 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_div_4_strided_a) {
32757 TEST_REQUIRES_X86_SSE41;
32758 for (uint32_t n = 8; n <= 12; n += 4) {
32759 for (size_t k = 1; k <= 40; k += 9) {
32760 GemmMicrokernelTester()
32761 .mr(3)
32762 .nr(4)
32763 .kr(2)
32764 .sr(4)
32765 .m(3)
32766 .n(n)
32767 .k(k)
32768 .a_stride(43)
32769 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32770 }
32771 }
32772 }
32773
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_div_4_subtile)32774 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_div_4_subtile) {
32775 TEST_REQUIRES_X86_SSE41;
32776 for (uint32_t n = 8; n <= 12; n += 4) {
32777 for (size_t k = 1; k <= 40; k += 9) {
32778 for (uint32_t m = 1; m <= 3; m++) {
32779 GemmMicrokernelTester()
32780 .mr(3)
32781 .nr(4)
32782 .kr(2)
32783 .sr(4)
32784 .m(m)
32785 .n(n)
32786 .k(k)
32787 .iterations(1)
32788 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32789 }
32790 }
32791 }
32792 }
32793
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,strided_cm_subtile)32794 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, strided_cm_subtile) {
32795 TEST_REQUIRES_X86_SSE41;
32796 for (size_t k = 1; k <= 40; k += 9) {
32797 for (uint32_t n = 1; n <= 4; n++) {
32798 for (uint32_t m = 1; m <= 3; m++) {
32799 GemmMicrokernelTester()
32800 .mr(3)
32801 .nr(4)
32802 .kr(2)
32803 .sr(4)
32804 .m(m)
32805 .n(n)
32806 .k(k)
32807 .cm_stride(7)
32808 .iterations(1)
32809 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32810 }
32811 }
32812 }
32813 }
32814
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,qmin)32815 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, qmin) {
32816 TEST_REQUIRES_X86_SSE41;
32817 GemmMicrokernelTester()
32818 .mr(3)
32819 .nr(4)
32820 .kr(2)
32821 .sr(4)
32822 .m(3)
32823 .n(4)
32824 .k(8)
32825 .qmin(128)
32826 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32827 }
32828
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,qmax)32829 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, qmax) {
32830 TEST_REQUIRES_X86_SSE41;
32831 GemmMicrokernelTester()
32832 .mr(3)
32833 .nr(4)
32834 .kr(2)
32835 .sr(4)
32836 .m(3)
32837 .n(4)
32838 .k(8)
32839 .qmax(128)
32840 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32841 }
32842
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,strided_cm)32843 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, strided_cm) {
32844 TEST_REQUIRES_X86_SSE41;
32845 GemmMicrokernelTester()
32846 .mr(3)
32847 .nr(4)
32848 .kr(2)
32849 .sr(4)
32850 .m(3)
32851 .n(4)
32852 .k(8)
32853 .cm_stride(7)
32854 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32855 }
32856 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
32857
32858
32859 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8)32860 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8) {
32861 TEST_REQUIRES_X86_AVX;
32862 GemmMicrokernelTester()
32863 .mr(1)
32864 .nr(4)
32865 .kr(2)
32866 .sr(4)
32867 .m(1)
32868 .n(4)
32869 .k(8)
32870 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32871 }
32872
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,strided_cn)32873 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, strided_cn) {
32874 TEST_REQUIRES_X86_AVX;
32875 GemmMicrokernelTester()
32876 .mr(1)
32877 .nr(4)
32878 .kr(2)
32879 .sr(4)
32880 .m(1)
32881 .n(4)
32882 .k(8)
32883 .cn_stride(7)
32884 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32885 }
32886
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_strided_a)32887 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_strided_a) {
32888 TEST_REQUIRES_X86_AVX;
32889 GemmMicrokernelTester()
32890 .mr(1)
32891 .nr(4)
32892 .kr(2)
32893 .sr(4)
32894 .m(1)
32895 .n(4)
32896 .k(8)
32897 .a_stride(11)
32898 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32899 }
32900
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_subtile)32901 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_subtile) {
32902 TEST_REQUIRES_X86_AVX;
32903 for (uint32_t n = 1; n <= 4; n++) {
32904 for (uint32_t m = 1; m <= 1; m++) {
32905 GemmMicrokernelTester()
32906 .mr(1)
32907 .nr(4)
32908 .kr(2)
32909 .sr(4)
32910 .m(m)
32911 .n(n)
32912 .k(8)
32913 .iterations(1)
32914 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32915 }
32916 }
32917 }
32918
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_subtile_m)32919 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
32920 TEST_REQUIRES_X86_AVX;
32921 for (uint32_t m = 1; m <= 1; m++) {
32922 GemmMicrokernelTester()
32923 .mr(1)
32924 .nr(4)
32925 .kr(2)
32926 .sr(4)
32927 .m(m)
32928 .n(4)
32929 .k(8)
32930 .iterations(1)
32931 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32932 }
32933 }
32934
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_subtile_n)32935 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
32936 TEST_REQUIRES_X86_AVX;
32937 for (uint32_t n = 1; n <= 4; n++) {
32938 GemmMicrokernelTester()
32939 .mr(1)
32940 .nr(4)
32941 .kr(2)
32942 .sr(4)
32943 .m(1)
32944 .n(n)
32945 .k(8)
32946 .iterations(1)
32947 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32948 }
32949 }
32950
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_lt_8)32951 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_lt_8) {
32952 TEST_REQUIRES_X86_AVX;
32953 for (size_t k = 1; k < 8; k++) {
32954 GemmMicrokernelTester()
32955 .mr(1)
32956 .nr(4)
32957 .kr(2)
32958 .sr(4)
32959 .m(1)
32960 .n(4)
32961 .k(k)
32962 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32963 }
32964 }
32965
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_lt_8_strided_a)32966 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_lt_8_strided_a) {
32967 TEST_REQUIRES_X86_AVX;
32968 for (size_t k = 1; k < 8; k++) {
32969 GemmMicrokernelTester()
32970 .mr(1)
32971 .nr(4)
32972 .kr(2)
32973 .sr(4)
32974 .m(1)
32975 .n(4)
32976 .k(k)
32977 .a_stride(11)
32978 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32979 }
32980 }
32981
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_lt_8_subtile)32982 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_lt_8_subtile) {
32983 TEST_REQUIRES_X86_AVX;
32984 for (size_t k = 1; k < 8; k++) {
32985 for (uint32_t n = 1; n <= 4; n++) {
32986 for (uint32_t m = 1; m <= 1; m++) {
32987 GemmMicrokernelTester()
32988 .mr(1)
32989 .nr(4)
32990 .kr(2)
32991 .sr(4)
32992 .m(m)
32993 .n(n)
32994 .k(k)
32995 .iterations(1)
32996 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32997 }
32998 }
32999 }
33000 }
33001
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_gt_8)33002 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_gt_8) {
33003 TEST_REQUIRES_X86_AVX;
33004 for (size_t k = 9; k < 16; k++) {
33005 GemmMicrokernelTester()
33006 .mr(1)
33007 .nr(4)
33008 .kr(2)
33009 .sr(4)
33010 .m(1)
33011 .n(4)
33012 .k(k)
33013 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33014 }
33015 }
33016
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_gt_8_strided_a)33017 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_gt_8_strided_a) {
33018 TEST_REQUIRES_X86_AVX;
33019 for (size_t k = 9; k < 16; k++) {
33020 GemmMicrokernelTester()
33021 .mr(1)
33022 .nr(4)
33023 .kr(2)
33024 .sr(4)
33025 .m(1)
33026 .n(4)
33027 .k(k)
33028 .a_stride(19)
33029 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33030 }
33031 }
33032
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_gt_8_subtile)33033 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_gt_8_subtile) {
33034 TEST_REQUIRES_X86_AVX;
33035 for (size_t k = 9; k < 16; k++) {
33036 for (uint32_t n = 1; n <= 4; n++) {
33037 for (uint32_t m = 1; m <= 1; m++) {
33038 GemmMicrokernelTester()
33039 .mr(1)
33040 .nr(4)
33041 .kr(2)
33042 .sr(4)
33043 .m(m)
33044 .n(n)
33045 .k(k)
33046 .iterations(1)
33047 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33048 }
33049 }
33050 }
33051 }
33052
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_div_8)33053 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_div_8) {
33054 TEST_REQUIRES_X86_AVX;
33055 for (size_t k = 16; k <= 80; k += 8) {
33056 GemmMicrokernelTester()
33057 .mr(1)
33058 .nr(4)
33059 .kr(2)
33060 .sr(4)
33061 .m(1)
33062 .n(4)
33063 .k(k)
33064 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33065 }
33066 }
33067
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_div_8_strided_a)33068 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_div_8_strided_a) {
33069 TEST_REQUIRES_X86_AVX;
33070 for (size_t k = 16; k <= 80; k += 8) {
33071 GemmMicrokernelTester()
33072 .mr(1)
33073 .nr(4)
33074 .kr(2)
33075 .sr(4)
33076 .m(1)
33077 .n(4)
33078 .k(k)
33079 .a_stride(83)
33080 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33081 }
33082 }
33083
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_div_8_subtile)33084 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_div_8_subtile) {
33085 TEST_REQUIRES_X86_AVX;
33086 for (size_t k = 16; k <= 80; k += 8) {
33087 for (uint32_t n = 1; n <= 4; n++) {
33088 for (uint32_t m = 1; m <= 1; m++) {
33089 GemmMicrokernelTester()
33090 .mr(1)
33091 .nr(4)
33092 .kr(2)
33093 .sr(4)
33094 .m(m)
33095 .n(n)
33096 .k(k)
33097 .iterations(1)
33098 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33099 }
33100 }
33101 }
33102 }
33103
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4)33104 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4) {
33105 TEST_REQUIRES_X86_AVX;
33106 for (uint32_t n = 5; n < 8; n++) {
33107 for (size_t k = 1; k <= 40; k += 9) {
33108 GemmMicrokernelTester()
33109 .mr(1)
33110 .nr(4)
33111 .kr(2)
33112 .sr(4)
33113 .m(1)
33114 .n(n)
33115 .k(k)
33116 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33117 }
33118 }
33119 }
33120
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4_strided_cn)33121 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
33122 TEST_REQUIRES_X86_AVX;
33123 for (uint32_t n = 5; n < 8; n++) {
33124 for (size_t k = 1; k <= 40; k += 9) {
33125 GemmMicrokernelTester()
33126 .mr(1)
33127 .nr(4)
33128 .kr(2)
33129 .sr(4)
33130 .m(1)
33131 .n(n)
33132 .k(k)
33133 .cn_stride(7)
33134 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33135 }
33136 }
33137 }
33138
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4_strided_a)33139 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4_strided_a) {
33140 TEST_REQUIRES_X86_AVX;
33141 for (uint32_t n = 5; n < 8; n++) {
33142 for (size_t k = 1; k <= 40; k += 9) {
33143 GemmMicrokernelTester()
33144 .mr(1)
33145 .nr(4)
33146 .kr(2)
33147 .sr(4)
33148 .m(1)
33149 .n(n)
33150 .k(k)
33151 .a_stride(43)
33152 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33153 }
33154 }
33155 }
33156
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4_subtile)33157 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4_subtile) {
33158 TEST_REQUIRES_X86_AVX;
33159 for (uint32_t n = 5; n < 8; n++) {
33160 for (size_t k = 1; k <= 40; k += 9) {
33161 for (uint32_t m = 1; m <= 1; m++) {
33162 GemmMicrokernelTester()
33163 .mr(1)
33164 .nr(4)
33165 .kr(2)
33166 .sr(4)
33167 .m(m)
33168 .n(n)
33169 .k(k)
33170 .iterations(1)
33171 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33172 }
33173 }
33174 }
33175 }
33176
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4)33177 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4) {
33178 TEST_REQUIRES_X86_AVX;
33179 for (uint32_t n = 8; n <= 12; n += 4) {
33180 for (size_t k = 1; k <= 40; k += 9) {
33181 GemmMicrokernelTester()
33182 .mr(1)
33183 .nr(4)
33184 .kr(2)
33185 .sr(4)
33186 .m(1)
33187 .n(n)
33188 .k(k)
33189 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33190 }
33191 }
33192 }
33193
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4_strided_cn)33194 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4_strided_cn) {
33195 TEST_REQUIRES_X86_AVX;
33196 for (uint32_t n = 8; n <= 12; n += 4) {
33197 for (size_t k = 1; k <= 40; k += 9) {
33198 GemmMicrokernelTester()
33199 .mr(1)
33200 .nr(4)
33201 .kr(2)
33202 .sr(4)
33203 .m(1)
33204 .n(n)
33205 .k(k)
33206 .cn_stride(7)
33207 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33208 }
33209 }
33210 }
33211
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4_strided_a)33212 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4_strided_a) {
33213 TEST_REQUIRES_X86_AVX;
33214 for (uint32_t n = 8; n <= 12; n += 4) {
33215 for (size_t k = 1; k <= 40; k += 9) {
33216 GemmMicrokernelTester()
33217 .mr(1)
33218 .nr(4)
33219 .kr(2)
33220 .sr(4)
33221 .m(1)
33222 .n(n)
33223 .k(k)
33224 .a_stride(43)
33225 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33226 }
33227 }
33228 }
33229
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4_subtile)33230 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4_subtile) {
33231 TEST_REQUIRES_X86_AVX;
33232 for (uint32_t n = 8; n <= 12; n += 4) {
33233 for (size_t k = 1; k <= 40; k += 9) {
33234 for (uint32_t m = 1; m <= 1; m++) {
33235 GemmMicrokernelTester()
33236 .mr(1)
33237 .nr(4)
33238 .kr(2)
33239 .sr(4)
33240 .m(m)
33241 .n(n)
33242 .k(k)
33243 .iterations(1)
33244 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33245 }
33246 }
33247 }
33248 }
33249
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,strided_cm_subtile)33250 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, strided_cm_subtile) {
33251 TEST_REQUIRES_X86_AVX;
33252 for (size_t k = 1; k <= 40; k += 9) {
33253 for (uint32_t n = 1; n <= 4; n++) {
33254 for (uint32_t m = 1; m <= 1; m++) {
33255 GemmMicrokernelTester()
33256 .mr(1)
33257 .nr(4)
33258 .kr(2)
33259 .sr(4)
33260 .m(m)
33261 .n(n)
33262 .k(k)
33263 .cm_stride(7)
33264 .iterations(1)
33265 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33266 }
33267 }
33268 }
33269 }
33270
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,qmin)33271 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, qmin) {
33272 TEST_REQUIRES_X86_AVX;
33273 GemmMicrokernelTester()
33274 .mr(1)
33275 .nr(4)
33276 .kr(2)
33277 .sr(4)
33278 .m(1)
33279 .n(4)
33280 .k(8)
33281 .qmin(128)
33282 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33283 }
33284
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,qmax)33285 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, qmax) {
33286 TEST_REQUIRES_X86_AVX;
33287 GemmMicrokernelTester()
33288 .mr(1)
33289 .nr(4)
33290 .kr(2)
33291 .sr(4)
33292 .m(1)
33293 .n(4)
33294 .k(8)
33295 .qmax(128)
33296 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33297 }
33298
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,strided_cm)33299 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, strided_cm) {
33300 TEST_REQUIRES_X86_AVX;
33301 GemmMicrokernelTester()
33302 .mr(1)
33303 .nr(4)
33304 .kr(2)
33305 .sr(4)
33306 .m(1)
33307 .n(4)
33308 .k(8)
33309 .cm_stride(7)
33310 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33311 }
33312 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
33313
33314
33315 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8)33316 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8) {
33317 TEST_REQUIRES_X86_XOP;
33318 GemmMicrokernelTester()
33319 .mr(1)
33320 .nr(4)
33321 .kr(2)
33322 .sr(4)
33323 .m(1)
33324 .n(4)
33325 .k(8)
33326 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33327 }
33328
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,strided_cn)33329 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, strided_cn) {
33330 TEST_REQUIRES_X86_XOP;
33331 GemmMicrokernelTester()
33332 .mr(1)
33333 .nr(4)
33334 .kr(2)
33335 .sr(4)
33336 .m(1)
33337 .n(4)
33338 .k(8)
33339 .cn_stride(7)
33340 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33341 }
33342
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8_strided_a)33343 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8_strided_a) {
33344 TEST_REQUIRES_X86_XOP;
33345 GemmMicrokernelTester()
33346 .mr(1)
33347 .nr(4)
33348 .kr(2)
33349 .sr(4)
33350 .m(1)
33351 .n(4)
33352 .k(8)
33353 .a_stride(11)
33354 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33355 }
33356
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8_subtile)33357 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8_subtile) {
33358 TEST_REQUIRES_X86_XOP;
33359 for (uint32_t n = 1; n <= 4; n++) {
33360 for (uint32_t m = 1; m <= 1; m++) {
33361 GemmMicrokernelTester()
33362 .mr(1)
33363 .nr(4)
33364 .kr(2)
33365 .sr(4)
33366 .m(m)
33367 .n(n)
33368 .k(8)
33369 .iterations(1)
33370 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33371 }
33372 }
33373 }
33374
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8_subtile_m)33375 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
33376 TEST_REQUIRES_X86_XOP;
33377 for (uint32_t m = 1; m <= 1; m++) {
33378 GemmMicrokernelTester()
33379 .mr(1)
33380 .nr(4)
33381 .kr(2)
33382 .sr(4)
33383 .m(m)
33384 .n(4)
33385 .k(8)
33386 .iterations(1)
33387 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33388 }
33389 }
33390
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8_subtile_n)33391 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
33392 TEST_REQUIRES_X86_XOP;
33393 for (uint32_t n = 1; n <= 4; n++) {
33394 GemmMicrokernelTester()
33395 .mr(1)
33396 .nr(4)
33397 .kr(2)
33398 .sr(4)
33399 .m(1)
33400 .n(n)
33401 .k(8)
33402 .iterations(1)
33403 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33404 }
33405 }
33406
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_lt_8)33407 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_lt_8) {
33408 TEST_REQUIRES_X86_XOP;
33409 for (size_t k = 1; k < 8; k++) {
33410 GemmMicrokernelTester()
33411 .mr(1)
33412 .nr(4)
33413 .kr(2)
33414 .sr(4)
33415 .m(1)
33416 .n(4)
33417 .k(k)
33418 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33419 }
33420 }
33421
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_lt_8_strided_a)33422 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_lt_8_strided_a) {
33423 TEST_REQUIRES_X86_XOP;
33424 for (size_t k = 1; k < 8; k++) {
33425 GemmMicrokernelTester()
33426 .mr(1)
33427 .nr(4)
33428 .kr(2)
33429 .sr(4)
33430 .m(1)
33431 .n(4)
33432 .k(k)
33433 .a_stride(11)
33434 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33435 }
33436 }
33437
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_lt_8_subtile)33438 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_lt_8_subtile) {
33439 TEST_REQUIRES_X86_XOP;
33440 for (size_t k = 1; k < 8; k++) {
33441 for (uint32_t n = 1; n <= 4; n++) {
33442 for (uint32_t m = 1; m <= 1; m++) {
33443 GemmMicrokernelTester()
33444 .mr(1)
33445 .nr(4)
33446 .kr(2)
33447 .sr(4)
33448 .m(m)
33449 .n(n)
33450 .k(k)
33451 .iterations(1)
33452 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33453 }
33454 }
33455 }
33456 }
33457
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_gt_8)33458 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_gt_8) {
33459 TEST_REQUIRES_X86_XOP;
33460 for (size_t k = 9; k < 16; k++) {
33461 GemmMicrokernelTester()
33462 .mr(1)
33463 .nr(4)
33464 .kr(2)
33465 .sr(4)
33466 .m(1)
33467 .n(4)
33468 .k(k)
33469 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33470 }
33471 }
33472
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_gt_8_strided_a)33473 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_gt_8_strided_a) {
33474 TEST_REQUIRES_X86_XOP;
33475 for (size_t k = 9; k < 16; k++) {
33476 GemmMicrokernelTester()
33477 .mr(1)
33478 .nr(4)
33479 .kr(2)
33480 .sr(4)
33481 .m(1)
33482 .n(4)
33483 .k(k)
33484 .a_stride(19)
33485 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33486 }
33487 }
33488
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_gt_8_subtile)33489 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_gt_8_subtile) {
33490 TEST_REQUIRES_X86_XOP;
33491 for (size_t k = 9; k < 16; k++) {
33492 for (uint32_t n = 1; n <= 4; n++) {
33493 for (uint32_t m = 1; m <= 1; m++) {
33494 GemmMicrokernelTester()
33495 .mr(1)
33496 .nr(4)
33497 .kr(2)
33498 .sr(4)
33499 .m(m)
33500 .n(n)
33501 .k(k)
33502 .iterations(1)
33503 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33504 }
33505 }
33506 }
33507 }
33508
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_div_8)33509 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_div_8) {
33510 TEST_REQUIRES_X86_XOP;
33511 for (size_t k = 16; k <= 80; k += 8) {
33512 GemmMicrokernelTester()
33513 .mr(1)
33514 .nr(4)
33515 .kr(2)
33516 .sr(4)
33517 .m(1)
33518 .n(4)
33519 .k(k)
33520 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33521 }
33522 }
33523
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_div_8_strided_a)33524 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_div_8_strided_a) {
33525 TEST_REQUIRES_X86_XOP;
33526 for (size_t k = 16; k <= 80; k += 8) {
33527 GemmMicrokernelTester()
33528 .mr(1)
33529 .nr(4)
33530 .kr(2)
33531 .sr(4)
33532 .m(1)
33533 .n(4)
33534 .k(k)
33535 .a_stride(83)
33536 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33537 }
33538 }
33539
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_div_8_subtile)33540 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_div_8_subtile) {
33541 TEST_REQUIRES_X86_XOP;
33542 for (size_t k = 16; k <= 80; k += 8) {
33543 for (uint32_t n = 1; n <= 4; n++) {
33544 for (uint32_t m = 1; m <= 1; m++) {
33545 GemmMicrokernelTester()
33546 .mr(1)
33547 .nr(4)
33548 .kr(2)
33549 .sr(4)
33550 .m(m)
33551 .n(n)
33552 .k(k)
33553 .iterations(1)
33554 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33555 }
33556 }
33557 }
33558 }
33559
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4)33560 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4) {
33561 TEST_REQUIRES_X86_XOP;
33562 for (uint32_t n = 5; n < 8; n++) {
33563 for (size_t k = 1; k <= 40; k += 9) {
33564 GemmMicrokernelTester()
33565 .mr(1)
33566 .nr(4)
33567 .kr(2)
33568 .sr(4)
33569 .m(1)
33570 .n(n)
33571 .k(k)
33572 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33573 }
33574 }
33575 }
33576
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4_strided_cn)33577 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
33578 TEST_REQUIRES_X86_XOP;
33579 for (uint32_t n = 5; n < 8; n++) {
33580 for (size_t k = 1; k <= 40; k += 9) {
33581 GemmMicrokernelTester()
33582 .mr(1)
33583 .nr(4)
33584 .kr(2)
33585 .sr(4)
33586 .m(1)
33587 .n(n)
33588 .k(k)
33589 .cn_stride(7)
33590 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33591 }
33592 }
33593 }
33594
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4_strided_a)33595 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4_strided_a) {
33596 TEST_REQUIRES_X86_XOP;
33597 for (uint32_t n = 5; n < 8; n++) {
33598 for (size_t k = 1; k <= 40; k += 9) {
33599 GemmMicrokernelTester()
33600 .mr(1)
33601 .nr(4)
33602 .kr(2)
33603 .sr(4)
33604 .m(1)
33605 .n(n)
33606 .k(k)
33607 .a_stride(43)
33608 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33609 }
33610 }
33611 }
33612
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4_subtile)33613 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4_subtile) {
33614 TEST_REQUIRES_X86_XOP;
33615 for (uint32_t n = 5; n < 8; n++) {
33616 for (size_t k = 1; k <= 40; k += 9) {
33617 for (uint32_t m = 1; m <= 1; m++) {
33618 GemmMicrokernelTester()
33619 .mr(1)
33620 .nr(4)
33621 .kr(2)
33622 .sr(4)
33623 .m(m)
33624 .n(n)
33625 .k(k)
33626 .iterations(1)
33627 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33628 }
33629 }
33630 }
33631 }
33632
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4)33633 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4) {
33634 TEST_REQUIRES_X86_XOP;
33635 for (uint32_t n = 8; n <= 12; n += 4) {
33636 for (size_t k = 1; k <= 40; k += 9) {
33637 GemmMicrokernelTester()
33638 .mr(1)
33639 .nr(4)
33640 .kr(2)
33641 .sr(4)
33642 .m(1)
33643 .n(n)
33644 .k(k)
33645 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33646 }
33647 }
33648 }
33649
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4_strided_cn)33650 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4_strided_cn) {
33651 TEST_REQUIRES_X86_XOP;
33652 for (uint32_t n = 8; n <= 12; n += 4) {
33653 for (size_t k = 1; k <= 40; k += 9) {
33654 GemmMicrokernelTester()
33655 .mr(1)
33656 .nr(4)
33657 .kr(2)
33658 .sr(4)
33659 .m(1)
33660 .n(n)
33661 .k(k)
33662 .cn_stride(7)
33663 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33664 }
33665 }
33666 }
33667
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4_strided_a)33668 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4_strided_a) {
33669 TEST_REQUIRES_X86_XOP;
33670 for (uint32_t n = 8; n <= 12; n += 4) {
33671 for (size_t k = 1; k <= 40; k += 9) {
33672 GemmMicrokernelTester()
33673 .mr(1)
33674 .nr(4)
33675 .kr(2)
33676 .sr(4)
33677 .m(1)
33678 .n(n)
33679 .k(k)
33680 .a_stride(43)
33681 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33682 }
33683 }
33684 }
33685
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4_subtile)33686 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4_subtile) {
33687 TEST_REQUIRES_X86_XOP;
33688 for (uint32_t n = 8; n <= 12; n += 4) {
33689 for (size_t k = 1; k <= 40; k += 9) {
33690 for (uint32_t m = 1; m <= 1; m++) {
33691 GemmMicrokernelTester()
33692 .mr(1)
33693 .nr(4)
33694 .kr(2)
33695 .sr(4)
33696 .m(m)
33697 .n(n)
33698 .k(k)
33699 .iterations(1)
33700 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33701 }
33702 }
33703 }
33704 }
33705
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,strided_cm_subtile)33706 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, strided_cm_subtile) {
33707 TEST_REQUIRES_X86_XOP;
33708 for (size_t k = 1; k <= 40; k += 9) {
33709 for (uint32_t n = 1; n <= 4; n++) {
33710 for (uint32_t m = 1; m <= 1; m++) {
33711 GemmMicrokernelTester()
33712 .mr(1)
33713 .nr(4)
33714 .kr(2)
33715 .sr(4)
33716 .m(m)
33717 .n(n)
33718 .k(k)
33719 .cm_stride(7)
33720 .iterations(1)
33721 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33722 }
33723 }
33724 }
33725 }
33726
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,qmin)33727 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, qmin) {
33728 TEST_REQUIRES_X86_XOP;
33729 GemmMicrokernelTester()
33730 .mr(1)
33731 .nr(4)
33732 .kr(2)
33733 .sr(4)
33734 .m(1)
33735 .n(4)
33736 .k(8)
33737 .qmin(128)
33738 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33739 }
33740
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,qmax)33741 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, qmax) {
33742 TEST_REQUIRES_X86_XOP;
33743 GemmMicrokernelTester()
33744 .mr(1)
33745 .nr(4)
33746 .kr(2)
33747 .sr(4)
33748 .m(1)
33749 .n(4)
33750 .k(8)
33751 .qmax(128)
33752 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33753 }
33754
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,strided_cm)33755 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, strided_cm) {
33756 TEST_REQUIRES_X86_XOP;
33757 GemmMicrokernelTester()
33758 .mr(1)
33759 .nr(4)
33760 .kr(2)
33761 .sr(4)
33762 .m(1)
33763 .n(4)
33764 .k(8)
33765 .cm_stride(7)
33766 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33767 }
33768 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
33769
33770
33771 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8)33772 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8) {
33773 TEST_REQUIRES_X86_AVX;
33774 GemmMicrokernelTester()
33775 .mr(2)
33776 .nr(4)
33777 .kr(2)
33778 .sr(4)
33779 .m(2)
33780 .n(4)
33781 .k(8)
33782 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33783 }
33784
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,strided_cn)33785 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, strided_cn) {
33786 TEST_REQUIRES_X86_AVX;
33787 GemmMicrokernelTester()
33788 .mr(2)
33789 .nr(4)
33790 .kr(2)
33791 .sr(4)
33792 .m(2)
33793 .n(4)
33794 .k(8)
33795 .cn_stride(7)
33796 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33797 }
33798
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8_strided_a)33799 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8_strided_a) {
33800 TEST_REQUIRES_X86_AVX;
33801 GemmMicrokernelTester()
33802 .mr(2)
33803 .nr(4)
33804 .kr(2)
33805 .sr(4)
33806 .m(2)
33807 .n(4)
33808 .k(8)
33809 .a_stride(11)
33810 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33811 }
33812
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8_subtile)33813 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8_subtile) {
33814 TEST_REQUIRES_X86_AVX;
33815 for (uint32_t n = 1; n <= 4; n++) {
33816 for (uint32_t m = 1; m <= 2; m++) {
33817 GemmMicrokernelTester()
33818 .mr(2)
33819 .nr(4)
33820 .kr(2)
33821 .sr(4)
33822 .m(m)
33823 .n(n)
33824 .k(8)
33825 .iterations(1)
33826 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33827 }
33828 }
33829 }
33830
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8_subtile_m)33831 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
33832 TEST_REQUIRES_X86_AVX;
33833 for (uint32_t m = 1; m <= 2; m++) {
33834 GemmMicrokernelTester()
33835 .mr(2)
33836 .nr(4)
33837 .kr(2)
33838 .sr(4)
33839 .m(m)
33840 .n(4)
33841 .k(8)
33842 .iterations(1)
33843 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33844 }
33845 }
33846
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8_subtile_n)33847 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
33848 TEST_REQUIRES_X86_AVX;
33849 for (uint32_t n = 1; n <= 4; n++) {
33850 GemmMicrokernelTester()
33851 .mr(2)
33852 .nr(4)
33853 .kr(2)
33854 .sr(4)
33855 .m(2)
33856 .n(n)
33857 .k(8)
33858 .iterations(1)
33859 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33860 }
33861 }
33862
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_lt_8)33863 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_lt_8) {
33864 TEST_REQUIRES_X86_AVX;
33865 for (size_t k = 1; k < 8; k++) {
33866 GemmMicrokernelTester()
33867 .mr(2)
33868 .nr(4)
33869 .kr(2)
33870 .sr(4)
33871 .m(2)
33872 .n(4)
33873 .k(k)
33874 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33875 }
33876 }
33877
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_lt_8_strided_a)33878 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_lt_8_strided_a) {
33879 TEST_REQUIRES_X86_AVX;
33880 for (size_t k = 1; k < 8; k++) {
33881 GemmMicrokernelTester()
33882 .mr(2)
33883 .nr(4)
33884 .kr(2)
33885 .sr(4)
33886 .m(2)
33887 .n(4)
33888 .k(k)
33889 .a_stride(11)
33890 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33891 }
33892 }
33893
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_lt_8_subtile)33894 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_lt_8_subtile) {
33895 TEST_REQUIRES_X86_AVX;
33896 for (size_t k = 1; k < 8; k++) {
33897 for (uint32_t n = 1; n <= 4; n++) {
33898 for (uint32_t m = 1; m <= 2; m++) {
33899 GemmMicrokernelTester()
33900 .mr(2)
33901 .nr(4)
33902 .kr(2)
33903 .sr(4)
33904 .m(m)
33905 .n(n)
33906 .k(k)
33907 .iterations(1)
33908 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33909 }
33910 }
33911 }
33912 }
33913
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_gt_8)33914 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_gt_8) {
33915 TEST_REQUIRES_X86_AVX;
33916 for (size_t k = 9; k < 16; k++) {
33917 GemmMicrokernelTester()
33918 .mr(2)
33919 .nr(4)
33920 .kr(2)
33921 .sr(4)
33922 .m(2)
33923 .n(4)
33924 .k(k)
33925 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33926 }
33927 }
33928
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_gt_8_strided_a)33929 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_gt_8_strided_a) {
33930 TEST_REQUIRES_X86_AVX;
33931 for (size_t k = 9; k < 16; k++) {
33932 GemmMicrokernelTester()
33933 .mr(2)
33934 .nr(4)
33935 .kr(2)
33936 .sr(4)
33937 .m(2)
33938 .n(4)
33939 .k(k)
33940 .a_stride(19)
33941 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33942 }
33943 }
33944
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_gt_8_subtile)33945 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_gt_8_subtile) {
33946 TEST_REQUIRES_X86_AVX;
33947 for (size_t k = 9; k < 16; k++) {
33948 for (uint32_t n = 1; n <= 4; n++) {
33949 for (uint32_t m = 1; m <= 2; m++) {
33950 GemmMicrokernelTester()
33951 .mr(2)
33952 .nr(4)
33953 .kr(2)
33954 .sr(4)
33955 .m(m)
33956 .n(n)
33957 .k(k)
33958 .iterations(1)
33959 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33960 }
33961 }
33962 }
33963 }
33964
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_div_8)33965 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_div_8) {
33966 TEST_REQUIRES_X86_AVX;
33967 for (size_t k = 16; k <= 80; k += 8) {
33968 GemmMicrokernelTester()
33969 .mr(2)
33970 .nr(4)
33971 .kr(2)
33972 .sr(4)
33973 .m(2)
33974 .n(4)
33975 .k(k)
33976 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33977 }
33978 }
33979
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_div_8_strided_a)33980 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_div_8_strided_a) {
33981 TEST_REQUIRES_X86_AVX;
33982 for (size_t k = 16; k <= 80; k += 8) {
33983 GemmMicrokernelTester()
33984 .mr(2)
33985 .nr(4)
33986 .kr(2)
33987 .sr(4)
33988 .m(2)
33989 .n(4)
33990 .k(k)
33991 .a_stride(83)
33992 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33993 }
33994 }
33995
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_div_8_subtile)33996 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_div_8_subtile) {
33997 TEST_REQUIRES_X86_AVX;
33998 for (size_t k = 16; k <= 80; k += 8) {
33999 for (uint32_t n = 1; n <= 4; n++) {
34000 for (uint32_t m = 1; m <= 2; m++) {
34001 GemmMicrokernelTester()
34002 .mr(2)
34003 .nr(4)
34004 .kr(2)
34005 .sr(4)
34006 .m(m)
34007 .n(n)
34008 .k(k)
34009 .iterations(1)
34010 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34011 }
34012 }
34013 }
34014 }
34015
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4)34016 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4) {
34017 TEST_REQUIRES_X86_AVX;
34018 for (uint32_t n = 5; n < 8; n++) {
34019 for (size_t k = 1; k <= 40; k += 9) {
34020 GemmMicrokernelTester()
34021 .mr(2)
34022 .nr(4)
34023 .kr(2)
34024 .sr(4)
34025 .m(2)
34026 .n(n)
34027 .k(k)
34028 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34029 }
34030 }
34031 }
34032
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4_strided_cn)34033 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
34034 TEST_REQUIRES_X86_AVX;
34035 for (uint32_t n = 5; n < 8; n++) {
34036 for (size_t k = 1; k <= 40; k += 9) {
34037 GemmMicrokernelTester()
34038 .mr(2)
34039 .nr(4)
34040 .kr(2)
34041 .sr(4)
34042 .m(2)
34043 .n(n)
34044 .k(k)
34045 .cn_stride(7)
34046 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34047 }
34048 }
34049 }
34050
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4_strided_a)34051 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4_strided_a) {
34052 TEST_REQUIRES_X86_AVX;
34053 for (uint32_t n = 5; n < 8; n++) {
34054 for (size_t k = 1; k <= 40; k += 9) {
34055 GemmMicrokernelTester()
34056 .mr(2)
34057 .nr(4)
34058 .kr(2)
34059 .sr(4)
34060 .m(2)
34061 .n(n)
34062 .k(k)
34063 .a_stride(43)
34064 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34065 }
34066 }
34067 }
34068
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4_subtile)34069 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4_subtile) {
34070 TEST_REQUIRES_X86_AVX;
34071 for (uint32_t n = 5; n < 8; n++) {
34072 for (size_t k = 1; k <= 40; k += 9) {
34073 for (uint32_t m = 1; m <= 2; m++) {
34074 GemmMicrokernelTester()
34075 .mr(2)
34076 .nr(4)
34077 .kr(2)
34078 .sr(4)
34079 .m(m)
34080 .n(n)
34081 .k(k)
34082 .iterations(1)
34083 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34084 }
34085 }
34086 }
34087 }
34088
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4)34089 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4) {
34090 TEST_REQUIRES_X86_AVX;
34091 for (uint32_t n = 8; n <= 12; n += 4) {
34092 for (size_t k = 1; k <= 40; k += 9) {
34093 GemmMicrokernelTester()
34094 .mr(2)
34095 .nr(4)
34096 .kr(2)
34097 .sr(4)
34098 .m(2)
34099 .n(n)
34100 .k(k)
34101 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34102 }
34103 }
34104 }
34105
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4_strided_cn)34106 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4_strided_cn) {
34107 TEST_REQUIRES_X86_AVX;
34108 for (uint32_t n = 8; n <= 12; n += 4) {
34109 for (size_t k = 1; k <= 40; k += 9) {
34110 GemmMicrokernelTester()
34111 .mr(2)
34112 .nr(4)
34113 .kr(2)
34114 .sr(4)
34115 .m(2)
34116 .n(n)
34117 .k(k)
34118 .cn_stride(7)
34119 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34120 }
34121 }
34122 }
34123
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4_strided_a)34124 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4_strided_a) {
34125 TEST_REQUIRES_X86_AVX;
34126 for (uint32_t n = 8; n <= 12; n += 4) {
34127 for (size_t k = 1; k <= 40; k += 9) {
34128 GemmMicrokernelTester()
34129 .mr(2)
34130 .nr(4)
34131 .kr(2)
34132 .sr(4)
34133 .m(2)
34134 .n(n)
34135 .k(k)
34136 .a_stride(43)
34137 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34138 }
34139 }
34140 }
34141
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4_subtile)34142 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4_subtile) {
34143 TEST_REQUIRES_X86_AVX;
34144 for (uint32_t n = 8; n <= 12; n += 4) {
34145 for (size_t k = 1; k <= 40; k += 9) {
34146 for (uint32_t m = 1; m <= 2; m++) {
34147 GemmMicrokernelTester()
34148 .mr(2)
34149 .nr(4)
34150 .kr(2)
34151 .sr(4)
34152 .m(m)
34153 .n(n)
34154 .k(k)
34155 .iterations(1)
34156 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34157 }
34158 }
34159 }
34160 }
34161
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,strided_cm_subtile)34162 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, strided_cm_subtile) {
34163 TEST_REQUIRES_X86_AVX;
34164 for (size_t k = 1; k <= 40; k += 9) {
34165 for (uint32_t n = 1; n <= 4; n++) {
34166 for (uint32_t m = 1; m <= 2; m++) {
34167 GemmMicrokernelTester()
34168 .mr(2)
34169 .nr(4)
34170 .kr(2)
34171 .sr(4)
34172 .m(m)
34173 .n(n)
34174 .k(k)
34175 .cm_stride(7)
34176 .iterations(1)
34177 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34178 }
34179 }
34180 }
34181 }
34182
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,qmin)34183 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, qmin) {
34184 TEST_REQUIRES_X86_AVX;
34185 GemmMicrokernelTester()
34186 .mr(2)
34187 .nr(4)
34188 .kr(2)
34189 .sr(4)
34190 .m(2)
34191 .n(4)
34192 .k(8)
34193 .qmin(128)
34194 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34195 }
34196
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,qmax)34197 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, qmax) {
34198 TEST_REQUIRES_X86_AVX;
34199 GemmMicrokernelTester()
34200 .mr(2)
34201 .nr(4)
34202 .kr(2)
34203 .sr(4)
34204 .m(2)
34205 .n(4)
34206 .k(8)
34207 .qmax(128)
34208 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34209 }
34210
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,strided_cm)34211 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, strided_cm) {
34212 TEST_REQUIRES_X86_AVX;
34213 GemmMicrokernelTester()
34214 .mr(2)
34215 .nr(4)
34216 .kr(2)
34217 .sr(4)
34218 .m(2)
34219 .n(4)
34220 .k(8)
34221 .cm_stride(7)
34222 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34223 }
34224 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
34225
34226
34227 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8)34228 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8) {
34229 TEST_REQUIRES_X86_XOP;
34230 GemmMicrokernelTester()
34231 .mr(2)
34232 .nr(4)
34233 .kr(2)
34234 .sr(4)
34235 .m(2)
34236 .n(4)
34237 .k(8)
34238 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34239 }
34240
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,strided_cn)34241 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, strided_cn) {
34242 TEST_REQUIRES_X86_XOP;
34243 GemmMicrokernelTester()
34244 .mr(2)
34245 .nr(4)
34246 .kr(2)
34247 .sr(4)
34248 .m(2)
34249 .n(4)
34250 .k(8)
34251 .cn_stride(7)
34252 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34253 }
34254
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_strided_a)34255 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_strided_a) {
34256 TEST_REQUIRES_X86_XOP;
34257 GemmMicrokernelTester()
34258 .mr(2)
34259 .nr(4)
34260 .kr(2)
34261 .sr(4)
34262 .m(2)
34263 .n(4)
34264 .k(8)
34265 .a_stride(11)
34266 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34267 }
34268
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_subtile)34269 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_subtile) {
34270 TEST_REQUIRES_X86_XOP;
34271 for (uint32_t n = 1; n <= 4; n++) {
34272 for (uint32_t m = 1; m <= 2; m++) {
34273 GemmMicrokernelTester()
34274 .mr(2)
34275 .nr(4)
34276 .kr(2)
34277 .sr(4)
34278 .m(m)
34279 .n(n)
34280 .k(8)
34281 .iterations(1)
34282 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34283 }
34284 }
34285 }
34286
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_subtile_m)34287 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
34288 TEST_REQUIRES_X86_XOP;
34289 for (uint32_t m = 1; m <= 2; m++) {
34290 GemmMicrokernelTester()
34291 .mr(2)
34292 .nr(4)
34293 .kr(2)
34294 .sr(4)
34295 .m(m)
34296 .n(4)
34297 .k(8)
34298 .iterations(1)
34299 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34300 }
34301 }
34302
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_subtile_n)34303 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
34304 TEST_REQUIRES_X86_XOP;
34305 for (uint32_t n = 1; n <= 4; n++) {
34306 GemmMicrokernelTester()
34307 .mr(2)
34308 .nr(4)
34309 .kr(2)
34310 .sr(4)
34311 .m(2)
34312 .n(n)
34313 .k(8)
34314 .iterations(1)
34315 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34316 }
34317 }
34318
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_lt_8)34319 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_lt_8) {
34320 TEST_REQUIRES_X86_XOP;
34321 for (size_t k = 1; k < 8; k++) {
34322 GemmMicrokernelTester()
34323 .mr(2)
34324 .nr(4)
34325 .kr(2)
34326 .sr(4)
34327 .m(2)
34328 .n(4)
34329 .k(k)
34330 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34331 }
34332 }
34333
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_lt_8_strided_a)34334 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_lt_8_strided_a) {
34335 TEST_REQUIRES_X86_XOP;
34336 for (size_t k = 1; k < 8; k++) {
34337 GemmMicrokernelTester()
34338 .mr(2)
34339 .nr(4)
34340 .kr(2)
34341 .sr(4)
34342 .m(2)
34343 .n(4)
34344 .k(k)
34345 .a_stride(11)
34346 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34347 }
34348 }
34349
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_lt_8_subtile)34350 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_lt_8_subtile) {
34351 TEST_REQUIRES_X86_XOP;
34352 for (size_t k = 1; k < 8; k++) {
34353 for (uint32_t n = 1; n <= 4; n++) {
34354 for (uint32_t m = 1; m <= 2; m++) {
34355 GemmMicrokernelTester()
34356 .mr(2)
34357 .nr(4)
34358 .kr(2)
34359 .sr(4)
34360 .m(m)
34361 .n(n)
34362 .k(k)
34363 .iterations(1)
34364 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34365 }
34366 }
34367 }
34368 }
34369
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_gt_8)34370 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_gt_8) {
34371 TEST_REQUIRES_X86_XOP;
34372 for (size_t k = 9; k < 16; k++) {
34373 GemmMicrokernelTester()
34374 .mr(2)
34375 .nr(4)
34376 .kr(2)
34377 .sr(4)
34378 .m(2)
34379 .n(4)
34380 .k(k)
34381 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34382 }
34383 }
34384
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_gt_8_strided_a)34385 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_gt_8_strided_a) {
34386 TEST_REQUIRES_X86_XOP;
34387 for (size_t k = 9; k < 16; k++) {
34388 GemmMicrokernelTester()
34389 .mr(2)
34390 .nr(4)
34391 .kr(2)
34392 .sr(4)
34393 .m(2)
34394 .n(4)
34395 .k(k)
34396 .a_stride(19)
34397 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34398 }
34399 }
34400
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_gt_8_subtile)34401 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_gt_8_subtile) {
34402 TEST_REQUIRES_X86_XOP;
34403 for (size_t k = 9; k < 16; k++) {
34404 for (uint32_t n = 1; n <= 4; n++) {
34405 for (uint32_t m = 1; m <= 2; m++) {
34406 GemmMicrokernelTester()
34407 .mr(2)
34408 .nr(4)
34409 .kr(2)
34410 .sr(4)
34411 .m(m)
34412 .n(n)
34413 .k(k)
34414 .iterations(1)
34415 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34416 }
34417 }
34418 }
34419 }
34420
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_div_8)34421 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_div_8) {
34422 TEST_REQUIRES_X86_XOP;
34423 for (size_t k = 16; k <= 80; k += 8) {
34424 GemmMicrokernelTester()
34425 .mr(2)
34426 .nr(4)
34427 .kr(2)
34428 .sr(4)
34429 .m(2)
34430 .n(4)
34431 .k(k)
34432 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34433 }
34434 }
34435
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_div_8_strided_a)34436 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_div_8_strided_a) {
34437 TEST_REQUIRES_X86_XOP;
34438 for (size_t k = 16; k <= 80; k += 8) {
34439 GemmMicrokernelTester()
34440 .mr(2)
34441 .nr(4)
34442 .kr(2)
34443 .sr(4)
34444 .m(2)
34445 .n(4)
34446 .k(k)
34447 .a_stride(83)
34448 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34449 }
34450 }
34451
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_div_8_subtile)34452 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_div_8_subtile) {
34453 TEST_REQUIRES_X86_XOP;
34454 for (size_t k = 16; k <= 80; k += 8) {
34455 for (uint32_t n = 1; n <= 4; n++) {
34456 for (uint32_t m = 1; m <= 2; m++) {
34457 GemmMicrokernelTester()
34458 .mr(2)
34459 .nr(4)
34460 .kr(2)
34461 .sr(4)
34462 .m(m)
34463 .n(n)
34464 .k(k)
34465 .iterations(1)
34466 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34467 }
34468 }
34469 }
34470 }
34471
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4)34472 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4) {
34473 TEST_REQUIRES_X86_XOP;
34474 for (uint32_t n = 5; n < 8; n++) {
34475 for (size_t k = 1; k <= 40; k += 9) {
34476 GemmMicrokernelTester()
34477 .mr(2)
34478 .nr(4)
34479 .kr(2)
34480 .sr(4)
34481 .m(2)
34482 .n(n)
34483 .k(k)
34484 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34485 }
34486 }
34487 }
34488
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4_strided_cn)34489 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
34490 TEST_REQUIRES_X86_XOP;
34491 for (uint32_t n = 5; n < 8; n++) {
34492 for (size_t k = 1; k <= 40; k += 9) {
34493 GemmMicrokernelTester()
34494 .mr(2)
34495 .nr(4)
34496 .kr(2)
34497 .sr(4)
34498 .m(2)
34499 .n(n)
34500 .k(k)
34501 .cn_stride(7)
34502 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34503 }
34504 }
34505 }
34506
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4_strided_a)34507 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4_strided_a) {
34508 TEST_REQUIRES_X86_XOP;
34509 for (uint32_t n = 5; n < 8; n++) {
34510 for (size_t k = 1; k <= 40; k += 9) {
34511 GemmMicrokernelTester()
34512 .mr(2)
34513 .nr(4)
34514 .kr(2)
34515 .sr(4)
34516 .m(2)
34517 .n(n)
34518 .k(k)
34519 .a_stride(43)
34520 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34521 }
34522 }
34523 }
34524
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4_subtile)34525 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4_subtile) {
34526 TEST_REQUIRES_X86_XOP;
34527 for (uint32_t n = 5; n < 8; n++) {
34528 for (size_t k = 1; k <= 40; k += 9) {
34529 for (uint32_t m = 1; m <= 2; m++) {
34530 GemmMicrokernelTester()
34531 .mr(2)
34532 .nr(4)
34533 .kr(2)
34534 .sr(4)
34535 .m(m)
34536 .n(n)
34537 .k(k)
34538 .iterations(1)
34539 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34540 }
34541 }
34542 }
34543 }
34544
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4)34545 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4) {
34546 TEST_REQUIRES_X86_XOP;
34547 for (uint32_t n = 8; n <= 12; n += 4) {
34548 for (size_t k = 1; k <= 40; k += 9) {
34549 GemmMicrokernelTester()
34550 .mr(2)
34551 .nr(4)
34552 .kr(2)
34553 .sr(4)
34554 .m(2)
34555 .n(n)
34556 .k(k)
34557 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34558 }
34559 }
34560 }
34561
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4_strided_cn)34562 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4_strided_cn) {
34563 TEST_REQUIRES_X86_XOP;
34564 for (uint32_t n = 8; n <= 12; n += 4) {
34565 for (size_t k = 1; k <= 40; k += 9) {
34566 GemmMicrokernelTester()
34567 .mr(2)
34568 .nr(4)
34569 .kr(2)
34570 .sr(4)
34571 .m(2)
34572 .n(n)
34573 .k(k)
34574 .cn_stride(7)
34575 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34576 }
34577 }
34578 }
34579
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4_strided_a)34580 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4_strided_a) {
34581 TEST_REQUIRES_X86_XOP;
34582 for (uint32_t n = 8; n <= 12; n += 4) {
34583 for (size_t k = 1; k <= 40; k += 9) {
34584 GemmMicrokernelTester()
34585 .mr(2)
34586 .nr(4)
34587 .kr(2)
34588 .sr(4)
34589 .m(2)
34590 .n(n)
34591 .k(k)
34592 .a_stride(43)
34593 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34594 }
34595 }
34596 }
34597
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4_subtile)34598 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4_subtile) {
34599 TEST_REQUIRES_X86_XOP;
34600 for (uint32_t n = 8; n <= 12; n += 4) {
34601 for (size_t k = 1; k <= 40; k += 9) {
34602 for (uint32_t m = 1; m <= 2; m++) {
34603 GemmMicrokernelTester()
34604 .mr(2)
34605 .nr(4)
34606 .kr(2)
34607 .sr(4)
34608 .m(m)
34609 .n(n)
34610 .k(k)
34611 .iterations(1)
34612 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34613 }
34614 }
34615 }
34616 }
34617
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,strided_cm_subtile)34618 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, strided_cm_subtile) {
34619 TEST_REQUIRES_X86_XOP;
34620 for (size_t k = 1; k <= 40; k += 9) {
34621 for (uint32_t n = 1; n <= 4; n++) {
34622 for (uint32_t m = 1; m <= 2; m++) {
34623 GemmMicrokernelTester()
34624 .mr(2)
34625 .nr(4)
34626 .kr(2)
34627 .sr(4)
34628 .m(m)
34629 .n(n)
34630 .k(k)
34631 .cm_stride(7)
34632 .iterations(1)
34633 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34634 }
34635 }
34636 }
34637 }
34638
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,qmin)34639 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, qmin) {
34640 TEST_REQUIRES_X86_XOP;
34641 GemmMicrokernelTester()
34642 .mr(2)
34643 .nr(4)
34644 .kr(2)
34645 .sr(4)
34646 .m(2)
34647 .n(4)
34648 .k(8)
34649 .qmin(128)
34650 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34651 }
34652
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,qmax)34653 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, qmax) {
34654 TEST_REQUIRES_X86_XOP;
34655 GemmMicrokernelTester()
34656 .mr(2)
34657 .nr(4)
34658 .kr(2)
34659 .sr(4)
34660 .m(2)
34661 .n(4)
34662 .k(8)
34663 .qmax(128)
34664 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34665 }
34666
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,strided_cm)34667 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, strided_cm) {
34668 TEST_REQUIRES_X86_XOP;
34669 GemmMicrokernelTester()
34670 .mr(2)
34671 .nr(4)
34672 .kr(2)
34673 .sr(4)
34674 .m(2)
34675 .n(4)
34676 .k(8)
34677 .cm_stride(7)
34678 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34679 }
34680 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
34681
34682
34683 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8)34684 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8) {
34685 TEST_REQUIRES_X86_XOP;
34686 GemmMicrokernelTester()
34687 .mr(3)
34688 .nr(4)
34689 .kr(2)
34690 .sr(4)
34691 .m(3)
34692 .n(4)
34693 .k(8)
34694 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34695 }
34696
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,strided_cn)34697 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, strided_cn) {
34698 TEST_REQUIRES_X86_XOP;
34699 GemmMicrokernelTester()
34700 .mr(3)
34701 .nr(4)
34702 .kr(2)
34703 .sr(4)
34704 .m(3)
34705 .n(4)
34706 .k(8)
34707 .cn_stride(7)
34708 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34709 }
34710
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8_strided_a)34711 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8_strided_a) {
34712 TEST_REQUIRES_X86_XOP;
34713 GemmMicrokernelTester()
34714 .mr(3)
34715 .nr(4)
34716 .kr(2)
34717 .sr(4)
34718 .m(3)
34719 .n(4)
34720 .k(8)
34721 .a_stride(11)
34722 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34723 }
34724
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8_subtile)34725 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8_subtile) {
34726 TEST_REQUIRES_X86_XOP;
34727 for (uint32_t n = 1; n <= 4; n++) {
34728 for (uint32_t m = 1; m <= 3; m++) {
34729 GemmMicrokernelTester()
34730 .mr(3)
34731 .nr(4)
34732 .kr(2)
34733 .sr(4)
34734 .m(m)
34735 .n(n)
34736 .k(8)
34737 .iterations(1)
34738 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34739 }
34740 }
34741 }
34742
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8_subtile_m)34743 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
34744 TEST_REQUIRES_X86_XOP;
34745 for (uint32_t m = 1; m <= 3; m++) {
34746 GemmMicrokernelTester()
34747 .mr(3)
34748 .nr(4)
34749 .kr(2)
34750 .sr(4)
34751 .m(m)
34752 .n(4)
34753 .k(8)
34754 .iterations(1)
34755 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34756 }
34757 }
34758
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8_subtile_n)34759 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
34760 TEST_REQUIRES_X86_XOP;
34761 for (uint32_t n = 1; n <= 4; n++) {
34762 GemmMicrokernelTester()
34763 .mr(3)
34764 .nr(4)
34765 .kr(2)
34766 .sr(4)
34767 .m(3)
34768 .n(n)
34769 .k(8)
34770 .iterations(1)
34771 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34772 }
34773 }
34774
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_lt_8)34775 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_lt_8) {
34776 TEST_REQUIRES_X86_XOP;
34777 for (size_t k = 1; k < 8; k++) {
34778 GemmMicrokernelTester()
34779 .mr(3)
34780 .nr(4)
34781 .kr(2)
34782 .sr(4)
34783 .m(3)
34784 .n(4)
34785 .k(k)
34786 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34787 }
34788 }
34789
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_lt_8_strided_a)34790 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_lt_8_strided_a) {
34791 TEST_REQUIRES_X86_XOP;
34792 for (size_t k = 1; k < 8; k++) {
34793 GemmMicrokernelTester()
34794 .mr(3)
34795 .nr(4)
34796 .kr(2)
34797 .sr(4)
34798 .m(3)
34799 .n(4)
34800 .k(k)
34801 .a_stride(11)
34802 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34803 }
34804 }
34805
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_lt_8_subtile)34806 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_lt_8_subtile) {
34807 TEST_REQUIRES_X86_XOP;
34808 for (size_t k = 1; k < 8; k++) {
34809 for (uint32_t n = 1; n <= 4; n++) {
34810 for (uint32_t m = 1; m <= 3; m++) {
34811 GemmMicrokernelTester()
34812 .mr(3)
34813 .nr(4)
34814 .kr(2)
34815 .sr(4)
34816 .m(m)
34817 .n(n)
34818 .k(k)
34819 .iterations(1)
34820 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34821 }
34822 }
34823 }
34824 }
34825
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_gt_8)34826 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_gt_8) {
34827 TEST_REQUIRES_X86_XOP;
34828 for (size_t k = 9; k < 16; k++) {
34829 GemmMicrokernelTester()
34830 .mr(3)
34831 .nr(4)
34832 .kr(2)
34833 .sr(4)
34834 .m(3)
34835 .n(4)
34836 .k(k)
34837 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34838 }
34839 }
34840
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_gt_8_strided_a)34841 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_gt_8_strided_a) {
34842 TEST_REQUIRES_X86_XOP;
34843 for (size_t k = 9; k < 16; k++) {
34844 GemmMicrokernelTester()
34845 .mr(3)
34846 .nr(4)
34847 .kr(2)
34848 .sr(4)
34849 .m(3)
34850 .n(4)
34851 .k(k)
34852 .a_stride(19)
34853 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34854 }
34855 }
34856
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_gt_8_subtile)34857 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_gt_8_subtile) {
34858 TEST_REQUIRES_X86_XOP;
34859 for (size_t k = 9; k < 16; k++) {
34860 for (uint32_t n = 1; n <= 4; n++) {
34861 for (uint32_t m = 1; m <= 3; m++) {
34862 GemmMicrokernelTester()
34863 .mr(3)
34864 .nr(4)
34865 .kr(2)
34866 .sr(4)
34867 .m(m)
34868 .n(n)
34869 .k(k)
34870 .iterations(1)
34871 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34872 }
34873 }
34874 }
34875 }
34876
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_div_8)34877 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_div_8) {
34878 TEST_REQUIRES_X86_XOP;
34879 for (size_t k = 16; k <= 80; k += 8) {
34880 GemmMicrokernelTester()
34881 .mr(3)
34882 .nr(4)
34883 .kr(2)
34884 .sr(4)
34885 .m(3)
34886 .n(4)
34887 .k(k)
34888 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34889 }
34890 }
34891
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_div_8_strided_a)34892 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_div_8_strided_a) {
34893 TEST_REQUIRES_X86_XOP;
34894 for (size_t k = 16; k <= 80; k += 8) {
34895 GemmMicrokernelTester()
34896 .mr(3)
34897 .nr(4)
34898 .kr(2)
34899 .sr(4)
34900 .m(3)
34901 .n(4)
34902 .k(k)
34903 .a_stride(83)
34904 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34905 }
34906 }
34907
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_div_8_subtile)34908 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_div_8_subtile) {
34909 TEST_REQUIRES_X86_XOP;
34910 for (size_t k = 16; k <= 80; k += 8) {
34911 for (uint32_t n = 1; n <= 4; n++) {
34912 for (uint32_t m = 1; m <= 3; m++) {
34913 GemmMicrokernelTester()
34914 .mr(3)
34915 .nr(4)
34916 .kr(2)
34917 .sr(4)
34918 .m(m)
34919 .n(n)
34920 .k(k)
34921 .iterations(1)
34922 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34923 }
34924 }
34925 }
34926 }
34927
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4)34928 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4) {
34929 TEST_REQUIRES_X86_XOP;
34930 for (uint32_t n = 5; n < 8; n++) {
34931 for (size_t k = 1; k <= 40; k += 9) {
34932 GemmMicrokernelTester()
34933 .mr(3)
34934 .nr(4)
34935 .kr(2)
34936 .sr(4)
34937 .m(3)
34938 .n(n)
34939 .k(k)
34940 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34941 }
34942 }
34943 }
34944
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4_strided_cn)34945 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
34946 TEST_REQUIRES_X86_XOP;
34947 for (uint32_t n = 5; n < 8; n++) {
34948 for (size_t k = 1; k <= 40; k += 9) {
34949 GemmMicrokernelTester()
34950 .mr(3)
34951 .nr(4)
34952 .kr(2)
34953 .sr(4)
34954 .m(3)
34955 .n(n)
34956 .k(k)
34957 .cn_stride(7)
34958 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34959 }
34960 }
34961 }
34962
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4_strided_a)34963 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4_strided_a) {
34964 TEST_REQUIRES_X86_XOP;
34965 for (uint32_t n = 5; n < 8; n++) {
34966 for (size_t k = 1; k <= 40; k += 9) {
34967 GemmMicrokernelTester()
34968 .mr(3)
34969 .nr(4)
34970 .kr(2)
34971 .sr(4)
34972 .m(3)
34973 .n(n)
34974 .k(k)
34975 .a_stride(43)
34976 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34977 }
34978 }
34979 }
34980
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4_subtile)34981 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4_subtile) {
34982 TEST_REQUIRES_X86_XOP;
34983 for (uint32_t n = 5; n < 8; n++) {
34984 for (size_t k = 1; k <= 40; k += 9) {
34985 for (uint32_t m = 1; m <= 3; m++) {
34986 GemmMicrokernelTester()
34987 .mr(3)
34988 .nr(4)
34989 .kr(2)
34990 .sr(4)
34991 .m(m)
34992 .n(n)
34993 .k(k)
34994 .iterations(1)
34995 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34996 }
34997 }
34998 }
34999 }
35000
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4)35001 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4) {
35002 TEST_REQUIRES_X86_XOP;
35003 for (uint32_t n = 8; n <= 12; n += 4) {
35004 for (size_t k = 1; k <= 40; k += 9) {
35005 GemmMicrokernelTester()
35006 .mr(3)
35007 .nr(4)
35008 .kr(2)
35009 .sr(4)
35010 .m(3)
35011 .n(n)
35012 .k(k)
35013 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35014 }
35015 }
35016 }
35017
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4_strided_cn)35018 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4_strided_cn) {
35019 TEST_REQUIRES_X86_XOP;
35020 for (uint32_t n = 8; n <= 12; n += 4) {
35021 for (size_t k = 1; k <= 40; k += 9) {
35022 GemmMicrokernelTester()
35023 .mr(3)
35024 .nr(4)
35025 .kr(2)
35026 .sr(4)
35027 .m(3)
35028 .n(n)
35029 .k(k)
35030 .cn_stride(7)
35031 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35032 }
35033 }
35034 }
35035
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4_strided_a)35036 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4_strided_a) {
35037 TEST_REQUIRES_X86_XOP;
35038 for (uint32_t n = 8; n <= 12; n += 4) {
35039 for (size_t k = 1; k <= 40; k += 9) {
35040 GemmMicrokernelTester()
35041 .mr(3)
35042 .nr(4)
35043 .kr(2)
35044 .sr(4)
35045 .m(3)
35046 .n(n)
35047 .k(k)
35048 .a_stride(43)
35049 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35050 }
35051 }
35052 }
35053
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4_subtile)35054 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4_subtile) {
35055 TEST_REQUIRES_X86_XOP;
35056 for (uint32_t n = 8; n <= 12; n += 4) {
35057 for (size_t k = 1; k <= 40; k += 9) {
35058 for (uint32_t m = 1; m <= 3; m++) {
35059 GemmMicrokernelTester()
35060 .mr(3)
35061 .nr(4)
35062 .kr(2)
35063 .sr(4)
35064 .m(m)
35065 .n(n)
35066 .k(k)
35067 .iterations(1)
35068 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35069 }
35070 }
35071 }
35072 }
35073
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,strided_cm_subtile)35074 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, strided_cm_subtile) {
35075 TEST_REQUIRES_X86_XOP;
35076 for (size_t k = 1; k <= 40; k += 9) {
35077 for (uint32_t n = 1; n <= 4; n++) {
35078 for (uint32_t m = 1; m <= 3; m++) {
35079 GemmMicrokernelTester()
35080 .mr(3)
35081 .nr(4)
35082 .kr(2)
35083 .sr(4)
35084 .m(m)
35085 .n(n)
35086 .k(k)
35087 .cm_stride(7)
35088 .iterations(1)
35089 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35090 }
35091 }
35092 }
35093 }
35094
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,qmin)35095 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, qmin) {
35096 TEST_REQUIRES_X86_XOP;
35097 GemmMicrokernelTester()
35098 .mr(3)
35099 .nr(4)
35100 .kr(2)
35101 .sr(4)
35102 .m(3)
35103 .n(4)
35104 .k(8)
35105 .qmin(128)
35106 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35107 }
35108
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,qmax)35109 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, qmax) {
35110 TEST_REQUIRES_X86_XOP;
35111 GemmMicrokernelTester()
35112 .mr(3)
35113 .nr(4)
35114 .kr(2)
35115 .sr(4)
35116 .m(3)
35117 .n(4)
35118 .k(8)
35119 .qmax(128)
35120 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35121 }
35122
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,strided_cm)35123 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, strided_cm) {
35124 TEST_REQUIRES_X86_XOP;
35125 GemmMicrokernelTester()
35126 .mr(3)
35127 .nr(4)
35128 .kr(2)
35129 .sr(4)
35130 .m(3)
35131 .n(4)
35132 .k(8)
35133 .cm_stride(7)
35134 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35135 }
35136 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
35137
35138
35139 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8)35140 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8) {
35141 TEST_REQUIRES_X86_AVX;
35142 GemmMicrokernelTester()
35143 .mr(4)
35144 .nr(4)
35145 .kr(2)
35146 .sr(4)
35147 .m(4)
35148 .n(4)
35149 .k(8)
35150 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35151 }
35152
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,strided_cn)35153 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, strided_cn) {
35154 TEST_REQUIRES_X86_AVX;
35155 GemmMicrokernelTester()
35156 .mr(4)
35157 .nr(4)
35158 .kr(2)
35159 .sr(4)
35160 .m(4)
35161 .n(4)
35162 .k(8)
35163 .cn_stride(7)
35164 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35165 }
35166
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8_strided_a)35167 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8_strided_a) {
35168 TEST_REQUIRES_X86_AVX;
35169 GemmMicrokernelTester()
35170 .mr(4)
35171 .nr(4)
35172 .kr(2)
35173 .sr(4)
35174 .m(4)
35175 .n(4)
35176 .k(8)
35177 .a_stride(11)
35178 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35179 }
35180
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8_subtile)35181 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8_subtile) {
35182 TEST_REQUIRES_X86_AVX;
35183 for (uint32_t n = 1; n <= 4; n++) {
35184 for (uint32_t m = 1; m <= 4; m++) {
35185 GemmMicrokernelTester()
35186 .mr(4)
35187 .nr(4)
35188 .kr(2)
35189 .sr(4)
35190 .m(m)
35191 .n(n)
35192 .k(8)
35193 .iterations(1)
35194 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35195 }
35196 }
35197 }
35198
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8_subtile_m)35199 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
35200 TEST_REQUIRES_X86_AVX;
35201 for (uint32_t m = 1; m <= 4; m++) {
35202 GemmMicrokernelTester()
35203 .mr(4)
35204 .nr(4)
35205 .kr(2)
35206 .sr(4)
35207 .m(m)
35208 .n(4)
35209 .k(8)
35210 .iterations(1)
35211 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35212 }
35213 }
35214
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8_subtile_n)35215 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
35216 TEST_REQUIRES_X86_AVX;
35217 for (uint32_t n = 1; n <= 4; n++) {
35218 GemmMicrokernelTester()
35219 .mr(4)
35220 .nr(4)
35221 .kr(2)
35222 .sr(4)
35223 .m(4)
35224 .n(n)
35225 .k(8)
35226 .iterations(1)
35227 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35228 }
35229 }
35230
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_lt_8)35231 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_lt_8) {
35232 TEST_REQUIRES_X86_AVX;
35233 for (size_t k = 1; k < 8; k++) {
35234 GemmMicrokernelTester()
35235 .mr(4)
35236 .nr(4)
35237 .kr(2)
35238 .sr(4)
35239 .m(4)
35240 .n(4)
35241 .k(k)
35242 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35243 }
35244 }
35245
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_lt_8_strided_a)35246 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_lt_8_strided_a) {
35247 TEST_REQUIRES_X86_AVX;
35248 for (size_t k = 1; k < 8; k++) {
35249 GemmMicrokernelTester()
35250 .mr(4)
35251 .nr(4)
35252 .kr(2)
35253 .sr(4)
35254 .m(4)
35255 .n(4)
35256 .k(k)
35257 .a_stride(11)
35258 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35259 }
35260 }
35261
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_lt_8_subtile)35262 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_lt_8_subtile) {
35263 TEST_REQUIRES_X86_AVX;
35264 for (size_t k = 1; k < 8; k++) {
35265 for (uint32_t n = 1; n <= 4; n++) {
35266 for (uint32_t m = 1; m <= 4; m++) {
35267 GemmMicrokernelTester()
35268 .mr(4)
35269 .nr(4)
35270 .kr(2)
35271 .sr(4)
35272 .m(m)
35273 .n(n)
35274 .k(k)
35275 .iterations(1)
35276 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35277 }
35278 }
35279 }
35280 }
35281
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_gt_8)35282 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_gt_8) {
35283 TEST_REQUIRES_X86_AVX;
35284 for (size_t k = 9; k < 16; k++) {
35285 GemmMicrokernelTester()
35286 .mr(4)
35287 .nr(4)
35288 .kr(2)
35289 .sr(4)
35290 .m(4)
35291 .n(4)
35292 .k(k)
35293 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35294 }
35295 }
35296
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_gt_8_strided_a)35297 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_gt_8_strided_a) {
35298 TEST_REQUIRES_X86_AVX;
35299 for (size_t k = 9; k < 16; k++) {
35300 GemmMicrokernelTester()
35301 .mr(4)
35302 .nr(4)
35303 .kr(2)
35304 .sr(4)
35305 .m(4)
35306 .n(4)
35307 .k(k)
35308 .a_stride(19)
35309 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35310 }
35311 }
35312
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_gt_8_subtile)35313 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_gt_8_subtile) {
35314 TEST_REQUIRES_X86_AVX;
35315 for (size_t k = 9; k < 16; k++) {
35316 for (uint32_t n = 1; n <= 4; n++) {
35317 for (uint32_t m = 1; m <= 4; m++) {
35318 GemmMicrokernelTester()
35319 .mr(4)
35320 .nr(4)
35321 .kr(2)
35322 .sr(4)
35323 .m(m)
35324 .n(n)
35325 .k(k)
35326 .iterations(1)
35327 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35328 }
35329 }
35330 }
35331 }
35332
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_div_8)35333 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_div_8) {
35334 TEST_REQUIRES_X86_AVX;
35335 for (size_t k = 16; k <= 80; k += 8) {
35336 GemmMicrokernelTester()
35337 .mr(4)
35338 .nr(4)
35339 .kr(2)
35340 .sr(4)
35341 .m(4)
35342 .n(4)
35343 .k(k)
35344 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35345 }
35346 }
35347
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_div_8_strided_a)35348 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_div_8_strided_a) {
35349 TEST_REQUIRES_X86_AVX;
35350 for (size_t k = 16; k <= 80; k += 8) {
35351 GemmMicrokernelTester()
35352 .mr(4)
35353 .nr(4)
35354 .kr(2)
35355 .sr(4)
35356 .m(4)
35357 .n(4)
35358 .k(k)
35359 .a_stride(83)
35360 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35361 }
35362 }
35363
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_div_8_subtile)35364 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_div_8_subtile) {
35365 TEST_REQUIRES_X86_AVX;
35366 for (size_t k = 16; k <= 80; k += 8) {
35367 for (uint32_t n = 1; n <= 4; n++) {
35368 for (uint32_t m = 1; m <= 4; m++) {
35369 GemmMicrokernelTester()
35370 .mr(4)
35371 .nr(4)
35372 .kr(2)
35373 .sr(4)
35374 .m(m)
35375 .n(n)
35376 .k(k)
35377 .iterations(1)
35378 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35379 }
35380 }
35381 }
35382 }
35383
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4)35384 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4) {
35385 TEST_REQUIRES_X86_AVX;
35386 for (uint32_t n = 5; n < 8; n++) {
35387 for (size_t k = 1; k <= 40; k += 9) {
35388 GemmMicrokernelTester()
35389 .mr(4)
35390 .nr(4)
35391 .kr(2)
35392 .sr(4)
35393 .m(4)
35394 .n(n)
35395 .k(k)
35396 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35397 }
35398 }
35399 }
35400
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4_strided_cn)35401 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
35402 TEST_REQUIRES_X86_AVX;
35403 for (uint32_t n = 5; n < 8; n++) {
35404 for (size_t k = 1; k <= 40; k += 9) {
35405 GemmMicrokernelTester()
35406 .mr(4)
35407 .nr(4)
35408 .kr(2)
35409 .sr(4)
35410 .m(4)
35411 .n(n)
35412 .k(k)
35413 .cn_stride(7)
35414 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35415 }
35416 }
35417 }
35418
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4_strided_a)35419 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4_strided_a) {
35420 TEST_REQUIRES_X86_AVX;
35421 for (uint32_t n = 5; n < 8; n++) {
35422 for (size_t k = 1; k <= 40; k += 9) {
35423 GemmMicrokernelTester()
35424 .mr(4)
35425 .nr(4)
35426 .kr(2)
35427 .sr(4)
35428 .m(4)
35429 .n(n)
35430 .k(k)
35431 .a_stride(43)
35432 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35433 }
35434 }
35435 }
35436
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4_subtile)35437 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4_subtile) {
35438 TEST_REQUIRES_X86_AVX;
35439 for (uint32_t n = 5; n < 8; n++) {
35440 for (size_t k = 1; k <= 40; k += 9) {
35441 for (uint32_t m = 1; m <= 4; m++) {
35442 GemmMicrokernelTester()
35443 .mr(4)
35444 .nr(4)
35445 .kr(2)
35446 .sr(4)
35447 .m(m)
35448 .n(n)
35449 .k(k)
35450 .iterations(1)
35451 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35452 }
35453 }
35454 }
35455 }
35456
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4)35457 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4) {
35458 TEST_REQUIRES_X86_AVX;
35459 for (uint32_t n = 8; n <= 12; n += 4) {
35460 for (size_t k = 1; k <= 40; k += 9) {
35461 GemmMicrokernelTester()
35462 .mr(4)
35463 .nr(4)
35464 .kr(2)
35465 .sr(4)
35466 .m(4)
35467 .n(n)
35468 .k(k)
35469 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35470 }
35471 }
35472 }
35473
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4_strided_cn)35474 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4_strided_cn) {
35475 TEST_REQUIRES_X86_AVX;
35476 for (uint32_t n = 8; n <= 12; n += 4) {
35477 for (size_t k = 1; k <= 40; k += 9) {
35478 GemmMicrokernelTester()
35479 .mr(4)
35480 .nr(4)
35481 .kr(2)
35482 .sr(4)
35483 .m(4)
35484 .n(n)
35485 .k(k)
35486 .cn_stride(7)
35487 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35488 }
35489 }
35490 }
35491
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4_strided_a)35492 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4_strided_a) {
35493 TEST_REQUIRES_X86_AVX;
35494 for (uint32_t n = 8; n <= 12; n += 4) {
35495 for (size_t k = 1; k <= 40; k += 9) {
35496 GemmMicrokernelTester()
35497 .mr(4)
35498 .nr(4)
35499 .kr(2)
35500 .sr(4)
35501 .m(4)
35502 .n(n)
35503 .k(k)
35504 .a_stride(43)
35505 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35506 }
35507 }
35508 }
35509
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4_subtile)35510 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4_subtile) {
35511 TEST_REQUIRES_X86_AVX;
35512 for (uint32_t n = 8; n <= 12; n += 4) {
35513 for (size_t k = 1; k <= 40; k += 9) {
35514 for (uint32_t m = 1; m <= 4; m++) {
35515 GemmMicrokernelTester()
35516 .mr(4)
35517 .nr(4)
35518 .kr(2)
35519 .sr(4)
35520 .m(m)
35521 .n(n)
35522 .k(k)
35523 .iterations(1)
35524 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35525 }
35526 }
35527 }
35528 }
35529
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,strided_cm_subtile)35530 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, strided_cm_subtile) {
35531 TEST_REQUIRES_X86_AVX;
35532 for (size_t k = 1; k <= 40; k += 9) {
35533 for (uint32_t n = 1; n <= 4; n++) {
35534 for (uint32_t m = 1; m <= 4; m++) {
35535 GemmMicrokernelTester()
35536 .mr(4)
35537 .nr(4)
35538 .kr(2)
35539 .sr(4)
35540 .m(m)
35541 .n(n)
35542 .k(k)
35543 .cm_stride(7)
35544 .iterations(1)
35545 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35546 }
35547 }
35548 }
35549 }
35550
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,qmin)35551 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, qmin) {
35552 TEST_REQUIRES_X86_AVX;
35553 GemmMicrokernelTester()
35554 .mr(4)
35555 .nr(4)
35556 .kr(2)
35557 .sr(4)
35558 .m(4)
35559 .n(4)
35560 .k(8)
35561 .qmin(128)
35562 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35563 }
35564
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,qmax)35565 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, qmax) {
35566 TEST_REQUIRES_X86_AVX;
35567 GemmMicrokernelTester()
35568 .mr(4)
35569 .nr(4)
35570 .kr(2)
35571 .sr(4)
35572 .m(4)
35573 .n(4)
35574 .k(8)
35575 .qmax(128)
35576 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35577 }
35578
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,strided_cm)35579 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, strided_cm) {
35580 TEST_REQUIRES_X86_AVX;
35581 GemmMicrokernelTester()
35582 .mr(4)
35583 .nr(4)
35584 .kr(2)
35585 .sr(4)
35586 .m(4)
35587 .n(4)
35588 .k(8)
35589 .cm_stride(7)
35590 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35591 }
35592 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
35593
35594
35595 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8)35596 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8) {
35597 TEST_REQUIRES_X86_XOP;
35598 GemmMicrokernelTester()
35599 .mr(4)
35600 .nr(4)
35601 .kr(2)
35602 .sr(4)
35603 .m(4)
35604 .n(4)
35605 .k(8)
35606 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35607 }
35608
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,strided_cn)35609 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, strided_cn) {
35610 TEST_REQUIRES_X86_XOP;
35611 GemmMicrokernelTester()
35612 .mr(4)
35613 .nr(4)
35614 .kr(2)
35615 .sr(4)
35616 .m(4)
35617 .n(4)
35618 .k(8)
35619 .cn_stride(7)
35620 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35621 }
35622
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8_strided_a)35623 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8_strided_a) {
35624 TEST_REQUIRES_X86_XOP;
35625 GemmMicrokernelTester()
35626 .mr(4)
35627 .nr(4)
35628 .kr(2)
35629 .sr(4)
35630 .m(4)
35631 .n(4)
35632 .k(8)
35633 .a_stride(11)
35634 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35635 }
35636
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8_subtile)35637 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8_subtile) {
35638 TEST_REQUIRES_X86_XOP;
35639 for (uint32_t n = 1; n <= 4; n++) {
35640 for (uint32_t m = 1; m <= 4; m++) {
35641 GemmMicrokernelTester()
35642 .mr(4)
35643 .nr(4)
35644 .kr(2)
35645 .sr(4)
35646 .m(m)
35647 .n(n)
35648 .k(8)
35649 .iterations(1)
35650 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35651 }
35652 }
35653 }
35654
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8_subtile_m)35655 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
35656 TEST_REQUIRES_X86_XOP;
35657 for (uint32_t m = 1; m <= 4; m++) {
35658 GemmMicrokernelTester()
35659 .mr(4)
35660 .nr(4)
35661 .kr(2)
35662 .sr(4)
35663 .m(m)
35664 .n(4)
35665 .k(8)
35666 .iterations(1)
35667 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35668 }
35669 }
35670
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8_subtile_n)35671 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
35672 TEST_REQUIRES_X86_XOP;
35673 for (uint32_t n = 1; n <= 4; n++) {
35674 GemmMicrokernelTester()
35675 .mr(4)
35676 .nr(4)
35677 .kr(2)
35678 .sr(4)
35679 .m(4)
35680 .n(n)
35681 .k(8)
35682 .iterations(1)
35683 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35684 }
35685 }
35686
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_lt_8)35687 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_lt_8) {
35688 TEST_REQUIRES_X86_XOP;
35689 for (size_t k = 1; k < 8; k++) {
35690 GemmMicrokernelTester()
35691 .mr(4)
35692 .nr(4)
35693 .kr(2)
35694 .sr(4)
35695 .m(4)
35696 .n(4)
35697 .k(k)
35698 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35699 }
35700 }
35701
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_lt_8_strided_a)35702 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_lt_8_strided_a) {
35703 TEST_REQUIRES_X86_XOP;
35704 for (size_t k = 1; k < 8; k++) {
35705 GemmMicrokernelTester()
35706 .mr(4)
35707 .nr(4)
35708 .kr(2)
35709 .sr(4)
35710 .m(4)
35711 .n(4)
35712 .k(k)
35713 .a_stride(11)
35714 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35715 }
35716 }
35717
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_lt_8_subtile)35718 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_lt_8_subtile) {
35719 TEST_REQUIRES_X86_XOP;
35720 for (size_t k = 1; k < 8; k++) {
35721 for (uint32_t n = 1; n <= 4; n++) {
35722 for (uint32_t m = 1; m <= 4; m++) {
35723 GemmMicrokernelTester()
35724 .mr(4)
35725 .nr(4)
35726 .kr(2)
35727 .sr(4)
35728 .m(m)
35729 .n(n)
35730 .k(k)
35731 .iterations(1)
35732 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35733 }
35734 }
35735 }
35736 }
35737
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_gt_8)35738 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_gt_8) {
35739 TEST_REQUIRES_X86_XOP;
35740 for (size_t k = 9; k < 16; k++) {
35741 GemmMicrokernelTester()
35742 .mr(4)
35743 .nr(4)
35744 .kr(2)
35745 .sr(4)
35746 .m(4)
35747 .n(4)
35748 .k(k)
35749 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35750 }
35751 }
35752
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_gt_8_strided_a)35753 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_gt_8_strided_a) {
35754 TEST_REQUIRES_X86_XOP;
35755 for (size_t k = 9; k < 16; k++) {
35756 GemmMicrokernelTester()
35757 .mr(4)
35758 .nr(4)
35759 .kr(2)
35760 .sr(4)
35761 .m(4)
35762 .n(4)
35763 .k(k)
35764 .a_stride(19)
35765 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35766 }
35767 }
35768
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_gt_8_subtile)35769 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_gt_8_subtile) {
35770 TEST_REQUIRES_X86_XOP;
35771 for (size_t k = 9; k < 16; k++) {
35772 for (uint32_t n = 1; n <= 4; n++) {
35773 for (uint32_t m = 1; m <= 4; m++) {
35774 GemmMicrokernelTester()
35775 .mr(4)
35776 .nr(4)
35777 .kr(2)
35778 .sr(4)
35779 .m(m)
35780 .n(n)
35781 .k(k)
35782 .iterations(1)
35783 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35784 }
35785 }
35786 }
35787 }
35788
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_div_8)35789 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_div_8) {
35790 TEST_REQUIRES_X86_XOP;
35791 for (size_t k = 16; k <= 80; k += 8) {
35792 GemmMicrokernelTester()
35793 .mr(4)
35794 .nr(4)
35795 .kr(2)
35796 .sr(4)
35797 .m(4)
35798 .n(4)
35799 .k(k)
35800 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35801 }
35802 }
35803
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_div_8_strided_a)35804 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_div_8_strided_a) {
35805 TEST_REQUIRES_X86_XOP;
35806 for (size_t k = 16; k <= 80; k += 8) {
35807 GemmMicrokernelTester()
35808 .mr(4)
35809 .nr(4)
35810 .kr(2)
35811 .sr(4)
35812 .m(4)
35813 .n(4)
35814 .k(k)
35815 .a_stride(83)
35816 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35817 }
35818 }
35819
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_div_8_subtile)35820 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_div_8_subtile) {
35821 TEST_REQUIRES_X86_XOP;
35822 for (size_t k = 16; k <= 80; k += 8) {
35823 for (uint32_t n = 1; n <= 4; n++) {
35824 for (uint32_t m = 1; m <= 4; m++) {
35825 GemmMicrokernelTester()
35826 .mr(4)
35827 .nr(4)
35828 .kr(2)
35829 .sr(4)
35830 .m(m)
35831 .n(n)
35832 .k(k)
35833 .iterations(1)
35834 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35835 }
35836 }
35837 }
35838 }
35839
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4)35840 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4) {
35841 TEST_REQUIRES_X86_XOP;
35842 for (uint32_t n = 5; n < 8; n++) {
35843 for (size_t k = 1; k <= 40; k += 9) {
35844 GemmMicrokernelTester()
35845 .mr(4)
35846 .nr(4)
35847 .kr(2)
35848 .sr(4)
35849 .m(4)
35850 .n(n)
35851 .k(k)
35852 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35853 }
35854 }
35855 }
35856
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4_strided_cn)35857 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
35858 TEST_REQUIRES_X86_XOP;
35859 for (uint32_t n = 5; n < 8; n++) {
35860 for (size_t k = 1; k <= 40; k += 9) {
35861 GemmMicrokernelTester()
35862 .mr(4)
35863 .nr(4)
35864 .kr(2)
35865 .sr(4)
35866 .m(4)
35867 .n(n)
35868 .k(k)
35869 .cn_stride(7)
35870 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35871 }
35872 }
35873 }
35874
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4_strided_a)35875 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4_strided_a) {
35876 TEST_REQUIRES_X86_XOP;
35877 for (uint32_t n = 5; n < 8; n++) {
35878 for (size_t k = 1; k <= 40; k += 9) {
35879 GemmMicrokernelTester()
35880 .mr(4)
35881 .nr(4)
35882 .kr(2)
35883 .sr(4)
35884 .m(4)
35885 .n(n)
35886 .k(k)
35887 .a_stride(43)
35888 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35889 }
35890 }
35891 }
35892
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4_subtile)35893 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4_subtile) {
35894 TEST_REQUIRES_X86_XOP;
35895 for (uint32_t n = 5; n < 8; n++) {
35896 for (size_t k = 1; k <= 40; k += 9) {
35897 for (uint32_t m = 1; m <= 4; m++) {
35898 GemmMicrokernelTester()
35899 .mr(4)
35900 .nr(4)
35901 .kr(2)
35902 .sr(4)
35903 .m(m)
35904 .n(n)
35905 .k(k)
35906 .iterations(1)
35907 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35908 }
35909 }
35910 }
35911 }
35912
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4)35913 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4) {
35914 TEST_REQUIRES_X86_XOP;
35915 for (uint32_t n = 8; n <= 12; n += 4) {
35916 for (size_t k = 1; k <= 40; k += 9) {
35917 GemmMicrokernelTester()
35918 .mr(4)
35919 .nr(4)
35920 .kr(2)
35921 .sr(4)
35922 .m(4)
35923 .n(n)
35924 .k(k)
35925 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35926 }
35927 }
35928 }
35929
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4_strided_cn)35930 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4_strided_cn) {
35931 TEST_REQUIRES_X86_XOP;
35932 for (uint32_t n = 8; n <= 12; n += 4) {
35933 for (size_t k = 1; k <= 40; k += 9) {
35934 GemmMicrokernelTester()
35935 .mr(4)
35936 .nr(4)
35937 .kr(2)
35938 .sr(4)
35939 .m(4)
35940 .n(n)
35941 .k(k)
35942 .cn_stride(7)
35943 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35944 }
35945 }
35946 }
35947
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4_strided_a)35948 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4_strided_a) {
35949 TEST_REQUIRES_X86_XOP;
35950 for (uint32_t n = 8; n <= 12; n += 4) {
35951 for (size_t k = 1; k <= 40; k += 9) {
35952 GemmMicrokernelTester()
35953 .mr(4)
35954 .nr(4)
35955 .kr(2)
35956 .sr(4)
35957 .m(4)
35958 .n(n)
35959 .k(k)
35960 .a_stride(43)
35961 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35962 }
35963 }
35964 }
35965
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4_subtile)35966 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4_subtile) {
35967 TEST_REQUIRES_X86_XOP;
35968 for (uint32_t n = 8; n <= 12; n += 4) {
35969 for (size_t k = 1; k <= 40; k += 9) {
35970 for (uint32_t m = 1; m <= 4; m++) {
35971 GemmMicrokernelTester()
35972 .mr(4)
35973 .nr(4)
35974 .kr(2)
35975 .sr(4)
35976 .m(m)
35977 .n(n)
35978 .k(k)
35979 .iterations(1)
35980 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35981 }
35982 }
35983 }
35984 }
35985
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,strided_cm_subtile)35986 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, strided_cm_subtile) {
35987 TEST_REQUIRES_X86_XOP;
35988 for (size_t k = 1; k <= 40; k += 9) {
35989 for (uint32_t n = 1; n <= 4; n++) {
35990 for (uint32_t m = 1; m <= 4; m++) {
35991 GemmMicrokernelTester()
35992 .mr(4)
35993 .nr(4)
35994 .kr(2)
35995 .sr(4)
35996 .m(m)
35997 .n(n)
35998 .k(k)
35999 .cm_stride(7)
36000 .iterations(1)
36001 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36002 }
36003 }
36004 }
36005 }
36006
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,qmin)36007 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, qmin) {
36008 TEST_REQUIRES_X86_XOP;
36009 GemmMicrokernelTester()
36010 .mr(4)
36011 .nr(4)
36012 .kr(2)
36013 .sr(4)
36014 .m(4)
36015 .n(4)
36016 .k(8)
36017 .qmin(128)
36018 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36019 }
36020
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,qmax)36021 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, qmax) {
36022 TEST_REQUIRES_X86_XOP;
36023 GemmMicrokernelTester()
36024 .mr(4)
36025 .nr(4)
36026 .kr(2)
36027 .sr(4)
36028 .m(4)
36029 .n(4)
36030 .k(8)
36031 .qmax(128)
36032 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36033 }
36034
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,strided_cm)36035 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, strided_cm) {
36036 TEST_REQUIRES_X86_XOP;
36037 GemmMicrokernelTester()
36038 .mr(4)
36039 .nr(4)
36040 .kr(2)
36041 .sr(4)
36042 .m(4)
36043 .n(4)
36044 .k(8)
36045 .cm_stride(7)
36046 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36047 }
36048 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
36049
36050
36051 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8)36052 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8) {
36053 TEST_REQUIRES_X86_SSE2;
36054 GemmMicrokernelTester()
36055 .mr(1)
36056 .nr(4)
36057 .kr(8)
36058 .sr(1)
36059 .m(1)
36060 .n(4)
36061 .k(8)
36062 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36063 }
36064
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,strided_cn)36065 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cn) {
36066 TEST_REQUIRES_X86_SSE2;
36067 GemmMicrokernelTester()
36068 .mr(1)
36069 .nr(4)
36070 .kr(8)
36071 .sr(1)
36072 .m(1)
36073 .n(4)
36074 .k(8)
36075 .cn_stride(7)
36076 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36077 }
36078
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_strided_a)36079 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_strided_a) {
36080 TEST_REQUIRES_X86_SSE2;
36081 GemmMicrokernelTester()
36082 .mr(1)
36083 .nr(4)
36084 .kr(8)
36085 .sr(1)
36086 .m(1)
36087 .n(4)
36088 .k(8)
36089 .a_stride(11)
36090 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36091 }
36092
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_subtile)36093 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile) {
36094 TEST_REQUIRES_X86_SSE2;
36095 for (uint32_t n = 1; n <= 4; n++) {
36096 for (uint32_t m = 1; m <= 1; m++) {
36097 GemmMicrokernelTester()
36098 .mr(1)
36099 .nr(4)
36100 .kr(8)
36101 .sr(1)
36102 .m(m)
36103 .n(n)
36104 .k(8)
36105 .iterations(1)
36106 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36107 }
36108 }
36109 }
36110
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_subtile_m)36111 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_m) {
36112 TEST_REQUIRES_X86_SSE2;
36113 for (uint32_t m = 1; m <= 1; m++) {
36114 GemmMicrokernelTester()
36115 .mr(1)
36116 .nr(4)
36117 .kr(8)
36118 .sr(1)
36119 .m(m)
36120 .n(4)
36121 .k(8)
36122 .iterations(1)
36123 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36124 }
36125 }
36126
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_subtile_n)36127 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_n) {
36128 TEST_REQUIRES_X86_SSE2;
36129 for (uint32_t n = 1; n <= 4; n++) {
36130 GemmMicrokernelTester()
36131 .mr(1)
36132 .nr(4)
36133 .kr(8)
36134 .sr(1)
36135 .m(1)
36136 .n(n)
36137 .k(8)
36138 .iterations(1)
36139 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36140 }
36141 }
36142
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_lt_8)36143 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8) {
36144 TEST_REQUIRES_X86_SSE2;
36145 for (size_t k = 1; k < 8; k++) {
36146 GemmMicrokernelTester()
36147 .mr(1)
36148 .nr(4)
36149 .kr(8)
36150 .sr(1)
36151 .m(1)
36152 .n(4)
36153 .k(k)
36154 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36155 }
36156 }
36157
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_lt_8_strided_a)36158 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8_strided_a) {
36159 TEST_REQUIRES_X86_SSE2;
36160 for (size_t k = 1; k < 8; k++) {
36161 GemmMicrokernelTester()
36162 .mr(1)
36163 .nr(4)
36164 .kr(8)
36165 .sr(1)
36166 .m(1)
36167 .n(4)
36168 .k(k)
36169 .a_stride(11)
36170 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36171 }
36172 }
36173
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_lt_8_subtile)36174 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8_subtile) {
36175 TEST_REQUIRES_X86_SSE2;
36176 for (size_t k = 1; k < 8; k++) {
36177 for (uint32_t n = 1; n <= 4; n++) {
36178 for (uint32_t m = 1; m <= 1; m++) {
36179 GemmMicrokernelTester()
36180 .mr(1)
36181 .nr(4)
36182 .kr(8)
36183 .sr(1)
36184 .m(m)
36185 .n(n)
36186 .k(k)
36187 .iterations(1)
36188 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36189 }
36190 }
36191 }
36192 }
36193
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_gt_8)36194 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8) {
36195 TEST_REQUIRES_X86_SSE2;
36196 for (size_t k = 9; k < 16; k++) {
36197 GemmMicrokernelTester()
36198 .mr(1)
36199 .nr(4)
36200 .kr(8)
36201 .sr(1)
36202 .m(1)
36203 .n(4)
36204 .k(k)
36205 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36206 }
36207 }
36208
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_gt_8_strided_a)36209 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8_strided_a) {
36210 TEST_REQUIRES_X86_SSE2;
36211 for (size_t k = 9; k < 16; k++) {
36212 GemmMicrokernelTester()
36213 .mr(1)
36214 .nr(4)
36215 .kr(8)
36216 .sr(1)
36217 .m(1)
36218 .n(4)
36219 .k(k)
36220 .a_stride(19)
36221 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36222 }
36223 }
36224
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_gt_8_subtile)36225 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8_subtile) {
36226 TEST_REQUIRES_X86_SSE2;
36227 for (size_t k = 9; k < 16; k++) {
36228 for (uint32_t n = 1; n <= 4; n++) {
36229 for (uint32_t m = 1; m <= 1; m++) {
36230 GemmMicrokernelTester()
36231 .mr(1)
36232 .nr(4)
36233 .kr(8)
36234 .sr(1)
36235 .m(m)
36236 .n(n)
36237 .k(k)
36238 .iterations(1)
36239 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36240 }
36241 }
36242 }
36243 }
36244
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_div_8)36245 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8) {
36246 TEST_REQUIRES_X86_SSE2;
36247 for (size_t k = 16; k <= 80; k += 8) {
36248 GemmMicrokernelTester()
36249 .mr(1)
36250 .nr(4)
36251 .kr(8)
36252 .sr(1)
36253 .m(1)
36254 .n(4)
36255 .k(k)
36256 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36257 }
36258 }
36259
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_div_8_strided_a)36260 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8_strided_a) {
36261 TEST_REQUIRES_X86_SSE2;
36262 for (size_t k = 16; k <= 80; k += 8) {
36263 GemmMicrokernelTester()
36264 .mr(1)
36265 .nr(4)
36266 .kr(8)
36267 .sr(1)
36268 .m(1)
36269 .n(4)
36270 .k(k)
36271 .a_stride(83)
36272 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36273 }
36274 }
36275
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_div_8_subtile)36276 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8_subtile) {
36277 TEST_REQUIRES_X86_SSE2;
36278 for (size_t k = 16; k <= 80; k += 8) {
36279 for (uint32_t n = 1; n <= 4; n++) {
36280 for (uint32_t m = 1; m <= 1; m++) {
36281 GemmMicrokernelTester()
36282 .mr(1)
36283 .nr(4)
36284 .kr(8)
36285 .sr(1)
36286 .m(m)
36287 .n(n)
36288 .k(k)
36289 .iterations(1)
36290 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36291 }
36292 }
36293 }
36294 }
36295
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4)36296 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4) {
36297 TEST_REQUIRES_X86_SSE2;
36298 for (uint32_t n = 5; n < 8; n++) {
36299 for (size_t k = 1; k <= 40; k += 9) {
36300 GemmMicrokernelTester()
36301 .mr(1)
36302 .nr(4)
36303 .kr(8)
36304 .sr(1)
36305 .m(1)
36306 .n(n)
36307 .k(k)
36308 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36309 }
36310 }
36311 }
36312
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4_strided_cn)36313 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_strided_cn) {
36314 TEST_REQUIRES_X86_SSE2;
36315 for (uint32_t n = 5; n < 8; n++) {
36316 for (size_t k = 1; k <= 40; k += 9) {
36317 GemmMicrokernelTester()
36318 .mr(1)
36319 .nr(4)
36320 .kr(8)
36321 .sr(1)
36322 .m(1)
36323 .n(n)
36324 .k(k)
36325 .cn_stride(7)
36326 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36327 }
36328 }
36329 }
36330
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4_strided_a)36331 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_strided_a) {
36332 TEST_REQUIRES_X86_SSE2;
36333 for (uint32_t n = 5; n < 8; n++) {
36334 for (size_t k = 1; k <= 40; k += 9) {
36335 GemmMicrokernelTester()
36336 .mr(1)
36337 .nr(4)
36338 .kr(8)
36339 .sr(1)
36340 .m(1)
36341 .n(n)
36342 .k(k)
36343 .a_stride(43)
36344 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36345 }
36346 }
36347 }
36348
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4_subtile)36349 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_subtile) {
36350 TEST_REQUIRES_X86_SSE2;
36351 for (uint32_t n = 5; n < 8; n++) {
36352 for (size_t k = 1; k <= 40; k += 9) {
36353 for (uint32_t m = 1; m <= 1; m++) {
36354 GemmMicrokernelTester()
36355 .mr(1)
36356 .nr(4)
36357 .kr(8)
36358 .sr(1)
36359 .m(m)
36360 .n(n)
36361 .k(k)
36362 .iterations(1)
36363 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36364 }
36365 }
36366 }
36367 }
36368
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4)36369 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4) {
36370 TEST_REQUIRES_X86_SSE2;
36371 for (uint32_t n = 8; n <= 12; n += 4) {
36372 for (size_t k = 1; k <= 40; k += 9) {
36373 GemmMicrokernelTester()
36374 .mr(1)
36375 .nr(4)
36376 .kr(8)
36377 .sr(1)
36378 .m(1)
36379 .n(n)
36380 .k(k)
36381 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36382 }
36383 }
36384 }
36385
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4_strided_cn)36386 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_strided_cn) {
36387 TEST_REQUIRES_X86_SSE2;
36388 for (uint32_t n = 8; n <= 12; n += 4) {
36389 for (size_t k = 1; k <= 40; k += 9) {
36390 GemmMicrokernelTester()
36391 .mr(1)
36392 .nr(4)
36393 .kr(8)
36394 .sr(1)
36395 .m(1)
36396 .n(n)
36397 .k(k)
36398 .cn_stride(7)
36399 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36400 }
36401 }
36402 }
36403
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4_strided_a)36404 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_strided_a) {
36405 TEST_REQUIRES_X86_SSE2;
36406 for (uint32_t n = 8; n <= 12; n += 4) {
36407 for (size_t k = 1; k <= 40; k += 9) {
36408 GemmMicrokernelTester()
36409 .mr(1)
36410 .nr(4)
36411 .kr(8)
36412 .sr(1)
36413 .m(1)
36414 .n(n)
36415 .k(k)
36416 .a_stride(43)
36417 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36418 }
36419 }
36420 }
36421
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4_subtile)36422 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_subtile) {
36423 TEST_REQUIRES_X86_SSE2;
36424 for (uint32_t n = 8; n <= 12; n += 4) {
36425 for (size_t k = 1; k <= 40; k += 9) {
36426 for (uint32_t m = 1; m <= 1; m++) {
36427 GemmMicrokernelTester()
36428 .mr(1)
36429 .nr(4)
36430 .kr(8)
36431 .sr(1)
36432 .m(m)
36433 .n(n)
36434 .k(k)
36435 .iterations(1)
36436 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36437 }
36438 }
36439 }
36440 }
36441
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,strided_cm_subtile)36442 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm_subtile) {
36443 TEST_REQUIRES_X86_SSE2;
36444 for (size_t k = 1; k <= 40; k += 9) {
36445 for (uint32_t n = 1; n <= 4; n++) {
36446 for (uint32_t m = 1; m <= 1; m++) {
36447 GemmMicrokernelTester()
36448 .mr(1)
36449 .nr(4)
36450 .kr(8)
36451 .sr(1)
36452 .m(m)
36453 .n(n)
36454 .k(k)
36455 .cm_stride(7)
36456 .iterations(1)
36457 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36458 }
36459 }
36460 }
36461 }
36462
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,qmin)36463 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmin) {
36464 TEST_REQUIRES_X86_SSE2;
36465 GemmMicrokernelTester()
36466 .mr(1)
36467 .nr(4)
36468 .kr(8)
36469 .sr(1)
36470 .m(1)
36471 .n(4)
36472 .k(8)
36473 .qmin(128)
36474 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36475 }
36476
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,qmax)36477 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmax) {
36478 TEST_REQUIRES_X86_SSE2;
36479 GemmMicrokernelTester()
36480 .mr(1)
36481 .nr(4)
36482 .kr(8)
36483 .sr(1)
36484 .m(1)
36485 .n(4)
36486 .k(8)
36487 .qmax(128)
36488 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36489 }
36490
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,strided_cm)36491 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm) {
36492 TEST_REQUIRES_X86_SSE2;
36493 GemmMicrokernelTester()
36494 .mr(1)
36495 .nr(4)
36496 .kr(8)
36497 .sr(1)
36498 .m(1)
36499 .n(4)
36500 .k(8)
36501 .cm_stride(7)
36502 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
36503 }
36504 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
36505
36506
36507 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_eq_8)36508 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8) {
36509 TEST_REQUIRES_X86_SSE41;
36510 GemmMicrokernelTester()
36511 .mr(1)
36512 .nr(4)
36513 .kr(8)
36514 .sr(1)
36515 .m(1)
36516 .n(4)
36517 .k(8)
36518 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36519 }
36520
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,strided_cn)36521 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cn) {
36522 TEST_REQUIRES_X86_SSE41;
36523 GemmMicrokernelTester()
36524 .mr(1)
36525 .nr(4)
36526 .kr(8)
36527 .sr(1)
36528 .m(1)
36529 .n(4)
36530 .k(8)
36531 .cn_stride(7)
36532 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36533 }
36534
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_eq_8_strided_a)36535 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_strided_a) {
36536 TEST_REQUIRES_X86_SSE41;
36537 GemmMicrokernelTester()
36538 .mr(1)
36539 .nr(4)
36540 .kr(8)
36541 .sr(1)
36542 .m(1)
36543 .n(4)
36544 .k(8)
36545 .a_stride(11)
36546 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36547 }
36548
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_eq_8_subtile)36549 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile) {
36550 TEST_REQUIRES_X86_SSE41;
36551 for (uint32_t n = 1; n <= 4; n++) {
36552 for (uint32_t m = 1; m <= 1; m++) {
36553 GemmMicrokernelTester()
36554 .mr(1)
36555 .nr(4)
36556 .kr(8)
36557 .sr(1)
36558 .m(m)
36559 .n(n)
36560 .k(8)
36561 .iterations(1)
36562 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36563 }
36564 }
36565 }
36566
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_eq_8_subtile_m)36567 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile_m) {
36568 TEST_REQUIRES_X86_SSE41;
36569 for (uint32_t m = 1; m <= 1; m++) {
36570 GemmMicrokernelTester()
36571 .mr(1)
36572 .nr(4)
36573 .kr(8)
36574 .sr(1)
36575 .m(m)
36576 .n(4)
36577 .k(8)
36578 .iterations(1)
36579 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36580 }
36581 }
36582
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_eq_8_subtile_n)36583 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile_n) {
36584 TEST_REQUIRES_X86_SSE41;
36585 for (uint32_t n = 1; n <= 4; n++) {
36586 GemmMicrokernelTester()
36587 .mr(1)
36588 .nr(4)
36589 .kr(8)
36590 .sr(1)
36591 .m(1)
36592 .n(n)
36593 .k(8)
36594 .iterations(1)
36595 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36596 }
36597 }
36598
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_lt_8)36599 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_lt_8) {
36600 TEST_REQUIRES_X86_SSE41;
36601 for (size_t k = 1; k < 8; k++) {
36602 GemmMicrokernelTester()
36603 .mr(1)
36604 .nr(4)
36605 .kr(8)
36606 .sr(1)
36607 .m(1)
36608 .n(4)
36609 .k(k)
36610 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36611 }
36612 }
36613
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_lt_8_strided_a)36614 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_lt_8_strided_a) {
36615 TEST_REQUIRES_X86_SSE41;
36616 for (size_t k = 1; k < 8; k++) {
36617 GemmMicrokernelTester()
36618 .mr(1)
36619 .nr(4)
36620 .kr(8)
36621 .sr(1)
36622 .m(1)
36623 .n(4)
36624 .k(k)
36625 .a_stride(11)
36626 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36627 }
36628 }
36629
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_lt_8_subtile)36630 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_lt_8_subtile) {
36631 TEST_REQUIRES_X86_SSE41;
36632 for (size_t k = 1; k < 8; k++) {
36633 for (uint32_t n = 1; n <= 4; n++) {
36634 for (uint32_t m = 1; m <= 1; m++) {
36635 GemmMicrokernelTester()
36636 .mr(1)
36637 .nr(4)
36638 .kr(8)
36639 .sr(1)
36640 .m(m)
36641 .n(n)
36642 .k(k)
36643 .iterations(1)
36644 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36645 }
36646 }
36647 }
36648 }
36649
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_gt_8)36650 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_gt_8) {
36651 TEST_REQUIRES_X86_SSE41;
36652 for (size_t k = 9; k < 16; k++) {
36653 GemmMicrokernelTester()
36654 .mr(1)
36655 .nr(4)
36656 .kr(8)
36657 .sr(1)
36658 .m(1)
36659 .n(4)
36660 .k(k)
36661 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36662 }
36663 }
36664
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_gt_8_strided_a)36665 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_gt_8_strided_a) {
36666 TEST_REQUIRES_X86_SSE41;
36667 for (size_t k = 9; k < 16; k++) {
36668 GemmMicrokernelTester()
36669 .mr(1)
36670 .nr(4)
36671 .kr(8)
36672 .sr(1)
36673 .m(1)
36674 .n(4)
36675 .k(k)
36676 .a_stride(19)
36677 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36678 }
36679 }
36680
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_gt_8_subtile)36681 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_gt_8_subtile) {
36682 TEST_REQUIRES_X86_SSE41;
36683 for (size_t k = 9; k < 16; k++) {
36684 for (uint32_t n = 1; n <= 4; n++) {
36685 for (uint32_t m = 1; m <= 1; m++) {
36686 GemmMicrokernelTester()
36687 .mr(1)
36688 .nr(4)
36689 .kr(8)
36690 .sr(1)
36691 .m(m)
36692 .n(n)
36693 .k(k)
36694 .iterations(1)
36695 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36696 }
36697 }
36698 }
36699 }
36700
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_div_8)36701 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_div_8) {
36702 TEST_REQUIRES_X86_SSE41;
36703 for (size_t k = 16; k <= 80; k += 8) {
36704 GemmMicrokernelTester()
36705 .mr(1)
36706 .nr(4)
36707 .kr(8)
36708 .sr(1)
36709 .m(1)
36710 .n(4)
36711 .k(k)
36712 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36713 }
36714 }
36715
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_div_8_strided_a)36716 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_div_8_strided_a) {
36717 TEST_REQUIRES_X86_SSE41;
36718 for (size_t k = 16; k <= 80; k += 8) {
36719 GemmMicrokernelTester()
36720 .mr(1)
36721 .nr(4)
36722 .kr(8)
36723 .sr(1)
36724 .m(1)
36725 .n(4)
36726 .k(k)
36727 .a_stride(83)
36728 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36729 }
36730 }
36731
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_div_8_subtile)36732 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_div_8_subtile) {
36733 TEST_REQUIRES_X86_SSE41;
36734 for (size_t k = 16; k <= 80; k += 8) {
36735 for (uint32_t n = 1; n <= 4; n++) {
36736 for (uint32_t m = 1; m <= 1; m++) {
36737 GemmMicrokernelTester()
36738 .mr(1)
36739 .nr(4)
36740 .kr(8)
36741 .sr(1)
36742 .m(m)
36743 .n(n)
36744 .k(k)
36745 .iterations(1)
36746 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36747 }
36748 }
36749 }
36750 }
36751
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_gt_4)36752 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4) {
36753 TEST_REQUIRES_X86_SSE41;
36754 for (uint32_t n = 5; n < 8; n++) {
36755 for (size_t k = 1; k <= 40; k += 9) {
36756 GemmMicrokernelTester()
36757 .mr(1)
36758 .nr(4)
36759 .kr(8)
36760 .sr(1)
36761 .m(1)
36762 .n(n)
36763 .k(k)
36764 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36765 }
36766 }
36767 }
36768
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_gt_4_strided_cn)36769 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_strided_cn) {
36770 TEST_REQUIRES_X86_SSE41;
36771 for (uint32_t n = 5; n < 8; n++) {
36772 for (size_t k = 1; k <= 40; k += 9) {
36773 GemmMicrokernelTester()
36774 .mr(1)
36775 .nr(4)
36776 .kr(8)
36777 .sr(1)
36778 .m(1)
36779 .n(n)
36780 .k(k)
36781 .cn_stride(7)
36782 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36783 }
36784 }
36785 }
36786
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_gt_4_strided_a)36787 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_strided_a) {
36788 TEST_REQUIRES_X86_SSE41;
36789 for (uint32_t n = 5; n < 8; n++) {
36790 for (size_t k = 1; k <= 40; k += 9) {
36791 GemmMicrokernelTester()
36792 .mr(1)
36793 .nr(4)
36794 .kr(8)
36795 .sr(1)
36796 .m(1)
36797 .n(n)
36798 .k(k)
36799 .a_stride(43)
36800 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36801 }
36802 }
36803 }
36804
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_gt_4_subtile)36805 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_subtile) {
36806 TEST_REQUIRES_X86_SSE41;
36807 for (uint32_t n = 5; n < 8; n++) {
36808 for (size_t k = 1; k <= 40; k += 9) {
36809 for (uint32_t m = 1; m <= 1; m++) {
36810 GemmMicrokernelTester()
36811 .mr(1)
36812 .nr(4)
36813 .kr(8)
36814 .sr(1)
36815 .m(m)
36816 .n(n)
36817 .k(k)
36818 .iterations(1)
36819 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36820 }
36821 }
36822 }
36823 }
36824
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_div_4)36825 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4) {
36826 TEST_REQUIRES_X86_SSE41;
36827 for (uint32_t n = 8; n <= 12; n += 4) {
36828 for (size_t k = 1; k <= 40; k += 9) {
36829 GemmMicrokernelTester()
36830 .mr(1)
36831 .nr(4)
36832 .kr(8)
36833 .sr(1)
36834 .m(1)
36835 .n(n)
36836 .k(k)
36837 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36838 }
36839 }
36840 }
36841
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_div_4_strided_cn)36842 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_strided_cn) {
36843 TEST_REQUIRES_X86_SSE41;
36844 for (uint32_t n = 8; n <= 12; n += 4) {
36845 for (size_t k = 1; k <= 40; k += 9) {
36846 GemmMicrokernelTester()
36847 .mr(1)
36848 .nr(4)
36849 .kr(8)
36850 .sr(1)
36851 .m(1)
36852 .n(n)
36853 .k(k)
36854 .cn_stride(7)
36855 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36856 }
36857 }
36858 }
36859
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_div_4_strided_a)36860 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_strided_a) {
36861 TEST_REQUIRES_X86_SSE41;
36862 for (uint32_t n = 8; n <= 12; n += 4) {
36863 for (size_t k = 1; k <= 40; k += 9) {
36864 GemmMicrokernelTester()
36865 .mr(1)
36866 .nr(4)
36867 .kr(8)
36868 .sr(1)
36869 .m(1)
36870 .n(n)
36871 .k(k)
36872 .a_stride(43)
36873 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36874 }
36875 }
36876 }
36877
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_div_4_subtile)36878 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_subtile) {
36879 TEST_REQUIRES_X86_SSE41;
36880 for (uint32_t n = 8; n <= 12; n += 4) {
36881 for (size_t k = 1; k <= 40; k += 9) {
36882 for (uint32_t m = 1; m <= 1; m++) {
36883 GemmMicrokernelTester()
36884 .mr(1)
36885 .nr(4)
36886 .kr(8)
36887 .sr(1)
36888 .m(m)
36889 .n(n)
36890 .k(k)
36891 .iterations(1)
36892 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36893 }
36894 }
36895 }
36896 }
36897
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,strided_cm_subtile)36898 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cm_subtile) {
36899 TEST_REQUIRES_X86_SSE41;
36900 for (size_t k = 1; k <= 40; k += 9) {
36901 for (uint32_t n = 1; n <= 4; n++) {
36902 for (uint32_t m = 1; m <= 1; m++) {
36903 GemmMicrokernelTester()
36904 .mr(1)
36905 .nr(4)
36906 .kr(8)
36907 .sr(1)
36908 .m(m)
36909 .n(n)
36910 .k(k)
36911 .cm_stride(7)
36912 .iterations(1)
36913 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36914 }
36915 }
36916 }
36917 }
36918
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,qmin)36919 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, qmin) {
36920 TEST_REQUIRES_X86_SSE41;
36921 GemmMicrokernelTester()
36922 .mr(1)
36923 .nr(4)
36924 .kr(8)
36925 .sr(1)
36926 .m(1)
36927 .n(4)
36928 .k(8)
36929 .qmin(128)
36930 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36931 }
36932
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,qmax)36933 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, qmax) {
36934 TEST_REQUIRES_X86_SSE41;
36935 GemmMicrokernelTester()
36936 .mr(1)
36937 .nr(4)
36938 .kr(8)
36939 .sr(1)
36940 .m(1)
36941 .n(4)
36942 .k(8)
36943 .qmax(128)
36944 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36945 }
36946
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64,strided_cm)36947 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cm) {
36948 TEST_REQUIRES_X86_SSE41;
36949 GemmMicrokernelTester()
36950 .mr(1)
36951 .nr(4)
36952 .kr(8)
36953 .sr(1)
36954 .m(1)
36955 .n(4)
36956 .k(8)
36957 .cm_stride(7)
36958 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36959 }
36960 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
36961
36962
36963 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8)36964 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8) {
36965 TEST_REQUIRES_X86_SSE41;
36966 GemmMicrokernelTester()
36967 .mr(3)
36968 .nr(4)
36969 .kr(8)
36970 .sr(1)
36971 .m(3)
36972 .n(4)
36973 .k(8)
36974 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36975 }
36976
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cn)36977 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cn) {
36978 TEST_REQUIRES_X86_SSE41;
36979 GemmMicrokernelTester()
36980 .mr(3)
36981 .nr(4)
36982 .kr(8)
36983 .sr(1)
36984 .m(3)
36985 .n(4)
36986 .k(8)
36987 .cn_stride(7)
36988 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36989 }
36990
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_strided_a)36991 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_strided_a) {
36992 TEST_REQUIRES_X86_SSE41;
36993 GemmMicrokernelTester()
36994 .mr(3)
36995 .nr(4)
36996 .kr(8)
36997 .sr(1)
36998 .m(3)
36999 .n(4)
37000 .k(8)
37001 .a_stride(11)
37002 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37003 }
37004
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile)37005 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile) {
37006 TEST_REQUIRES_X86_SSE41;
37007 for (uint32_t n = 1; n <= 4; n++) {
37008 for (uint32_t m = 1; m <= 3; m++) {
37009 GemmMicrokernelTester()
37010 .mr(3)
37011 .nr(4)
37012 .kr(8)
37013 .sr(1)
37014 .m(m)
37015 .n(n)
37016 .k(8)
37017 .iterations(1)
37018 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37019 }
37020 }
37021 }
37022
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile_m)37023 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_m) {
37024 TEST_REQUIRES_X86_SSE41;
37025 for (uint32_t m = 1; m <= 3; m++) {
37026 GemmMicrokernelTester()
37027 .mr(3)
37028 .nr(4)
37029 .kr(8)
37030 .sr(1)
37031 .m(m)
37032 .n(4)
37033 .k(8)
37034 .iterations(1)
37035 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37036 }
37037 }
37038
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile_n)37039 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_n) {
37040 TEST_REQUIRES_X86_SSE41;
37041 for (uint32_t n = 1; n <= 4; n++) {
37042 GemmMicrokernelTester()
37043 .mr(3)
37044 .nr(4)
37045 .kr(8)
37046 .sr(1)
37047 .m(3)
37048 .n(n)
37049 .k(8)
37050 .iterations(1)
37051 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37052 }
37053 }
37054
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_lt_8)37055 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8) {
37056 TEST_REQUIRES_X86_SSE41;
37057 for (size_t k = 1; k < 8; k++) {
37058 GemmMicrokernelTester()
37059 .mr(3)
37060 .nr(4)
37061 .kr(8)
37062 .sr(1)
37063 .m(3)
37064 .n(4)
37065 .k(k)
37066 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37067 }
37068 }
37069
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_lt_8_strided_a)37070 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8_strided_a) {
37071 TEST_REQUIRES_X86_SSE41;
37072 for (size_t k = 1; k < 8; k++) {
37073 GemmMicrokernelTester()
37074 .mr(3)
37075 .nr(4)
37076 .kr(8)
37077 .sr(1)
37078 .m(3)
37079 .n(4)
37080 .k(k)
37081 .a_stride(11)
37082 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37083 }
37084 }
37085
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_lt_8_subtile)37086 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8_subtile) {
37087 TEST_REQUIRES_X86_SSE41;
37088 for (size_t k = 1; k < 8; k++) {
37089 for (uint32_t n = 1; n <= 4; n++) {
37090 for (uint32_t m = 1; m <= 3; m++) {
37091 GemmMicrokernelTester()
37092 .mr(3)
37093 .nr(4)
37094 .kr(8)
37095 .sr(1)
37096 .m(m)
37097 .n(n)
37098 .k(k)
37099 .iterations(1)
37100 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37101 }
37102 }
37103 }
37104 }
37105
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_gt_8)37106 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8) {
37107 TEST_REQUIRES_X86_SSE41;
37108 for (size_t k = 9; k < 16; k++) {
37109 GemmMicrokernelTester()
37110 .mr(3)
37111 .nr(4)
37112 .kr(8)
37113 .sr(1)
37114 .m(3)
37115 .n(4)
37116 .k(k)
37117 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37118 }
37119 }
37120
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_gt_8_strided_a)37121 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8_strided_a) {
37122 TEST_REQUIRES_X86_SSE41;
37123 for (size_t k = 9; k < 16; k++) {
37124 GemmMicrokernelTester()
37125 .mr(3)
37126 .nr(4)
37127 .kr(8)
37128 .sr(1)
37129 .m(3)
37130 .n(4)
37131 .k(k)
37132 .a_stride(19)
37133 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37134 }
37135 }
37136
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_gt_8_subtile)37137 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8_subtile) {
37138 TEST_REQUIRES_X86_SSE41;
37139 for (size_t k = 9; k < 16; k++) {
37140 for (uint32_t n = 1; n <= 4; n++) {
37141 for (uint32_t m = 1; m <= 3; m++) {
37142 GemmMicrokernelTester()
37143 .mr(3)
37144 .nr(4)
37145 .kr(8)
37146 .sr(1)
37147 .m(m)
37148 .n(n)
37149 .k(k)
37150 .iterations(1)
37151 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37152 }
37153 }
37154 }
37155 }
37156
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_div_8)37157 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8) {
37158 TEST_REQUIRES_X86_SSE41;
37159 for (size_t k = 16; k <= 80; k += 8) {
37160 GemmMicrokernelTester()
37161 .mr(3)
37162 .nr(4)
37163 .kr(8)
37164 .sr(1)
37165 .m(3)
37166 .n(4)
37167 .k(k)
37168 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37169 }
37170 }
37171
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_div_8_strided_a)37172 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8_strided_a) {
37173 TEST_REQUIRES_X86_SSE41;
37174 for (size_t k = 16; k <= 80; k += 8) {
37175 GemmMicrokernelTester()
37176 .mr(3)
37177 .nr(4)
37178 .kr(8)
37179 .sr(1)
37180 .m(3)
37181 .n(4)
37182 .k(k)
37183 .a_stride(83)
37184 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37185 }
37186 }
37187
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_div_8_subtile)37188 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8_subtile) {
37189 TEST_REQUIRES_X86_SSE41;
37190 for (size_t k = 16; k <= 80; k += 8) {
37191 for (uint32_t n = 1; n <= 4; n++) {
37192 for (uint32_t m = 1; m <= 3; m++) {
37193 GemmMicrokernelTester()
37194 .mr(3)
37195 .nr(4)
37196 .kr(8)
37197 .sr(1)
37198 .m(m)
37199 .n(n)
37200 .k(k)
37201 .iterations(1)
37202 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37203 }
37204 }
37205 }
37206 }
37207
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4)37208 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4) {
37209 TEST_REQUIRES_X86_SSE41;
37210 for (uint32_t n = 5; n < 8; n++) {
37211 for (size_t k = 1; k <= 40; k += 9) {
37212 GemmMicrokernelTester()
37213 .mr(3)
37214 .nr(4)
37215 .kr(8)
37216 .sr(1)
37217 .m(3)
37218 .n(n)
37219 .k(k)
37220 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37221 }
37222 }
37223 }
37224
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_strided_cn)37225 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_strided_cn) {
37226 TEST_REQUIRES_X86_SSE41;
37227 for (uint32_t n = 5; n < 8; n++) {
37228 for (size_t k = 1; k <= 40; k += 9) {
37229 GemmMicrokernelTester()
37230 .mr(3)
37231 .nr(4)
37232 .kr(8)
37233 .sr(1)
37234 .m(3)
37235 .n(n)
37236 .k(k)
37237 .cn_stride(7)
37238 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37239 }
37240 }
37241 }
37242
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_strided_a)37243 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_strided_a) {
37244 TEST_REQUIRES_X86_SSE41;
37245 for (uint32_t n = 5; n < 8; n++) {
37246 for (size_t k = 1; k <= 40; k += 9) {
37247 GemmMicrokernelTester()
37248 .mr(3)
37249 .nr(4)
37250 .kr(8)
37251 .sr(1)
37252 .m(3)
37253 .n(n)
37254 .k(k)
37255 .a_stride(43)
37256 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37257 }
37258 }
37259 }
37260
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_subtile)37261 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_subtile) {
37262 TEST_REQUIRES_X86_SSE41;
37263 for (uint32_t n = 5; n < 8; n++) {
37264 for (size_t k = 1; k <= 40; k += 9) {
37265 for (uint32_t m = 1; m <= 3; m++) {
37266 GemmMicrokernelTester()
37267 .mr(3)
37268 .nr(4)
37269 .kr(8)
37270 .sr(1)
37271 .m(m)
37272 .n(n)
37273 .k(k)
37274 .iterations(1)
37275 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37276 }
37277 }
37278 }
37279 }
37280
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4)37281 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4) {
37282 TEST_REQUIRES_X86_SSE41;
37283 for (uint32_t n = 8; n <= 12; n += 4) {
37284 for (size_t k = 1; k <= 40; k += 9) {
37285 GemmMicrokernelTester()
37286 .mr(3)
37287 .nr(4)
37288 .kr(8)
37289 .sr(1)
37290 .m(3)
37291 .n(n)
37292 .k(k)
37293 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37294 }
37295 }
37296 }
37297
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_strided_cn)37298 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_strided_cn) {
37299 TEST_REQUIRES_X86_SSE41;
37300 for (uint32_t n = 8; n <= 12; n += 4) {
37301 for (size_t k = 1; k <= 40; k += 9) {
37302 GemmMicrokernelTester()
37303 .mr(3)
37304 .nr(4)
37305 .kr(8)
37306 .sr(1)
37307 .m(3)
37308 .n(n)
37309 .k(k)
37310 .cn_stride(7)
37311 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37312 }
37313 }
37314 }
37315
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_strided_a)37316 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_strided_a) {
37317 TEST_REQUIRES_X86_SSE41;
37318 for (uint32_t n = 8; n <= 12; n += 4) {
37319 for (size_t k = 1; k <= 40; k += 9) {
37320 GemmMicrokernelTester()
37321 .mr(3)
37322 .nr(4)
37323 .kr(8)
37324 .sr(1)
37325 .m(3)
37326 .n(n)
37327 .k(k)
37328 .a_stride(43)
37329 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37330 }
37331 }
37332 }
37333
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_subtile)37334 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_subtile) {
37335 TEST_REQUIRES_X86_SSE41;
37336 for (uint32_t n = 8; n <= 12; n += 4) {
37337 for (size_t k = 1; k <= 40; k += 9) {
37338 for (uint32_t m = 1; m <= 3; m++) {
37339 GemmMicrokernelTester()
37340 .mr(3)
37341 .nr(4)
37342 .kr(8)
37343 .sr(1)
37344 .m(m)
37345 .n(n)
37346 .k(k)
37347 .iterations(1)
37348 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37349 }
37350 }
37351 }
37352 }
37353
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cm_subtile)37354 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm_subtile) {
37355 TEST_REQUIRES_X86_SSE41;
37356 for (size_t k = 1; k <= 40; k += 9) {
37357 for (uint32_t n = 1; n <= 4; n++) {
37358 for (uint32_t m = 1; m <= 3; m++) {
37359 GemmMicrokernelTester()
37360 .mr(3)
37361 .nr(4)
37362 .kr(8)
37363 .sr(1)
37364 .m(m)
37365 .n(n)
37366 .k(k)
37367 .cm_stride(7)
37368 .iterations(1)
37369 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37370 }
37371 }
37372 }
37373 }
37374
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,qmin)37375 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmin) {
37376 TEST_REQUIRES_X86_SSE41;
37377 GemmMicrokernelTester()
37378 .mr(3)
37379 .nr(4)
37380 .kr(8)
37381 .sr(1)
37382 .m(3)
37383 .n(4)
37384 .k(8)
37385 .qmin(128)
37386 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37387 }
37388
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,qmax)37389 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmax) {
37390 TEST_REQUIRES_X86_SSE41;
37391 GemmMicrokernelTester()
37392 .mr(3)
37393 .nr(4)
37394 .kr(8)
37395 .sr(1)
37396 .m(3)
37397 .n(4)
37398 .k(8)
37399 .qmax(128)
37400 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37401 }
37402
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cm)37403 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm) {
37404 TEST_REQUIRES_X86_SSE41;
37405 GemmMicrokernelTester()
37406 .mr(3)
37407 .nr(4)
37408 .kr(8)
37409 .sr(1)
37410 .m(3)
37411 .n(4)
37412 .k(8)
37413 .cm_stride(7)
37414 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37415 }
37416 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
37417
37418
37419 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_eq_8)37420 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8) {
37421 TEST_REQUIRES_X86_XOP;
37422 GemmMicrokernelTester()
37423 .mr(1)
37424 .nr(4)
37425 .kr(8)
37426 .sr(1)
37427 .m(1)
37428 .n(4)
37429 .k(8)
37430 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37431 }
37432
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,strided_cn)37433 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, strided_cn) {
37434 TEST_REQUIRES_X86_XOP;
37435 GemmMicrokernelTester()
37436 .mr(1)
37437 .nr(4)
37438 .kr(8)
37439 .sr(1)
37440 .m(1)
37441 .n(4)
37442 .k(8)
37443 .cn_stride(7)
37444 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37445 }
37446
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_eq_8_strided_a)37447 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8_strided_a) {
37448 TEST_REQUIRES_X86_XOP;
37449 GemmMicrokernelTester()
37450 .mr(1)
37451 .nr(4)
37452 .kr(8)
37453 .sr(1)
37454 .m(1)
37455 .n(4)
37456 .k(8)
37457 .a_stride(11)
37458 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37459 }
37460
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_eq_8_subtile)37461 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8_subtile) {
37462 TEST_REQUIRES_X86_XOP;
37463 for (uint32_t n = 1; n <= 4; n++) {
37464 for (uint32_t m = 1; m <= 1; m++) {
37465 GemmMicrokernelTester()
37466 .mr(1)
37467 .nr(4)
37468 .kr(8)
37469 .sr(1)
37470 .m(m)
37471 .n(n)
37472 .k(8)
37473 .iterations(1)
37474 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37475 }
37476 }
37477 }
37478
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_eq_8_subtile_m)37479 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8_subtile_m) {
37480 TEST_REQUIRES_X86_XOP;
37481 for (uint32_t m = 1; m <= 1; m++) {
37482 GemmMicrokernelTester()
37483 .mr(1)
37484 .nr(4)
37485 .kr(8)
37486 .sr(1)
37487 .m(m)
37488 .n(4)
37489 .k(8)
37490 .iterations(1)
37491 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37492 }
37493 }
37494
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_eq_8_subtile_n)37495 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8_subtile_n) {
37496 TEST_REQUIRES_X86_XOP;
37497 for (uint32_t n = 1; n <= 4; n++) {
37498 GemmMicrokernelTester()
37499 .mr(1)
37500 .nr(4)
37501 .kr(8)
37502 .sr(1)
37503 .m(1)
37504 .n(n)
37505 .k(8)
37506 .iterations(1)
37507 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37508 }
37509 }
37510
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_lt_8)37511 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_lt_8) {
37512 TEST_REQUIRES_X86_XOP;
37513 for (size_t k = 1; k < 8; k++) {
37514 GemmMicrokernelTester()
37515 .mr(1)
37516 .nr(4)
37517 .kr(8)
37518 .sr(1)
37519 .m(1)
37520 .n(4)
37521 .k(k)
37522 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37523 }
37524 }
37525
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_lt_8_strided_a)37526 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_lt_8_strided_a) {
37527 TEST_REQUIRES_X86_XOP;
37528 for (size_t k = 1; k < 8; k++) {
37529 GemmMicrokernelTester()
37530 .mr(1)
37531 .nr(4)
37532 .kr(8)
37533 .sr(1)
37534 .m(1)
37535 .n(4)
37536 .k(k)
37537 .a_stride(11)
37538 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37539 }
37540 }
37541
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_lt_8_subtile)37542 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_lt_8_subtile) {
37543 TEST_REQUIRES_X86_XOP;
37544 for (size_t k = 1; k < 8; k++) {
37545 for (uint32_t n = 1; n <= 4; n++) {
37546 for (uint32_t m = 1; m <= 1; m++) {
37547 GemmMicrokernelTester()
37548 .mr(1)
37549 .nr(4)
37550 .kr(8)
37551 .sr(1)
37552 .m(m)
37553 .n(n)
37554 .k(k)
37555 .iterations(1)
37556 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37557 }
37558 }
37559 }
37560 }
37561
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_gt_8)37562 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_gt_8) {
37563 TEST_REQUIRES_X86_XOP;
37564 for (size_t k = 9; k < 16; k++) {
37565 GemmMicrokernelTester()
37566 .mr(1)
37567 .nr(4)
37568 .kr(8)
37569 .sr(1)
37570 .m(1)
37571 .n(4)
37572 .k(k)
37573 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37574 }
37575 }
37576
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_gt_8_strided_a)37577 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_gt_8_strided_a) {
37578 TEST_REQUIRES_X86_XOP;
37579 for (size_t k = 9; k < 16; k++) {
37580 GemmMicrokernelTester()
37581 .mr(1)
37582 .nr(4)
37583 .kr(8)
37584 .sr(1)
37585 .m(1)
37586 .n(4)
37587 .k(k)
37588 .a_stride(19)
37589 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37590 }
37591 }
37592
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_gt_8_subtile)37593 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_gt_8_subtile) {
37594 TEST_REQUIRES_X86_XOP;
37595 for (size_t k = 9; k < 16; k++) {
37596 for (uint32_t n = 1; n <= 4; n++) {
37597 for (uint32_t m = 1; m <= 1; m++) {
37598 GemmMicrokernelTester()
37599 .mr(1)
37600 .nr(4)
37601 .kr(8)
37602 .sr(1)
37603 .m(m)
37604 .n(n)
37605 .k(k)
37606 .iterations(1)
37607 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37608 }
37609 }
37610 }
37611 }
37612
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_div_8)37613 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_div_8) {
37614 TEST_REQUIRES_X86_XOP;
37615 for (size_t k = 16; k <= 80; k += 8) {
37616 GemmMicrokernelTester()
37617 .mr(1)
37618 .nr(4)
37619 .kr(8)
37620 .sr(1)
37621 .m(1)
37622 .n(4)
37623 .k(k)
37624 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37625 }
37626 }
37627
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_div_8_strided_a)37628 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_div_8_strided_a) {
37629 TEST_REQUIRES_X86_XOP;
37630 for (size_t k = 16; k <= 80; k += 8) {
37631 GemmMicrokernelTester()
37632 .mr(1)
37633 .nr(4)
37634 .kr(8)
37635 .sr(1)
37636 .m(1)
37637 .n(4)
37638 .k(k)
37639 .a_stride(83)
37640 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37641 }
37642 }
37643
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_div_8_subtile)37644 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_div_8_subtile) {
37645 TEST_REQUIRES_X86_XOP;
37646 for (size_t k = 16; k <= 80; k += 8) {
37647 for (uint32_t n = 1; n <= 4; n++) {
37648 for (uint32_t m = 1; m <= 1; m++) {
37649 GemmMicrokernelTester()
37650 .mr(1)
37651 .nr(4)
37652 .kr(8)
37653 .sr(1)
37654 .m(m)
37655 .n(n)
37656 .k(k)
37657 .iterations(1)
37658 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37659 }
37660 }
37661 }
37662 }
37663
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_gt_4)37664 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4) {
37665 TEST_REQUIRES_X86_XOP;
37666 for (uint32_t n = 5; n < 8; n++) {
37667 for (size_t k = 1; k <= 40; k += 9) {
37668 GemmMicrokernelTester()
37669 .mr(1)
37670 .nr(4)
37671 .kr(8)
37672 .sr(1)
37673 .m(1)
37674 .n(n)
37675 .k(k)
37676 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37677 }
37678 }
37679 }
37680
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_gt_4_strided_cn)37681 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4_strided_cn) {
37682 TEST_REQUIRES_X86_XOP;
37683 for (uint32_t n = 5; n < 8; n++) {
37684 for (size_t k = 1; k <= 40; k += 9) {
37685 GemmMicrokernelTester()
37686 .mr(1)
37687 .nr(4)
37688 .kr(8)
37689 .sr(1)
37690 .m(1)
37691 .n(n)
37692 .k(k)
37693 .cn_stride(7)
37694 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37695 }
37696 }
37697 }
37698
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_gt_4_strided_a)37699 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4_strided_a) {
37700 TEST_REQUIRES_X86_XOP;
37701 for (uint32_t n = 5; n < 8; n++) {
37702 for (size_t k = 1; k <= 40; k += 9) {
37703 GemmMicrokernelTester()
37704 .mr(1)
37705 .nr(4)
37706 .kr(8)
37707 .sr(1)
37708 .m(1)
37709 .n(n)
37710 .k(k)
37711 .a_stride(43)
37712 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37713 }
37714 }
37715 }
37716
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_gt_4_subtile)37717 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4_subtile) {
37718 TEST_REQUIRES_X86_XOP;
37719 for (uint32_t n = 5; n < 8; n++) {
37720 for (size_t k = 1; k <= 40; k += 9) {
37721 for (uint32_t m = 1; m <= 1; m++) {
37722 GemmMicrokernelTester()
37723 .mr(1)
37724 .nr(4)
37725 .kr(8)
37726 .sr(1)
37727 .m(m)
37728 .n(n)
37729 .k(k)
37730 .iterations(1)
37731 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37732 }
37733 }
37734 }
37735 }
37736
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_div_4)37737 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4) {
37738 TEST_REQUIRES_X86_XOP;
37739 for (uint32_t n = 8; n <= 12; n += 4) {
37740 for (size_t k = 1; k <= 40; k += 9) {
37741 GemmMicrokernelTester()
37742 .mr(1)
37743 .nr(4)
37744 .kr(8)
37745 .sr(1)
37746 .m(1)
37747 .n(n)
37748 .k(k)
37749 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37750 }
37751 }
37752 }
37753
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_div_4_strided_cn)37754 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4_strided_cn) {
37755 TEST_REQUIRES_X86_XOP;
37756 for (uint32_t n = 8; n <= 12; n += 4) {
37757 for (size_t k = 1; k <= 40; k += 9) {
37758 GemmMicrokernelTester()
37759 .mr(1)
37760 .nr(4)
37761 .kr(8)
37762 .sr(1)
37763 .m(1)
37764 .n(n)
37765 .k(k)
37766 .cn_stride(7)
37767 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37768 }
37769 }
37770 }
37771
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_div_4_strided_a)37772 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4_strided_a) {
37773 TEST_REQUIRES_X86_XOP;
37774 for (uint32_t n = 8; n <= 12; n += 4) {
37775 for (size_t k = 1; k <= 40; k += 9) {
37776 GemmMicrokernelTester()
37777 .mr(1)
37778 .nr(4)
37779 .kr(8)
37780 .sr(1)
37781 .m(1)
37782 .n(n)
37783 .k(k)
37784 .a_stride(43)
37785 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37786 }
37787 }
37788 }
37789
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_div_4_subtile)37790 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4_subtile) {
37791 TEST_REQUIRES_X86_XOP;
37792 for (uint32_t n = 8; n <= 12; n += 4) {
37793 for (size_t k = 1; k <= 40; k += 9) {
37794 for (uint32_t m = 1; m <= 1; m++) {
37795 GemmMicrokernelTester()
37796 .mr(1)
37797 .nr(4)
37798 .kr(8)
37799 .sr(1)
37800 .m(m)
37801 .n(n)
37802 .k(k)
37803 .iterations(1)
37804 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37805 }
37806 }
37807 }
37808 }
37809
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,strided_cm_subtile)37810 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, strided_cm_subtile) {
37811 TEST_REQUIRES_X86_XOP;
37812 for (size_t k = 1; k <= 40; k += 9) {
37813 for (uint32_t n = 1; n <= 4; n++) {
37814 for (uint32_t m = 1; m <= 1; m++) {
37815 GemmMicrokernelTester()
37816 .mr(1)
37817 .nr(4)
37818 .kr(8)
37819 .sr(1)
37820 .m(m)
37821 .n(n)
37822 .k(k)
37823 .cm_stride(7)
37824 .iterations(1)
37825 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37826 }
37827 }
37828 }
37829 }
37830
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,qmin)37831 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, qmin) {
37832 TEST_REQUIRES_X86_XOP;
37833 GemmMicrokernelTester()
37834 .mr(1)
37835 .nr(4)
37836 .kr(8)
37837 .sr(1)
37838 .m(1)
37839 .n(4)
37840 .k(8)
37841 .qmin(128)
37842 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37843 }
37844
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,qmax)37845 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, qmax) {
37846 TEST_REQUIRES_X86_XOP;
37847 GemmMicrokernelTester()
37848 .mr(1)
37849 .nr(4)
37850 .kr(8)
37851 .sr(1)
37852 .m(1)
37853 .n(4)
37854 .k(8)
37855 .qmax(128)
37856 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37857 }
37858
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64,strided_cm)37859 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__XOP_LD64, strided_cm) {
37860 TEST_REQUIRES_X86_XOP;
37861 GemmMicrokernelTester()
37862 .mr(1)
37863 .nr(4)
37864 .kr(8)
37865 .sr(1)
37866 .m(1)
37867 .n(4)
37868 .k(8)
37869 .cm_stride(7)
37870 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37871 }
37872 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
37873
37874
37875 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8)37876 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8) {
37877 TEST_REQUIRES_X86_XOP;
37878 GemmMicrokernelTester()
37879 .mr(2)
37880 .nr(4)
37881 .kr(8)
37882 .sr(1)
37883 .m(2)
37884 .n(4)
37885 .k(8)
37886 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37887 }
37888
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,strided_cn)37889 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cn) {
37890 TEST_REQUIRES_X86_XOP;
37891 GemmMicrokernelTester()
37892 .mr(2)
37893 .nr(4)
37894 .kr(8)
37895 .sr(1)
37896 .m(2)
37897 .n(4)
37898 .k(8)
37899 .cn_stride(7)
37900 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37901 }
37902
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_strided_a)37903 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_strided_a) {
37904 TEST_REQUIRES_X86_XOP;
37905 GemmMicrokernelTester()
37906 .mr(2)
37907 .nr(4)
37908 .kr(8)
37909 .sr(1)
37910 .m(2)
37911 .n(4)
37912 .k(8)
37913 .a_stride(11)
37914 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37915 }
37916
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_subtile)37917 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile) {
37918 TEST_REQUIRES_X86_XOP;
37919 for (uint32_t n = 1; n <= 4; n++) {
37920 for (uint32_t m = 1; m <= 2; m++) {
37921 GemmMicrokernelTester()
37922 .mr(2)
37923 .nr(4)
37924 .kr(8)
37925 .sr(1)
37926 .m(m)
37927 .n(n)
37928 .k(8)
37929 .iterations(1)
37930 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37931 }
37932 }
37933 }
37934
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_subtile_m)37935 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_m) {
37936 TEST_REQUIRES_X86_XOP;
37937 for (uint32_t m = 1; m <= 2; m++) {
37938 GemmMicrokernelTester()
37939 .mr(2)
37940 .nr(4)
37941 .kr(8)
37942 .sr(1)
37943 .m(m)
37944 .n(4)
37945 .k(8)
37946 .iterations(1)
37947 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37948 }
37949 }
37950
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_subtile_n)37951 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_n) {
37952 TEST_REQUIRES_X86_XOP;
37953 for (uint32_t n = 1; n <= 4; n++) {
37954 GemmMicrokernelTester()
37955 .mr(2)
37956 .nr(4)
37957 .kr(8)
37958 .sr(1)
37959 .m(2)
37960 .n(n)
37961 .k(8)
37962 .iterations(1)
37963 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37964 }
37965 }
37966
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_lt_8)37967 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8) {
37968 TEST_REQUIRES_X86_XOP;
37969 for (size_t k = 1; k < 8; k++) {
37970 GemmMicrokernelTester()
37971 .mr(2)
37972 .nr(4)
37973 .kr(8)
37974 .sr(1)
37975 .m(2)
37976 .n(4)
37977 .k(k)
37978 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37979 }
37980 }
37981
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_lt_8_strided_a)37982 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8_strided_a) {
37983 TEST_REQUIRES_X86_XOP;
37984 for (size_t k = 1; k < 8; k++) {
37985 GemmMicrokernelTester()
37986 .mr(2)
37987 .nr(4)
37988 .kr(8)
37989 .sr(1)
37990 .m(2)
37991 .n(4)
37992 .k(k)
37993 .a_stride(11)
37994 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
37995 }
37996 }
37997
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_lt_8_subtile)37998 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8_subtile) {
37999 TEST_REQUIRES_X86_XOP;
38000 for (size_t k = 1; k < 8; k++) {
38001 for (uint32_t n = 1; n <= 4; n++) {
38002 for (uint32_t m = 1; m <= 2; m++) {
38003 GemmMicrokernelTester()
38004 .mr(2)
38005 .nr(4)
38006 .kr(8)
38007 .sr(1)
38008 .m(m)
38009 .n(n)
38010 .k(k)
38011 .iterations(1)
38012 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38013 }
38014 }
38015 }
38016 }
38017
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_gt_8)38018 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8) {
38019 TEST_REQUIRES_X86_XOP;
38020 for (size_t k = 9; k < 16; k++) {
38021 GemmMicrokernelTester()
38022 .mr(2)
38023 .nr(4)
38024 .kr(8)
38025 .sr(1)
38026 .m(2)
38027 .n(4)
38028 .k(k)
38029 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38030 }
38031 }
38032
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_gt_8_strided_a)38033 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8_strided_a) {
38034 TEST_REQUIRES_X86_XOP;
38035 for (size_t k = 9; k < 16; k++) {
38036 GemmMicrokernelTester()
38037 .mr(2)
38038 .nr(4)
38039 .kr(8)
38040 .sr(1)
38041 .m(2)
38042 .n(4)
38043 .k(k)
38044 .a_stride(19)
38045 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38046 }
38047 }
38048
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_gt_8_subtile)38049 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8_subtile) {
38050 TEST_REQUIRES_X86_XOP;
38051 for (size_t k = 9; k < 16; k++) {
38052 for (uint32_t n = 1; n <= 4; n++) {
38053 for (uint32_t m = 1; m <= 2; m++) {
38054 GemmMicrokernelTester()
38055 .mr(2)
38056 .nr(4)
38057 .kr(8)
38058 .sr(1)
38059 .m(m)
38060 .n(n)
38061 .k(k)
38062 .iterations(1)
38063 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38064 }
38065 }
38066 }
38067 }
38068
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_div_8)38069 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8) {
38070 TEST_REQUIRES_X86_XOP;
38071 for (size_t k = 16; k <= 80; k += 8) {
38072 GemmMicrokernelTester()
38073 .mr(2)
38074 .nr(4)
38075 .kr(8)
38076 .sr(1)
38077 .m(2)
38078 .n(4)
38079 .k(k)
38080 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38081 }
38082 }
38083
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_div_8_strided_a)38084 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8_strided_a) {
38085 TEST_REQUIRES_X86_XOP;
38086 for (size_t k = 16; k <= 80; k += 8) {
38087 GemmMicrokernelTester()
38088 .mr(2)
38089 .nr(4)
38090 .kr(8)
38091 .sr(1)
38092 .m(2)
38093 .n(4)
38094 .k(k)
38095 .a_stride(83)
38096 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38097 }
38098 }
38099
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_div_8_subtile)38100 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8_subtile) {
38101 TEST_REQUIRES_X86_XOP;
38102 for (size_t k = 16; k <= 80; k += 8) {
38103 for (uint32_t n = 1; n <= 4; n++) {
38104 for (uint32_t m = 1; m <= 2; m++) {
38105 GemmMicrokernelTester()
38106 .mr(2)
38107 .nr(4)
38108 .kr(8)
38109 .sr(1)
38110 .m(m)
38111 .n(n)
38112 .k(k)
38113 .iterations(1)
38114 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38115 }
38116 }
38117 }
38118 }
38119
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4)38120 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4) {
38121 TEST_REQUIRES_X86_XOP;
38122 for (uint32_t n = 5; n < 8; n++) {
38123 for (size_t k = 1; k <= 40; k += 9) {
38124 GemmMicrokernelTester()
38125 .mr(2)
38126 .nr(4)
38127 .kr(8)
38128 .sr(1)
38129 .m(2)
38130 .n(n)
38131 .k(k)
38132 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38133 }
38134 }
38135 }
38136
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4_strided_cn)38137 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_strided_cn) {
38138 TEST_REQUIRES_X86_XOP;
38139 for (uint32_t n = 5; n < 8; n++) {
38140 for (size_t k = 1; k <= 40; k += 9) {
38141 GemmMicrokernelTester()
38142 .mr(2)
38143 .nr(4)
38144 .kr(8)
38145 .sr(1)
38146 .m(2)
38147 .n(n)
38148 .k(k)
38149 .cn_stride(7)
38150 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38151 }
38152 }
38153 }
38154
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4_strided_a)38155 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_strided_a) {
38156 TEST_REQUIRES_X86_XOP;
38157 for (uint32_t n = 5; n < 8; n++) {
38158 for (size_t k = 1; k <= 40; k += 9) {
38159 GemmMicrokernelTester()
38160 .mr(2)
38161 .nr(4)
38162 .kr(8)
38163 .sr(1)
38164 .m(2)
38165 .n(n)
38166 .k(k)
38167 .a_stride(43)
38168 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38169 }
38170 }
38171 }
38172
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4_subtile)38173 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_subtile) {
38174 TEST_REQUIRES_X86_XOP;
38175 for (uint32_t n = 5; n < 8; n++) {
38176 for (size_t k = 1; k <= 40; k += 9) {
38177 for (uint32_t m = 1; m <= 2; m++) {
38178 GemmMicrokernelTester()
38179 .mr(2)
38180 .nr(4)
38181 .kr(8)
38182 .sr(1)
38183 .m(m)
38184 .n(n)
38185 .k(k)
38186 .iterations(1)
38187 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38188 }
38189 }
38190 }
38191 }
38192
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4)38193 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4) {
38194 TEST_REQUIRES_X86_XOP;
38195 for (uint32_t n = 8; n <= 12; n += 4) {
38196 for (size_t k = 1; k <= 40; k += 9) {
38197 GemmMicrokernelTester()
38198 .mr(2)
38199 .nr(4)
38200 .kr(8)
38201 .sr(1)
38202 .m(2)
38203 .n(n)
38204 .k(k)
38205 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38206 }
38207 }
38208 }
38209
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4_strided_cn)38210 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_strided_cn) {
38211 TEST_REQUIRES_X86_XOP;
38212 for (uint32_t n = 8; n <= 12; n += 4) {
38213 for (size_t k = 1; k <= 40; k += 9) {
38214 GemmMicrokernelTester()
38215 .mr(2)
38216 .nr(4)
38217 .kr(8)
38218 .sr(1)
38219 .m(2)
38220 .n(n)
38221 .k(k)
38222 .cn_stride(7)
38223 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38224 }
38225 }
38226 }
38227
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4_strided_a)38228 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_strided_a) {
38229 TEST_REQUIRES_X86_XOP;
38230 for (uint32_t n = 8; n <= 12; n += 4) {
38231 for (size_t k = 1; k <= 40; k += 9) {
38232 GemmMicrokernelTester()
38233 .mr(2)
38234 .nr(4)
38235 .kr(8)
38236 .sr(1)
38237 .m(2)
38238 .n(n)
38239 .k(k)
38240 .a_stride(43)
38241 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38242 }
38243 }
38244 }
38245
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4_subtile)38246 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_subtile) {
38247 TEST_REQUIRES_X86_XOP;
38248 for (uint32_t n = 8; n <= 12; n += 4) {
38249 for (size_t k = 1; k <= 40; k += 9) {
38250 for (uint32_t m = 1; m <= 2; m++) {
38251 GemmMicrokernelTester()
38252 .mr(2)
38253 .nr(4)
38254 .kr(8)
38255 .sr(1)
38256 .m(m)
38257 .n(n)
38258 .k(k)
38259 .iterations(1)
38260 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38261 }
38262 }
38263 }
38264 }
38265
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,strided_cm_subtile)38266 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm_subtile) {
38267 TEST_REQUIRES_X86_XOP;
38268 for (size_t k = 1; k <= 40; k += 9) {
38269 for (uint32_t n = 1; n <= 4; n++) {
38270 for (uint32_t m = 1; m <= 2; m++) {
38271 GemmMicrokernelTester()
38272 .mr(2)
38273 .nr(4)
38274 .kr(8)
38275 .sr(1)
38276 .m(m)
38277 .n(n)
38278 .k(k)
38279 .cm_stride(7)
38280 .iterations(1)
38281 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38282 }
38283 }
38284 }
38285 }
38286
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,qmin)38287 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmin) {
38288 TEST_REQUIRES_X86_XOP;
38289 GemmMicrokernelTester()
38290 .mr(2)
38291 .nr(4)
38292 .kr(8)
38293 .sr(1)
38294 .m(2)
38295 .n(4)
38296 .k(8)
38297 .qmin(128)
38298 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38299 }
38300
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,qmax)38301 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmax) {
38302 TEST_REQUIRES_X86_XOP;
38303 GemmMicrokernelTester()
38304 .mr(2)
38305 .nr(4)
38306 .kr(8)
38307 .sr(1)
38308 .m(2)
38309 .n(4)
38310 .k(8)
38311 .qmax(128)
38312 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38313 }
38314
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,strided_cm)38315 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm) {
38316 TEST_REQUIRES_X86_XOP;
38317 GemmMicrokernelTester()
38318 .mr(2)
38319 .nr(4)
38320 .kr(8)
38321 .sr(1)
38322 .m(2)
38323 .n(4)
38324 .k(8)
38325 .cm_stride(7)
38326 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38327 }
38328 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
38329
38330
38331 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8)38332 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8) {
38333 TEST_REQUIRES_X86_SSE2;
38334 GemmMicrokernelTester()
38335 .mr(1)
38336 .nr(4)
38337 .kr(8)
38338 .sr(1)
38339 .m(1)
38340 .n(4)
38341 .k(8)
38342 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38343 }
38344
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,strided_cn)38345 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cn) {
38346 TEST_REQUIRES_X86_SSE2;
38347 GemmMicrokernelTester()
38348 .mr(1)
38349 .nr(4)
38350 .kr(8)
38351 .sr(1)
38352 .m(1)
38353 .n(4)
38354 .k(8)
38355 .cn_stride(7)
38356 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38357 }
38358
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_strided_a)38359 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_strided_a) {
38360 TEST_REQUIRES_X86_SSE2;
38361 GemmMicrokernelTester()
38362 .mr(1)
38363 .nr(4)
38364 .kr(8)
38365 .sr(1)
38366 .m(1)
38367 .n(4)
38368 .k(8)
38369 .a_stride(11)
38370 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38371 }
38372
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_subtile)38373 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile) {
38374 TEST_REQUIRES_X86_SSE2;
38375 for (uint32_t n = 1; n <= 4; n++) {
38376 for (uint32_t m = 1; m <= 1; m++) {
38377 GemmMicrokernelTester()
38378 .mr(1)
38379 .nr(4)
38380 .kr(8)
38381 .sr(1)
38382 .m(m)
38383 .n(n)
38384 .k(8)
38385 .iterations(1)
38386 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38387 }
38388 }
38389 }
38390
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_subtile_m)38391 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile_m) {
38392 TEST_REQUIRES_X86_SSE2;
38393 for (uint32_t m = 1; m <= 1; m++) {
38394 GemmMicrokernelTester()
38395 .mr(1)
38396 .nr(4)
38397 .kr(8)
38398 .sr(1)
38399 .m(m)
38400 .n(4)
38401 .k(8)
38402 .iterations(1)
38403 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38404 }
38405 }
38406
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_subtile_n)38407 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile_n) {
38408 TEST_REQUIRES_X86_SSE2;
38409 for (uint32_t n = 1; n <= 4; n++) {
38410 GemmMicrokernelTester()
38411 .mr(1)
38412 .nr(4)
38413 .kr(8)
38414 .sr(1)
38415 .m(1)
38416 .n(n)
38417 .k(8)
38418 .iterations(1)
38419 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38420 }
38421 }
38422
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_lt_8)38423 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8) {
38424 TEST_REQUIRES_X86_SSE2;
38425 for (size_t k = 1; k < 8; k++) {
38426 GemmMicrokernelTester()
38427 .mr(1)
38428 .nr(4)
38429 .kr(8)
38430 .sr(1)
38431 .m(1)
38432 .n(4)
38433 .k(k)
38434 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38435 }
38436 }
38437
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_lt_8_strided_a)38438 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8_strided_a) {
38439 TEST_REQUIRES_X86_SSE2;
38440 for (size_t k = 1; k < 8; k++) {
38441 GemmMicrokernelTester()
38442 .mr(1)
38443 .nr(4)
38444 .kr(8)
38445 .sr(1)
38446 .m(1)
38447 .n(4)
38448 .k(k)
38449 .a_stride(11)
38450 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38451 }
38452 }
38453
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_lt_8_subtile)38454 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8_subtile) {
38455 TEST_REQUIRES_X86_SSE2;
38456 for (size_t k = 1; k < 8; k++) {
38457 for (uint32_t n = 1; n <= 4; n++) {
38458 for (uint32_t m = 1; m <= 1; m++) {
38459 GemmMicrokernelTester()
38460 .mr(1)
38461 .nr(4)
38462 .kr(8)
38463 .sr(1)
38464 .m(m)
38465 .n(n)
38466 .k(k)
38467 .iterations(1)
38468 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38469 }
38470 }
38471 }
38472 }
38473
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_gt_8)38474 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8) {
38475 TEST_REQUIRES_X86_SSE2;
38476 for (size_t k = 9; k < 16; k++) {
38477 GemmMicrokernelTester()
38478 .mr(1)
38479 .nr(4)
38480 .kr(8)
38481 .sr(1)
38482 .m(1)
38483 .n(4)
38484 .k(k)
38485 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38486 }
38487 }
38488
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_gt_8_strided_a)38489 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8_strided_a) {
38490 TEST_REQUIRES_X86_SSE2;
38491 for (size_t k = 9; k < 16; k++) {
38492 GemmMicrokernelTester()
38493 .mr(1)
38494 .nr(4)
38495 .kr(8)
38496 .sr(1)
38497 .m(1)
38498 .n(4)
38499 .k(k)
38500 .a_stride(19)
38501 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38502 }
38503 }
38504
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_gt_8_subtile)38505 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8_subtile) {
38506 TEST_REQUIRES_X86_SSE2;
38507 for (size_t k = 9; k < 16; k++) {
38508 for (uint32_t n = 1; n <= 4; n++) {
38509 for (uint32_t m = 1; m <= 1; m++) {
38510 GemmMicrokernelTester()
38511 .mr(1)
38512 .nr(4)
38513 .kr(8)
38514 .sr(1)
38515 .m(m)
38516 .n(n)
38517 .k(k)
38518 .iterations(1)
38519 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38520 }
38521 }
38522 }
38523 }
38524
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_div_8)38525 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8) {
38526 TEST_REQUIRES_X86_SSE2;
38527 for (size_t k = 16; k <= 80; k += 8) {
38528 GemmMicrokernelTester()
38529 .mr(1)
38530 .nr(4)
38531 .kr(8)
38532 .sr(1)
38533 .m(1)
38534 .n(4)
38535 .k(k)
38536 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38537 }
38538 }
38539
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_div_8_strided_a)38540 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8_strided_a) {
38541 TEST_REQUIRES_X86_SSE2;
38542 for (size_t k = 16; k <= 80; k += 8) {
38543 GemmMicrokernelTester()
38544 .mr(1)
38545 .nr(4)
38546 .kr(8)
38547 .sr(1)
38548 .m(1)
38549 .n(4)
38550 .k(k)
38551 .a_stride(83)
38552 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38553 }
38554 }
38555
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_div_8_subtile)38556 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8_subtile) {
38557 TEST_REQUIRES_X86_SSE2;
38558 for (size_t k = 16; k <= 80; k += 8) {
38559 for (uint32_t n = 1; n <= 4; n++) {
38560 for (uint32_t m = 1; m <= 1; m++) {
38561 GemmMicrokernelTester()
38562 .mr(1)
38563 .nr(4)
38564 .kr(8)
38565 .sr(1)
38566 .m(m)
38567 .n(n)
38568 .k(k)
38569 .iterations(1)
38570 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38571 }
38572 }
38573 }
38574 }
38575
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4)38576 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4) {
38577 TEST_REQUIRES_X86_SSE2;
38578 for (uint32_t n = 5; n < 8; n++) {
38579 for (size_t k = 1; k <= 40; k += 9) {
38580 GemmMicrokernelTester()
38581 .mr(1)
38582 .nr(4)
38583 .kr(8)
38584 .sr(1)
38585 .m(1)
38586 .n(n)
38587 .k(k)
38588 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38589 }
38590 }
38591 }
38592
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4_strided_cn)38593 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_strided_cn) {
38594 TEST_REQUIRES_X86_SSE2;
38595 for (uint32_t n = 5; n < 8; n++) {
38596 for (size_t k = 1; k <= 40; k += 9) {
38597 GemmMicrokernelTester()
38598 .mr(1)
38599 .nr(4)
38600 .kr(8)
38601 .sr(1)
38602 .m(1)
38603 .n(n)
38604 .k(k)
38605 .cn_stride(7)
38606 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38607 }
38608 }
38609 }
38610
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4_strided_a)38611 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_strided_a) {
38612 TEST_REQUIRES_X86_SSE2;
38613 for (uint32_t n = 5; n < 8; n++) {
38614 for (size_t k = 1; k <= 40; k += 9) {
38615 GemmMicrokernelTester()
38616 .mr(1)
38617 .nr(4)
38618 .kr(8)
38619 .sr(1)
38620 .m(1)
38621 .n(n)
38622 .k(k)
38623 .a_stride(43)
38624 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38625 }
38626 }
38627 }
38628
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4_subtile)38629 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_subtile) {
38630 TEST_REQUIRES_X86_SSE2;
38631 for (uint32_t n = 5; n < 8; n++) {
38632 for (size_t k = 1; k <= 40; k += 9) {
38633 for (uint32_t m = 1; m <= 1; m++) {
38634 GemmMicrokernelTester()
38635 .mr(1)
38636 .nr(4)
38637 .kr(8)
38638 .sr(1)
38639 .m(m)
38640 .n(n)
38641 .k(k)
38642 .iterations(1)
38643 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38644 }
38645 }
38646 }
38647 }
38648
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4)38649 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4) {
38650 TEST_REQUIRES_X86_SSE2;
38651 for (uint32_t n = 8; n <= 12; n += 4) {
38652 for (size_t k = 1; k <= 40; k += 9) {
38653 GemmMicrokernelTester()
38654 .mr(1)
38655 .nr(4)
38656 .kr(8)
38657 .sr(1)
38658 .m(1)
38659 .n(n)
38660 .k(k)
38661 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38662 }
38663 }
38664 }
38665
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4_strided_cn)38666 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_strided_cn) {
38667 TEST_REQUIRES_X86_SSE2;
38668 for (uint32_t n = 8; n <= 12; n += 4) {
38669 for (size_t k = 1; k <= 40; k += 9) {
38670 GemmMicrokernelTester()
38671 .mr(1)
38672 .nr(4)
38673 .kr(8)
38674 .sr(1)
38675 .m(1)
38676 .n(n)
38677 .k(k)
38678 .cn_stride(7)
38679 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38680 }
38681 }
38682 }
38683
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4_strided_a)38684 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_strided_a) {
38685 TEST_REQUIRES_X86_SSE2;
38686 for (uint32_t n = 8; n <= 12; n += 4) {
38687 for (size_t k = 1; k <= 40; k += 9) {
38688 GemmMicrokernelTester()
38689 .mr(1)
38690 .nr(4)
38691 .kr(8)
38692 .sr(1)
38693 .m(1)
38694 .n(n)
38695 .k(k)
38696 .a_stride(43)
38697 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38698 }
38699 }
38700 }
38701
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4_subtile)38702 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_subtile) {
38703 TEST_REQUIRES_X86_SSE2;
38704 for (uint32_t n = 8; n <= 12; n += 4) {
38705 for (size_t k = 1; k <= 40; k += 9) {
38706 for (uint32_t m = 1; m <= 1; m++) {
38707 GemmMicrokernelTester()
38708 .mr(1)
38709 .nr(4)
38710 .kr(8)
38711 .sr(1)
38712 .m(m)
38713 .n(n)
38714 .k(k)
38715 .iterations(1)
38716 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38717 }
38718 }
38719 }
38720 }
38721
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,strided_cm_subtile)38722 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cm_subtile) {
38723 TEST_REQUIRES_X86_SSE2;
38724 for (size_t k = 1; k <= 40; k += 9) {
38725 for (uint32_t n = 1; n <= 4; n++) {
38726 for (uint32_t m = 1; m <= 1; m++) {
38727 GemmMicrokernelTester()
38728 .mr(1)
38729 .nr(4)
38730 .kr(8)
38731 .sr(1)
38732 .m(m)
38733 .n(n)
38734 .k(k)
38735 .cm_stride(7)
38736 .iterations(1)
38737 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38738 }
38739 }
38740 }
38741 }
38742
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,qmin)38743 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, qmin) {
38744 TEST_REQUIRES_X86_SSE2;
38745 GemmMicrokernelTester()
38746 .mr(1)
38747 .nr(4)
38748 .kr(8)
38749 .sr(1)
38750 .m(1)
38751 .n(4)
38752 .k(8)
38753 .qmin(128)
38754 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38755 }
38756
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,qmax)38757 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, qmax) {
38758 TEST_REQUIRES_X86_SSE2;
38759 GemmMicrokernelTester()
38760 .mr(1)
38761 .nr(4)
38762 .kr(8)
38763 .sr(1)
38764 .m(1)
38765 .n(4)
38766 .k(8)
38767 .qmax(128)
38768 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38769 }
38770
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,strided_cm)38771 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cm) {
38772 TEST_REQUIRES_X86_SSE2;
38773 GemmMicrokernelTester()
38774 .mr(1)
38775 .nr(4)
38776 .kr(8)
38777 .sr(1)
38778 .m(1)
38779 .n(4)
38780 .k(8)
38781 .cm_stride(7)
38782 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
38783 }
38784 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
38785
38786
38787 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8)38788 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8) {
38789 TEST_REQUIRES_X86_SSE41;
38790 GemmMicrokernelTester()
38791 .mr(1)
38792 .nr(4)
38793 .kr(8)
38794 .sr(1)
38795 .m(1)
38796 .n(4)
38797 .k(8)
38798 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38799 }
38800
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,strided_cn)38801 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cn) {
38802 TEST_REQUIRES_X86_SSE41;
38803 GemmMicrokernelTester()
38804 .mr(1)
38805 .nr(4)
38806 .kr(8)
38807 .sr(1)
38808 .m(1)
38809 .n(4)
38810 .k(8)
38811 .cn_stride(7)
38812 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38813 }
38814
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8_strided_a)38815 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_strided_a) {
38816 TEST_REQUIRES_X86_SSE41;
38817 GemmMicrokernelTester()
38818 .mr(1)
38819 .nr(4)
38820 .kr(8)
38821 .sr(1)
38822 .m(1)
38823 .n(4)
38824 .k(8)
38825 .a_stride(11)
38826 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38827 }
38828
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8_subtile)38829 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile) {
38830 TEST_REQUIRES_X86_SSE41;
38831 for (uint32_t n = 1; n <= 4; n++) {
38832 for (uint32_t m = 1; m <= 1; m++) {
38833 GemmMicrokernelTester()
38834 .mr(1)
38835 .nr(4)
38836 .kr(8)
38837 .sr(1)
38838 .m(m)
38839 .n(n)
38840 .k(8)
38841 .iterations(1)
38842 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38843 }
38844 }
38845 }
38846
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8_subtile_m)38847 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile_m) {
38848 TEST_REQUIRES_X86_SSE41;
38849 for (uint32_t m = 1; m <= 1; m++) {
38850 GemmMicrokernelTester()
38851 .mr(1)
38852 .nr(4)
38853 .kr(8)
38854 .sr(1)
38855 .m(m)
38856 .n(4)
38857 .k(8)
38858 .iterations(1)
38859 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38860 }
38861 }
38862
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8_subtile_n)38863 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile_n) {
38864 TEST_REQUIRES_X86_SSE41;
38865 for (uint32_t n = 1; n <= 4; n++) {
38866 GemmMicrokernelTester()
38867 .mr(1)
38868 .nr(4)
38869 .kr(8)
38870 .sr(1)
38871 .m(1)
38872 .n(n)
38873 .k(8)
38874 .iterations(1)
38875 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38876 }
38877 }
38878
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_lt_8)38879 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8) {
38880 TEST_REQUIRES_X86_SSE41;
38881 for (size_t k = 1; k < 8; k++) {
38882 GemmMicrokernelTester()
38883 .mr(1)
38884 .nr(4)
38885 .kr(8)
38886 .sr(1)
38887 .m(1)
38888 .n(4)
38889 .k(k)
38890 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38891 }
38892 }
38893
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_lt_8_strided_a)38894 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8_strided_a) {
38895 TEST_REQUIRES_X86_SSE41;
38896 for (size_t k = 1; k < 8; k++) {
38897 GemmMicrokernelTester()
38898 .mr(1)
38899 .nr(4)
38900 .kr(8)
38901 .sr(1)
38902 .m(1)
38903 .n(4)
38904 .k(k)
38905 .a_stride(11)
38906 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38907 }
38908 }
38909
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_lt_8_subtile)38910 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8_subtile) {
38911 TEST_REQUIRES_X86_SSE41;
38912 for (size_t k = 1; k < 8; k++) {
38913 for (uint32_t n = 1; n <= 4; n++) {
38914 for (uint32_t m = 1; m <= 1; m++) {
38915 GemmMicrokernelTester()
38916 .mr(1)
38917 .nr(4)
38918 .kr(8)
38919 .sr(1)
38920 .m(m)
38921 .n(n)
38922 .k(k)
38923 .iterations(1)
38924 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38925 }
38926 }
38927 }
38928 }
38929
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_gt_8)38930 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8) {
38931 TEST_REQUIRES_X86_SSE41;
38932 for (size_t k = 9; k < 16; k++) {
38933 GemmMicrokernelTester()
38934 .mr(1)
38935 .nr(4)
38936 .kr(8)
38937 .sr(1)
38938 .m(1)
38939 .n(4)
38940 .k(k)
38941 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38942 }
38943 }
38944
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_gt_8_strided_a)38945 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8_strided_a) {
38946 TEST_REQUIRES_X86_SSE41;
38947 for (size_t k = 9; k < 16; k++) {
38948 GemmMicrokernelTester()
38949 .mr(1)
38950 .nr(4)
38951 .kr(8)
38952 .sr(1)
38953 .m(1)
38954 .n(4)
38955 .k(k)
38956 .a_stride(19)
38957 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38958 }
38959 }
38960
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_gt_8_subtile)38961 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8_subtile) {
38962 TEST_REQUIRES_X86_SSE41;
38963 for (size_t k = 9; k < 16; k++) {
38964 for (uint32_t n = 1; n <= 4; n++) {
38965 for (uint32_t m = 1; m <= 1; m++) {
38966 GemmMicrokernelTester()
38967 .mr(1)
38968 .nr(4)
38969 .kr(8)
38970 .sr(1)
38971 .m(m)
38972 .n(n)
38973 .k(k)
38974 .iterations(1)
38975 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38976 }
38977 }
38978 }
38979 }
38980
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_div_8)38981 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8) {
38982 TEST_REQUIRES_X86_SSE41;
38983 for (size_t k = 16; k <= 80; k += 8) {
38984 GemmMicrokernelTester()
38985 .mr(1)
38986 .nr(4)
38987 .kr(8)
38988 .sr(1)
38989 .m(1)
38990 .n(4)
38991 .k(k)
38992 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
38993 }
38994 }
38995
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_div_8_strided_a)38996 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8_strided_a) {
38997 TEST_REQUIRES_X86_SSE41;
38998 for (size_t k = 16; k <= 80; k += 8) {
38999 GemmMicrokernelTester()
39000 .mr(1)
39001 .nr(4)
39002 .kr(8)
39003 .sr(1)
39004 .m(1)
39005 .n(4)
39006 .k(k)
39007 .a_stride(83)
39008 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39009 }
39010 }
39011
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_div_8_subtile)39012 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8_subtile) {
39013 TEST_REQUIRES_X86_SSE41;
39014 for (size_t k = 16; k <= 80; k += 8) {
39015 for (uint32_t n = 1; n <= 4; n++) {
39016 for (uint32_t m = 1; m <= 1; m++) {
39017 GemmMicrokernelTester()
39018 .mr(1)
39019 .nr(4)
39020 .kr(8)
39021 .sr(1)
39022 .m(m)
39023 .n(n)
39024 .k(k)
39025 .iterations(1)
39026 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39027 }
39028 }
39029 }
39030 }
39031
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4)39032 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4) {
39033 TEST_REQUIRES_X86_SSE41;
39034 for (uint32_t n = 5; n < 8; n++) {
39035 for (size_t k = 1; k <= 40; k += 9) {
39036 GemmMicrokernelTester()
39037 .mr(1)
39038 .nr(4)
39039 .kr(8)
39040 .sr(1)
39041 .m(1)
39042 .n(n)
39043 .k(k)
39044 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39045 }
39046 }
39047 }
39048
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4_strided_cn)39049 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_strided_cn) {
39050 TEST_REQUIRES_X86_SSE41;
39051 for (uint32_t n = 5; n < 8; n++) {
39052 for (size_t k = 1; k <= 40; k += 9) {
39053 GemmMicrokernelTester()
39054 .mr(1)
39055 .nr(4)
39056 .kr(8)
39057 .sr(1)
39058 .m(1)
39059 .n(n)
39060 .k(k)
39061 .cn_stride(7)
39062 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39063 }
39064 }
39065 }
39066
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4_strided_a)39067 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_strided_a) {
39068 TEST_REQUIRES_X86_SSE41;
39069 for (uint32_t n = 5; n < 8; n++) {
39070 for (size_t k = 1; k <= 40; k += 9) {
39071 GemmMicrokernelTester()
39072 .mr(1)
39073 .nr(4)
39074 .kr(8)
39075 .sr(1)
39076 .m(1)
39077 .n(n)
39078 .k(k)
39079 .a_stride(43)
39080 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39081 }
39082 }
39083 }
39084
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4_subtile)39085 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_subtile) {
39086 TEST_REQUIRES_X86_SSE41;
39087 for (uint32_t n = 5; n < 8; n++) {
39088 for (size_t k = 1; k <= 40; k += 9) {
39089 for (uint32_t m = 1; m <= 1; m++) {
39090 GemmMicrokernelTester()
39091 .mr(1)
39092 .nr(4)
39093 .kr(8)
39094 .sr(1)
39095 .m(m)
39096 .n(n)
39097 .k(k)
39098 .iterations(1)
39099 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39100 }
39101 }
39102 }
39103 }
39104
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4)39105 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4) {
39106 TEST_REQUIRES_X86_SSE41;
39107 for (uint32_t n = 8; n <= 12; n += 4) {
39108 for (size_t k = 1; k <= 40; k += 9) {
39109 GemmMicrokernelTester()
39110 .mr(1)
39111 .nr(4)
39112 .kr(8)
39113 .sr(1)
39114 .m(1)
39115 .n(n)
39116 .k(k)
39117 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39118 }
39119 }
39120 }
39121
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4_strided_cn)39122 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_strided_cn) {
39123 TEST_REQUIRES_X86_SSE41;
39124 for (uint32_t n = 8; n <= 12; n += 4) {
39125 for (size_t k = 1; k <= 40; k += 9) {
39126 GemmMicrokernelTester()
39127 .mr(1)
39128 .nr(4)
39129 .kr(8)
39130 .sr(1)
39131 .m(1)
39132 .n(n)
39133 .k(k)
39134 .cn_stride(7)
39135 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39136 }
39137 }
39138 }
39139
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4_strided_a)39140 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_strided_a) {
39141 TEST_REQUIRES_X86_SSE41;
39142 for (uint32_t n = 8; n <= 12; n += 4) {
39143 for (size_t k = 1; k <= 40; k += 9) {
39144 GemmMicrokernelTester()
39145 .mr(1)
39146 .nr(4)
39147 .kr(8)
39148 .sr(1)
39149 .m(1)
39150 .n(n)
39151 .k(k)
39152 .a_stride(43)
39153 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39154 }
39155 }
39156 }
39157
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4_subtile)39158 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_subtile) {
39159 TEST_REQUIRES_X86_SSE41;
39160 for (uint32_t n = 8; n <= 12; n += 4) {
39161 for (size_t k = 1; k <= 40; k += 9) {
39162 for (uint32_t m = 1; m <= 1; m++) {
39163 GemmMicrokernelTester()
39164 .mr(1)
39165 .nr(4)
39166 .kr(8)
39167 .sr(1)
39168 .m(m)
39169 .n(n)
39170 .k(k)
39171 .iterations(1)
39172 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39173 }
39174 }
39175 }
39176 }
39177
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,strided_cm_subtile)39178 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cm_subtile) {
39179 TEST_REQUIRES_X86_SSE41;
39180 for (size_t k = 1; k <= 40; k += 9) {
39181 for (uint32_t n = 1; n <= 4; n++) {
39182 for (uint32_t m = 1; m <= 1; m++) {
39183 GemmMicrokernelTester()
39184 .mr(1)
39185 .nr(4)
39186 .kr(8)
39187 .sr(1)
39188 .m(m)
39189 .n(n)
39190 .k(k)
39191 .cm_stride(7)
39192 .iterations(1)
39193 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39194 }
39195 }
39196 }
39197 }
39198
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,qmin)39199 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, qmin) {
39200 TEST_REQUIRES_X86_SSE41;
39201 GemmMicrokernelTester()
39202 .mr(1)
39203 .nr(4)
39204 .kr(8)
39205 .sr(1)
39206 .m(1)
39207 .n(4)
39208 .k(8)
39209 .qmin(128)
39210 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39211 }
39212
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,qmax)39213 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, qmax) {
39214 TEST_REQUIRES_X86_SSE41;
39215 GemmMicrokernelTester()
39216 .mr(1)
39217 .nr(4)
39218 .kr(8)
39219 .sr(1)
39220 .m(1)
39221 .n(4)
39222 .k(8)
39223 .qmax(128)
39224 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39225 }
39226
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,strided_cm)39227 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cm) {
39228 TEST_REQUIRES_X86_SSE41;
39229 GemmMicrokernelTester()
39230 .mr(1)
39231 .nr(4)
39232 .kr(8)
39233 .sr(1)
39234 .m(1)
39235 .n(4)
39236 .k(8)
39237 .cm_stride(7)
39238 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39239 }
39240 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
39241
39242
39243 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_eq_8)39244 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8) {
39245 TEST_REQUIRES_X86_SSE2;
39246 GemmMicrokernelTester()
39247 .mr(3)
39248 .nr(4)
39249 .kr(8)
39250 .sr(1)
39251 .m(3)
39252 .n(4)
39253 .k(8)
39254 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39255 }
39256
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,strided_cn)39257 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cn) {
39258 TEST_REQUIRES_X86_SSE2;
39259 GemmMicrokernelTester()
39260 .mr(3)
39261 .nr(4)
39262 .kr(8)
39263 .sr(1)
39264 .m(3)
39265 .n(4)
39266 .k(8)
39267 .cn_stride(7)
39268 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39269 }
39270
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_eq_8_strided_a)39271 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_strided_a) {
39272 TEST_REQUIRES_X86_SSE2;
39273 GemmMicrokernelTester()
39274 .mr(3)
39275 .nr(4)
39276 .kr(8)
39277 .sr(1)
39278 .m(3)
39279 .n(4)
39280 .k(8)
39281 .a_stride(11)
39282 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39283 }
39284
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_eq_8_subtile)39285 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile) {
39286 TEST_REQUIRES_X86_SSE2;
39287 for (uint32_t n = 1; n <= 4; n++) {
39288 for (uint32_t m = 1; m <= 3; m++) {
39289 GemmMicrokernelTester()
39290 .mr(3)
39291 .nr(4)
39292 .kr(8)
39293 .sr(1)
39294 .m(m)
39295 .n(n)
39296 .k(8)
39297 .iterations(1)
39298 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39299 }
39300 }
39301 }
39302
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_eq_8_subtile_m)39303 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile_m) {
39304 TEST_REQUIRES_X86_SSE2;
39305 for (uint32_t m = 1; m <= 3; m++) {
39306 GemmMicrokernelTester()
39307 .mr(3)
39308 .nr(4)
39309 .kr(8)
39310 .sr(1)
39311 .m(m)
39312 .n(4)
39313 .k(8)
39314 .iterations(1)
39315 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39316 }
39317 }
39318
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_eq_8_subtile_n)39319 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile_n) {
39320 TEST_REQUIRES_X86_SSE2;
39321 for (uint32_t n = 1; n <= 4; n++) {
39322 GemmMicrokernelTester()
39323 .mr(3)
39324 .nr(4)
39325 .kr(8)
39326 .sr(1)
39327 .m(3)
39328 .n(n)
39329 .k(8)
39330 .iterations(1)
39331 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39332 }
39333 }
39334
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_lt_8)39335 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_lt_8) {
39336 TEST_REQUIRES_X86_SSE2;
39337 for (size_t k = 1; k < 8; k++) {
39338 GemmMicrokernelTester()
39339 .mr(3)
39340 .nr(4)
39341 .kr(8)
39342 .sr(1)
39343 .m(3)
39344 .n(4)
39345 .k(k)
39346 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39347 }
39348 }
39349
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_lt_8_strided_a)39350 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_lt_8_strided_a) {
39351 TEST_REQUIRES_X86_SSE2;
39352 for (size_t k = 1; k < 8; k++) {
39353 GemmMicrokernelTester()
39354 .mr(3)
39355 .nr(4)
39356 .kr(8)
39357 .sr(1)
39358 .m(3)
39359 .n(4)
39360 .k(k)
39361 .a_stride(11)
39362 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39363 }
39364 }
39365
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_lt_8_subtile)39366 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_lt_8_subtile) {
39367 TEST_REQUIRES_X86_SSE2;
39368 for (size_t k = 1; k < 8; k++) {
39369 for (uint32_t n = 1; n <= 4; n++) {
39370 for (uint32_t m = 1; m <= 3; m++) {
39371 GemmMicrokernelTester()
39372 .mr(3)
39373 .nr(4)
39374 .kr(8)
39375 .sr(1)
39376 .m(m)
39377 .n(n)
39378 .k(k)
39379 .iterations(1)
39380 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39381 }
39382 }
39383 }
39384 }
39385
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_gt_8)39386 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_gt_8) {
39387 TEST_REQUIRES_X86_SSE2;
39388 for (size_t k = 9; k < 16; k++) {
39389 GemmMicrokernelTester()
39390 .mr(3)
39391 .nr(4)
39392 .kr(8)
39393 .sr(1)
39394 .m(3)
39395 .n(4)
39396 .k(k)
39397 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39398 }
39399 }
39400
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_gt_8_strided_a)39401 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_gt_8_strided_a) {
39402 TEST_REQUIRES_X86_SSE2;
39403 for (size_t k = 9; k < 16; k++) {
39404 GemmMicrokernelTester()
39405 .mr(3)
39406 .nr(4)
39407 .kr(8)
39408 .sr(1)
39409 .m(3)
39410 .n(4)
39411 .k(k)
39412 .a_stride(19)
39413 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39414 }
39415 }
39416
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_gt_8_subtile)39417 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_gt_8_subtile) {
39418 TEST_REQUIRES_X86_SSE2;
39419 for (size_t k = 9; k < 16; k++) {
39420 for (uint32_t n = 1; n <= 4; n++) {
39421 for (uint32_t m = 1; m <= 3; m++) {
39422 GemmMicrokernelTester()
39423 .mr(3)
39424 .nr(4)
39425 .kr(8)
39426 .sr(1)
39427 .m(m)
39428 .n(n)
39429 .k(k)
39430 .iterations(1)
39431 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39432 }
39433 }
39434 }
39435 }
39436
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_div_8)39437 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_div_8) {
39438 TEST_REQUIRES_X86_SSE2;
39439 for (size_t k = 16; k <= 80; k += 8) {
39440 GemmMicrokernelTester()
39441 .mr(3)
39442 .nr(4)
39443 .kr(8)
39444 .sr(1)
39445 .m(3)
39446 .n(4)
39447 .k(k)
39448 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39449 }
39450 }
39451
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_div_8_strided_a)39452 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_div_8_strided_a) {
39453 TEST_REQUIRES_X86_SSE2;
39454 for (size_t k = 16; k <= 80; k += 8) {
39455 GemmMicrokernelTester()
39456 .mr(3)
39457 .nr(4)
39458 .kr(8)
39459 .sr(1)
39460 .m(3)
39461 .n(4)
39462 .k(k)
39463 .a_stride(83)
39464 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39465 }
39466 }
39467
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_div_8_subtile)39468 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_div_8_subtile) {
39469 TEST_REQUIRES_X86_SSE2;
39470 for (size_t k = 16; k <= 80; k += 8) {
39471 for (uint32_t n = 1; n <= 4; n++) {
39472 for (uint32_t m = 1; m <= 3; m++) {
39473 GemmMicrokernelTester()
39474 .mr(3)
39475 .nr(4)
39476 .kr(8)
39477 .sr(1)
39478 .m(m)
39479 .n(n)
39480 .k(k)
39481 .iterations(1)
39482 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39483 }
39484 }
39485 }
39486 }
39487
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_gt_4)39488 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4) {
39489 TEST_REQUIRES_X86_SSE2;
39490 for (uint32_t n = 5; n < 8; n++) {
39491 for (size_t k = 1; k <= 40; k += 9) {
39492 GemmMicrokernelTester()
39493 .mr(3)
39494 .nr(4)
39495 .kr(8)
39496 .sr(1)
39497 .m(3)
39498 .n(n)
39499 .k(k)
39500 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39501 }
39502 }
39503 }
39504
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_gt_4_strided_cn)39505 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_strided_cn) {
39506 TEST_REQUIRES_X86_SSE2;
39507 for (uint32_t n = 5; n < 8; n++) {
39508 for (size_t k = 1; k <= 40; k += 9) {
39509 GemmMicrokernelTester()
39510 .mr(3)
39511 .nr(4)
39512 .kr(8)
39513 .sr(1)
39514 .m(3)
39515 .n(n)
39516 .k(k)
39517 .cn_stride(7)
39518 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39519 }
39520 }
39521 }
39522
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_gt_4_strided_a)39523 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_strided_a) {
39524 TEST_REQUIRES_X86_SSE2;
39525 for (uint32_t n = 5; n < 8; n++) {
39526 for (size_t k = 1; k <= 40; k += 9) {
39527 GemmMicrokernelTester()
39528 .mr(3)
39529 .nr(4)
39530 .kr(8)
39531 .sr(1)
39532 .m(3)
39533 .n(n)
39534 .k(k)
39535 .a_stride(43)
39536 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39537 }
39538 }
39539 }
39540
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_gt_4_subtile)39541 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_subtile) {
39542 TEST_REQUIRES_X86_SSE2;
39543 for (uint32_t n = 5; n < 8; n++) {
39544 for (size_t k = 1; k <= 40; k += 9) {
39545 for (uint32_t m = 1; m <= 3; m++) {
39546 GemmMicrokernelTester()
39547 .mr(3)
39548 .nr(4)
39549 .kr(8)
39550 .sr(1)
39551 .m(m)
39552 .n(n)
39553 .k(k)
39554 .iterations(1)
39555 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39556 }
39557 }
39558 }
39559 }
39560
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_div_4)39561 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4) {
39562 TEST_REQUIRES_X86_SSE2;
39563 for (uint32_t n = 8; n <= 12; n += 4) {
39564 for (size_t k = 1; k <= 40; k += 9) {
39565 GemmMicrokernelTester()
39566 .mr(3)
39567 .nr(4)
39568 .kr(8)
39569 .sr(1)
39570 .m(3)
39571 .n(n)
39572 .k(k)
39573 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39574 }
39575 }
39576 }
39577
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_div_4_strided_cn)39578 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_strided_cn) {
39579 TEST_REQUIRES_X86_SSE2;
39580 for (uint32_t n = 8; n <= 12; n += 4) {
39581 for (size_t k = 1; k <= 40; k += 9) {
39582 GemmMicrokernelTester()
39583 .mr(3)
39584 .nr(4)
39585 .kr(8)
39586 .sr(1)
39587 .m(3)
39588 .n(n)
39589 .k(k)
39590 .cn_stride(7)
39591 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39592 }
39593 }
39594 }
39595
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_div_4_strided_a)39596 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_strided_a) {
39597 TEST_REQUIRES_X86_SSE2;
39598 for (uint32_t n = 8; n <= 12; n += 4) {
39599 for (size_t k = 1; k <= 40; k += 9) {
39600 GemmMicrokernelTester()
39601 .mr(3)
39602 .nr(4)
39603 .kr(8)
39604 .sr(1)
39605 .m(3)
39606 .n(n)
39607 .k(k)
39608 .a_stride(43)
39609 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39610 }
39611 }
39612 }
39613
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_div_4_subtile)39614 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_subtile) {
39615 TEST_REQUIRES_X86_SSE2;
39616 for (uint32_t n = 8; n <= 12; n += 4) {
39617 for (size_t k = 1; k <= 40; k += 9) {
39618 for (uint32_t m = 1; m <= 3; m++) {
39619 GemmMicrokernelTester()
39620 .mr(3)
39621 .nr(4)
39622 .kr(8)
39623 .sr(1)
39624 .m(m)
39625 .n(n)
39626 .k(k)
39627 .iterations(1)
39628 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39629 }
39630 }
39631 }
39632 }
39633
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,strided_cm_subtile)39634 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cm_subtile) {
39635 TEST_REQUIRES_X86_SSE2;
39636 for (size_t k = 1; k <= 40; k += 9) {
39637 for (uint32_t n = 1; n <= 4; n++) {
39638 for (uint32_t m = 1; m <= 3; m++) {
39639 GemmMicrokernelTester()
39640 .mr(3)
39641 .nr(4)
39642 .kr(8)
39643 .sr(1)
39644 .m(m)
39645 .n(n)
39646 .k(k)
39647 .cm_stride(7)
39648 .iterations(1)
39649 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39650 }
39651 }
39652 }
39653 }
39654
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,qmin)39655 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, qmin) {
39656 TEST_REQUIRES_X86_SSE2;
39657 GemmMicrokernelTester()
39658 .mr(3)
39659 .nr(4)
39660 .kr(8)
39661 .sr(1)
39662 .m(3)
39663 .n(4)
39664 .k(8)
39665 .qmin(128)
39666 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39667 }
39668
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,qmax)39669 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, qmax) {
39670 TEST_REQUIRES_X86_SSE2;
39671 GemmMicrokernelTester()
39672 .mr(3)
39673 .nr(4)
39674 .kr(8)
39675 .sr(1)
39676 .m(3)
39677 .n(4)
39678 .k(8)
39679 .qmax(128)
39680 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39681 }
39682
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128,strided_cm)39683 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cm) {
39684 TEST_REQUIRES_X86_SSE2;
39685 GemmMicrokernelTester()
39686 .mr(3)
39687 .nr(4)
39688 .kr(8)
39689 .sr(1)
39690 .m(3)
39691 .n(4)
39692 .k(8)
39693 .cm_stride(7)
39694 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
39695 }
39696 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
39697
39698
39699 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8)39700 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8) {
39701 TEST_REQUIRES_X86_AVX;
39702 GemmMicrokernelTester()
39703 .mr(1)
39704 .nr(4)
39705 .kr(8)
39706 .sr(1)
39707 .m(1)
39708 .n(4)
39709 .k(8)
39710 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39711 }
39712
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,strided_cn)39713 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cn) {
39714 TEST_REQUIRES_X86_AVX;
39715 GemmMicrokernelTester()
39716 .mr(1)
39717 .nr(4)
39718 .kr(8)
39719 .sr(1)
39720 .m(1)
39721 .n(4)
39722 .k(8)
39723 .cn_stride(7)
39724 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39725 }
39726
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_strided_a)39727 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_strided_a) {
39728 TEST_REQUIRES_X86_AVX;
39729 GemmMicrokernelTester()
39730 .mr(1)
39731 .nr(4)
39732 .kr(8)
39733 .sr(1)
39734 .m(1)
39735 .n(4)
39736 .k(8)
39737 .a_stride(11)
39738 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39739 }
39740
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_subtile)39741 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile) {
39742 TEST_REQUIRES_X86_AVX;
39743 for (uint32_t n = 1; n <= 4; n++) {
39744 for (uint32_t m = 1; m <= 1; m++) {
39745 GemmMicrokernelTester()
39746 .mr(1)
39747 .nr(4)
39748 .kr(8)
39749 .sr(1)
39750 .m(m)
39751 .n(n)
39752 .k(8)
39753 .iterations(1)
39754 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39755 }
39756 }
39757 }
39758
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_subtile_m)39759 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_m) {
39760 TEST_REQUIRES_X86_AVX;
39761 for (uint32_t m = 1; m <= 1; m++) {
39762 GemmMicrokernelTester()
39763 .mr(1)
39764 .nr(4)
39765 .kr(8)
39766 .sr(1)
39767 .m(m)
39768 .n(4)
39769 .k(8)
39770 .iterations(1)
39771 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39772 }
39773 }
39774
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_subtile_n)39775 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_n) {
39776 TEST_REQUIRES_X86_AVX;
39777 for (uint32_t n = 1; n <= 4; n++) {
39778 GemmMicrokernelTester()
39779 .mr(1)
39780 .nr(4)
39781 .kr(8)
39782 .sr(1)
39783 .m(1)
39784 .n(n)
39785 .k(8)
39786 .iterations(1)
39787 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39788 }
39789 }
39790
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_lt_8)39791 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8) {
39792 TEST_REQUIRES_X86_AVX;
39793 for (size_t k = 1; k < 8; k++) {
39794 GemmMicrokernelTester()
39795 .mr(1)
39796 .nr(4)
39797 .kr(8)
39798 .sr(1)
39799 .m(1)
39800 .n(4)
39801 .k(k)
39802 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39803 }
39804 }
39805
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_lt_8_strided_a)39806 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8_strided_a) {
39807 TEST_REQUIRES_X86_AVX;
39808 for (size_t k = 1; k < 8; k++) {
39809 GemmMicrokernelTester()
39810 .mr(1)
39811 .nr(4)
39812 .kr(8)
39813 .sr(1)
39814 .m(1)
39815 .n(4)
39816 .k(k)
39817 .a_stride(11)
39818 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39819 }
39820 }
39821
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_lt_8_subtile)39822 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8_subtile) {
39823 TEST_REQUIRES_X86_AVX;
39824 for (size_t k = 1; k < 8; k++) {
39825 for (uint32_t n = 1; n <= 4; n++) {
39826 for (uint32_t m = 1; m <= 1; m++) {
39827 GemmMicrokernelTester()
39828 .mr(1)
39829 .nr(4)
39830 .kr(8)
39831 .sr(1)
39832 .m(m)
39833 .n(n)
39834 .k(k)
39835 .iterations(1)
39836 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39837 }
39838 }
39839 }
39840 }
39841
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_gt_8)39842 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8) {
39843 TEST_REQUIRES_X86_AVX;
39844 for (size_t k = 9; k < 16; k++) {
39845 GemmMicrokernelTester()
39846 .mr(1)
39847 .nr(4)
39848 .kr(8)
39849 .sr(1)
39850 .m(1)
39851 .n(4)
39852 .k(k)
39853 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39854 }
39855 }
39856
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_gt_8_strided_a)39857 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8_strided_a) {
39858 TEST_REQUIRES_X86_AVX;
39859 for (size_t k = 9; k < 16; k++) {
39860 GemmMicrokernelTester()
39861 .mr(1)
39862 .nr(4)
39863 .kr(8)
39864 .sr(1)
39865 .m(1)
39866 .n(4)
39867 .k(k)
39868 .a_stride(19)
39869 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39870 }
39871 }
39872
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_gt_8_subtile)39873 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8_subtile) {
39874 TEST_REQUIRES_X86_AVX;
39875 for (size_t k = 9; k < 16; k++) {
39876 for (uint32_t n = 1; n <= 4; n++) {
39877 for (uint32_t m = 1; m <= 1; m++) {
39878 GemmMicrokernelTester()
39879 .mr(1)
39880 .nr(4)
39881 .kr(8)
39882 .sr(1)
39883 .m(m)
39884 .n(n)
39885 .k(k)
39886 .iterations(1)
39887 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39888 }
39889 }
39890 }
39891 }
39892
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_div_8)39893 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8) {
39894 TEST_REQUIRES_X86_AVX;
39895 for (size_t k = 16; k <= 80; k += 8) {
39896 GemmMicrokernelTester()
39897 .mr(1)
39898 .nr(4)
39899 .kr(8)
39900 .sr(1)
39901 .m(1)
39902 .n(4)
39903 .k(k)
39904 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39905 }
39906 }
39907
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_div_8_strided_a)39908 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8_strided_a) {
39909 TEST_REQUIRES_X86_AVX;
39910 for (size_t k = 16; k <= 80; k += 8) {
39911 GemmMicrokernelTester()
39912 .mr(1)
39913 .nr(4)
39914 .kr(8)
39915 .sr(1)
39916 .m(1)
39917 .n(4)
39918 .k(k)
39919 .a_stride(83)
39920 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39921 }
39922 }
39923
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_div_8_subtile)39924 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8_subtile) {
39925 TEST_REQUIRES_X86_AVX;
39926 for (size_t k = 16; k <= 80; k += 8) {
39927 for (uint32_t n = 1; n <= 4; n++) {
39928 for (uint32_t m = 1; m <= 1; m++) {
39929 GemmMicrokernelTester()
39930 .mr(1)
39931 .nr(4)
39932 .kr(8)
39933 .sr(1)
39934 .m(m)
39935 .n(n)
39936 .k(k)
39937 .iterations(1)
39938 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39939 }
39940 }
39941 }
39942 }
39943
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4)39944 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4) {
39945 TEST_REQUIRES_X86_AVX;
39946 for (uint32_t n = 5; n < 8; n++) {
39947 for (size_t k = 1; k <= 40; k += 9) {
39948 GemmMicrokernelTester()
39949 .mr(1)
39950 .nr(4)
39951 .kr(8)
39952 .sr(1)
39953 .m(1)
39954 .n(n)
39955 .k(k)
39956 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39957 }
39958 }
39959 }
39960
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4_strided_cn)39961 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_strided_cn) {
39962 TEST_REQUIRES_X86_AVX;
39963 for (uint32_t n = 5; n < 8; n++) {
39964 for (size_t k = 1; k <= 40; k += 9) {
39965 GemmMicrokernelTester()
39966 .mr(1)
39967 .nr(4)
39968 .kr(8)
39969 .sr(1)
39970 .m(1)
39971 .n(n)
39972 .k(k)
39973 .cn_stride(7)
39974 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39975 }
39976 }
39977 }
39978
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4_strided_a)39979 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_strided_a) {
39980 TEST_REQUIRES_X86_AVX;
39981 for (uint32_t n = 5; n < 8; n++) {
39982 for (size_t k = 1; k <= 40; k += 9) {
39983 GemmMicrokernelTester()
39984 .mr(1)
39985 .nr(4)
39986 .kr(8)
39987 .sr(1)
39988 .m(1)
39989 .n(n)
39990 .k(k)
39991 .a_stride(43)
39992 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
39993 }
39994 }
39995 }
39996
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4_subtile)39997 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_subtile) {
39998 TEST_REQUIRES_X86_AVX;
39999 for (uint32_t n = 5; n < 8; n++) {
40000 for (size_t k = 1; k <= 40; k += 9) {
40001 for (uint32_t m = 1; m <= 1; m++) {
40002 GemmMicrokernelTester()
40003 .mr(1)
40004 .nr(4)
40005 .kr(8)
40006 .sr(1)
40007 .m(m)
40008 .n(n)
40009 .k(k)
40010 .iterations(1)
40011 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40012 }
40013 }
40014 }
40015 }
40016
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4)40017 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4) {
40018 TEST_REQUIRES_X86_AVX;
40019 for (uint32_t n = 8; n <= 12; n += 4) {
40020 for (size_t k = 1; k <= 40; k += 9) {
40021 GemmMicrokernelTester()
40022 .mr(1)
40023 .nr(4)
40024 .kr(8)
40025 .sr(1)
40026 .m(1)
40027 .n(n)
40028 .k(k)
40029 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40030 }
40031 }
40032 }
40033
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4_strided_cn)40034 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_strided_cn) {
40035 TEST_REQUIRES_X86_AVX;
40036 for (uint32_t n = 8; n <= 12; n += 4) {
40037 for (size_t k = 1; k <= 40; k += 9) {
40038 GemmMicrokernelTester()
40039 .mr(1)
40040 .nr(4)
40041 .kr(8)
40042 .sr(1)
40043 .m(1)
40044 .n(n)
40045 .k(k)
40046 .cn_stride(7)
40047 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40048 }
40049 }
40050 }
40051
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4_strided_a)40052 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_strided_a) {
40053 TEST_REQUIRES_X86_AVX;
40054 for (uint32_t n = 8; n <= 12; n += 4) {
40055 for (size_t k = 1; k <= 40; k += 9) {
40056 GemmMicrokernelTester()
40057 .mr(1)
40058 .nr(4)
40059 .kr(8)
40060 .sr(1)
40061 .m(1)
40062 .n(n)
40063 .k(k)
40064 .a_stride(43)
40065 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40066 }
40067 }
40068 }
40069
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4_subtile)40070 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_subtile) {
40071 TEST_REQUIRES_X86_AVX;
40072 for (uint32_t n = 8; n <= 12; n += 4) {
40073 for (size_t k = 1; k <= 40; k += 9) {
40074 for (uint32_t m = 1; m <= 1; m++) {
40075 GemmMicrokernelTester()
40076 .mr(1)
40077 .nr(4)
40078 .kr(8)
40079 .sr(1)
40080 .m(m)
40081 .n(n)
40082 .k(k)
40083 .iterations(1)
40084 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40085 }
40086 }
40087 }
40088 }
40089
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,strided_cm_subtile)40090 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm_subtile) {
40091 TEST_REQUIRES_X86_AVX;
40092 for (size_t k = 1; k <= 40; k += 9) {
40093 for (uint32_t n = 1; n <= 4; n++) {
40094 for (uint32_t m = 1; m <= 1; m++) {
40095 GemmMicrokernelTester()
40096 .mr(1)
40097 .nr(4)
40098 .kr(8)
40099 .sr(1)
40100 .m(m)
40101 .n(n)
40102 .k(k)
40103 .cm_stride(7)
40104 .iterations(1)
40105 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40106 }
40107 }
40108 }
40109 }
40110
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,qmin)40111 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmin) {
40112 TEST_REQUIRES_X86_AVX;
40113 GemmMicrokernelTester()
40114 .mr(1)
40115 .nr(4)
40116 .kr(8)
40117 .sr(1)
40118 .m(1)
40119 .n(4)
40120 .k(8)
40121 .qmin(128)
40122 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40123 }
40124
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,qmax)40125 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmax) {
40126 TEST_REQUIRES_X86_AVX;
40127 GemmMicrokernelTester()
40128 .mr(1)
40129 .nr(4)
40130 .kr(8)
40131 .sr(1)
40132 .m(1)
40133 .n(4)
40134 .k(8)
40135 .qmax(128)
40136 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40137 }
40138
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,strided_cm)40139 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm) {
40140 TEST_REQUIRES_X86_AVX;
40141 GemmMicrokernelTester()
40142 .mr(1)
40143 .nr(4)
40144 .kr(8)
40145 .sr(1)
40146 .m(1)
40147 .n(4)
40148 .k(8)
40149 .cm_stride(7)
40150 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40151 }
40152 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
40153
40154
40155 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8)40156 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8) {
40157 TEST_REQUIRES_X86_XOP;
40158 GemmMicrokernelTester()
40159 .mr(3)
40160 .nr(4)
40161 .kr(8)
40162 .sr(1)
40163 .m(3)
40164 .n(4)
40165 .k(8)
40166 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40167 }
40168
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,strided_cn)40169 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cn) {
40170 TEST_REQUIRES_X86_XOP;
40171 GemmMicrokernelTester()
40172 .mr(3)
40173 .nr(4)
40174 .kr(8)
40175 .sr(1)
40176 .m(3)
40177 .n(4)
40178 .k(8)
40179 .cn_stride(7)
40180 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40181 }
40182
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_strided_a)40183 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_strided_a) {
40184 TEST_REQUIRES_X86_XOP;
40185 GemmMicrokernelTester()
40186 .mr(3)
40187 .nr(4)
40188 .kr(8)
40189 .sr(1)
40190 .m(3)
40191 .n(4)
40192 .k(8)
40193 .a_stride(11)
40194 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40195 }
40196
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_subtile)40197 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile) {
40198 TEST_REQUIRES_X86_XOP;
40199 for (uint32_t n = 1; n <= 4; n++) {
40200 for (uint32_t m = 1; m <= 3; m++) {
40201 GemmMicrokernelTester()
40202 .mr(3)
40203 .nr(4)
40204 .kr(8)
40205 .sr(1)
40206 .m(m)
40207 .n(n)
40208 .k(8)
40209 .iterations(1)
40210 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40211 }
40212 }
40213 }
40214
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_subtile_m)40215 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_m) {
40216 TEST_REQUIRES_X86_XOP;
40217 for (uint32_t m = 1; m <= 3; m++) {
40218 GemmMicrokernelTester()
40219 .mr(3)
40220 .nr(4)
40221 .kr(8)
40222 .sr(1)
40223 .m(m)
40224 .n(4)
40225 .k(8)
40226 .iterations(1)
40227 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40228 }
40229 }
40230
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_subtile_n)40231 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_n) {
40232 TEST_REQUIRES_X86_XOP;
40233 for (uint32_t n = 1; n <= 4; n++) {
40234 GemmMicrokernelTester()
40235 .mr(3)
40236 .nr(4)
40237 .kr(8)
40238 .sr(1)
40239 .m(3)
40240 .n(n)
40241 .k(8)
40242 .iterations(1)
40243 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40244 }
40245 }
40246
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_lt_8)40247 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8) {
40248 TEST_REQUIRES_X86_XOP;
40249 for (size_t k = 1; k < 8; k++) {
40250 GemmMicrokernelTester()
40251 .mr(3)
40252 .nr(4)
40253 .kr(8)
40254 .sr(1)
40255 .m(3)
40256 .n(4)
40257 .k(k)
40258 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40259 }
40260 }
40261
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_lt_8_strided_a)40262 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8_strided_a) {
40263 TEST_REQUIRES_X86_XOP;
40264 for (size_t k = 1; k < 8; k++) {
40265 GemmMicrokernelTester()
40266 .mr(3)
40267 .nr(4)
40268 .kr(8)
40269 .sr(1)
40270 .m(3)
40271 .n(4)
40272 .k(k)
40273 .a_stride(11)
40274 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40275 }
40276 }
40277
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_lt_8_subtile)40278 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8_subtile) {
40279 TEST_REQUIRES_X86_XOP;
40280 for (size_t k = 1; k < 8; k++) {
40281 for (uint32_t n = 1; n <= 4; n++) {
40282 for (uint32_t m = 1; m <= 3; m++) {
40283 GemmMicrokernelTester()
40284 .mr(3)
40285 .nr(4)
40286 .kr(8)
40287 .sr(1)
40288 .m(m)
40289 .n(n)
40290 .k(k)
40291 .iterations(1)
40292 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40293 }
40294 }
40295 }
40296 }
40297
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_gt_8)40298 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8) {
40299 TEST_REQUIRES_X86_XOP;
40300 for (size_t k = 9; k < 16; k++) {
40301 GemmMicrokernelTester()
40302 .mr(3)
40303 .nr(4)
40304 .kr(8)
40305 .sr(1)
40306 .m(3)
40307 .n(4)
40308 .k(k)
40309 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40310 }
40311 }
40312
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_gt_8_strided_a)40313 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8_strided_a) {
40314 TEST_REQUIRES_X86_XOP;
40315 for (size_t k = 9; k < 16; k++) {
40316 GemmMicrokernelTester()
40317 .mr(3)
40318 .nr(4)
40319 .kr(8)
40320 .sr(1)
40321 .m(3)
40322 .n(4)
40323 .k(k)
40324 .a_stride(19)
40325 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40326 }
40327 }
40328
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_gt_8_subtile)40329 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8_subtile) {
40330 TEST_REQUIRES_X86_XOP;
40331 for (size_t k = 9; k < 16; k++) {
40332 for (uint32_t n = 1; n <= 4; n++) {
40333 for (uint32_t m = 1; m <= 3; m++) {
40334 GemmMicrokernelTester()
40335 .mr(3)
40336 .nr(4)
40337 .kr(8)
40338 .sr(1)
40339 .m(m)
40340 .n(n)
40341 .k(k)
40342 .iterations(1)
40343 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40344 }
40345 }
40346 }
40347 }
40348
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_div_8)40349 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8) {
40350 TEST_REQUIRES_X86_XOP;
40351 for (size_t k = 16; k <= 80; k += 8) {
40352 GemmMicrokernelTester()
40353 .mr(3)
40354 .nr(4)
40355 .kr(8)
40356 .sr(1)
40357 .m(3)
40358 .n(4)
40359 .k(k)
40360 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40361 }
40362 }
40363
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_div_8_strided_a)40364 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8_strided_a) {
40365 TEST_REQUIRES_X86_XOP;
40366 for (size_t k = 16; k <= 80; k += 8) {
40367 GemmMicrokernelTester()
40368 .mr(3)
40369 .nr(4)
40370 .kr(8)
40371 .sr(1)
40372 .m(3)
40373 .n(4)
40374 .k(k)
40375 .a_stride(83)
40376 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40377 }
40378 }
40379
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_div_8_subtile)40380 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8_subtile) {
40381 TEST_REQUIRES_X86_XOP;
40382 for (size_t k = 16; k <= 80; k += 8) {
40383 for (uint32_t n = 1; n <= 4; n++) {
40384 for (uint32_t m = 1; m <= 3; m++) {
40385 GemmMicrokernelTester()
40386 .mr(3)
40387 .nr(4)
40388 .kr(8)
40389 .sr(1)
40390 .m(m)
40391 .n(n)
40392 .k(k)
40393 .iterations(1)
40394 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40395 }
40396 }
40397 }
40398 }
40399
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4)40400 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4) {
40401 TEST_REQUIRES_X86_XOP;
40402 for (uint32_t n = 5; n < 8; n++) {
40403 for (size_t k = 1; k <= 40; k += 9) {
40404 GemmMicrokernelTester()
40405 .mr(3)
40406 .nr(4)
40407 .kr(8)
40408 .sr(1)
40409 .m(3)
40410 .n(n)
40411 .k(k)
40412 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40413 }
40414 }
40415 }
40416
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4_strided_cn)40417 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_strided_cn) {
40418 TEST_REQUIRES_X86_XOP;
40419 for (uint32_t n = 5; n < 8; n++) {
40420 for (size_t k = 1; k <= 40; k += 9) {
40421 GemmMicrokernelTester()
40422 .mr(3)
40423 .nr(4)
40424 .kr(8)
40425 .sr(1)
40426 .m(3)
40427 .n(n)
40428 .k(k)
40429 .cn_stride(7)
40430 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40431 }
40432 }
40433 }
40434
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4_strided_a)40435 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_strided_a) {
40436 TEST_REQUIRES_X86_XOP;
40437 for (uint32_t n = 5; n < 8; n++) {
40438 for (size_t k = 1; k <= 40; k += 9) {
40439 GemmMicrokernelTester()
40440 .mr(3)
40441 .nr(4)
40442 .kr(8)
40443 .sr(1)
40444 .m(3)
40445 .n(n)
40446 .k(k)
40447 .a_stride(43)
40448 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40449 }
40450 }
40451 }
40452
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4_subtile)40453 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_subtile) {
40454 TEST_REQUIRES_X86_XOP;
40455 for (uint32_t n = 5; n < 8; n++) {
40456 for (size_t k = 1; k <= 40; k += 9) {
40457 for (uint32_t m = 1; m <= 3; m++) {
40458 GemmMicrokernelTester()
40459 .mr(3)
40460 .nr(4)
40461 .kr(8)
40462 .sr(1)
40463 .m(m)
40464 .n(n)
40465 .k(k)
40466 .iterations(1)
40467 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40468 }
40469 }
40470 }
40471 }
40472
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4)40473 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4) {
40474 TEST_REQUIRES_X86_XOP;
40475 for (uint32_t n = 8; n <= 12; n += 4) {
40476 for (size_t k = 1; k <= 40; k += 9) {
40477 GemmMicrokernelTester()
40478 .mr(3)
40479 .nr(4)
40480 .kr(8)
40481 .sr(1)
40482 .m(3)
40483 .n(n)
40484 .k(k)
40485 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40486 }
40487 }
40488 }
40489
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4_strided_cn)40490 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_strided_cn) {
40491 TEST_REQUIRES_X86_XOP;
40492 for (uint32_t n = 8; n <= 12; n += 4) {
40493 for (size_t k = 1; k <= 40; k += 9) {
40494 GemmMicrokernelTester()
40495 .mr(3)
40496 .nr(4)
40497 .kr(8)
40498 .sr(1)
40499 .m(3)
40500 .n(n)
40501 .k(k)
40502 .cn_stride(7)
40503 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40504 }
40505 }
40506 }
40507
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4_strided_a)40508 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_strided_a) {
40509 TEST_REQUIRES_X86_XOP;
40510 for (uint32_t n = 8; n <= 12; n += 4) {
40511 for (size_t k = 1; k <= 40; k += 9) {
40512 GemmMicrokernelTester()
40513 .mr(3)
40514 .nr(4)
40515 .kr(8)
40516 .sr(1)
40517 .m(3)
40518 .n(n)
40519 .k(k)
40520 .a_stride(43)
40521 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40522 }
40523 }
40524 }
40525
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4_subtile)40526 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_subtile) {
40527 TEST_REQUIRES_X86_XOP;
40528 for (uint32_t n = 8; n <= 12; n += 4) {
40529 for (size_t k = 1; k <= 40; k += 9) {
40530 for (uint32_t m = 1; m <= 3; m++) {
40531 GemmMicrokernelTester()
40532 .mr(3)
40533 .nr(4)
40534 .kr(8)
40535 .sr(1)
40536 .m(m)
40537 .n(n)
40538 .k(k)
40539 .iterations(1)
40540 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40541 }
40542 }
40543 }
40544 }
40545
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,strided_cm_subtile)40546 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm_subtile) {
40547 TEST_REQUIRES_X86_XOP;
40548 for (size_t k = 1; k <= 40; k += 9) {
40549 for (uint32_t n = 1; n <= 4; n++) {
40550 for (uint32_t m = 1; m <= 3; m++) {
40551 GemmMicrokernelTester()
40552 .mr(3)
40553 .nr(4)
40554 .kr(8)
40555 .sr(1)
40556 .m(m)
40557 .n(n)
40558 .k(k)
40559 .cm_stride(7)
40560 .iterations(1)
40561 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40562 }
40563 }
40564 }
40565 }
40566
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,qmin)40567 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmin) {
40568 TEST_REQUIRES_X86_XOP;
40569 GemmMicrokernelTester()
40570 .mr(3)
40571 .nr(4)
40572 .kr(8)
40573 .sr(1)
40574 .m(3)
40575 .n(4)
40576 .k(8)
40577 .qmin(128)
40578 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40579 }
40580
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,qmax)40581 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmax) {
40582 TEST_REQUIRES_X86_XOP;
40583 GemmMicrokernelTester()
40584 .mr(3)
40585 .nr(4)
40586 .kr(8)
40587 .sr(1)
40588 .m(3)
40589 .n(4)
40590 .k(8)
40591 .qmax(128)
40592 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40593 }
40594
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,strided_cm)40595 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm) {
40596 TEST_REQUIRES_X86_XOP;
40597 GemmMicrokernelTester()
40598 .mr(3)
40599 .nr(4)
40600 .kr(8)
40601 .sr(1)
40602 .m(3)
40603 .n(4)
40604 .k(8)
40605 .cm_stride(7)
40606 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
40607 }
40608 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
40609
40610
40611 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,k_eq_8)40612 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_eq_8) {
40613 TEST_REQUIRES_X86_AVX2;
40614 GemmMicrokernelTester()
40615 .extended_weights(true)
40616 .mr(2)
40617 .nr(8)
40618 .kr(8)
40619 .sr(1)
40620 .m(2)
40621 .n(8)
40622 .k(8)
40623 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
40624 }
40625
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,strided_cn)40626 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, strided_cn) {
40627 TEST_REQUIRES_X86_AVX2;
40628 GemmMicrokernelTester()
40629 .extended_weights(true)
40630 .mr(2)
40631 .nr(8)
40632 .kr(8)
40633 .sr(1)
40634 .m(2)
40635 .n(8)
40636 .k(8)
40637 .cn_stride(11)
40638 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
40639 }
40640
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,k_eq_8_strided_a)40641 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_eq_8_strided_a) {
40642 TEST_REQUIRES_X86_AVX2;
40643 GemmMicrokernelTester()
40644 .extended_weights(true)
40645 .mr(2)
40646 .nr(8)
40647 .kr(8)
40648 .sr(1)
40649 .m(2)
40650 .n(8)
40651 .k(8)
40652 .a_stride(11)
40653 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
40654 }
40655
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,k_eq_8_subtile)40656 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile) {
40657 TEST_REQUIRES_X86_AVX2;
40658 for (uint32_t n = 1; n <= 8; n++) {
40659 for (uint32_t m = 1; m <= 2; m++) {
40660 GemmMicrokernelTester()
40661 .extended_weights(true)
40662 .mr(2)
40663 .nr(8)
40664 .kr(8)
40665 .sr(1)
40666 .m(m)
40667 .n(n)
40668 .k(8)
40669 .iterations(1)
40670 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
40671 }
40672 }
40673 }
40674
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,k_eq_8_subtile_m)40675 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_m) {
40676 TEST_REQUIRES_X86_AVX2;
40677 for (uint32_t m = 1; m <= 2; m++) {
40678 GemmMicrokernelTester()
40679 .extended_weights(true)
40680 .mr(2)
40681 .nr(8)
40682 .kr(8)
40683 .sr(1)
40684 .m(m)
40685 .n(8)
40686 .k(8)
40687 .iterations(1)
40688 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
40689 }
40690 }
40691
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,k_eq_8_subtile_n)40692 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_n) {
40693 TEST_REQUIRES_X86_AVX2;
40694 for (uint32_t n = 1; n <= 8; n++) {
40695 GemmMicrokernelTester()
40696 .extended_weights(true)
40697 .mr(2)
40698 .nr(8)
40699 .kr(8)
40700 .sr(1)
40701 .m(2)
40702 .n(n)
40703 .k(8)
40704 .iterations(1)
40705 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
40706 }
40707 }
40708
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,k_lt_8)40709 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_lt_8) {
40710 TEST_REQUIRES_X86_AVX2;
40711 for (size_t k = 1; k < 8; k++) {
40712 GemmMicrokernelTester()
40713 .extended_weights(true)
40714 .mr(2)
40715 .nr(8)
40716 .kr(8)
40717 .sr(1)
40718 .m(2)
40719 .n(8)
40720 .k(k)
40721 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
40722 }
40723 }
40724
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,k_lt_8_strided_a)40725 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_lt_8_strided_a) {
40726 TEST_REQUIRES_X86_AVX2;
40727 for (size_t k = 1; k < 8; k++) {
40728 GemmMicrokernelTester()
40729 .extended_weights(true)
40730 .mr(2)
40731 .nr(8)
40732 .kr(8)
40733 .sr(1)
40734 .m(2)
40735 .n(8)
40736 .k(k)
40737 .a_stride(11)
40738 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
40739 }
40740 }
40741
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,k_lt_8_subtile)40742 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_lt_8_subtile) {
40743 TEST_REQUIRES_X86_AVX2;
40744 for (size_t k = 1; k < 8; k++) {
40745 for (uint32_t n = 1; n <= 8; n++) {
40746 for (uint32_t m = 1; m <= 2; m++) {
40747 GemmMicrokernelTester()
40748 .extended_weights(true)
40749 .mr(2)
40750 .nr(8)
40751 .kr(8)
40752 .sr(1)
40753 .m(m)
40754 .n(n)
40755 .k(k)
40756 .iterations(1)
40757 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
40758 }
40759 }
40760 }
40761 }
40762
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,k_gt_8)40763 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_gt_8) {
40764 TEST_REQUIRES_X86_AVX2;
40765 for (size_t k = 9; k < 16; k++) {
40766 GemmMicrokernelTester()
40767 .extended_weights(true)
40768 .mr(2)
40769 .nr(8)
40770 .kr(8)
40771 .sr(1)
40772 .m(2)
40773 .n(8)
40774 .k(k)
40775 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
40776 }
40777 }
40778
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,k_gt_8_strided_a)40779 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_gt_8_strided_a) {
40780 TEST_REQUIRES_X86_AVX2;
40781 for (size_t k = 9; k < 16; k++) {
40782 GemmMicrokernelTester()
40783 .extended_weights(true)
40784 .mr(2)
40785 .nr(8)
40786 .kr(8)
40787 .sr(1)
40788 .m(2)
40789 .n(8)
40790 .k(k)
40791 .a_stride(19)
40792 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
40793 }
40794 }
40795
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,k_gt_8_subtile)40796 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_gt_8_subtile) {
40797 TEST_REQUIRES_X86_AVX2;
40798 for (size_t k = 9; k < 16; k++) {
40799 for (uint32_t n = 1; n <= 8; n++) {
40800 for (uint32_t m = 1; m <= 2; m++) {
40801 GemmMicrokernelTester()
40802 .extended_weights(true)
40803 .mr(2)
40804 .nr(8)
40805 .kr(8)
40806 .sr(1)
40807 .m(m)
40808 .n(n)
40809 .k(k)
40810 .iterations(1)
40811 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
40812 }
40813 }
40814 }
40815 }
40816
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,k_div_8)40817 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_div_8) {
40818 TEST_REQUIRES_X86_AVX2;
40819 for (size_t k = 16; k <= 80; k += 8) {
40820 GemmMicrokernelTester()
40821 .extended_weights(true)
40822 .mr(2)
40823 .nr(8)
40824 .kr(8)
40825 .sr(1)
40826 .m(2)
40827 .n(8)
40828 .k(k)
40829 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
40830 }
40831 }
40832
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,k_div_8_strided_a)40833 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_div_8_strided_a) {
40834 TEST_REQUIRES_X86_AVX2;
40835 for (size_t k = 16; k <= 80; k += 8) {
40836 GemmMicrokernelTester()
40837 .extended_weights(true)
40838 .mr(2)
40839 .nr(8)
40840 .kr(8)
40841 .sr(1)
40842 .m(2)
40843 .n(8)
40844 .k(k)
40845 .a_stride(83)
40846 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
40847 }
40848 }
40849
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,k_div_8_subtile)40850 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_div_8_subtile) {
40851 TEST_REQUIRES_X86_AVX2;
40852 for (size_t k = 16; k <= 80; k += 8) {
40853 for (uint32_t n = 1; n <= 8; n++) {
40854 for (uint32_t m = 1; m <= 2; m++) {
40855 GemmMicrokernelTester()
40856 .extended_weights(true)
40857 .mr(2)
40858 .nr(8)
40859 .kr(8)
40860 .sr(1)
40861 .m(m)
40862 .n(n)
40863 .k(k)
40864 .iterations(1)
40865 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
40866 }
40867 }
40868 }
40869 }
40870
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,n_gt_8)40871 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, n_gt_8) {
40872 TEST_REQUIRES_X86_AVX2;
40873 for (uint32_t n = 9; n < 16; n++) {
40874 for (size_t k = 1; k <= 40; k += 9) {
40875 GemmMicrokernelTester()
40876 .extended_weights(true)
40877 .mr(2)
40878 .nr(8)
40879 .kr(8)
40880 .sr(1)
40881 .m(2)
40882 .n(n)
40883 .k(k)
40884 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
40885 }
40886 }
40887 }
40888
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,n_gt_8_strided_cn)40889 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_cn) {
40890 TEST_REQUIRES_X86_AVX2;
40891 for (uint32_t n = 9; n < 16; n++) {
40892 for (size_t k = 1; k <= 40; k += 9) {
40893 GemmMicrokernelTester()
40894 .extended_weights(true)
40895 .mr(2)
40896 .nr(8)
40897 .kr(8)
40898 .sr(1)
40899 .m(2)
40900 .n(n)
40901 .k(k)
40902 .cn_stride(11)
40903 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
40904 }
40905 }
40906 }
40907
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,n_gt_8_strided_a)40908 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_a) {
40909 TEST_REQUIRES_X86_AVX2;
40910 for (uint32_t n = 9; n < 16; n++) {
40911 for (size_t k = 1; k <= 40; k += 9) {
40912 GemmMicrokernelTester()
40913 .extended_weights(true)
40914 .mr(2)
40915 .nr(8)
40916 .kr(8)
40917 .sr(1)
40918 .m(2)
40919 .n(n)
40920 .k(k)
40921 .a_stride(43)
40922 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
40923 }
40924 }
40925 }
40926
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,n_gt_8_subtile)40927 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, n_gt_8_subtile) {
40928 TEST_REQUIRES_X86_AVX2;
40929 for (uint32_t n = 9; n < 16; n++) {
40930 for (size_t k = 1; k <= 40; k += 9) {
40931 for (uint32_t m = 1; m <= 2; m++) {
40932 GemmMicrokernelTester()
40933 .extended_weights(true)
40934 .mr(2)
40935 .nr(8)
40936 .kr(8)
40937 .sr(1)
40938 .m(m)
40939 .n(n)
40940 .k(k)
40941 .iterations(1)
40942 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
40943 }
40944 }
40945 }
40946 }
40947
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,n_div_8)40948 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, n_div_8) {
40949 TEST_REQUIRES_X86_AVX2;
40950 for (uint32_t n = 16; n <= 24; n += 8) {
40951 for (size_t k = 1; k <= 40; k += 9) {
40952 GemmMicrokernelTester()
40953 .extended_weights(true)
40954 .mr(2)
40955 .nr(8)
40956 .kr(8)
40957 .sr(1)
40958 .m(2)
40959 .n(n)
40960 .k(k)
40961 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
40962 }
40963 }
40964 }
40965
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,n_div_8_strided_cn)40966 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_cn) {
40967 TEST_REQUIRES_X86_AVX2;
40968 for (uint32_t n = 16; n <= 24; n += 8) {
40969 for (size_t k = 1; k <= 40; k += 9) {
40970 GemmMicrokernelTester()
40971 .extended_weights(true)
40972 .mr(2)
40973 .nr(8)
40974 .kr(8)
40975 .sr(1)
40976 .m(2)
40977 .n(n)
40978 .k(k)
40979 .cn_stride(11)
40980 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
40981 }
40982 }
40983 }
40984
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,n_div_8_strided_a)40985 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_a) {
40986 TEST_REQUIRES_X86_AVX2;
40987 for (uint32_t n = 16; n <= 24; n += 8) {
40988 for (size_t k = 1; k <= 40; k += 9) {
40989 GemmMicrokernelTester()
40990 .extended_weights(true)
40991 .mr(2)
40992 .nr(8)
40993 .kr(8)
40994 .sr(1)
40995 .m(2)
40996 .n(n)
40997 .k(k)
40998 .a_stride(43)
40999 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
41000 }
41001 }
41002 }
41003
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,n_div_8_subtile)41004 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, n_div_8_subtile) {
41005 TEST_REQUIRES_X86_AVX2;
41006 for (uint32_t n = 16; n <= 24; n += 8) {
41007 for (size_t k = 1; k <= 40; k += 9) {
41008 for (uint32_t m = 1; m <= 2; m++) {
41009 GemmMicrokernelTester()
41010 .extended_weights(true)
41011 .mr(2)
41012 .nr(8)
41013 .kr(8)
41014 .sr(1)
41015 .m(m)
41016 .n(n)
41017 .k(k)
41018 .iterations(1)
41019 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
41020 }
41021 }
41022 }
41023 }
41024
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,strided_cm_subtile)41025 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, strided_cm_subtile) {
41026 TEST_REQUIRES_X86_AVX2;
41027 for (size_t k = 1; k <= 40; k += 9) {
41028 for (uint32_t n = 1; n <= 8; n++) {
41029 for (uint32_t m = 1; m <= 2; m++) {
41030 GemmMicrokernelTester()
41031 .extended_weights(true)
41032 .mr(2)
41033 .nr(8)
41034 .kr(8)
41035 .sr(1)
41036 .m(m)
41037 .n(n)
41038 .k(k)
41039 .cm_stride(11)
41040 .iterations(1)
41041 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
41042 }
41043 }
41044 }
41045 }
41046
TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2,strided_cm)41047 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, strided_cm) {
41048 TEST_REQUIRES_X86_AVX2;
41049 GemmMicrokernelTester()
41050 .extended_weights(true)
41051 .mr(2)
41052 .nr(8)
41053 .kr(8)
41054 .sr(1)
41055 .m(2)
41056 .n(8)
41057 .k(8)
41058 .cm_stride(11)
41059 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
41060 }
41061 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
41062
41063
41064 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8)41065 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8) {
41066 TEST_REQUIRES_X86_AVX512SKX;
41067 GemmMicrokernelTester()
41068 .mr(3)
41069 .nr(16)
41070 .kr(8)
41071 .sr(1)
41072 .m(3)
41073 .n(16)
41074 .k(8)
41075 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41076 }
41077
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,strided_cn)41078 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cn) {
41079 TEST_REQUIRES_X86_AVX512SKX;
41080 GemmMicrokernelTester()
41081 .mr(3)
41082 .nr(16)
41083 .kr(8)
41084 .sr(1)
41085 .m(3)
41086 .n(16)
41087 .k(8)
41088 .cn_stride(19)
41089 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41090 }
41091
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_strided_a)41092 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_strided_a) {
41093 TEST_REQUIRES_X86_AVX512SKX;
41094 GemmMicrokernelTester()
41095 .mr(3)
41096 .nr(16)
41097 .kr(8)
41098 .sr(1)
41099 .m(3)
41100 .n(16)
41101 .k(8)
41102 .a_stride(11)
41103 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41104 }
41105
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_subtile)41106 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile) {
41107 TEST_REQUIRES_X86_AVX512SKX;
41108 for (uint32_t n = 1; n <= 16; n++) {
41109 for (uint32_t m = 1; m <= 3; m++) {
41110 GemmMicrokernelTester()
41111 .mr(3)
41112 .nr(16)
41113 .kr(8)
41114 .sr(1)
41115 .m(m)
41116 .n(n)
41117 .k(8)
41118 .iterations(1)
41119 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41120 }
41121 }
41122 }
41123
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_subtile_m)41124 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_m) {
41125 TEST_REQUIRES_X86_AVX512SKX;
41126 for (uint32_t m = 1; m <= 3; m++) {
41127 GemmMicrokernelTester()
41128 .mr(3)
41129 .nr(16)
41130 .kr(8)
41131 .sr(1)
41132 .m(m)
41133 .n(16)
41134 .k(8)
41135 .iterations(1)
41136 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41137 }
41138 }
41139
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_subtile_n)41140 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_n) {
41141 TEST_REQUIRES_X86_AVX512SKX;
41142 for (uint32_t n = 1; n <= 16; n++) {
41143 GemmMicrokernelTester()
41144 .mr(3)
41145 .nr(16)
41146 .kr(8)
41147 .sr(1)
41148 .m(3)
41149 .n(n)
41150 .k(8)
41151 .iterations(1)
41152 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41153 }
41154 }
41155
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_lt_8)41156 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8) {
41157 TEST_REQUIRES_X86_AVX512SKX;
41158 for (size_t k = 1; k < 8; k++) {
41159 GemmMicrokernelTester()
41160 .mr(3)
41161 .nr(16)
41162 .kr(8)
41163 .sr(1)
41164 .m(3)
41165 .n(16)
41166 .k(k)
41167 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41168 }
41169 }
41170
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_lt_8_strided_a)41171 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8_strided_a) {
41172 TEST_REQUIRES_X86_AVX512SKX;
41173 for (size_t k = 1; k < 8; k++) {
41174 GemmMicrokernelTester()
41175 .mr(3)
41176 .nr(16)
41177 .kr(8)
41178 .sr(1)
41179 .m(3)
41180 .n(16)
41181 .k(k)
41182 .a_stride(11)
41183 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41184 }
41185 }
41186
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_lt_8_subtile)41187 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8_subtile) {
41188 TEST_REQUIRES_X86_AVX512SKX;
41189 for (size_t k = 1; k < 8; k++) {
41190 for (uint32_t n = 1; n <= 16; n++) {
41191 for (uint32_t m = 1; m <= 3; m++) {
41192 GemmMicrokernelTester()
41193 .mr(3)
41194 .nr(16)
41195 .kr(8)
41196 .sr(1)
41197 .m(m)
41198 .n(n)
41199 .k(k)
41200 .iterations(1)
41201 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41202 }
41203 }
41204 }
41205 }
41206
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_gt_8)41207 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8) {
41208 TEST_REQUIRES_X86_AVX512SKX;
41209 for (size_t k = 9; k < 16; k++) {
41210 GemmMicrokernelTester()
41211 .mr(3)
41212 .nr(16)
41213 .kr(8)
41214 .sr(1)
41215 .m(3)
41216 .n(16)
41217 .k(k)
41218 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41219 }
41220 }
41221
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_gt_8_strided_a)41222 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8_strided_a) {
41223 TEST_REQUIRES_X86_AVX512SKX;
41224 for (size_t k = 9; k < 16; k++) {
41225 GemmMicrokernelTester()
41226 .mr(3)
41227 .nr(16)
41228 .kr(8)
41229 .sr(1)
41230 .m(3)
41231 .n(16)
41232 .k(k)
41233 .a_stride(19)
41234 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41235 }
41236 }
41237
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_gt_8_subtile)41238 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8_subtile) {
41239 TEST_REQUIRES_X86_AVX512SKX;
41240 for (size_t k = 9; k < 16; k++) {
41241 for (uint32_t n = 1; n <= 16; n++) {
41242 for (uint32_t m = 1; m <= 3; m++) {
41243 GemmMicrokernelTester()
41244 .mr(3)
41245 .nr(16)
41246 .kr(8)
41247 .sr(1)
41248 .m(m)
41249 .n(n)
41250 .k(k)
41251 .iterations(1)
41252 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41253 }
41254 }
41255 }
41256 }
41257
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_div_8)41258 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8) {
41259 TEST_REQUIRES_X86_AVX512SKX;
41260 for (size_t k = 16; k <= 80; k += 8) {
41261 GemmMicrokernelTester()
41262 .mr(3)
41263 .nr(16)
41264 .kr(8)
41265 .sr(1)
41266 .m(3)
41267 .n(16)
41268 .k(k)
41269 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41270 }
41271 }
41272
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_div_8_strided_a)41273 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8_strided_a) {
41274 TEST_REQUIRES_X86_AVX512SKX;
41275 for (size_t k = 16; k <= 80; k += 8) {
41276 GemmMicrokernelTester()
41277 .mr(3)
41278 .nr(16)
41279 .kr(8)
41280 .sr(1)
41281 .m(3)
41282 .n(16)
41283 .k(k)
41284 .a_stride(83)
41285 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41286 }
41287 }
41288
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_div_8_subtile)41289 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8_subtile) {
41290 TEST_REQUIRES_X86_AVX512SKX;
41291 for (size_t k = 16; k <= 80; k += 8) {
41292 for (uint32_t n = 1; n <= 16; n++) {
41293 for (uint32_t m = 1; m <= 3; m++) {
41294 GemmMicrokernelTester()
41295 .mr(3)
41296 .nr(16)
41297 .kr(8)
41298 .sr(1)
41299 .m(m)
41300 .n(n)
41301 .k(k)
41302 .iterations(1)
41303 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41304 }
41305 }
41306 }
41307 }
41308
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16)41309 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16) {
41310 TEST_REQUIRES_X86_AVX512SKX;
41311 for (uint32_t n = 17; n < 32; n++) {
41312 for (size_t k = 1; k <= 40; k += 9) {
41313 GemmMicrokernelTester()
41314 .mr(3)
41315 .nr(16)
41316 .kr(8)
41317 .sr(1)
41318 .m(3)
41319 .n(n)
41320 .k(k)
41321 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41322 }
41323 }
41324 }
41325
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16_strided_cn)41326 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_strided_cn) {
41327 TEST_REQUIRES_X86_AVX512SKX;
41328 for (uint32_t n = 17; n < 32; n++) {
41329 for (size_t k = 1; k <= 40; k += 9) {
41330 GemmMicrokernelTester()
41331 .mr(3)
41332 .nr(16)
41333 .kr(8)
41334 .sr(1)
41335 .m(3)
41336 .n(n)
41337 .k(k)
41338 .cn_stride(19)
41339 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41340 }
41341 }
41342 }
41343
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16_strided_a)41344 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_strided_a) {
41345 TEST_REQUIRES_X86_AVX512SKX;
41346 for (uint32_t n = 17; n < 32; n++) {
41347 for (size_t k = 1; k <= 40; k += 9) {
41348 GemmMicrokernelTester()
41349 .mr(3)
41350 .nr(16)
41351 .kr(8)
41352 .sr(1)
41353 .m(3)
41354 .n(n)
41355 .k(k)
41356 .a_stride(43)
41357 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41358 }
41359 }
41360 }
41361
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16_subtile)41362 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_subtile) {
41363 TEST_REQUIRES_X86_AVX512SKX;
41364 for (uint32_t n = 17; n < 32; n++) {
41365 for (size_t k = 1; k <= 40; k += 9) {
41366 for (uint32_t m = 1; m <= 3; m++) {
41367 GemmMicrokernelTester()
41368 .mr(3)
41369 .nr(16)
41370 .kr(8)
41371 .sr(1)
41372 .m(m)
41373 .n(n)
41374 .k(k)
41375 .iterations(1)
41376 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41377 }
41378 }
41379 }
41380 }
41381
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16)41382 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16) {
41383 TEST_REQUIRES_X86_AVX512SKX;
41384 for (uint32_t n = 32; n <= 48; n += 16) {
41385 for (size_t k = 1; k <= 40; k += 9) {
41386 GemmMicrokernelTester()
41387 .mr(3)
41388 .nr(16)
41389 .kr(8)
41390 .sr(1)
41391 .m(3)
41392 .n(n)
41393 .k(k)
41394 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41395 }
41396 }
41397 }
41398
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16_strided_cn)41399 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_strided_cn) {
41400 TEST_REQUIRES_X86_AVX512SKX;
41401 for (uint32_t n = 32; n <= 48; n += 16) {
41402 for (size_t k = 1; k <= 40; k += 9) {
41403 GemmMicrokernelTester()
41404 .mr(3)
41405 .nr(16)
41406 .kr(8)
41407 .sr(1)
41408 .m(3)
41409 .n(n)
41410 .k(k)
41411 .cn_stride(19)
41412 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41413 }
41414 }
41415 }
41416
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16_strided_a)41417 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_strided_a) {
41418 TEST_REQUIRES_X86_AVX512SKX;
41419 for (uint32_t n = 32; n <= 48; n += 16) {
41420 for (size_t k = 1; k <= 40; k += 9) {
41421 GemmMicrokernelTester()
41422 .mr(3)
41423 .nr(16)
41424 .kr(8)
41425 .sr(1)
41426 .m(3)
41427 .n(n)
41428 .k(k)
41429 .a_stride(43)
41430 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41431 }
41432 }
41433 }
41434
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16_subtile)41435 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_subtile) {
41436 TEST_REQUIRES_X86_AVX512SKX;
41437 for (uint32_t n = 32; n <= 48; n += 16) {
41438 for (size_t k = 1; k <= 40; k += 9) {
41439 for (uint32_t m = 1; m <= 3; m++) {
41440 GemmMicrokernelTester()
41441 .mr(3)
41442 .nr(16)
41443 .kr(8)
41444 .sr(1)
41445 .m(m)
41446 .n(n)
41447 .k(k)
41448 .iterations(1)
41449 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41450 }
41451 }
41452 }
41453 }
41454
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,strided_cm_subtile)41455 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm_subtile) {
41456 TEST_REQUIRES_X86_AVX512SKX;
41457 for (size_t k = 1; k <= 40; k += 9) {
41458 for (uint32_t n = 1; n <= 16; n++) {
41459 for (uint32_t m = 1; m <= 3; m++) {
41460 GemmMicrokernelTester()
41461 .mr(3)
41462 .nr(16)
41463 .kr(8)
41464 .sr(1)
41465 .m(m)
41466 .n(n)
41467 .k(k)
41468 .cm_stride(19)
41469 .iterations(1)
41470 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41471 }
41472 }
41473 }
41474 }
41475
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,qmin)41476 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmin) {
41477 TEST_REQUIRES_X86_AVX512SKX;
41478 GemmMicrokernelTester()
41479 .mr(3)
41480 .nr(16)
41481 .kr(8)
41482 .sr(1)
41483 .m(3)
41484 .n(16)
41485 .k(8)
41486 .qmin(128)
41487 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41488 }
41489
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,qmax)41490 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmax) {
41491 TEST_REQUIRES_X86_AVX512SKX;
41492 GemmMicrokernelTester()
41493 .mr(3)
41494 .nr(16)
41495 .kr(8)
41496 .sr(1)
41497 .m(3)
41498 .n(16)
41499 .k(8)
41500 .qmax(128)
41501 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41502 }
41503
TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,strided_cm)41504 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm) {
41505 TEST_REQUIRES_X86_AVX512SKX;
41506 GemmMicrokernelTester()
41507 .mr(3)
41508 .nr(16)
41509 .kr(8)
41510 .sr(1)
41511 .m(3)
41512 .n(16)
41513 .k(8)
41514 .cm_stride(19)
41515 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
41516 }
41517 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
41518
41519
41520 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)41521 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
41522 GemmMicrokernelTester()
41523 .mr(1)
41524 .nr(4)
41525 .kr(2)
41526 .sr(4)
41527 .m(1)
41528 .n(4)
41529 .k(8)
41530 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41531 }
41532
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)41533 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
41534 GemmMicrokernelTester()
41535 .mr(1)
41536 .nr(4)
41537 .kr(2)
41538 .sr(4)
41539 .m(1)
41540 .n(4)
41541 .k(8)
41542 .cn_stride(7)
41543 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41544 }
41545
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_strided_a)41546 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
41547 GemmMicrokernelTester()
41548 .mr(1)
41549 .nr(4)
41550 .kr(2)
41551 .sr(4)
41552 .m(1)
41553 .n(4)
41554 .k(8)
41555 .a_stride(11)
41556 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41557 }
41558
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)41559 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
41560 for (uint32_t n = 1; n <= 4; n++) {
41561 for (uint32_t m = 1; m <= 1; m++) {
41562 GemmMicrokernelTester()
41563 .mr(1)
41564 .nr(4)
41565 .kr(2)
41566 .sr(4)
41567 .m(m)
41568 .n(n)
41569 .k(8)
41570 .iterations(1)
41571 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41572 }
41573 }
41574 }
41575
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)41576 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
41577 for (uint32_t m = 1; m <= 1; m++) {
41578 GemmMicrokernelTester()
41579 .mr(1)
41580 .nr(4)
41581 .kr(2)
41582 .sr(4)
41583 .m(m)
41584 .n(4)
41585 .k(8)
41586 .iterations(1)
41587 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41588 }
41589 }
41590
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)41591 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
41592 for (uint32_t n = 1; n <= 4; n++) {
41593 GemmMicrokernelTester()
41594 .mr(1)
41595 .nr(4)
41596 .kr(2)
41597 .sr(4)
41598 .m(1)
41599 .n(n)
41600 .k(8)
41601 .iterations(1)
41602 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41603 }
41604 }
41605
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)41606 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
41607 for (size_t k = 1; k < 8; k++) {
41608 GemmMicrokernelTester()
41609 .mr(1)
41610 .nr(4)
41611 .kr(2)
41612 .sr(4)
41613 .m(1)
41614 .n(4)
41615 .k(k)
41616 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41617 }
41618 }
41619
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_strided_a)41620 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
41621 for (size_t k = 1; k < 8; k++) {
41622 GemmMicrokernelTester()
41623 .mr(1)
41624 .nr(4)
41625 .kr(2)
41626 .sr(4)
41627 .m(1)
41628 .n(4)
41629 .k(k)
41630 .a_stride(11)
41631 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41632 }
41633 }
41634
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)41635 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
41636 for (size_t k = 1; k < 8; k++) {
41637 for (uint32_t n = 1; n <= 4; n++) {
41638 for (uint32_t m = 1; m <= 1; m++) {
41639 GemmMicrokernelTester()
41640 .mr(1)
41641 .nr(4)
41642 .kr(2)
41643 .sr(4)
41644 .m(m)
41645 .n(n)
41646 .k(k)
41647 .iterations(1)
41648 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41649 }
41650 }
41651 }
41652 }
41653
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)41654 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
41655 for (size_t k = 9; k < 16; k++) {
41656 GemmMicrokernelTester()
41657 .mr(1)
41658 .nr(4)
41659 .kr(2)
41660 .sr(4)
41661 .m(1)
41662 .n(4)
41663 .k(k)
41664 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41665 }
41666 }
41667
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_strided_a)41668 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
41669 for (size_t k = 9; k < 16; k++) {
41670 GemmMicrokernelTester()
41671 .mr(1)
41672 .nr(4)
41673 .kr(2)
41674 .sr(4)
41675 .m(1)
41676 .n(4)
41677 .k(k)
41678 .a_stride(19)
41679 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41680 }
41681 }
41682
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)41683 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
41684 for (size_t k = 9; k < 16; k++) {
41685 for (uint32_t n = 1; n <= 4; n++) {
41686 for (uint32_t m = 1; m <= 1; m++) {
41687 GemmMicrokernelTester()
41688 .mr(1)
41689 .nr(4)
41690 .kr(2)
41691 .sr(4)
41692 .m(m)
41693 .n(n)
41694 .k(k)
41695 .iterations(1)
41696 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41697 }
41698 }
41699 }
41700 }
41701
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)41702 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
41703 for (size_t k = 16; k <= 80; k += 8) {
41704 GemmMicrokernelTester()
41705 .mr(1)
41706 .nr(4)
41707 .kr(2)
41708 .sr(4)
41709 .m(1)
41710 .n(4)
41711 .k(k)
41712 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41713 }
41714 }
41715
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_strided_a)41716 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
41717 for (size_t k = 16; k <= 80; k += 8) {
41718 GemmMicrokernelTester()
41719 .mr(1)
41720 .nr(4)
41721 .kr(2)
41722 .sr(4)
41723 .m(1)
41724 .n(4)
41725 .k(k)
41726 .a_stride(83)
41727 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41728 }
41729 }
41730
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)41731 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
41732 for (size_t k = 16; k <= 80; k += 8) {
41733 for (uint32_t n = 1; n <= 4; n++) {
41734 for (uint32_t m = 1; m <= 1; m++) {
41735 GemmMicrokernelTester()
41736 .mr(1)
41737 .nr(4)
41738 .kr(2)
41739 .sr(4)
41740 .m(m)
41741 .n(n)
41742 .k(k)
41743 .iterations(1)
41744 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41745 }
41746 }
41747 }
41748 }
41749
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)41750 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
41751 for (uint32_t n = 5; n < 8; n++) {
41752 for (size_t k = 1; k <= 40; k += 9) {
41753 GemmMicrokernelTester()
41754 .mr(1)
41755 .nr(4)
41756 .kr(2)
41757 .sr(4)
41758 .m(1)
41759 .n(n)
41760 .k(k)
41761 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41762 }
41763 }
41764 }
41765
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)41766 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
41767 for (uint32_t n = 5; n < 8; n++) {
41768 for (size_t k = 1; k <= 40; k += 9) {
41769 GemmMicrokernelTester()
41770 .mr(1)
41771 .nr(4)
41772 .kr(2)
41773 .sr(4)
41774 .m(1)
41775 .n(n)
41776 .k(k)
41777 .cn_stride(7)
41778 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41779 }
41780 }
41781 }
41782
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_a)41783 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
41784 for (uint32_t n = 5; n < 8; n++) {
41785 for (size_t k = 1; k <= 40; k += 9) {
41786 GemmMicrokernelTester()
41787 .mr(1)
41788 .nr(4)
41789 .kr(2)
41790 .sr(4)
41791 .m(1)
41792 .n(n)
41793 .k(k)
41794 .a_stride(43)
41795 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41796 }
41797 }
41798 }
41799
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)41800 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
41801 for (uint32_t n = 5; n < 8; n++) {
41802 for (size_t k = 1; k <= 40; k += 9) {
41803 for (uint32_t m = 1; m <= 1; m++) {
41804 GemmMicrokernelTester()
41805 .mr(1)
41806 .nr(4)
41807 .kr(2)
41808 .sr(4)
41809 .m(m)
41810 .n(n)
41811 .k(k)
41812 .iterations(1)
41813 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41814 }
41815 }
41816 }
41817 }
41818
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)41819 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
41820 for (uint32_t n = 8; n <= 12; n += 4) {
41821 for (size_t k = 1; k <= 40; k += 9) {
41822 GemmMicrokernelTester()
41823 .mr(1)
41824 .nr(4)
41825 .kr(2)
41826 .sr(4)
41827 .m(1)
41828 .n(n)
41829 .k(k)
41830 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41831 }
41832 }
41833 }
41834
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)41835 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
41836 for (uint32_t n = 8; n <= 12; n += 4) {
41837 for (size_t k = 1; k <= 40; k += 9) {
41838 GemmMicrokernelTester()
41839 .mr(1)
41840 .nr(4)
41841 .kr(2)
41842 .sr(4)
41843 .m(1)
41844 .n(n)
41845 .k(k)
41846 .cn_stride(7)
41847 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41848 }
41849 }
41850 }
41851
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_a)41852 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
41853 for (uint32_t n = 8; n <= 12; n += 4) {
41854 for (size_t k = 1; k <= 40; k += 9) {
41855 GemmMicrokernelTester()
41856 .mr(1)
41857 .nr(4)
41858 .kr(2)
41859 .sr(4)
41860 .m(1)
41861 .n(n)
41862 .k(k)
41863 .a_stride(43)
41864 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41865 }
41866 }
41867 }
41868
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)41869 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
41870 for (uint32_t n = 8; n <= 12; n += 4) {
41871 for (size_t k = 1; k <= 40; k += 9) {
41872 for (uint32_t m = 1; m <= 1; m++) {
41873 GemmMicrokernelTester()
41874 .mr(1)
41875 .nr(4)
41876 .kr(2)
41877 .sr(4)
41878 .m(m)
41879 .n(n)
41880 .k(k)
41881 .iterations(1)
41882 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41883 }
41884 }
41885 }
41886 }
41887
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)41888 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
41889 for (size_t k = 1; k <= 40; k += 9) {
41890 for (uint32_t n = 1; n <= 4; n++) {
41891 for (uint32_t m = 1; m <= 1; m++) {
41892 GemmMicrokernelTester()
41893 .mr(1)
41894 .nr(4)
41895 .kr(2)
41896 .sr(4)
41897 .m(m)
41898 .n(n)
41899 .k(k)
41900 .cm_stride(7)
41901 .iterations(1)
41902 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41903 }
41904 }
41905 }
41906 }
41907
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)41908 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
41909 GemmMicrokernelTester()
41910 .mr(1)
41911 .nr(4)
41912 .kr(2)
41913 .sr(4)
41914 .m(1)
41915 .n(4)
41916 .k(8)
41917 .qmin(128)
41918 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41919 }
41920
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)41921 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
41922 GemmMicrokernelTester()
41923 .mr(1)
41924 .nr(4)
41925 .kr(2)
41926 .sr(4)
41927 .m(1)
41928 .n(4)
41929 .k(8)
41930 .qmax(128)
41931 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41932 }
41933
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)41934 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
41935 GemmMicrokernelTester()
41936 .mr(1)
41937 .nr(4)
41938 .kr(2)
41939 .sr(4)
41940 .m(1)
41941 .n(4)
41942 .k(8)
41943 .cm_stride(7)
41944 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41945 }
41946 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
41947
41948
41949 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)41950 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
41951 GemmMicrokernelTester()
41952 .mr(1)
41953 .nr(4)
41954 .kr(8)
41955 .sr(1)
41956 .m(1)
41957 .n(4)
41958 .k(8)
41959 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41960 }
41961
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)41962 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
41963 GemmMicrokernelTester()
41964 .mr(1)
41965 .nr(4)
41966 .kr(8)
41967 .sr(1)
41968 .m(1)
41969 .n(4)
41970 .k(8)
41971 .cn_stride(7)
41972 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41973 }
41974
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_strided_a)41975 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
41976 GemmMicrokernelTester()
41977 .mr(1)
41978 .nr(4)
41979 .kr(8)
41980 .sr(1)
41981 .m(1)
41982 .n(4)
41983 .k(8)
41984 .a_stride(11)
41985 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41986 }
41987
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)41988 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
41989 for (uint32_t n = 1; n <= 4; n++) {
41990 for (uint32_t m = 1; m <= 1; m++) {
41991 GemmMicrokernelTester()
41992 .mr(1)
41993 .nr(4)
41994 .kr(8)
41995 .sr(1)
41996 .m(m)
41997 .n(n)
41998 .k(8)
41999 .iterations(1)
42000 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42001 }
42002 }
42003 }
42004
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)42005 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
42006 for (uint32_t m = 1; m <= 1; m++) {
42007 GemmMicrokernelTester()
42008 .mr(1)
42009 .nr(4)
42010 .kr(8)
42011 .sr(1)
42012 .m(m)
42013 .n(4)
42014 .k(8)
42015 .iterations(1)
42016 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42017 }
42018 }
42019
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)42020 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
42021 for (uint32_t n = 1; n <= 4; n++) {
42022 GemmMicrokernelTester()
42023 .mr(1)
42024 .nr(4)
42025 .kr(8)
42026 .sr(1)
42027 .m(1)
42028 .n(n)
42029 .k(8)
42030 .iterations(1)
42031 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42032 }
42033 }
42034
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)42035 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
42036 for (size_t k = 1; k < 8; k++) {
42037 GemmMicrokernelTester()
42038 .mr(1)
42039 .nr(4)
42040 .kr(8)
42041 .sr(1)
42042 .m(1)
42043 .n(4)
42044 .k(k)
42045 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42046 }
42047 }
42048
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_strided_a)42049 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
42050 for (size_t k = 1; k < 8; k++) {
42051 GemmMicrokernelTester()
42052 .mr(1)
42053 .nr(4)
42054 .kr(8)
42055 .sr(1)
42056 .m(1)
42057 .n(4)
42058 .k(k)
42059 .a_stride(11)
42060 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42061 }
42062 }
42063
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)42064 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
42065 for (size_t k = 1; k < 8; k++) {
42066 for (uint32_t n = 1; n <= 4; n++) {
42067 for (uint32_t m = 1; m <= 1; m++) {
42068 GemmMicrokernelTester()
42069 .mr(1)
42070 .nr(4)
42071 .kr(8)
42072 .sr(1)
42073 .m(m)
42074 .n(n)
42075 .k(k)
42076 .iterations(1)
42077 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42078 }
42079 }
42080 }
42081 }
42082
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)42083 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
42084 for (size_t k = 9; k < 16; k++) {
42085 GemmMicrokernelTester()
42086 .mr(1)
42087 .nr(4)
42088 .kr(8)
42089 .sr(1)
42090 .m(1)
42091 .n(4)
42092 .k(k)
42093 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42094 }
42095 }
42096
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_strided_a)42097 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
42098 for (size_t k = 9; k < 16; k++) {
42099 GemmMicrokernelTester()
42100 .mr(1)
42101 .nr(4)
42102 .kr(8)
42103 .sr(1)
42104 .m(1)
42105 .n(4)
42106 .k(k)
42107 .a_stride(19)
42108 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42109 }
42110 }
42111
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)42112 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
42113 for (size_t k = 9; k < 16; k++) {
42114 for (uint32_t n = 1; n <= 4; n++) {
42115 for (uint32_t m = 1; m <= 1; m++) {
42116 GemmMicrokernelTester()
42117 .mr(1)
42118 .nr(4)
42119 .kr(8)
42120 .sr(1)
42121 .m(m)
42122 .n(n)
42123 .k(k)
42124 .iterations(1)
42125 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42126 }
42127 }
42128 }
42129 }
42130
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)42131 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
42132 for (size_t k = 16; k <= 80; k += 8) {
42133 GemmMicrokernelTester()
42134 .mr(1)
42135 .nr(4)
42136 .kr(8)
42137 .sr(1)
42138 .m(1)
42139 .n(4)
42140 .k(k)
42141 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42142 }
42143 }
42144
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_strided_a)42145 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
42146 for (size_t k = 16; k <= 80; k += 8) {
42147 GemmMicrokernelTester()
42148 .mr(1)
42149 .nr(4)
42150 .kr(8)
42151 .sr(1)
42152 .m(1)
42153 .n(4)
42154 .k(k)
42155 .a_stride(83)
42156 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42157 }
42158 }
42159
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)42160 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
42161 for (size_t k = 16; k <= 80; k += 8) {
42162 for (uint32_t n = 1; n <= 4; n++) {
42163 for (uint32_t m = 1; m <= 1; m++) {
42164 GemmMicrokernelTester()
42165 .mr(1)
42166 .nr(4)
42167 .kr(8)
42168 .sr(1)
42169 .m(m)
42170 .n(n)
42171 .k(k)
42172 .iterations(1)
42173 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42174 }
42175 }
42176 }
42177 }
42178
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)42179 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
42180 for (uint32_t n = 5; n < 8; n++) {
42181 for (size_t k = 1; k <= 40; k += 9) {
42182 GemmMicrokernelTester()
42183 .mr(1)
42184 .nr(4)
42185 .kr(8)
42186 .sr(1)
42187 .m(1)
42188 .n(n)
42189 .k(k)
42190 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42191 }
42192 }
42193 }
42194
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)42195 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
42196 for (uint32_t n = 5; n < 8; n++) {
42197 for (size_t k = 1; k <= 40; k += 9) {
42198 GemmMicrokernelTester()
42199 .mr(1)
42200 .nr(4)
42201 .kr(8)
42202 .sr(1)
42203 .m(1)
42204 .n(n)
42205 .k(k)
42206 .cn_stride(7)
42207 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42208 }
42209 }
42210 }
42211
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_a)42212 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
42213 for (uint32_t n = 5; n < 8; n++) {
42214 for (size_t k = 1; k <= 40; k += 9) {
42215 GemmMicrokernelTester()
42216 .mr(1)
42217 .nr(4)
42218 .kr(8)
42219 .sr(1)
42220 .m(1)
42221 .n(n)
42222 .k(k)
42223 .a_stride(43)
42224 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42225 }
42226 }
42227 }
42228
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)42229 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
42230 for (uint32_t n = 5; n < 8; n++) {
42231 for (size_t k = 1; k <= 40; k += 9) {
42232 for (uint32_t m = 1; m <= 1; m++) {
42233 GemmMicrokernelTester()
42234 .mr(1)
42235 .nr(4)
42236 .kr(8)
42237 .sr(1)
42238 .m(m)
42239 .n(n)
42240 .k(k)
42241 .iterations(1)
42242 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42243 }
42244 }
42245 }
42246 }
42247
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)42248 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
42249 for (uint32_t n = 8; n <= 12; n += 4) {
42250 for (size_t k = 1; k <= 40; k += 9) {
42251 GemmMicrokernelTester()
42252 .mr(1)
42253 .nr(4)
42254 .kr(8)
42255 .sr(1)
42256 .m(1)
42257 .n(n)
42258 .k(k)
42259 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42260 }
42261 }
42262 }
42263
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)42264 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
42265 for (uint32_t n = 8; n <= 12; n += 4) {
42266 for (size_t k = 1; k <= 40; k += 9) {
42267 GemmMicrokernelTester()
42268 .mr(1)
42269 .nr(4)
42270 .kr(8)
42271 .sr(1)
42272 .m(1)
42273 .n(n)
42274 .k(k)
42275 .cn_stride(7)
42276 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42277 }
42278 }
42279 }
42280
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_a)42281 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
42282 for (uint32_t n = 8; n <= 12; n += 4) {
42283 for (size_t k = 1; k <= 40; k += 9) {
42284 GemmMicrokernelTester()
42285 .mr(1)
42286 .nr(4)
42287 .kr(8)
42288 .sr(1)
42289 .m(1)
42290 .n(n)
42291 .k(k)
42292 .a_stride(43)
42293 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42294 }
42295 }
42296 }
42297
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)42298 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
42299 for (uint32_t n = 8; n <= 12; n += 4) {
42300 for (size_t k = 1; k <= 40; k += 9) {
42301 for (uint32_t m = 1; m <= 1; m++) {
42302 GemmMicrokernelTester()
42303 .mr(1)
42304 .nr(4)
42305 .kr(8)
42306 .sr(1)
42307 .m(m)
42308 .n(n)
42309 .k(k)
42310 .iterations(1)
42311 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42312 }
42313 }
42314 }
42315 }
42316
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)42317 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
42318 for (size_t k = 1; k <= 40; k += 9) {
42319 for (uint32_t n = 1; n <= 4; n++) {
42320 for (uint32_t m = 1; m <= 1; m++) {
42321 GemmMicrokernelTester()
42322 .mr(1)
42323 .nr(4)
42324 .kr(8)
42325 .sr(1)
42326 .m(m)
42327 .n(n)
42328 .k(k)
42329 .cm_stride(7)
42330 .iterations(1)
42331 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42332 }
42333 }
42334 }
42335 }
42336
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,qmin)42337 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
42338 GemmMicrokernelTester()
42339 .mr(1)
42340 .nr(4)
42341 .kr(8)
42342 .sr(1)
42343 .m(1)
42344 .n(4)
42345 .k(8)
42346 .qmin(128)
42347 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42348 }
42349
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,qmax)42350 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
42351 GemmMicrokernelTester()
42352 .mr(1)
42353 .nr(4)
42354 .kr(8)
42355 .sr(1)
42356 .m(1)
42357 .n(4)
42358 .k(8)
42359 .qmax(128)
42360 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42361 }
42362
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)42363 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
42364 GemmMicrokernelTester()
42365 .mr(1)
42366 .nr(4)
42367 .kr(8)
42368 .sr(1)
42369 .m(1)
42370 .n(4)
42371 .k(8)
42372 .cm_stride(7)
42373 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42374 }
42375 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
42376
42377
42378 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8)42379 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
42380 GemmMicrokernelTester()
42381 .mr(2)
42382 .nr(4)
42383 .kr(2)
42384 .sr(4)
42385 .m(2)
42386 .n(4)
42387 .k(8)
42388 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42389 }
42390
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cn)42391 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
42392 GemmMicrokernelTester()
42393 .mr(2)
42394 .nr(4)
42395 .kr(2)
42396 .sr(4)
42397 .m(2)
42398 .n(4)
42399 .k(8)
42400 .cn_stride(7)
42401 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42402 }
42403
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_strided_a)42404 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
42405 GemmMicrokernelTester()
42406 .mr(2)
42407 .nr(4)
42408 .kr(2)
42409 .sr(4)
42410 .m(2)
42411 .n(4)
42412 .k(8)
42413 .a_stride(11)
42414 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42415 }
42416
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)42417 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
42418 for (uint32_t n = 1; n <= 4; n++) {
42419 for (uint32_t m = 1; m <= 2; m++) {
42420 GemmMicrokernelTester()
42421 .mr(2)
42422 .nr(4)
42423 .kr(2)
42424 .sr(4)
42425 .m(m)
42426 .n(n)
42427 .k(8)
42428 .iterations(1)
42429 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42430 }
42431 }
42432 }
42433
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)42434 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
42435 for (uint32_t m = 1; m <= 2; m++) {
42436 GemmMicrokernelTester()
42437 .mr(2)
42438 .nr(4)
42439 .kr(2)
42440 .sr(4)
42441 .m(m)
42442 .n(4)
42443 .k(8)
42444 .iterations(1)
42445 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42446 }
42447 }
42448
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)42449 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
42450 for (uint32_t n = 1; n <= 4; n++) {
42451 GemmMicrokernelTester()
42452 .mr(2)
42453 .nr(4)
42454 .kr(2)
42455 .sr(4)
42456 .m(2)
42457 .n(n)
42458 .k(8)
42459 .iterations(1)
42460 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42461 }
42462 }
42463
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8)42464 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
42465 for (size_t k = 1; k < 8; k++) {
42466 GemmMicrokernelTester()
42467 .mr(2)
42468 .nr(4)
42469 .kr(2)
42470 .sr(4)
42471 .m(2)
42472 .n(4)
42473 .k(k)
42474 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42475 }
42476 }
42477
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_strided_a)42478 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
42479 for (size_t k = 1; k < 8; k++) {
42480 GemmMicrokernelTester()
42481 .mr(2)
42482 .nr(4)
42483 .kr(2)
42484 .sr(4)
42485 .m(2)
42486 .n(4)
42487 .k(k)
42488 .a_stride(11)
42489 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42490 }
42491 }
42492
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)42493 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
42494 for (size_t k = 1; k < 8; k++) {
42495 for (uint32_t n = 1; n <= 4; n++) {
42496 for (uint32_t m = 1; m <= 2; m++) {
42497 GemmMicrokernelTester()
42498 .mr(2)
42499 .nr(4)
42500 .kr(2)
42501 .sr(4)
42502 .m(m)
42503 .n(n)
42504 .k(k)
42505 .iterations(1)
42506 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42507 }
42508 }
42509 }
42510 }
42511
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8)42512 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
42513 for (size_t k = 9; k < 16; k++) {
42514 GemmMicrokernelTester()
42515 .mr(2)
42516 .nr(4)
42517 .kr(2)
42518 .sr(4)
42519 .m(2)
42520 .n(4)
42521 .k(k)
42522 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42523 }
42524 }
42525
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_strided_a)42526 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
42527 for (size_t k = 9; k < 16; k++) {
42528 GemmMicrokernelTester()
42529 .mr(2)
42530 .nr(4)
42531 .kr(2)
42532 .sr(4)
42533 .m(2)
42534 .n(4)
42535 .k(k)
42536 .a_stride(19)
42537 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42538 }
42539 }
42540
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)42541 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
42542 for (size_t k = 9; k < 16; k++) {
42543 for (uint32_t n = 1; n <= 4; n++) {
42544 for (uint32_t m = 1; m <= 2; m++) {
42545 GemmMicrokernelTester()
42546 .mr(2)
42547 .nr(4)
42548 .kr(2)
42549 .sr(4)
42550 .m(m)
42551 .n(n)
42552 .k(k)
42553 .iterations(1)
42554 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42555 }
42556 }
42557 }
42558 }
42559
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8)42560 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
42561 for (size_t k = 16; k <= 80; k += 8) {
42562 GemmMicrokernelTester()
42563 .mr(2)
42564 .nr(4)
42565 .kr(2)
42566 .sr(4)
42567 .m(2)
42568 .n(4)
42569 .k(k)
42570 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42571 }
42572 }
42573
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_strided_a)42574 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
42575 for (size_t k = 16; k <= 80; k += 8) {
42576 GemmMicrokernelTester()
42577 .mr(2)
42578 .nr(4)
42579 .kr(2)
42580 .sr(4)
42581 .m(2)
42582 .n(4)
42583 .k(k)
42584 .a_stride(83)
42585 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42586 }
42587 }
42588
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)42589 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
42590 for (size_t k = 16; k <= 80; k += 8) {
42591 for (uint32_t n = 1; n <= 4; n++) {
42592 for (uint32_t m = 1; m <= 2; m++) {
42593 GemmMicrokernelTester()
42594 .mr(2)
42595 .nr(4)
42596 .kr(2)
42597 .sr(4)
42598 .m(m)
42599 .n(n)
42600 .k(k)
42601 .iterations(1)
42602 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42603 }
42604 }
42605 }
42606 }
42607
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4)42608 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
42609 for (uint32_t n = 5; n < 8; n++) {
42610 for (size_t k = 1; k <= 40; k += 9) {
42611 GemmMicrokernelTester()
42612 .mr(2)
42613 .nr(4)
42614 .kr(2)
42615 .sr(4)
42616 .m(2)
42617 .n(n)
42618 .k(k)
42619 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42620 }
42621 }
42622 }
42623
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)42624 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
42625 for (uint32_t n = 5; n < 8; n++) {
42626 for (size_t k = 1; k <= 40; k += 9) {
42627 GemmMicrokernelTester()
42628 .mr(2)
42629 .nr(4)
42630 .kr(2)
42631 .sr(4)
42632 .m(2)
42633 .n(n)
42634 .k(k)
42635 .cn_stride(7)
42636 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42637 }
42638 }
42639 }
42640
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_a)42641 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
42642 for (uint32_t n = 5; n < 8; n++) {
42643 for (size_t k = 1; k <= 40; k += 9) {
42644 GemmMicrokernelTester()
42645 .mr(2)
42646 .nr(4)
42647 .kr(2)
42648 .sr(4)
42649 .m(2)
42650 .n(n)
42651 .k(k)
42652 .a_stride(43)
42653 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42654 }
42655 }
42656 }
42657
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)42658 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
42659 for (uint32_t n = 5; n < 8; n++) {
42660 for (size_t k = 1; k <= 40; k += 9) {
42661 for (uint32_t m = 1; m <= 2; m++) {
42662 GemmMicrokernelTester()
42663 .mr(2)
42664 .nr(4)
42665 .kr(2)
42666 .sr(4)
42667 .m(m)
42668 .n(n)
42669 .k(k)
42670 .iterations(1)
42671 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42672 }
42673 }
42674 }
42675 }
42676
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4)42677 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
42678 for (uint32_t n = 8; n <= 12; n += 4) {
42679 for (size_t k = 1; k <= 40; k += 9) {
42680 GemmMicrokernelTester()
42681 .mr(2)
42682 .nr(4)
42683 .kr(2)
42684 .sr(4)
42685 .m(2)
42686 .n(n)
42687 .k(k)
42688 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42689 }
42690 }
42691 }
42692
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)42693 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
42694 for (uint32_t n = 8; n <= 12; n += 4) {
42695 for (size_t k = 1; k <= 40; k += 9) {
42696 GemmMicrokernelTester()
42697 .mr(2)
42698 .nr(4)
42699 .kr(2)
42700 .sr(4)
42701 .m(2)
42702 .n(n)
42703 .k(k)
42704 .cn_stride(7)
42705 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42706 }
42707 }
42708 }
42709
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_a)42710 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
42711 for (uint32_t n = 8; n <= 12; n += 4) {
42712 for (size_t k = 1; k <= 40; k += 9) {
42713 GemmMicrokernelTester()
42714 .mr(2)
42715 .nr(4)
42716 .kr(2)
42717 .sr(4)
42718 .m(2)
42719 .n(n)
42720 .k(k)
42721 .a_stride(43)
42722 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42723 }
42724 }
42725 }
42726
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)42727 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
42728 for (uint32_t n = 8; n <= 12; n += 4) {
42729 for (size_t k = 1; k <= 40; k += 9) {
42730 for (uint32_t m = 1; m <= 2; m++) {
42731 GemmMicrokernelTester()
42732 .mr(2)
42733 .nr(4)
42734 .kr(2)
42735 .sr(4)
42736 .m(m)
42737 .n(n)
42738 .k(k)
42739 .iterations(1)
42740 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42741 }
42742 }
42743 }
42744 }
42745
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)42746 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
42747 for (size_t k = 1; k <= 40; k += 9) {
42748 for (uint32_t n = 1; n <= 4; n++) {
42749 for (uint32_t m = 1; m <= 2; m++) {
42750 GemmMicrokernelTester()
42751 .mr(2)
42752 .nr(4)
42753 .kr(2)
42754 .sr(4)
42755 .m(m)
42756 .n(n)
42757 .k(k)
42758 .cm_stride(7)
42759 .iterations(1)
42760 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42761 }
42762 }
42763 }
42764 }
42765
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,qmin)42766 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
42767 GemmMicrokernelTester()
42768 .mr(2)
42769 .nr(4)
42770 .kr(2)
42771 .sr(4)
42772 .m(2)
42773 .n(4)
42774 .k(8)
42775 .qmin(128)
42776 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42777 }
42778
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,qmax)42779 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
42780 GemmMicrokernelTester()
42781 .mr(2)
42782 .nr(4)
42783 .kr(2)
42784 .sr(4)
42785 .m(2)
42786 .n(4)
42787 .k(8)
42788 .qmax(128)
42789 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42790 }
42791
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm)42792 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
42793 GemmMicrokernelTester()
42794 .mr(2)
42795 .nr(4)
42796 .kr(2)
42797 .sr(4)
42798 .m(2)
42799 .n(4)
42800 .k(8)
42801 .cm_stride(7)
42802 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42803 }
42804 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
42805
42806
42807 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)42808 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
42809 GemmMicrokernelTester()
42810 .mr(2)
42811 .nr(4)
42812 .kr(8)
42813 .sr(1)
42814 .m(2)
42815 .n(4)
42816 .k(8)
42817 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42818 }
42819
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)42820 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
42821 GemmMicrokernelTester()
42822 .mr(2)
42823 .nr(4)
42824 .kr(8)
42825 .sr(1)
42826 .m(2)
42827 .n(4)
42828 .k(8)
42829 .cn_stride(7)
42830 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42831 }
42832
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_strided_a)42833 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
42834 GemmMicrokernelTester()
42835 .mr(2)
42836 .nr(4)
42837 .kr(8)
42838 .sr(1)
42839 .m(2)
42840 .n(4)
42841 .k(8)
42842 .a_stride(11)
42843 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42844 }
42845
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)42846 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
42847 for (uint32_t n = 1; n <= 4; n++) {
42848 for (uint32_t m = 1; m <= 2; m++) {
42849 GemmMicrokernelTester()
42850 .mr(2)
42851 .nr(4)
42852 .kr(8)
42853 .sr(1)
42854 .m(m)
42855 .n(n)
42856 .k(8)
42857 .iterations(1)
42858 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42859 }
42860 }
42861 }
42862
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)42863 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
42864 for (uint32_t m = 1; m <= 2; m++) {
42865 GemmMicrokernelTester()
42866 .mr(2)
42867 .nr(4)
42868 .kr(8)
42869 .sr(1)
42870 .m(m)
42871 .n(4)
42872 .k(8)
42873 .iterations(1)
42874 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42875 }
42876 }
42877
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)42878 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
42879 for (uint32_t n = 1; n <= 4; n++) {
42880 GemmMicrokernelTester()
42881 .mr(2)
42882 .nr(4)
42883 .kr(8)
42884 .sr(1)
42885 .m(2)
42886 .n(n)
42887 .k(8)
42888 .iterations(1)
42889 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42890 }
42891 }
42892
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)42893 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
42894 for (size_t k = 1; k < 8; k++) {
42895 GemmMicrokernelTester()
42896 .mr(2)
42897 .nr(4)
42898 .kr(8)
42899 .sr(1)
42900 .m(2)
42901 .n(4)
42902 .k(k)
42903 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42904 }
42905 }
42906
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_strided_a)42907 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
42908 for (size_t k = 1; k < 8; k++) {
42909 GemmMicrokernelTester()
42910 .mr(2)
42911 .nr(4)
42912 .kr(8)
42913 .sr(1)
42914 .m(2)
42915 .n(4)
42916 .k(k)
42917 .a_stride(11)
42918 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42919 }
42920 }
42921
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)42922 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
42923 for (size_t k = 1; k < 8; k++) {
42924 for (uint32_t n = 1; n <= 4; n++) {
42925 for (uint32_t m = 1; m <= 2; m++) {
42926 GemmMicrokernelTester()
42927 .mr(2)
42928 .nr(4)
42929 .kr(8)
42930 .sr(1)
42931 .m(m)
42932 .n(n)
42933 .k(k)
42934 .iterations(1)
42935 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42936 }
42937 }
42938 }
42939 }
42940
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)42941 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
42942 for (size_t k = 9; k < 16; k++) {
42943 GemmMicrokernelTester()
42944 .mr(2)
42945 .nr(4)
42946 .kr(8)
42947 .sr(1)
42948 .m(2)
42949 .n(4)
42950 .k(k)
42951 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42952 }
42953 }
42954
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_strided_a)42955 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
42956 for (size_t k = 9; k < 16; k++) {
42957 GemmMicrokernelTester()
42958 .mr(2)
42959 .nr(4)
42960 .kr(8)
42961 .sr(1)
42962 .m(2)
42963 .n(4)
42964 .k(k)
42965 .a_stride(19)
42966 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42967 }
42968 }
42969
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)42970 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
42971 for (size_t k = 9; k < 16; k++) {
42972 for (uint32_t n = 1; n <= 4; n++) {
42973 for (uint32_t m = 1; m <= 2; m++) {
42974 GemmMicrokernelTester()
42975 .mr(2)
42976 .nr(4)
42977 .kr(8)
42978 .sr(1)
42979 .m(m)
42980 .n(n)
42981 .k(k)
42982 .iterations(1)
42983 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42984 }
42985 }
42986 }
42987 }
42988
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)42989 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
42990 for (size_t k = 16; k <= 80; k += 8) {
42991 GemmMicrokernelTester()
42992 .mr(2)
42993 .nr(4)
42994 .kr(8)
42995 .sr(1)
42996 .m(2)
42997 .n(4)
42998 .k(k)
42999 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43000 }
43001 }
43002
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_strided_a)43003 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
43004 for (size_t k = 16; k <= 80; k += 8) {
43005 GemmMicrokernelTester()
43006 .mr(2)
43007 .nr(4)
43008 .kr(8)
43009 .sr(1)
43010 .m(2)
43011 .n(4)
43012 .k(k)
43013 .a_stride(83)
43014 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43015 }
43016 }
43017
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)43018 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
43019 for (size_t k = 16; k <= 80; k += 8) {
43020 for (uint32_t n = 1; n <= 4; n++) {
43021 for (uint32_t m = 1; m <= 2; m++) {
43022 GemmMicrokernelTester()
43023 .mr(2)
43024 .nr(4)
43025 .kr(8)
43026 .sr(1)
43027 .m(m)
43028 .n(n)
43029 .k(k)
43030 .iterations(1)
43031 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43032 }
43033 }
43034 }
43035 }
43036
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)43037 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
43038 for (uint32_t n = 5; n < 8; n++) {
43039 for (size_t k = 1; k <= 40; k += 9) {
43040 GemmMicrokernelTester()
43041 .mr(2)
43042 .nr(4)
43043 .kr(8)
43044 .sr(1)
43045 .m(2)
43046 .n(n)
43047 .k(k)
43048 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43049 }
43050 }
43051 }
43052
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)43053 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
43054 for (uint32_t n = 5; n < 8; n++) {
43055 for (size_t k = 1; k <= 40; k += 9) {
43056 GemmMicrokernelTester()
43057 .mr(2)
43058 .nr(4)
43059 .kr(8)
43060 .sr(1)
43061 .m(2)
43062 .n(n)
43063 .k(k)
43064 .cn_stride(7)
43065 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43066 }
43067 }
43068 }
43069
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_a)43070 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
43071 for (uint32_t n = 5; n < 8; n++) {
43072 for (size_t k = 1; k <= 40; k += 9) {
43073 GemmMicrokernelTester()
43074 .mr(2)
43075 .nr(4)
43076 .kr(8)
43077 .sr(1)
43078 .m(2)
43079 .n(n)
43080 .k(k)
43081 .a_stride(43)
43082 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43083 }
43084 }
43085 }
43086
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)43087 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
43088 for (uint32_t n = 5; n < 8; n++) {
43089 for (size_t k = 1; k <= 40; k += 9) {
43090 for (uint32_t m = 1; m <= 2; m++) {
43091 GemmMicrokernelTester()
43092 .mr(2)
43093 .nr(4)
43094 .kr(8)
43095 .sr(1)
43096 .m(m)
43097 .n(n)
43098 .k(k)
43099 .iterations(1)
43100 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43101 }
43102 }
43103 }
43104 }
43105
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)43106 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
43107 for (uint32_t n = 8; n <= 12; n += 4) {
43108 for (size_t k = 1; k <= 40; k += 9) {
43109 GemmMicrokernelTester()
43110 .mr(2)
43111 .nr(4)
43112 .kr(8)
43113 .sr(1)
43114 .m(2)
43115 .n(n)
43116 .k(k)
43117 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43118 }
43119 }
43120 }
43121
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)43122 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
43123 for (uint32_t n = 8; n <= 12; n += 4) {
43124 for (size_t k = 1; k <= 40; k += 9) {
43125 GemmMicrokernelTester()
43126 .mr(2)
43127 .nr(4)
43128 .kr(8)
43129 .sr(1)
43130 .m(2)
43131 .n(n)
43132 .k(k)
43133 .cn_stride(7)
43134 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43135 }
43136 }
43137 }
43138
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_a)43139 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
43140 for (uint32_t n = 8; n <= 12; n += 4) {
43141 for (size_t k = 1; k <= 40; k += 9) {
43142 GemmMicrokernelTester()
43143 .mr(2)
43144 .nr(4)
43145 .kr(8)
43146 .sr(1)
43147 .m(2)
43148 .n(n)
43149 .k(k)
43150 .a_stride(43)
43151 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43152 }
43153 }
43154 }
43155
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)43156 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
43157 for (uint32_t n = 8; n <= 12; n += 4) {
43158 for (size_t k = 1; k <= 40; k += 9) {
43159 for (uint32_t m = 1; m <= 2; m++) {
43160 GemmMicrokernelTester()
43161 .mr(2)
43162 .nr(4)
43163 .kr(8)
43164 .sr(1)
43165 .m(m)
43166 .n(n)
43167 .k(k)
43168 .iterations(1)
43169 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43170 }
43171 }
43172 }
43173 }
43174
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)43175 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
43176 for (size_t k = 1; k <= 40; k += 9) {
43177 for (uint32_t n = 1; n <= 4; n++) {
43178 for (uint32_t m = 1; m <= 2; m++) {
43179 GemmMicrokernelTester()
43180 .mr(2)
43181 .nr(4)
43182 .kr(8)
43183 .sr(1)
43184 .m(m)
43185 .n(n)
43186 .k(k)
43187 .cm_stride(7)
43188 .iterations(1)
43189 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43190 }
43191 }
43192 }
43193 }
43194
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,qmin)43195 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
43196 GemmMicrokernelTester()
43197 .mr(2)
43198 .nr(4)
43199 .kr(8)
43200 .sr(1)
43201 .m(2)
43202 .n(4)
43203 .k(8)
43204 .qmin(128)
43205 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43206 }
43207
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,qmax)43208 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
43209 GemmMicrokernelTester()
43210 .mr(2)
43211 .nr(4)
43212 .kr(8)
43213 .sr(1)
43214 .m(2)
43215 .n(4)
43216 .k(8)
43217 .qmax(128)
43218 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43219 }
43220
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)43221 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
43222 GemmMicrokernelTester()
43223 .mr(2)
43224 .nr(4)
43225 .kr(8)
43226 .sr(1)
43227 .m(2)
43228 .n(4)
43229 .k(8)
43230 .cm_stride(7)
43231 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43232 }
43233 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
43234
43235
43236 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8)43237 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
43238 GemmMicrokernelTester()
43239 .mr(3)
43240 .nr(4)
43241 .kr(2)
43242 .sr(4)
43243 .m(3)
43244 .n(4)
43245 .k(8)
43246 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43247 }
43248
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cn)43249 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
43250 GemmMicrokernelTester()
43251 .mr(3)
43252 .nr(4)
43253 .kr(2)
43254 .sr(4)
43255 .m(3)
43256 .n(4)
43257 .k(8)
43258 .cn_stride(7)
43259 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43260 }
43261
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_strided_a)43262 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
43263 GemmMicrokernelTester()
43264 .mr(3)
43265 .nr(4)
43266 .kr(2)
43267 .sr(4)
43268 .m(3)
43269 .n(4)
43270 .k(8)
43271 .a_stride(11)
43272 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43273 }
43274
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)43275 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
43276 for (uint32_t n = 1; n <= 4; n++) {
43277 for (uint32_t m = 1; m <= 3; m++) {
43278 GemmMicrokernelTester()
43279 .mr(3)
43280 .nr(4)
43281 .kr(2)
43282 .sr(4)
43283 .m(m)
43284 .n(n)
43285 .k(8)
43286 .iterations(1)
43287 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43288 }
43289 }
43290 }
43291
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)43292 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
43293 for (uint32_t m = 1; m <= 3; m++) {
43294 GemmMicrokernelTester()
43295 .mr(3)
43296 .nr(4)
43297 .kr(2)
43298 .sr(4)
43299 .m(m)
43300 .n(4)
43301 .k(8)
43302 .iterations(1)
43303 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43304 }
43305 }
43306
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)43307 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
43308 for (uint32_t n = 1; n <= 4; n++) {
43309 GemmMicrokernelTester()
43310 .mr(3)
43311 .nr(4)
43312 .kr(2)
43313 .sr(4)
43314 .m(3)
43315 .n(n)
43316 .k(8)
43317 .iterations(1)
43318 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43319 }
43320 }
43321
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8)43322 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
43323 for (size_t k = 1; k < 8; k++) {
43324 GemmMicrokernelTester()
43325 .mr(3)
43326 .nr(4)
43327 .kr(2)
43328 .sr(4)
43329 .m(3)
43330 .n(4)
43331 .k(k)
43332 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43333 }
43334 }
43335
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_strided_a)43336 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
43337 for (size_t k = 1; k < 8; k++) {
43338 GemmMicrokernelTester()
43339 .mr(3)
43340 .nr(4)
43341 .kr(2)
43342 .sr(4)
43343 .m(3)
43344 .n(4)
43345 .k(k)
43346 .a_stride(11)
43347 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43348 }
43349 }
43350
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)43351 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
43352 for (size_t k = 1; k < 8; k++) {
43353 for (uint32_t n = 1; n <= 4; n++) {
43354 for (uint32_t m = 1; m <= 3; m++) {
43355 GemmMicrokernelTester()
43356 .mr(3)
43357 .nr(4)
43358 .kr(2)
43359 .sr(4)
43360 .m(m)
43361 .n(n)
43362 .k(k)
43363 .iterations(1)
43364 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43365 }
43366 }
43367 }
43368 }
43369
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8)43370 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
43371 for (size_t k = 9; k < 16; k++) {
43372 GemmMicrokernelTester()
43373 .mr(3)
43374 .nr(4)
43375 .kr(2)
43376 .sr(4)
43377 .m(3)
43378 .n(4)
43379 .k(k)
43380 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43381 }
43382 }
43383
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_strided_a)43384 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
43385 for (size_t k = 9; k < 16; k++) {
43386 GemmMicrokernelTester()
43387 .mr(3)
43388 .nr(4)
43389 .kr(2)
43390 .sr(4)
43391 .m(3)
43392 .n(4)
43393 .k(k)
43394 .a_stride(19)
43395 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43396 }
43397 }
43398
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)43399 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
43400 for (size_t k = 9; k < 16; k++) {
43401 for (uint32_t n = 1; n <= 4; n++) {
43402 for (uint32_t m = 1; m <= 3; m++) {
43403 GemmMicrokernelTester()
43404 .mr(3)
43405 .nr(4)
43406 .kr(2)
43407 .sr(4)
43408 .m(m)
43409 .n(n)
43410 .k(k)
43411 .iterations(1)
43412 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43413 }
43414 }
43415 }
43416 }
43417
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8)43418 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
43419 for (size_t k = 16; k <= 80; k += 8) {
43420 GemmMicrokernelTester()
43421 .mr(3)
43422 .nr(4)
43423 .kr(2)
43424 .sr(4)
43425 .m(3)
43426 .n(4)
43427 .k(k)
43428 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43429 }
43430 }
43431
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_strided_a)43432 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
43433 for (size_t k = 16; k <= 80; k += 8) {
43434 GemmMicrokernelTester()
43435 .mr(3)
43436 .nr(4)
43437 .kr(2)
43438 .sr(4)
43439 .m(3)
43440 .n(4)
43441 .k(k)
43442 .a_stride(83)
43443 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43444 }
43445 }
43446
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)43447 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
43448 for (size_t k = 16; k <= 80; k += 8) {
43449 for (uint32_t n = 1; n <= 4; n++) {
43450 for (uint32_t m = 1; m <= 3; m++) {
43451 GemmMicrokernelTester()
43452 .mr(3)
43453 .nr(4)
43454 .kr(2)
43455 .sr(4)
43456 .m(m)
43457 .n(n)
43458 .k(k)
43459 .iterations(1)
43460 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43461 }
43462 }
43463 }
43464 }
43465
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4)43466 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
43467 for (uint32_t n = 5; n < 8; n++) {
43468 for (size_t k = 1; k <= 40; k += 9) {
43469 GemmMicrokernelTester()
43470 .mr(3)
43471 .nr(4)
43472 .kr(2)
43473 .sr(4)
43474 .m(3)
43475 .n(n)
43476 .k(k)
43477 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43478 }
43479 }
43480 }
43481
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)43482 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
43483 for (uint32_t n = 5; n < 8; n++) {
43484 for (size_t k = 1; k <= 40; k += 9) {
43485 GemmMicrokernelTester()
43486 .mr(3)
43487 .nr(4)
43488 .kr(2)
43489 .sr(4)
43490 .m(3)
43491 .n(n)
43492 .k(k)
43493 .cn_stride(7)
43494 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43495 }
43496 }
43497 }
43498
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_a)43499 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
43500 for (uint32_t n = 5; n < 8; n++) {
43501 for (size_t k = 1; k <= 40; k += 9) {
43502 GemmMicrokernelTester()
43503 .mr(3)
43504 .nr(4)
43505 .kr(2)
43506 .sr(4)
43507 .m(3)
43508 .n(n)
43509 .k(k)
43510 .a_stride(43)
43511 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43512 }
43513 }
43514 }
43515
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)43516 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
43517 for (uint32_t n = 5; n < 8; n++) {
43518 for (size_t k = 1; k <= 40; k += 9) {
43519 for (uint32_t m = 1; m <= 3; m++) {
43520 GemmMicrokernelTester()
43521 .mr(3)
43522 .nr(4)
43523 .kr(2)
43524 .sr(4)
43525 .m(m)
43526 .n(n)
43527 .k(k)
43528 .iterations(1)
43529 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43530 }
43531 }
43532 }
43533 }
43534
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4)43535 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
43536 for (uint32_t n = 8; n <= 12; n += 4) {
43537 for (size_t k = 1; k <= 40; k += 9) {
43538 GemmMicrokernelTester()
43539 .mr(3)
43540 .nr(4)
43541 .kr(2)
43542 .sr(4)
43543 .m(3)
43544 .n(n)
43545 .k(k)
43546 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43547 }
43548 }
43549 }
43550
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)43551 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
43552 for (uint32_t n = 8; n <= 12; n += 4) {
43553 for (size_t k = 1; k <= 40; k += 9) {
43554 GemmMicrokernelTester()
43555 .mr(3)
43556 .nr(4)
43557 .kr(2)
43558 .sr(4)
43559 .m(3)
43560 .n(n)
43561 .k(k)
43562 .cn_stride(7)
43563 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43564 }
43565 }
43566 }
43567
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_a)43568 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
43569 for (uint32_t n = 8; n <= 12; n += 4) {
43570 for (size_t k = 1; k <= 40; k += 9) {
43571 GemmMicrokernelTester()
43572 .mr(3)
43573 .nr(4)
43574 .kr(2)
43575 .sr(4)
43576 .m(3)
43577 .n(n)
43578 .k(k)
43579 .a_stride(43)
43580 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43581 }
43582 }
43583 }
43584
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)43585 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
43586 for (uint32_t n = 8; n <= 12; n += 4) {
43587 for (size_t k = 1; k <= 40; k += 9) {
43588 for (uint32_t m = 1; m <= 3; m++) {
43589 GemmMicrokernelTester()
43590 .mr(3)
43591 .nr(4)
43592 .kr(2)
43593 .sr(4)
43594 .m(m)
43595 .n(n)
43596 .k(k)
43597 .iterations(1)
43598 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43599 }
43600 }
43601 }
43602 }
43603
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)43604 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
43605 for (size_t k = 1; k <= 40; k += 9) {
43606 for (uint32_t n = 1; n <= 4; n++) {
43607 for (uint32_t m = 1; m <= 3; m++) {
43608 GemmMicrokernelTester()
43609 .mr(3)
43610 .nr(4)
43611 .kr(2)
43612 .sr(4)
43613 .m(m)
43614 .n(n)
43615 .k(k)
43616 .cm_stride(7)
43617 .iterations(1)
43618 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43619 }
43620 }
43621 }
43622 }
43623
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,qmin)43624 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
43625 GemmMicrokernelTester()
43626 .mr(3)
43627 .nr(4)
43628 .kr(2)
43629 .sr(4)
43630 .m(3)
43631 .n(4)
43632 .k(8)
43633 .qmin(128)
43634 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43635 }
43636
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,qmax)43637 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
43638 GemmMicrokernelTester()
43639 .mr(3)
43640 .nr(4)
43641 .kr(2)
43642 .sr(4)
43643 .m(3)
43644 .n(4)
43645 .k(8)
43646 .qmax(128)
43647 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43648 }
43649
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm)43650 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
43651 GemmMicrokernelTester()
43652 .mr(3)
43653 .nr(4)
43654 .kr(2)
43655 .sr(4)
43656 .m(3)
43657 .n(4)
43658 .k(8)
43659 .cm_stride(7)
43660 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43661 }
43662 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
43663
43664
43665 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)43666 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
43667 GemmMicrokernelTester()
43668 .mr(3)
43669 .nr(4)
43670 .kr(2)
43671 .sr(4)
43672 .m(3)
43673 .n(4)
43674 .k(8)
43675 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43676 }
43677
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)43678 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
43679 GemmMicrokernelTester()
43680 .mr(3)
43681 .nr(4)
43682 .kr(2)
43683 .sr(4)
43684 .m(3)
43685 .n(4)
43686 .k(8)
43687 .cn_stride(7)
43688 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43689 }
43690
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_strided_a)43691 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
43692 GemmMicrokernelTester()
43693 .mr(3)
43694 .nr(4)
43695 .kr(2)
43696 .sr(4)
43697 .m(3)
43698 .n(4)
43699 .k(8)
43700 .a_stride(11)
43701 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43702 }
43703
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)43704 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
43705 for (uint32_t n = 1; n <= 4; n++) {
43706 for (uint32_t m = 1; m <= 3; m++) {
43707 GemmMicrokernelTester()
43708 .mr(3)
43709 .nr(4)
43710 .kr(2)
43711 .sr(4)
43712 .m(m)
43713 .n(n)
43714 .k(8)
43715 .iterations(1)
43716 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43717 }
43718 }
43719 }
43720
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)43721 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
43722 for (uint32_t m = 1; m <= 3; m++) {
43723 GemmMicrokernelTester()
43724 .mr(3)
43725 .nr(4)
43726 .kr(2)
43727 .sr(4)
43728 .m(m)
43729 .n(4)
43730 .k(8)
43731 .iterations(1)
43732 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43733 }
43734 }
43735
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)43736 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
43737 for (uint32_t n = 1; n <= 4; n++) {
43738 GemmMicrokernelTester()
43739 .mr(3)
43740 .nr(4)
43741 .kr(2)
43742 .sr(4)
43743 .m(3)
43744 .n(n)
43745 .k(8)
43746 .iterations(1)
43747 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43748 }
43749 }
43750
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)43751 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
43752 for (size_t k = 1; k < 8; k++) {
43753 GemmMicrokernelTester()
43754 .mr(3)
43755 .nr(4)
43756 .kr(2)
43757 .sr(4)
43758 .m(3)
43759 .n(4)
43760 .k(k)
43761 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43762 }
43763 }
43764
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_strided_a)43765 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
43766 for (size_t k = 1; k < 8; k++) {
43767 GemmMicrokernelTester()
43768 .mr(3)
43769 .nr(4)
43770 .kr(2)
43771 .sr(4)
43772 .m(3)
43773 .n(4)
43774 .k(k)
43775 .a_stride(11)
43776 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43777 }
43778 }
43779
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)43780 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
43781 for (size_t k = 1; k < 8; k++) {
43782 for (uint32_t n = 1; n <= 4; n++) {
43783 for (uint32_t m = 1; m <= 3; m++) {
43784 GemmMicrokernelTester()
43785 .mr(3)
43786 .nr(4)
43787 .kr(2)
43788 .sr(4)
43789 .m(m)
43790 .n(n)
43791 .k(k)
43792 .iterations(1)
43793 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43794 }
43795 }
43796 }
43797 }
43798
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)43799 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
43800 for (size_t k = 9; k < 16; k++) {
43801 GemmMicrokernelTester()
43802 .mr(3)
43803 .nr(4)
43804 .kr(2)
43805 .sr(4)
43806 .m(3)
43807 .n(4)
43808 .k(k)
43809 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43810 }
43811 }
43812
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_strided_a)43813 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
43814 for (size_t k = 9; k < 16; k++) {
43815 GemmMicrokernelTester()
43816 .mr(3)
43817 .nr(4)
43818 .kr(2)
43819 .sr(4)
43820 .m(3)
43821 .n(4)
43822 .k(k)
43823 .a_stride(19)
43824 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43825 }
43826 }
43827
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)43828 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
43829 for (size_t k = 9; k < 16; k++) {
43830 for (uint32_t n = 1; n <= 4; n++) {
43831 for (uint32_t m = 1; m <= 3; m++) {
43832 GemmMicrokernelTester()
43833 .mr(3)
43834 .nr(4)
43835 .kr(2)
43836 .sr(4)
43837 .m(m)
43838 .n(n)
43839 .k(k)
43840 .iterations(1)
43841 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43842 }
43843 }
43844 }
43845 }
43846
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)43847 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
43848 for (size_t k = 16; k <= 80; k += 8) {
43849 GemmMicrokernelTester()
43850 .mr(3)
43851 .nr(4)
43852 .kr(2)
43853 .sr(4)
43854 .m(3)
43855 .n(4)
43856 .k(k)
43857 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43858 }
43859 }
43860
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_strided_a)43861 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
43862 for (size_t k = 16; k <= 80; k += 8) {
43863 GemmMicrokernelTester()
43864 .mr(3)
43865 .nr(4)
43866 .kr(2)
43867 .sr(4)
43868 .m(3)
43869 .n(4)
43870 .k(k)
43871 .a_stride(83)
43872 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43873 }
43874 }
43875
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)43876 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
43877 for (size_t k = 16; k <= 80; k += 8) {
43878 for (uint32_t n = 1; n <= 4; n++) {
43879 for (uint32_t m = 1; m <= 3; m++) {
43880 GemmMicrokernelTester()
43881 .mr(3)
43882 .nr(4)
43883 .kr(2)
43884 .sr(4)
43885 .m(m)
43886 .n(n)
43887 .k(k)
43888 .iterations(1)
43889 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43890 }
43891 }
43892 }
43893 }
43894
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)43895 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
43896 for (uint32_t n = 5; n < 8; n++) {
43897 for (size_t k = 1; k <= 40; k += 9) {
43898 GemmMicrokernelTester()
43899 .mr(3)
43900 .nr(4)
43901 .kr(2)
43902 .sr(4)
43903 .m(3)
43904 .n(n)
43905 .k(k)
43906 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43907 }
43908 }
43909 }
43910
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)43911 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
43912 for (uint32_t n = 5; n < 8; n++) {
43913 for (size_t k = 1; k <= 40; k += 9) {
43914 GemmMicrokernelTester()
43915 .mr(3)
43916 .nr(4)
43917 .kr(2)
43918 .sr(4)
43919 .m(3)
43920 .n(n)
43921 .k(k)
43922 .cn_stride(7)
43923 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43924 }
43925 }
43926 }
43927
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_a)43928 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
43929 for (uint32_t n = 5; n < 8; n++) {
43930 for (size_t k = 1; k <= 40; k += 9) {
43931 GemmMicrokernelTester()
43932 .mr(3)
43933 .nr(4)
43934 .kr(2)
43935 .sr(4)
43936 .m(3)
43937 .n(n)
43938 .k(k)
43939 .a_stride(43)
43940 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43941 }
43942 }
43943 }
43944
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)43945 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
43946 for (uint32_t n = 5; n < 8; n++) {
43947 for (size_t k = 1; k <= 40; k += 9) {
43948 for (uint32_t m = 1; m <= 3; m++) {
43949 GemmMicrokernelTester()
43950 .mr(3)
43951 .nr(4)
43952 .kr(2)
43953 .sr(4)
43954 .m(m)
43955 .n(n)
43956 .k(k)
43957 .iterations(1)
43958 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43959 }
43960 }
43961 }
43962 }
43963
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)43964 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
43965 for (uint32_t n = 8; n <= 12; n += 4) {
43966 for (size_t k = 1; k <= 40; k += 9) {
43967 GemmMicrokernelTester()
43968 .mr(3)
43969 .nr(4)
43970 .kr(2)
43971 .sr(4)
43972 .m(3)
43973 .n(n)
43974 .k(k)
43975 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43976 }
43977 }
43978 }
43979
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)43980 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
43981 for (uint32_t n = 8; n <= 12; n += 4) {
43982 for (size_t k = 1; k <= 40; k += 9) {
43983 GemmMicrokernelTester()
43984 .mr(3)
43985 .nr(4)
43986 .kr(2)
43987 .sr(4)
43988 .m(3)
43989 .n(n)
43990 .k(k)
43991 .cn_stride(7)
43992 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43993 }
43994 }
43995 }
43996
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_a)43997 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
43998 for (uint32_t n = 8; n <= 12; n += 4) {
43999 for (size_t k = 1; k <= 40; k += 9) {
44000 GemmMicrokernelTester()
44001 .mr(3)
44002 .nr(4)
44003 .kr(2)
44004 .sr(4)
44005 .m(3)
44006 .n(n)
44007 .k(k)
44008 .a_stride(43)
44009 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44010 }
44011 }
44012 }
44013
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)44014 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
44015 for (uint32_t n = 8; n <= 12; n += 4) {
44016 for (size_t k = 1; k <= 40; k += 9) {
44017 for (uint32_t m = 1; m <= 3; m++) {
44018 GemmMicrokernelTester()
44019 .mr(3)
44020 .nr(4)
44021 .kr(2)
44022 .sr(4)
44023 .m(m)
44024 .n(n)
44025 .k(k)
44026 .iterations(1)
44027 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44028 }
44029 }
44030 }
44031 }
44032
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)44033 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
44034 for (size_t k = 1; k <= 40; k += 9) {
44035 for (uint32_t n = 1; n <= 4; n++) {
44036 for (uint32_t m = 1; m <= 3; m++) {
44037 GemmMicrokernelTester()
44038 .mr(3)
44039 .nr(4)
44040 .kr(2)
44041 .sr(4)
44042 .m(m)
44043 .n(n)
44044 .k(k)
44045 .cm_stride(7)
44046 .iterations(1)
44047 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44048 }
44049 }
44050 }
44051 }
44052
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)44053 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
44054 GemmMicrokernelTester()
44055 .mr(3)
44056 .nr(4)
44057 .kr(2)
44058 .sr(4)
44059 .m(3)
44060 .n(4)
44061 .k(8)
44062 .qmin(128)
44063 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44064 }
44065
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)44066 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
44067 GemmMicrokernelTester()
44068 .mr(3)
44069 .nr(4)
44070 .kr(2)
44071 .sr(4)
44072 .m(3)
44073 .n(4)
44074 .k(8)
44075 .qmax(128)
44076 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44077 }
44078
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)44079 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
44080 GemmMicrokernelTester()
44081 .mr(3)
44082 .nr(4)
44083 .kr(2)
44084 .sr(4)
44085 .m(3)
44086 .n(4)
44087 .k(8)
44088 .cm_stride(7)
44089 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44090 }
44091 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
44092
44093
44094 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)44095 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
44096 GemmMicrokernelTester()
44097 .mr(3)
44098 .nr(4)
44099 .kr(8)
44100 .sr(1)
44101 .m(3)
44102 .n(4)
44103 .k(8)
44104 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44105 }
44106
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)44107 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
44108 GemmMicrokernelTester()
44109 .mr(3)
44110 .nr(4)
44111 .kr(8)
44112 .sr(1)
44113 .m(3)
44114 .n(4)
44115 .k(8)
44116 .cn_stride(7)
44117 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44118 }
44119
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_strided_a)44120 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
44121 GemmMicrokernelTester()
44122 .mr(3)
44123 .nr(4)
44124 .kr(8)
44125 .sr(1)
44126 .m(3)
44127 .n(4)
44128 .k(8)
44129 .a_stride(11)
44130 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44131 }
44132
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)44133 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
44134 for (uint32_t n = 1; n <= 4; n++) {
44135 for (uint32_t m = 1; m <= 3; m++) {
44136 GemmMicrokernelTester()
44137 .mr(3)
44138 .nr(4)
44139 .kr(8)
44140 .sr(1)
44141 .m(m)
44142 .n(n)
44143 .k(8)
44144 .iterations(1)
44145 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44146 }
44147 }
44148 }
44149
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)44150 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
44151 for (uint32_t m = 1; m <= 3; m++) {
44152 GemmMicrokernelTester()
44153 .mr(3)
44154 .nr(4)
44155 .kr(8)
44156 .sr(1)
44157 .m(m)
44158 .n(4)
44159 .k(8)
44160 .iterations(1)
44161 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44162 }
44163 }
44164
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)44165 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
44166 for (uint32_t n = 1; n <= 4; n++) {
44167 GemmMicrokernelTester()
44168 .mr(3)
44169 .nr(4)
44170 .kr(8)
44171 .sr(1)
44172 .m(3)
44173 .n(n)
44174 .k(8)
44175 .iterations(1)
44176 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44177 }
44178 }
44179
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)44180 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
44181 for (size_t k = 1; k < 8; k++) {
44182 GemmMicrokernelTester()
44183 .mr(3)
44184 .nr(4)
44185 .kr(8)
44186 .sr(1)
44187 .m(3)
44188 .n(4)
44189 .k(k)
44190 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44191 }
44192 }
44193
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_strided_a)44194 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
44195 for (size_t k = 1; k < 8; k++) {
44196 GemmMicrokernelTester()
44197 .mr(3)
44198 .nr(4)
44199 .kr(8)
44200 .sr(1)
44201 .m(3)
44202 .n(4)
44203 .k(k)
44204 .a_stride(11)
44205 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44206 }
44207 }
44208
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)44209 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
44210 for (size_t k = 1; k < 8; k++) {
44211 for (uint32_t n = 1; n <= 4; n++) {
44212 for (uint32_t m = 1; m <= 3; m++) {
44213 GemmMicrokernelTester()
44214 .mr(3)
44215 .nr(4)
44216 .kr(8)
44217 .sr(1)
44218 .m(m)
44219 .n(n)
44220 .k(k)
44221 .iterations(1)
44222 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44223 }
44224 }
44225 }
44226 }
44227
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)44228 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
44229 for (size_t k = 9; k < 16; k++) {
44230 GemmMicrokernelTester()
44231 .mr(3)
44232 .nr(4)
44233 .kr(8)
44234 .sr(1)
44235 .m(3)
44236 .n(4)
44237 .k(k)
44238 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44239 }
44240 }
44241
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_strided_a)44242 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
44243 for (size_t k = 9; k < 16; k++) {
44244 GemmMicrokernelTester()
44245 .mr(3)
44246 .nr(4)
44247 .kr(8)
44248 .sr(1)
44249 .m(3)
44250 .n(4)
44251 .k(k)
44252 .a_stride(19)
44253 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44254 }
44255 }
44256
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)44257 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
44258 for (size_t k = 9; k < 16; k++) {
44259 for (uint32_t n = 1; n <= 4; n++) {
44260 for (uint32_t m = 1; m <= 3; m++) {
44261 GemmMicrokernelTester()
44262 .mr(3)
44263 .nr(4)
44264 .kr(8)
44265 .sr(1)
44266 .m(m)
44267 .n(n)
44268 .k(k)
44269 .iterations(1)
44270 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44271 }
44272 }
44273 }
44274 }
44275
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)44276 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
44277 for (size_t k = 16; k <= 80; k += 8) {
44278 GemmMicrokernelTester()
44279 .mr(3)
44280 .nr(4)
44281 .kr(8)
44282 .sr(1)
44283 .m(3)
44284 .n(4)
44285 .k(k)
44286 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44287 }
44288 }
44289
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_strided_a)44290 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
44291 for (size_t k = 16; k <= 80; k += 8) {
44292 GemmMicrokernelTester()
44293 .mr(3)
44294 .nr(4)
44295 .kr(8)
44296 .sr(1)
44297 .m(3)
44298 .n(4)
44299 .k(k)
44300 .a_stride(83)
44301 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44302 }
44303 }
44304
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)44305 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
44306 for (size_t k = 16; k <= 80; k += 8) {
44307 for (uint32_t n = 1; n <= 4; n++) {
44308 for (uint32_t m = 1; m <= 3; m++) {
44309 GemmMicrokernelTester()
44310 .mr(3)
44311 .nr(4)
44312 .kr(8)
44313 .sr(1)
44314 .m(m)
44315 .n(n)
44316 .k(k)
44317 .iterations(1)
44318 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44319 }
44320 }
44321 }
44322 }
44323
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)44324 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
44325 for (uint32_t n = 5; n < 8; n++) {
44326 for (size_t k = 1; k <= 40; k += 9) {
44327 GemmMicrokernelTester()
44328 .mr(3)
44329 .nr(4)
44330 .kr(8)
44331 .sr(1)
44332 .m(3)
44333 .n(n)
44334 .k(k)
44335 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44336 }
44337 }
44338 }
44339
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)44340 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
44341 for (uint32_t n = 5; n < 8; n++) {
44342 for (size_t k = 1; k <= 40; k += 9) {
44343 GemmMicrokernelTester()
44344 .mr(3)
44345 .nr(4)
44346 .kr(8)
44347 .sr(1)
44348 .m(3)
44349 .n(n)
44350 .k(k)
44351 .cn_stride(7)
44352 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44353 }
44354 }
44355 }
44356
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_a)44357 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
44358 for (uint32_t n = 5; n < 8; n++) {
44359 for (size_t k = 1; k <= 40; k += 9) {
44360 GemmMicrokernelTester()
44361 .mr(3)
44362 .nr(4)
44363 .kr(8)
44364 .sr(1)
44365 .m(3)
44366 .n(n)
44367 .k(k)
44368 .a_stride(43)
44369 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44370 }
44371 }
44372 }
44373
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)44374 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
44375 for (uint32_t n = 5; n < 8; n++) {
44376 for (size_t k = 1; k <= 40; k += 9) {
44377 for (uint32_t m = 1; m <= 3; m++) {
44378 GemmMicrokernelTester()
44379 .mr(3)
44380 .nr(4)
44381 .kr(8)
44382 .sr(1)
44383 .m(m)
44384 .n(n)
44385 .k(k)
44386 .iterations(1)
44387 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44388 }
44389 }
44390 }
44391 }
44392
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)44393 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
44394 for (uint32_t n = 8; n <= 12; n += 4) {
44395 for (size_t k = 1; k <= 40; k += 9) {
44396 GemmMicrokernelTester()
44397 .mr(3)
44398 .nr(4)
44399 .kr(8)
44400 .sr(1)
44401 .m(3)
44402 .n(n)
44403 .k(k)
44404 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44405 }
44406 }
44407 }
44408
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)44409 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
44410 for (uint32_t n = 8; n <= 12; n += 4) {
44411 for (size_t k = 1; k <= 40; k += 9) {
44412 GemmMicrokernelTester()
44413 .mr(3)
44414 .nr(4)
44415 .kr(8)
44416 .sr(1)
44417 .m(3)
44418 .n(n)
44419 .k(k)
44420 .cn_stride(7)
44421 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44422 }
44423 }
44424 }
44425
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_a)44426 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
44427 for (uint32_t n = 8; n <= 12; n += 4) {
44428 for (size_t k = 1; k <= 40; k += 9) {
44429 GemmMicrokernelTester()
44430 .mr(3)
44431 .nr(4)
44432 .kr(8)
44433 .sr(1)
44434 .m(3)
44435 .n(n)
44436 .k(k)
44437 .a_stride(43)
44438 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44439 }
44440 }
44441 }
44442
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)44443 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
44444 for (uint32_t n = 8; n <= 12; n += 4) {
44445 for (size_t k = 1; k <= 40; k += 9) {
44446 for (uint32_t m = 1; m <= 3; m++) {
44447 GemmMicrokernelTester()
44448 .mr(3)
44449 .nr(4)
44450 .kr(8)
44451 .sr(1)
44452 .m(m)
44453 .n(n)
44454 .k(k)
44455 .iterations(1)
44456 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44457 }
44458 }
44459 }
44460 }
44461
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)44462 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
44463 for (size_t k = 1; k <= 40; k += 9) {
44464 for (uint32_t n = 1; n <= 4; n++) {
44465 for (uint32_t m = 1; m <= 3; m++) {
44466 GemmMicrokernelTester()
44467 .mr(3)
44468 .nr(4)
44469 .kr(8)
44470 .sr(1)
44471 .m(m)
44472 .n(n)
44473 .k(k)
44474 .cm_stride(7)
44475 .iterations(1)
44476 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44477 }
44478 }
44479 }
44480 }
44481
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,qmin)44482 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
44483 GemmMicrokernelTester()
44484 .mr(3)
44485 .nr(4)
44486 .kr(8)
44487 .sr(1)
44488 .m(3)
44489 .n(4)
44490 .k(8)
44491 .qmin(128)
44492 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44493 }
44494
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,qmax)44495 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
44496 GemmMicrokernelTester()
44497 .mr(3)
44498 .nr(4)
44499 .kr(8)
44500 .sr(1)
44501 .m(3)
44502 .n(4)
44503 .k(8)
44504 .qmax(128)
44505 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44506 }
44507
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)44508 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
44509 GemmMicrokernelTester()
44510 .mr(3)
44511 .nr(4)
44512 .kr(8)
44513 .sr(1)
44514 .m(3)
44515 .n(4)
44516 .k(8)
44517 .cm_stride(7)
44518 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44519 }
44520 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
44521
44522
44523 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8)44524 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
44525 GemmMicrokernelTester()
44526 .mr(4)
44527 .nr(4)
44528 .kr(2)
44529 .sr(4)
44530 .m(4)
44531 .n(4)
44532 .k(8)
44533 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44534 }
44535
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cn)44536 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
44537 GemmMicrokernelTester()
44538 .mr(4)
44539 .nr(4)
44540 .kr(2)
44541 .sr(4)
44542 .m(4)
44543 .n(4)
44544 .k(8)
44545 .cn_stride(7)
44546 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44547 }
44548
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_strided_a)44549 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
44550 GemmMicrokernelTester()
44551 .mr(4)
44552 .nr(4)
44553 .kr(2)
44554 .sr(4)
44555 .m(4)
44556 .n(4)
44557 .k(8)
44558 .a_stride(11)
44559 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44560 }
44561
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)44562 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
44563 for (uint32_t n = 1; n <= 4; n++) {
44564 for (uint32_t m = 1; m <= 4; m++) {
44565 GemmMicrokernelTester()
44566 .mr(4)
44567 .nr(4)
44568 .kr(2)
44569 .sr(4)
44570 .m(m)
44571 .n(n)
44572 .k(8)
44573 .iterations(1)
44574 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44575 }
44576 }
44577 }
44578
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)44579 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
44580 for (uint32_t m = 1; m <= 4; m++) {
44581 GemmMicrokernelTester()
44582 .mr(4)
44583 .nr(4)
44584 .kr(2)
44585 .sr(4)
44586 .m(m)
44587 .n(4)
44588 .k(8)
44589 .iterations(1)
44590 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44591 }
44592 }
44593
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)44594 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
44595 for (uint32_t n = 1; n <= 4; n++) {
44596 GemmMicrokernelTester()
44597 .mr(4)
44598 .nr(4)
44599 .kr(2)
44600 .sr(4)
44601 .m(4)
44602 .n(n)
44603 .k(8)
44604 .iterations(1)
44605 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44606 }
44607 }
44608
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8)44609 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
44610 for (size_t k = 1; k < 8; k++) {
44611 GemmMicrokernelTester()
44612 .mr(4)
44613 .nr(4)
44614 .kr(2)
44615 .sr(4)
44616 .m(4)
44617 .n(4)
44618 .k(k)
44619 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44620 }
44621 }
44622
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_strided_a)44623 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
44624 for (size_t k = 1; k < 8; k++) {
44625 GemmMicrokernelTester()
44626 .mr(4)
44627 .nr(4)
44628 .kr(2)
44629 .sr(4)
44630 .m(4)
44631 .n(4)
44632 .k(k)
44633 .a_stride(11)
44634 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44635 }
44636 }
44637
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)44638 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
44639 for (size_t k = 1; k < 8; k++) {
44640 for (uint32_t n = 1; n <= 4; n++) {
44641 for (uint32_t m = 1; m <= 4; m++) {
44642 GemmMicrokernelTester()
44643 .mr(4)
44644 .nr(4)
44645 .kr(2)
44646 .sr(4)
44647 .m(m)
44648 .n(n)
44649 .k(k)
44650 .iterations(1)
44651 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44652 }
44653 }
44654 }
44655 }
44656
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8)44657 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
44658 for (size_t k = 9; k < 16; k++) {
44659 GemmMicrokernelTester()
44660 .mr(4)
44661 .nr(4)
44662 .kr(2)
44663 .sr(4)
44664 .m(4)
44665 .n(4)
44666 .k(k)
44667 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44668 }
44669 }
44670
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_strided_a)44671 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
44672 for (size_t k = 9; k < 16; k++) {
44673 GemmMicrokernelTester()
44674 .mr(4)
44675 .nr(4)
44676 .kr(2)
44677 .sr(4)
44678 .m(4)
44679 .n(4)
44680 .k(k)
44681 .a_stride(19)
44682 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44683 }
44684 }
44685
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)44686 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
44687 for (size_t k = 9; k < 16; k++) {
44688 for (uint32_t n = 1; n <= 4; n++) {
44689 for (uint32_t m = 1; m <= 4; m++) {
44690 GemmMicrokernelTester()
44691 .mr(4)
44692 .nr(4)
44693 .kr(2)
44694 .sr(4)
44695 .m(m)
44696 .n(n)
44697 .k(k)
44698 .iterations(1)
44699 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44700 }
44701 }
44702 }
44703 }
44704
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8)44705 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
44706 for (size_t k = 16; k <= 80; k += 8) {
44707 GemmMicrokernelTester()
44708 .mr(4)
44709 .nr(4)
44710 .kr(2)
44711 .sr(4)
44712 .m(4)
44713 .n(4)
44714 .k(k)
44715 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44716 }
44717 }
44718
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_strided_a)44719 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
44720 for (size_t k = 16; k <= 80; k += 8) {
44721 GemmMicrokernelTester()
44722 .mr(4)
44723 .nr(4)
44724 .kr(2)
44725 .sr(4)
44726 .m(4)
44727 .n(4)
44728 .k(k)
44729 .a_stride(83)
44730 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44731 }
44732 }
44733
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)44734 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
44735 for (size_t k = 16; k <= 80; k += 8) {
44736 for (uint32_t n = 1; n <= 4; n++) {
44737 for (uint32_t m = 1; m <= 4; m++) {
44738 GemmMicrokernelTester()
44739 .mr(4)
44740 .nr(4)
44741 .kr(2)
44742 .sr(4)
44743 .m(m)
44744 .n(n)
44745 .k(k)
44746 .iterations(1)
44747 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44748 }
44749 }
44750 }
44751 }
44752
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4)44753 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
44754 for (uint32_t n = 5; n < 8; n++) {
44755 for (size_t k = 1; k <= 40; k += 9) {
44756 GemmMicrokernelTester()
44757 .mr(4)
44758 .nr(4)
44759 .kr(2)
44760 .sr(4)
44761 .m(4)
44762 .n(n)
44763 .k(k)
44764 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44765 }
44766 }
44767 }
44768
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)44769 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
44770 for (uint32_t n = 5; n < 8; n++) {
44771 for (size_t k = 1; k <= 40; k += 9) {
44772 GemmMicrokernelTester()
44773 .mr(4)
44774 .nr(4)
44775 .kr(2)
44776 .sr(4)
44777 .m(4)
44778 .n(n)
44779 .k(k)
44780 .cn_stride(7)
44781 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44782 }
44783 }
44784 }
44785
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_a)44786 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
44787 for (uint32_t n = 5; n < 8; n++) {
44788 for (size_t k = 1; k <= 40; k += 9) {
44789 GemmMicrokernelTester()
44790 .mr(4)
44791 .nr(4)
44792 .kr(2)
44793 .sr(4)
44794 .m(4)
44795 .n(n)
44796 .k(k)
44797 .a_stride(43)
44798 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44799 }
44800 }
44801 }
44802
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)44803 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
44804 for (uint32_t n = 5; n < 8; n++) {
44805 for (size_t k = 1; k <= 40; k += 9) {
44806 for (uint32_t m = 1; m <= 4; m++) {
44807 GemmMicrokernelTester()
44808 .mr(4)
44809 .nr(4)
44810 .kr(2)
44811 .sr(4)
44812 .m(m)
44813 .n(n)
44814 .k(k)
44815 .iterations(1)
44816 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44817 }
44818 }
44819 }
44820 }
44821
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4)44822 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
44823 for (uint32_t n = 8; n <= 12; n += 4) {
44824 for (size_t k = 1; k <= 40; k += 9) {
44825 GemmMicrokernelTester()
44826 .mr(4)
44827 .nr(4)
44828 .kr(2)
44829 .sr(4)
44830 .m(4)
44831 .n(n)
44832 .k(k)
44833 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44834 }
44835 }
44836 }
44837
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)44838 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
44839 for (uint32_t n = 8; n <= 12; n += 4) {
44840 for (size_t k = 1; k <= 40; k += 9) {
44841 GemmMicrokernelTester()
44842 .mr(4)
44843 .nr(4)
44844 .kr(2)
44845 .sr(4)
44846 .m(4)
44847 .n(n)
44848 .k(k)
44849 .cn_stride(7)
44850 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44851 }
44852 }
44853 }
44854
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_a)44855 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
44856 for (uint32_t n = 8; n <= 12; n += 4) {
44857 for (size_t k = 1; k <= 40; k += 9) {
44858 GemmMicrokernelTester()
44859 .mr(4)
44860 .nr(4)
44861 .kr(2)
44862 .sr(4)
44863 .m(4)
44864 .n(n)
44865 .k(k)
44866 .a_stride(43)
44867 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44868 }
44869 }
44870 }
44871
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)44872 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
44873 for (uint32_t n = 8; n <= 12; n += 4) {
44874 for (size_t k = 1; k <= 40; k += 9) {
44875 for (uint32_t m = 1; m <= 4; m++) {
44876 GemmMicrokernelTester()
44877 .mr(4)
44878 .nr(4)
44879 .kr(2)
44880 .sr(4)
44881 .m(m)
44882 .n(n)
44883 .k(k)
44884 .iterations(1)
44885 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44886 }
44887 }
44888 }
44889 }
44890
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)44891 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
44892 for (size_t k = 1; k <= 40; k += 9) {
44893 for (uint32_t n = 1; n <= 4; n++) {
44894 for (uint32_t m = 1; m <= 4; m++) {
44895 GemmMicrokernelTester()
44896 .mr(4)
44897 .nr(4)
44898 .kr(2)
44899 .sr(4)
44900 .m(m)
44901 .n(n)
44902 .k(k)
44903 .cm_stride(7)
44904 .iterations(1)
44905 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44906 }
44907 }
44908 }
44909 }
44910
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,qmin)44911 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
44912 GemmMicrokernelTester()
44913 .mr(4)
44914 .nr(4)
44915 .kr(2)
44916 .sr(4)
44917 .m(4)
44918 .n(4)
44919 .k(8)
44920 .qmin(128)
44921 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44922 }
44923
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,qmax)44924 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
44925 GemmMicrokernelTester()
44926 .mr(4)
44927 .nr(4)
44928 .kr(2)
44929 .sr(4)
44930 .m(4)
44931 .n(4)
44932 .k(8)
44933 .qmax(128)
44934 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44935 }
44936
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm)44937 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
44938 GemmMicrokernelTester()
44939 .mr(4)
44940 .nr(4)
44941 .kr(2)
44942 .sr(4)
44943 .m(4)
44944 .n(4)
44945 .k(8)
44946 .cm_stride(7)
44947 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44948 }
44949 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
44950
44951
44952 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)44953 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
44954 GemmMicrokernelTester()
44955 .mr(4)
44956 .nr(4)
44957 .kr(8)
44958 .sr(1)
44959 .m(4)
44960 .n(4)
44961 .k(8)
44962 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44963 }
44964
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)44965 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
44966 GemmMicrokernelTester()
44967 .mr(4)
44968 .nr(4)
44969 .kr(8)
44970 .sr(1)
44971 .m(4)
44972 .n(4)
44973 .k(8)
44974 .cn_stride(7)
44975 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44976 }
44977
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_strided_a)44978 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
44979 GemmMicrokernelTester()
44980 .mr(4)
44981 .nr(4)
44982 .kr(8)
44983 .sr(1)
44984 .m(4)
44985 .n(4)
44986 .k(8)
44987 .a_stride(11)
44988 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
44989 }
44990
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)44991 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
44992 for (uint32_t n = 1; n <= 4; n++) {
44993 for (uint32_t m = 1; m <= 4; m++) {
44994 GemmMicrokernelTester()
44995 .mr(4)
44996 .nr(4)
44997 .kr(8)
44998 .sr(1)
44999 .m(m)
45000 .n(n)
45001 .k(8)
45002 .iterations(1)
45003 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45004 }
45005 }
45006 }
45007
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)45008 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
45009 for (uint32_t m = 1; m <= 4; m++) {
45010 GemmMicrokernelTester()
45011 .mr(4)
45012 .nr(4)
45013 .kr(8)
45014 .sr(1)
45015 .m(m)
45016 .n(4)
45017 .k(8)
45018 .iterations(1)
45019 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45020 }
45021 }
45022
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)45023 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
45024 for (uint32_t n = 1; n <= 4; n++) {
45025 GemmMicrokernelTester()
45026 .mr(4)
45027 .nr(4)
45028 .kr(8)
45029 .sr(1)
45030 .m(4)
45031 .n(n)
45032 .k(8)
45033 .iterations(1)
45034 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45035 }
45036 }
45037
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)45038 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
45039 for (size_t k = 1; k < 8; k++) {
45040 GemmMicrokernelTester()
45041 .mr(4)
45042 .nr(4)
45043 .kr(8)
45044 .sr(1)
45045 .m(4)
45046 .n(4)
45047 .k(k)
45048 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45049 }
45050 }
45051
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_strided_a)45052 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
45053 for (size_t k = 1; k < 8; k++) {
45054 GemmMicrokernelTester()
45055 .mr(4)
45056 .nr(4)
45057 .kr(8)
45058 .sr(1)
45059 .m(4)
45060 .n(4)
45061 .k(k)
45062 .a_stride(11)
45063 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45064 }
45065 }
45066
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)45067 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
45068 for (size_t k = 1; k < 8; k++) {
45069 for (uint32_t n = 1; n <= 4; n++) {
45070 for (uint32_t m = 1; m <= 4; m++) {
45071 GemmMicrokernelTester()
45072 .mr(4)
45073 .nr(4)
45074 .kr(8)
45075 .sr(1)
45076 .m(m)
45077 .n(n)
45078 .k(k)
45079 .iterations(1)
45080 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45081 }
45082 }
45083 }
45084 }
45085
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)45086 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
45087 for (size_t k = 9; k < 16; k++) {
45088 GemmMicrokernelTester()
45089 .mr(4)
45090 .nr(4)
45091 .kr(8)
45092 .sr(1)
45093 .m(4)
45094 .n(4)
45095 .k(k)
45096 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45097 }
45098 }
45099
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_strided_a)45100 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
45101 for (size_t k = 9; k < 16; k++) {
45102 GemmMicrokernelTester()
45103 .mr(4)
45104 .nr(4)
45105 .kr(8)
45106 .sr(1)
45107 .m(4)
45108 .n(4)
45109 .k(k)
45110 .a_stride(19)
45111 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45112 }
45113 }
45114
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)45115 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
45116 for (size_t k = 9; k < 16; k++) {
45117 for (uint32_t n = 1; n <= 4; n++) {
45118 for (uint32_t m = 1; m <= 4; m++) {
45119 GemmMicrokernelTester()
45120 .mr(4)
45121 .nr(4)
45122 .kr(8)
45123 .sr(1)
45124 .m(m)
45125 .n(n)
45126 .k(k)
45127 .iterations(1)
45128 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45129 }
45130 }
45131 }
45132 }
45133
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)45134 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
45135 for (size_t k = 16; k <= 80; k += 8) {
45136 GemmMicrokernelTester()
45137 .mr(4)
45138 .nr(4)
45139 .kr(8)
45140 .sr(1)
45141 .m(4)
45142 .n(4)
45143 .k(k)
45144 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45145 }
45146 }
45147
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_strided_a)45148 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
45149 for (size_t k = 16; k <= 80; k += 8) {
45150 GemmMicrokernelTester()
45151 .mr(4)
45152 .nr(4)
45153 .kr(8)
45154 .sr(1)
45155 .m(4)
45156 .n(4)
45157 .k(k)
45158 .a_stride(83)
45159 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45160 }
45161 }
45162
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)45163 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
45164 for (size_t k = 16; k <= 80; k += 8) {
45165 for (uint32_t n = 1; n <= 4; n++) {
45166 for (uint32_t m = 1; m <= 4; m++) {
45167 GemmMicrokernelTester()
45168 .mr(4)
45169 .nr(4)
45170 .kr(8)
45171 .sr(1)
45172 .m(m)
45173 .n(n)
45174 .k(k)
45175 .iterations(1)
45176 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45177 }
45178 }
45179 }
45180 }
45181
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)45182 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
45183 for (uint32_t n = 5; n < 8; n++) {
45184 for (size_t k = 1; k <= 40; k += 9) {
45185 GemmMicrokernelTester()
45186 .mr(4)
45187 .nr(4)
45188 .kr(8)
45189 .sr(1)
45190 .m(4)
45191 .n(n)
45192 .k(k)
45193 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45194 }
45195 }
45196 }
45197
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)45198 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
45199 for (uint32_t n = 5; n < 8; n++) {
45200 for (size_t k = 1; k <= 40; k += 9) {
45201 GemmMicrokernelTester()
45202 .mr(4)
45203 .nr(4)
45204 .kr(8)
45205 .sr(1)
45206 .m(4)
45207 .n(n)
45208 .k(k)
45209 .cn_stride(7)
45210 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45211 }
45212 }
45213 }
45214
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_a)45215 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
45216 for (uint32_t n = 5; n < 8; n++) {
45217 for (size_t k = 1; k <= 40; k += 9) {
45218 GemmMicrokernelTester()
45219 .mr(4)
45220 .nr(4)
45221 .kr(8)
45222 .sr(1)
45223 .m(4)
45224 .n(n)
45225 .k(k)
45226 .a_stride(43)
45227 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45228 }
45229 }
45230 }
45231
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)45232 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
45233 for (uint32_t n = 5; n < 8; n++) {
45234 for (size_t k = 1; k <= 40; k += 9) {
45235 for (uint32_t m = 1; m <= 4; m++) {
45236 GemmMicrokernelTester()
45237 .mr(4)
45238 .nr(4)
45239 .kr(8)
45240 .sr(1)
45241 .m(m)
45242 .n(n)
45243 .k(k)
45244 .iterations(1)
45245 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45246 }
45247 }
45248 }
45249 }
45250
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)45251 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
45252 for (uint32_t n = 8; n <= 12; n += 4) {
45253 for (size_t k = 1; k <= 40; k += 9) {
45254 GemmMicrokernelTester()
45255 .mr(4)
45256 .nr(4)
45257 .kr(8)
45258 .sr(1)
45259 .m(4)
45260 .n(n)
45261 .k(k)
45262 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45263 }
45264 }
45265 }
45266
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)45267 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
45268 for (uint32_t n = 8; n <= 12; n += 4) {
45269 for (size_t k = 1; k <= 40; k += 9) {
45270 GemmMicrokernelTester()
45271 .mr(4)
45272 .nr(4)
45273 .kr(8)
45274 .sr(1)
45275 .m(4)
45276 .n(n)
45277 .k(k)
45278 .cn_stride(7)
45279 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45280 }
45281 }
45282 }
45283
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_a)45284 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
45285 for (uint32_t n = 8; n <= 12; n += 4) {
45286 for (size_t k = 1; k <= 40; k += 9) {
45287 GemmMicrokernelTester()
45288 .mr(4)
45289 .nr(4)
45290 .kr(8)
45291 .sr(1)
45292 .m(4)
45293 .n(n)
45294 .k(k)
45295 .a_stride(43)
45296 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45297 }
45298 }
45299 }
45300
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)45301 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
45302 for (uint32_t n = 8; n <= 12; n += 4) {
45303 for (size_t k = 1; k <= 40; k += 9) {
45304 for (uint32_t m = 1; m <= 4; m++) {
45305 GemmMicrokernelTester()
45306 .mr(4)
45307 .nr(4)
45308 .kr(8)
45309 .sr(1)
45310 .m(m)
45311 .n(n)
45312 .k(k)
45313 .iterations(1)
45314 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45315 }
45316 }
45317 }
45318 }
45319
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)45320 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
45321 for (size_t k = 1; k <= 40; k += 9) {
45322 for (uint32_t n = 1; n <= 4; n++) {
45323 for (uint32_t m = 1; m <= 4; m++) {
45324 GemmMicrokernelTester()
45325 .mr(4)
45326 .nr(4)
45327 .kr(8)
45328 .sr(1)
45329 .m(m)
45330 .n(n)
45331 .k(k)
45332 .cm_stride(7)
45333 .iterations(1)
45334 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45335 }
45336 }
45337 }
45338 }
45339
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,qmin)45340 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
45341 GemmMicrokernelTester()
45342 .mr(4)
45343 .nr(4)
45344 .kr(8)
45345 .sr(1)
45346 .m(4)
45347 .n(4)
45348 .k(8)
45349 .qmin(128)
45350 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45351 }
45352
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,qmax)45353 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
45354 GemmMicrokernelTester()
45355 .mr(4)
45356 .nr(4)
45357 .kr(8)
45358 .sr(1)
45359 .m(4)
45360 .n(4)
45361 .k(8)
45362 .qmax(128)
45363 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45364 }
45365
TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)45366 TEST(QC8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
45367 GemmMicrokernelTester()
45368 .mr(4)
45369 .nr(4)
45370 .kr(8)
45371 .sr(1)
45372 .m(4)
45373 .n(4)
45374 .k(8)
45375 .cm_stride(7)
45376 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
45377 }
45378 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
45379
45380
45381 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1)45382 TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1) {
45383 GemmMicrokernelTester()
45384 .mr(1)
45385 .nr(4)
45386 .kr(1)
45387 .sr(1)
45388 .m(1)
45389 .n(4)
45390 .k(1)
45391 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45392 }
45393
TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,strided_cn)45394 TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cn) {
45395 GemmMicrokernelTester()
45396 .mr(1)
45397 .nr(4)
45398 .kr(1)
45399 .sr(1)
45400 .m(1)
45401 .n(4)
45402 .k(1)
45403 .cn_stride(7)
45404 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45405 }
45406
TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_strided_a)45407 TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_strided_a) {
45408 GemmMicrokernelTester()
45409 .mr(1)
45410 .nr(4)
45411 .kr(1)
45412 .sr(1)
45413 .m(1)
45414 .n(4)
45415 .k(1)
45416 .a_stride(3)
45417 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45418 }
45419
TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_subtile)45420 TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile) {
45421 for (uint32_t n = 1; n <= 4; n++) {
45422 for (uint32_t m = 1; m <= 1; m++) {
45423 GemmMicrokernelTester()
45424 .mr(1)
45425 .nr(4)
45426 .kr(1)
45427 .sr(1)
45428 .m(m)
45429 .n(n)
45430 .k(1)
45431 .iterations(1)
45432 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45433 }
45434 }
45435 }
45436
TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_subtile_m)45437 TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile_m) {
45438 for (uint32_t m = 1; m <= 1; m++) {
45439 GemmMicrokernelTester()
45440 .mr(1)
45441 .nr(4)
45442 .kr(1)
45443 .sr(1)
45444 .m(m)
45445 .n(4)
45446 .k(1)
45447 .iterations(1)
45448 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45449 }
45450 }
45451
TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_subtile_n)45452 TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile_n) {
45453 for (uint32_t n = 1; n <= 4; n++) {
45454 GemmMicrokernelTester()
45455 .mr(1)
45456 .nr(4)
45457 .kr(1)
45458 .sr(1)
45459 .m(1)
45460 .n(n)
45461 .k(1)
45462 .iterations(1)
45463 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45464 }
45465 }
45466
TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_gt_1)45467 TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1) {
45468 for (size_t k = 2; k < 10; k++) {
45469 GemmMicrokernelTester()
45470 .mr(1)
45471 .nr(4)
45472 .kr(1)
45473 .sr(1)
45474 .m(1)
45475 .n(4)
45476 .k(k)
45477 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45478 }
45479 }
45480
TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_gt_1_strided_a)45481 TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1_strided_a) {
45482 for (size_t k = 2; k < 10; k++) {
45483 GemmMicrokernelTester()
45484 .mr(1)
45485 .nr(4)
45486 .kr(1)
45487 .sr(1)
45488 .m(1)
45489 .n(4)
45490 .k(k)
45491 .a_stride(11)
45492 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45493 }
45494 }
45495
TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_gt_1_subtile)45496 TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1_subtile) {
45497 for (size_t k = 2; k < 10; k++) {
45498 for (uint32_t n = 1; n <= 4; n++) {
45499 for (uint32_t m = 1; m <= 1; m++) {
45500 GemmMicrokernelTester()
45501 .mr(1)
45502 .nr(4)
45503 .kr(1)
45504 .sr(1)
45505 .m(m)
45506 .n(n)
45507 .k(k)
45508 .iterations(1)
45509 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45510 }
45511 }
45512 }
45513 }
45514
TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4)45515 TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4) {
45516 for (uint32_t n = 5; n < 8; n++) {
45517 for (size_t k = 1; k <= 5; k += 2) {
45518 GemmMicrokernelTester()
45519 .mr(1)
45520 .nr(4)
45521 .kr(1)
45522 .sr(1)
45523 .m(1)
45524 .n(n)
45525 .k(k)
45526 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45527 }
45528 }
45529 }
45530
TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4_strided_cn)45531 TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_strided_cn) {
45532 for (uint32_t n = 5; n < 8; n++) {
45533 for (size_t k = 1; k <= 5; k += 2) {
45534 GemmMicrokernelTester()
45535 .mr(1)
45536 .nr(4)
45537 .kr(1)
45538 .sr(1)
45539 .m(1)
45540 .n(n)
45541 .k(k)
45542 .cn_stride(7)
45543 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45544 }
45545 }
45546 }
45547
TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4_strided_a)45548 TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_strided_a) {
45549 for (uint32_t n = 5; n < 8; n++) {
45550 for (size_t k = 1; k <= 5; k += 2) {
45551 GemmMicrokernelTester()
45552 .mr(1)
45553 .nr(4)
45554 .kr(1)
45555 .sr(1)
45556 .m(1)
45557 .n(n)
45558 .k(k)
45559 .a_stride(7)
45560 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45561 }
45562 }
45563 }
45564
TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4_subtile)45565 TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_subtile) {
45566 for (uint32_t n = 5; n < 8; n++) {
45567 for (size_t k = 1; k <= 5; k += 2) {
45568 for (uint32_t m = 1; m <= 1; m++) {
45569 GemmMicrokernelTester()
45570 .mr(1)
45571 .nr(4)
45572 .kr(1)
45573 .sr(1)
45574 .m(m)
45575 .n(n)
45576 .k(k)
45577 .iterations(1)
45578 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45579 }
45580 }
45581 }
45582 }
45583
TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4)45584 TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4) {
45585 for (uint32_t n = 8; n <= 12; n += 4) {
45586 for (size_t k = 1; k <= 5; k += 2) {
45587 GemmMicrokernelTester()
45588 .mr(1)
45589 .nr(4)
45590 .kr(1)
45591 .sr(1)
45592 .m(1)
45593 .n(n)
45594 .k(k)
45595 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45596 }
45597 }
45598 }
45599
TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4_strided_cn)45600 TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_strided_cn) {
45601 for (uint32_t n = 8; n <= 12; n += 4) {
45602 for (size_t k = 1; k <= 5; k += 2) {
45603 GemmMicrokernelTester()
45604 .mr(1)
45605 .nr(4)
45606 .kr(1)
45607 .sr(1)
45608 .m(1)
45609 .n(n)
45610 .k(k)
45611 .cn_stride(7)
45612 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45613 }
45614 }
45615 }
45616
TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4_strided_a)45617 TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_strided_a) {
45618 for (uint32_t n = 8; n <= 12; n += 4) {
45619 for (size_t k = 1; k <= 5; k += 2) {
45620 GemmMicrokernelTester()
45621 .mr(1)
45622 .nr(4)
45623 .kr(1)
45624 .sr(1)
45625 .m(1)
45626 .n(n)
45627 .k(k)
45628 .a_stride(7)
45629 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45630 }
45631 }
45632 }
45633
TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4_subtile)45634 TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_subtile) {
45635 for (uint32_t n = 8; n <= 12; n += 4) {
45636 for (size_t k = 1; k <= 5; k += 2) {
45637 for (uint32_t m = 1; m <= 1; m++) {
45638 GemmMicrokernelTester()
45639 .mr(1)
45640 .nr(4)
45641 .kr(1)
45642 .sr(1)
45643 .m(m)
45644 .n(n)
45645 .k(k)
45646 .iterations(1)
45647 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45648 }
45649 }
45650 }
45651 }
45652
TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,strided_cm_subtile)45653 TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cm_subtile) {
45654 for (size_t k = 1; k <= 5; k += 2) {
45655 for (uint32_t n = 1; n <= 4; n++) {
45656 for (uint32_t m = 1; m <= 1; m++) {
45657 GemmMicrokernelTester()
45658 .mr(1)
45659 .nr(4)
45660 .kr(1)
45661 .sr(1)
45662 .m(m)
45663 .n(n)
45664 .k(k)
45665 .cm_stride(7)
45666 .iterations(1)
45667 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45668 }
45669 }
45670 }
45671 }
45672
TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,qmin)45673 TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, qmin) {
45674 GemmMicrokernelTester()
45675 .mr(1)
45676 .nr(4)
45677 .kr(1)
45678 .sr(1)
45679 .m(1)
45680 .n(4)
45681 .k(1)
45682 .qmin(128)
45683 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45684 }
45685
TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,qmax)45686 TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, qmax) {
45687 GemmMicrokernelTester()
45688 .mr(1)
45689 .nr(4)
45690 .kr(1)
45691 .sr(1)
45692 .m(1)
45693 .n(4)
45694 .k(1)
45695 .qmax(128)
45696 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45697 }
45698
TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,strided_cm)45699 TEST(QC8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cm) {
45700 GemmMicrokernelTester()
45701 .mr(1)
45702 .nr(4)
45703 .kr(1)
45704 .sr(1)
45705 .m(1)
45706 .n(4)
45707 .k(1)
45708 .cm_stride(7)
45709 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45710 }
45711 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
45712
45713
45714 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1)45715 TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1) {
45716 GemmMicrokernelTester()
45717 .mr(2)
45718 .nr(4)
45719 .kr(1)
45720 .sr(1)
45721 .m(2)
45722 .n(4)
45723 .k(1)
45724 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45725 }
45726
TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,strided_cn)45727 TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cn) {
45728 GemmMicrokernelTester()
45729 .mr(2)
45730 .nr(4)
45731 .kr(1)
45732 .sr(1)
45733 .m(2)
45734 .n(4)
45735 .k(1)
45736 .cn_stride(7)
45737 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45738 }
45739
TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_strided_a)45740 TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_strided_a) {
45741 GemmMicrokernelTester()
45742 .mr(2)
45743 .nr(4)
45744 .kr(1)
45745 .sr(1)
45746 .m(2)
45747 .n(4)
45748 .k(1)
45749 .a_stride(3)
45750 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45751 }
45752
TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_subtile)45753 TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile) {
45754 for (uint32_t n = 1; n <= 4; n++) {
45755 for (uint32_t m = 1; m <= 2; m++) {
45756 GemmMicrokernelTester()
45757 .mr(2)
45758 .nr(4)
45759 .kr(1)
45760 .sr(1)
45761 .m(m)
45762 .n(n)
45763 .k(1)
45764 .iterations(1)
45765 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45766 }
45767 }
45768 }
45769
TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_subtile_m)45770 TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile_m) {
45771 for (uint32_t m = 1; m <= 2; m++) {
45772 GemmMicrokernelTester()
45773 .mr(2)
45774 .nr(4)
45775 .kr(1)
45776 .sr(1)
45777 .m(m)
45778 .n(4)
45779 .k(1)
45780 .iterations(1)
45781 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45782 }
45783 }
45784
TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_subtile_n)45785 TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile_n) {
45786 for (uint32_t n = 1; n <= 4; n++) {
45787 GemmMicrokernelTester()
45788 .mr(2)
45789 .nr(4)
45790 .kr(1)
45791 .sr(1)
45792 .m(2)
45793 .n(n)
45794 .k(1)
45795 .iterations(1)
45796 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45797 }
45798 }
45799
TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_gt_1)45800 TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1) {
45801 for (size_t k = 2; k < 10; k++) {
45802 GemmMicrokernelTester()
45803 .mr(2)
45804 .nr(4)
45805 .kr(1)
45806 .sr(1)
45807 .m(2)
45808 .n(4)
45809 .k(k)
45810 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45811 }
45812 }
45813
TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_gt_1_strided_a)45814 TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1_strided_a) {
45815 for (size_t k = 2; k < 10; k++) {
45816 GemmMicrokernelTester()
45817 .mr(2)
45818 .nr(4)
45819 .kr(1)
45820 .sr(1)
45821 .m(2)
45822 .n(4)
45823 .k(k)
45824 .a_stride(11)
45825 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45826 }
45827 }
45828
TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_gt_1_subtile)45829 TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1_subtile) {
45830 for (size_t k = 2; k < 10; k++) {
45831 for (uint32_t n = 1; n <= 4; n++) {
45832 for (uint32_t m = 1; m <= 2; m++) {
45833 GemmMicrokernelTester()
45834 .mr(2)
45835 .nr(4)
45836 .kr(1)
45837 .sr(1)
45838 .m(m)
45839 .n(n)
45840 .k(k)
45841 .iterations(1)
45842 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45843 }
45844 }
45845 }
45846 }
45847
TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4)45848 TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4) {
45849 for (uint32_t n = 5; n < 8; n++) {
45850 for (size_t k = 1; k <= 5; k += 2) {
45851 GemmMicrokernelTester()
45852 .mr(2)
45853 .nr(4)
45854 .kr(1)
45855 .sr(1)
45856 .m(2)
45857 .n(n)
45858 .k(k)
45859 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45860 }
45861 }
45862 }
45863
TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4_strided_cn)45864 TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_strided_cn) {
45865 for (uint32_t n = 5; n < 8; n++) {
45866 for (size_t k = 1; k <= 5; k += 2) {
45867 GemmMicrokernelTester()
45868 .mr(2)
45869 .nr(4)
45870 .kr(1)
45871 .sr(1)
45872 .m(2)
45873 .n(n)
45874 .k(k)
45875 .cn_stride(7)
45876 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45877 }
45878 }
45879 }
45880
TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4_strided_a)45881 TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_strided_a) {
45882 for (uint32_t n = 5; n < 8; n++) {
45883 for (size_t k = 1; k <= 5; k += 2) {
45884 GemmMicrokernelTester()
45885 .mr(2)
45886 .nr(4)
45887 .kr(1)
45888 .sr(1)
45889 .m(2)
45890 .n(n)
45891 .k(k)
45892 .a_stride(7)
45893 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45894 }
45895 }
45896 }
45897
TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4_subtile)45898 TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_subtile) {
45899 for (uint32_t n = 5; n < 8; n++) {
45900 for (size_t k = 1; k <= 5; k += 2) {
45901 for (uint32_t m = 1; m <= 2; m++) {
45902 GemmMicrokernelTester()
45903 .mr(2)
45904 .nr(4)
45905 .kr(1)
45906 .sr(1)
45907 .m(m)
45908 .n(n)
45909 .k(k)
45910 .iterations(1)
45911 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45912 }
45913 }
45914 }
45915 }
45916
TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4)45917 TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4) {
45918 for (uint32_t n = 8; n <= 12; n += 4) {
45919 for (size_t k = 1; k <= 5; k += 2) {
45920 GemmMicrokernelTester()
45921 .mr(2)
45922 .nr(4)
45923 .kr(1)
45924 .sr(1)
45925 .m(2)
45926 .n(n)
45927 .k(k)
45928 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45929 }
45930 }
45931 }
45932
TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4_strided_cn)45933 TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_strided_cn) {
45934 for (uint32_t n = 8; n <= 12; n += 4) {
45935 for (size_t k = 1; k <= 5; k += 2) {
45936 GemmMicrokernelTester()
45937 .mr(2)
45938 .nr(4)
45939 .kr(1)
45940 .sr(1)
45941 .m(2)
45942 .n(n)
45943 .k(k)
45944 .cn_stride(7)
45945 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45946 }
45947 }
45948 }
45949
TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4_strided_a)45950 TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_strided_a) {
45951 for (uint32_t n = 8; n <= 12; n += 4) {
45952 for (size_t k = 1; k <= 5; k += 2) {
45953 GemmMicrokernelTester()
45954 .mr(2)
45955 .nr(4)
45956 .kr(1)
45957 .sr(1)
45958 .m(2)
45959 .n(n)
45960 .k(k)
45961 .a_stride(7)
45962 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45963 }
45964 }
45965 }
45966
TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4_subtile)45967 TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_subtile) {
45968 for (uint32_t n = 8; n <= 12; n += 4) {
45969 for (size_t k = 1; k <= 5; k += 2) {
45970 for (uint32_t m = 1; m <= 2; m++) {
45971 GemmMicrokernelTester()
45972 .mr(2)
45973 .nr(4)
45974 .kr(1)
45975 .sr(1)
45976 .m(m)
45977 .n(n)
45978 .k(k)
45979 .iterations(1)
45980 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45981 }
45982 }
45983 }
45984 }
45985
TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,strided_cm_subtile)45986 TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cm_subtile) {
45987 for (size_t k = 1; k <= 5; k += 2) {
45988 for (uint32_t n = 1; n <= 4; n++) {
45989 for (uint32_t m = 1; m <= 2; m++) {
45990 GemmMicrokernelTester()
45991 .mr(2)
45992 .nr(4)
45993 .kr(1)
45994 .sr(1)
45995 .m(m)
45996 .n(n)
45997 .k(k)
45998 .cm_stride(7)
45999 .iterations(1)
46000 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46001 }
46002 }
46003 }
46004 }
46005
TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,qmin)46006 TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, qmin) {
46007 GemmMicrokernelTester()
46008 .mr(2)
46009 .nr(4)
46010 .kr(1)
46011 .sr(1)
46012 .m(2)
46013 .n(4)
46014 .k(1)
46015 .qmin(128)
46016 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46017 }
46018
TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,qmax)46019 TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, qmax) {
46020 GemmMicrokernelTester()
46021 .mr(2)
46022 .nr(4)
46023 .kr(1)
46024 .sr(1)
46025 .m(2)
46026 .n(4)
46027 .k(1)
46028 .qmax(128)
46029 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46030 }
46031
TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,strided_cm)46032 TEST(QC8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cm) {
46033 GemmMicrokernelTester()
46034 .mr(2)
46035 .nr(4)
46036 .kr(1)
46037 .sr(1)
46038 .m(2)
46039 .n(4)
46040 .k(1)
46041 .cm_stride(7)
46042 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46043 }
46044 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
46045
46046
46047 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1)46048 TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1) {
46049 GemmMicrokernelTester()
46050 .mr(4)
46051 .nr(2)
46052 .kr(1)
46053 .sr(1)
46054 .m(4)
46055 .n(2)
46056 .k(1)
46057 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46058 }
46059
TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cn)46060 TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cn) {
46061 GemmMicrokernelTester()
46062 .mr(4)
46063 .nr(2)
46064 .kr(1)
46065 .sr(1)
46066 .m(4)
46067 .n(2)
46068 .k(1)
46069 .cn_stride(5)
46070 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46071 }
46072
TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_strided_a)46073 TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_strided_a) {
46074 GemmMicrokernelTester()
46075 .mr(4)
46076 .nr(2)
46077 .kr(1)
46078 .sr(1)
46079 .m(4)
46080 .n(2)
46081 .k(1)
46082 .a_stride(3)
46083 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46084 }
46085
TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile)46086 TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile) {
46087 for (uint32_t n = 1; n <= 2; n++) {
46088 for (uint32_t m = 1; m <= 4; m++) {
46089 GemmMicrokernelTester()
46090 .mr(4)
46091 .nr(2)
46092 .kr(1)
46093 .sr(1)
46094 .m(m)
46095 .n(n)
46096 .k(1)
46097 .iterations(1)
46098 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46099 }
46100 }
46101 }
46102
TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile_m)46103 TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_m) {
46104 for (uint32_t m = 1; m <= 4; m++) {
46105 GemmMicrokernelTester()
46106 .mr(4)
46107 .nr(2)
46108 .kr(1)
46109 .sr(1)
46110 .m(m)
46111 .n(2)
46112 .k(1)
46113 .iterations(1)
46114 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46115 }
46116 }
46117
TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile_n)46118 TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_n) {
46119 for (uint32_t n = 1; n <= 2; n++) {
46120 GemmMicrokernelTester()
46121 .mr(4)
46122 .nr(2)
46123 .kr(1)
46124 .sr(1)
46125 .m(4)
46126 .n(n)
46127 .k(1)
46128 .iterations(1)
46129 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46130 }
46131 }
46132
TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_gt_1)46133 TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1) {
46134 for (size_t k = 2; k < 10; k++) {
46135 GemmMicrokernelTester()
46136 .mr(4)
46137 .nr(2)
46138 .kr(1)
46139 .sr(1)
46140 .m(4)
46141 .n(2)
46142 .k(k)
46143 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46144 }
46145 }
46146
TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_gt_1_strided_a)46147 TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1_strided_a) {
46148 for (size_t k = 2; k < 10; k++) {
46149 GemmMicrokernelTester()
46150 .mr(4)
46151 .nr(2)
46152 .kr(1)
46153 .sr(1)
46154 .m(4)
46155 .n(2)
46156 .k(k)
46157 .a_stride(11)
46158 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46159 }
46160 }
46161
TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_gt_1_subtile)46162 TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1_subtile) {
46163 for (size_t k = 2; k < 10; k++) {
46164 for (uint32_t n = 1; n <= 2; n++) {
46165 for (uint32_t m = 1; m <= 4; m++) {
46166 GemmMicrokernelTester()
46167 .mr(4)
46168 .nr(2)
46169 .kr(1)
46170 .sr(1)
46171 .m(m)
46172 .n(n)
46173 .k(k)
46174 .iterations(1)
46175 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46176 }
46177 }
46178 }
46179 }
46180
TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2)46181 TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2) {
46182 for (uint32_t n = 3; n < 4; n++) {
46183 for (size_t k = 1; k <= 5; k += 2) {
46184 GemmMicrokernelTester()
46185 .mr(4)
46186 .nr(2)
46187 .kr(1)
46188 .sr(1)
46189 .m(4)
46190 .n(n)
46191 .k(k)
46192 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46193 }
46194 }
46195 }
46196
TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_strided_cn)46197 TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_strided_cn) {
46198 for (uint32_t n = 3; n < 4; n++) {
46199 for (size_t k = 1; k <= 5; k += 2) {
46200 GemmMicrokernelTester()
46201 .mr(4)
46202 .nr(2)
46203 .kr(1)
46204 .sr(1)
46205 .m(4)
46206 .n(n)
46207 .k(k)
46208 .cn_stride(5)
46209 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46210 }
46211 }
46212 }
46213
TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_strided_a)46214 TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_strided_a) {
46215 for (uint32_t n = 3; n < 4; n++) {
46216 for (size_t k = 1; k <= 5; k += 2) {
46217 GemmMicrokernelTester()
46218 .mr(4)
46219 .nr(2)
46220 .kr(1)
46221 .sr(1)
46222 .m(4)
46223 .n(n)
46224 .k(k)
46225 .a_stride(7)
46226 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46227 }
46228 }
46229 }
46230
TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_subtile)46231 TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_subtile) {
46232 for (uint32_t n = 3; n < 4; n++) {
46233 for (size_t k = 1; k <= 5; k += 2) {
46234 for (uint32_t m = 1; m <= 4; m++) {
46235 GemmMicrokernelTester()
46236 .mr(4)
46237 .nr(2)
46238 .kr(1)
46239 .sr(1)
46240 .m(m)
46241 .n(n)
46242 .k(k)
46243 .iterations(1)
46244 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46245 }
46246 }
46247 }
46248 }
46249
TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2)46250 TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2) {
46251 for (uint32_t n = 4; n <= 6; n += 2) {
46252 for (size_t k = 1; k <= 5; k += 2) {
46253 GemmMicrokernelTester()
46254 .mr(4)
46255 .nr(2)
46256 .kr(1)
46257 .sr(1)
46258 .m(4)
46259 .n(n)
46260 .k(k)
46261 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46262 }
46263 }
46264 }
46265
TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_strided_cn)46266 TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_strided_cn) {
46267 for (uint32_t n = 4; n <= 6; n += 2) {
46268 for (size_t k = 1; k <= 5; k += 2) {
46269 GemmMicrokernelTester()
46270 .mr(4)
46271 .nr(2)
46272 .kr(1)
46273 .sr(1)
46274 .m(4)
46275 .n(n)
46276 .k(k)
46277 .cn_stride(5)
46278 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46279 }
46280 }
46281 }
46282
TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_strided_a)46283 TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_strided_a) {
46284 for (uint32_t n = 4; n <= 6; n += 2) {
46285 for (size_t k = 1; k <= 5; k += 2) {
46286 GemmMicrokernelTester()
46287 .mr(4)
46288 .nr(2)
46289 .kr(1)
46290 .sr(1)
46291 .m(4)
46292 .n(n)
46293 .k(k)
46294 .a_stride(7)
46295 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46296 }
46297 }
46298 }
46299
TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_subtile)46300 TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_subtile) {
46301 for (uint32_t n = 4; n <= 6; n += 2) {
46302 for (size_t k = 1; k <= 5; k += 2) {
46303 for (uint32_t m = 1; m <= 4; m++) {
46304 GemmMicrokernelTester()
46305 .mr(4)
46306 .nr(2)
46307 .kr(1)
46308 .sr(1)
46309 .m(m)
46310 .n(n)
46311 .k(k)
46312 .iterations(1)
46313 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46314 }
46315 }
46316 }
46317 }
46318
TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cm_subtile)46319 TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm_subtile) {
46320 for (size_t k = 1; k <= 5; k += 2) {
46321 for (uint32_t n = 1; n <= 2; n++) {
46322 for (uint32_t m = 1; m <= 4; m++) {
46323 GemmMicrokernelTester()
46324 .mr(4)
46325 .nr(2)
46326 .kr(1)
46327 .sr(1)
46328 .m(m)
46329 .n(n)
46330 .k(k)
46331 .cm_stride(5)
46332 .iterations(1)
46333 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46334 }
46335 }
46336 }
46337 }
46338
TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,qmin)46339 TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmin) {
46340 GemmMicrokernelTester()
46341 .mr(4)
46342 .nr(2)
46343 .kr(1)
46344 .sr(1)
46345 .m(4)
46346 .n(2)
46347 .k(1)
46348 .qmin(128)
46349 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46350 }
46351
TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,qmax)46352 TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmax) {
46353 GemmMicrokernelTester()
46354 .mr(4)
46355 .nr(2)
46356 .kr(1)
46357 .sr(1)
46358 .m(4)
46359 .n(2)
46360 .k(1)
46361 .qmax(128)
46362 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46363 }
46364
TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cm)46365 TEST(QC8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm) {
46366 GemmMicrokernelTester()
46367 .mr(4)
46368 .nr(2)
46369 .kr(1)
46370 .sr(1)
46371 .m(4)
46372 .n(2)
46373 .k(1)
46374 .cm_stride(5)
46375 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46376 }
46377 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
46378
46379
46380 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1)46381 TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1) {
46382 GemmMicrokernelTester()
46383 .mr(4)
46384 .nr(4)
46385 .kr(1)
46386 .sr(1)
46387 .m(4)
46388 .n(4)
46389 .k(1)
46390 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46391 }
46392
TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,strided_cn)46393 TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cn) {
46394 GemmMicrokernelTester()
46395 .mr(4)
46396 .nr(4)
46397 .kr(1)
46398 .sr(1)
46399 .m(4)
46400 .n(4)
46401 .k(1)
46402 .cn_stride(7)
46403 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46404 }
46405
TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_strided_a)46406 TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_strided_a) {
46407 GemmMicrokernelTester()
46408 .mr(4)
46409 .nr(4)
46410 .kr(1)
46411 .sr(1)
46412 .m(4)
46413 .n(4)
46414 .k(1)
46415 .a_stride(3)
46416 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46417 }
46418
TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_subtile)46419 TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile) {
46420 for (uint32_t n = 1; n <= 4; n++) {
46421 for (uint32_t m = 1; m <= 4; m++) {
46422 GemmMicrokernelTester()
46423 .mr(4)
46424 .nr(4)
46425 .kr(1)
46426 .sr(1)
46427 .m(m)
46428 .n(n)
46429 .k(1)
46430 .iterations(1)
46431 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46432 }
46433 }
46434 }
46435
TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_subtile_m)46436 TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile_m) {
46437 for (uint32_t m = 1; m <= 4; m++) {
46438 GemmMicrokernelTester()
46439 .mr(4)
46440 .nr(4)
46441 .kr(1)
46442 .sr(1)
46443 .m(m)
46444 .n(4)
46445 .k(1)
46446 .iterations(1)
46447 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46448 }
46449 }
46450
TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_subtile_n)46451 TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile_n) {
46452 for (uint32_t n = 1; n <= 4; n++) {
46453 GemmMicrokernelTester()
46454 .mr(4)
46455 .nr(4)
46456 .kr(1)
46457 .sr(1)
46458 .m(4)
46459 .n(n)
46460 .k(1)
46461 .iterations(1)
46462 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46463 }
46464 }
46465
TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_gt_1)46466 TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1) {
46467 for (size_t k = 2; k < 10; k++) {
46468 GemmMicrokernelTester()
46469 .mr(4)
46470 .nr(4)
46471 .kr(1)
46472 .sr(1)
46473 .m(4)
46474 .n(4)
46475 .k(k)
46476 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46477 }
46478 }
46479
TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_gt_1_strided_a)46480 TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1_strided_a) {
46481 for (size_t k = 2; k < 10; k++) {
46482 GemmMicrokernelTester()
46483 .mr(4)
46484 .nr(4)
46485 .kr(1)
46486 .sr(1)
46487 .m(4)
46488 .n(4)
46489 .k(k)
46490 .a_stride(11)
46491 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46492 }
46493 }
46494
TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_gt_1_subtile)46495 TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1_subtile) {
46496 for (size_t k = 2; k < 10; k++) {
46497 for (uint32_t n = 1; n <= 4; n++) {
46498 for (uint32_t m = 1; m <= 4; m++) {
46499 GemmMicrokernelTester()
46500 .mr(4)
46501 .nr(4)
46502 .kr(1)
46503 .sr(1)
46504 .m(m)
46505 .n(n)
46506 .k(k)
46507 .iterations(1)
46508 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46509 }
46510 }
46511 }
46512 }
46513
TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4)46514 TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4) {
46515 for (uint32_t n = 5; n < 8; n++) {
46516 for (size_t k = 1; k <= 5; k += 2) {
46517 GemmMicrokernelTester()
46518 .mr(4)
46519 .nr(4)
46520 .kr(1)
46521 .sr(1)
46522 .m(4)
46523 .n(n)
46524 .k(k)
46525 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46526 }
46527 }
46528 }
46529
TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4_strided_cn)46530 TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_strided_cn) {
46531 for (uint32_t n = 5; n < 8; n++) {
46532 for (size_t k = 1; k <= 5; k += 2) {
46533 GemmMicrokernelTester()
46534 .mr(4)
46535 .nr(4)
46536 .kr(1)
46537 .sr(1)
46538 .m(4)
46539 .n(n)
46540 .k(k)
46541 .cn_stride(7)
46542 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46543 }
46544 }
46545 }
46546
TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4_strided_a)46547 TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_strided_a) {
46548 for (uint32_t n = 5; n < 8; n++) {
46549 for (size_t k = 1; k <= 5; k += 2) {
46550 GemmMicrokernelTester()
46551 .mr(4)
46552 .nr(4)
46553 .kr(1)
46554 .sr(1)
46555 .m(4)
46556 .n(n)
46557 .k(k)
46558 .a_stride(7)
46559 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46560 }
46561 }
46562 }
46563
TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4_subtile)46564 TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_subtile) {
46565 for (uint32_t n = 5; n < 8; n++) {
46566 for (size_t k = 1; k <= 5; k += 2) {
46567 for (uint32_t m = 1; m <= 4; m++) {
46568 GemmMicrokernelTester()
46569 .mr(4)
46570 .nr(4)
46571 .kr(1)
46572 .sr(1)
46573 .m(m)
46574 .n(n)
46575 .k(k)
46576 .iterations(1)
46577 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46578 }
46579 }
46580 }
46581 }
46582
TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4)46583 TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4) {
46584 for (uint32_t n = 8; n <= 12; n += 4) {
46585 for (size_t k = 1; k <= 5; k += 2) {
46586 GemmMicrokernelTester()
46587 .mr(4)
46588 .nr(4)
46589 .kr(1)
46590 .sr(1)
46591 .m(4)
46592 .n(n)
46593 .k(k)
46594 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46595 }
46596 }
46597 }
46598
TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4_strided_cn)46599 TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_strided_cn) {
46600 for (uint32_t n = 8; n <= 12; n += 4) {
46601 for (size_t k = 1; k <= 5; k += 2) {
46602 GemmMicrokernelTester()
46603 .mr(4)
46604 .nr(4)
46605 .kr(1)
46606 .sr(1)
46607 .m(4)
46608 .n(n)
46609 .k(k)
46610 .cn_stride(7)
46611 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46612 }
46613 }
46614 }
46615
TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4_strided_a)46616 TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_strided_a) {
46617 for (uint32_t n = 8; n <= 12; n += 4) {
46618 for (size_t k = 1; k <= 5; k += 2) {
46619 GemmMicrokernelTester()
46620 .mr(4)
46621 .nr(4)
46622 .kr(1)
46623 .sr(1)
46624 .m(4)
46625 .n(n)
46626 .k(k)
46627 .a_stride(7)
46628 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46629 }
46630 }
46631 }
46632
TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4_subtile)46633 TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_subtile) {
46634 for (uint32_t n = 8; n <= 12; n += 4) {
46635 for (size_t k = 1; k <= 5; k += 2) {
46636 for (uint32_t m = 1; m <= 4; m++) {
46637 GemmMicrokernelTester()
46638 .mr(4)
46639 .nr(4)
46640 .kr(1)
46641 .sr(1)
46642 .m(m)
46643 .n(n)
46644 .k(k)
46645 .iterations(1)
46646 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46647 }
46648 }
46649 }
46650 }
46651
TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,strided_cm_subtile)46652 TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cm_subtile) {
46653 for (size_t k = 1; k <= 5; k += 2) {
46654 for (uint32_t n = 1; n <= 4; n++) {
46655 for (uint32_t m = 1; m <= 4; m++) {
46656 GemmMicrokernelTester()
46657 .mr(4)
46658 .nr(4)
46659 .kr(1)
46660 .sr(1)
46661 .m(m)
46662 .n(n)
46663 .k(k)
46664 .cm_stride(7)
46665 .iterations(1)
46666 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46667 }
46668 }
46669 }
46670 }
46671
TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,qmin)46672 TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, qmin) {
46673 GemmMicrokernelTester()
46674 .mr(4)
46675 .nr(4)
46676 .kr(1)
46677 .sr(1)
46678 .m(4)
46679 .n(4)
46680 .k(1)
46681 .qmin(128)
46682 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46683 }
46684
TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,qmax)46685 TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, qmax) {
46686 GemmMicrokernelTester()
46687 .mr(4)
46688 .nr(4)
46689 .kr(1)
46690 .sr(1)
46691 .m(4)
46692 .n(4)
46693 .k(1)
46694 .qmax(128)
46695 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46696 }
46697
TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,strided_cm)46698 TEST(QC8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cm) {
46699 GemmMicrokernelTester()
46700 .mr(4)
46701 .nr(4)
46702 .kr(1)
46703 .sr(1)
46704 .m(4)
46705 .n(4)
46706 .k(1)
46707 .cm_stride(7)
46708 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46709 }
46710 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
46711
46712
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1)46713 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1) {
46714 GemmMicrokernelTester()
46715 .mr(1)
46716 .nr(2)
46717 .kr(1)
46718 .sr(1)
46719 .m(1)
46720 .n(2)
46721 .k(1)
46722 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46723 }
46724
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,strided_cn)46725 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cn) {
46726 GemmMicrokernelTester()
46727 .mr(1)
46728 .nr(2)
46729 .kr(1)
46730 .sr(1)
46731 .m(1)
46732 .n(2)
46733 .k(1)
46734 .cn_stride(5)
46735 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46736 }
46737
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1_strided_a)46738 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_strided_a) {
46739 GemmMicrokernelTester()
46740 .mr(1)
46741 .nr(2)
46742 .kr(1)
46743 .sr(1)
46744 .m(1)
46745 .n(2)
46746 .k(1)
46747 .a_stride(3)
46748 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46749 }
46750
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1_subtile)46751 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile) {
46752 for (uint32_t n = 1; n <= 2; n++) {
46753 for (uint32_t m = 1; m <= 1; m++) {
46754 GemmMicrokernelTester()
46755 .mr(1)
46756 .nr(2)
46757 .kr(1)
46758 .sr(1)
46759 .m(m)
46760 .n(n)
46761 .k(1)
46762 .iterations(1)
46763 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46764 }
46765 }
46766 }
46767
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1_subtile_m)46768 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
46769 for (uint32_t m = 1; m <= 1; m++) {
46770 GemmMicrokernelTester()
46771 .mr(1)
46772 .nr(2)
46773 .kr(1)
46774 .sr(1)
46775 .m(m)
46776 .n(2)
46777 .k(1)
46778 .iterations(1)
46779 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46780 }
46781 }
46782
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1_subtile_n)46783 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
46784 for (uint32_t n = 1; n <= 2; n++) {
46785 GemmMicrokernelTester()
46786 .mr(1)
46787 .nr(2)
46788 .kr(1)
46789 .sr(1)
46790 .m(1)
46791 .n(n)
46792 .k(1)
46793 .iterations(1)
46794 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46795 }
46796 }
46797
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_gt_1)46798 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1) {
46799 for (size_t k = 2; k < 10; k++) {
46800 GemmMicrokernelTester()
46801 .mr(1)
46802 .nr(2)
46803 .kr(1)
46804 .sr(1)
46805 .m(1)
46806 .n(2)
46807 .k(k)
46808 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46809 }
46810 }
46811
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_gt_1_strided_a)46812 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1_strided_a) {
46813 for (size_t k = 2; k < 10; k++) {
46814 GemmMicrokernelTester()
46815 .mr(1)
46816 .nr(2)
46817 .kr(1)
46818 .sr(1)
46819 .m(1)
46820 .n(2)
46821 .k(k)
46822 .a_stride(11)
46823 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46824 }
46825 }
46826
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_gt_1_subtile)46827 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1_subtile) {
46828 for (size_t k = 2; k < 10; k++) {
46829 for (uint32_t n = 1; n <= 2; n++) {
46830 for (uint32_t m = 1; m <= 1; m++) {
46831 GemmMicrokernelTester()
46832 .mr(1)
46833 .nr(2)
46834 .kr(1)
46835 .sr(1)
46836 .m(m)
46837 .n(n)
46838 .k(k)
46839 .iterations(1)
46840 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46841 }
46842 }
46843 }
46844 }
46845
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2)46846 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2) {
46847 for (uint32_t n = 3; n < 4; n++) {
46848 for (size_t k = 1; k <= 5; k += 2) {
46849 GemmMicrokernelTester()
46850 .mr(1)
46851 .nr(2)
46852 .kr(1)
46853 .sr(1)
46854 .m(1)
46855 .n(n)
46856 .k(k)
46857 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46858 }
46859 }
46860 }
46861
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2_strided_cn)46862 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
46863 for (uint32_t n = 3; n < 4; n++) {
46864 for (size_t k = 1; k <= 5; k += 2) {
46865 GemmMicrokernelTester()
46866 .mr(1)
46867 .nr(2)
46868 .kr(1)
46869 .sr(1)
46870 .m(1)
46871 .n(n)
46872 .k(k)
46873 .cn_stride(5)
46874 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46875 }
46876 }
46877 }
46878
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2_strided_a)46879 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_strided_a) {
46880 for (uint32_t n = 3; n < 4; n++) {
46881 for (size_t k = 1; k <= 5; k += 2) {
46882 GemmMicrokernelTester()
46883 .mr(1)
46884 .nr(2)
46885 .kr(1)
46886 .sr(1)
46887 .m(1)
46888 .n(n)
46889 .k(k)
46890 .a_stride(7)
46891 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46892 }
46893 }
46894 }
46895
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2_subtile)46896 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_subtile) {
46897 for (uint32_t n = 3; n < 4; n++) {
46898 for (size_t k = 1; k <= 5; k += 2) {
46899 for (uint32_t m = 1; m <= 1; m++) {
46900 GemmMicrokernelTester()
46901 .mr(1)
46902 .nr(2)
46903 .kr(1)
46904 .sr(1)
46905 .m(m)
46906 .n(n)
46907 .k(k)
46908 .iterations(1)
46909 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46910 }
46911 }
46912 }
46913 }
46914
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2)46915 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2) {
46916 for (uint32_t n = 4; n <= 6; n += 2) {
46917 for (size_t k = 1; k <= 5; k += 2) {
46918 GemmMicrokernelTester()
46919 .mr(1)
46920 .nr(2)
46921 .kr(1)
46922 .sr(1)
46923 .m(1)
46924 .n(n)
46925 .k(k)
46926 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46927 }
46928 }
46929 }
46930
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2_strided_cn)46931 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
46932 for (uint32_t n = 4; n <= 6; n += 2) {
46933 for (size_t k = 1; k <= 5; k += 2) {
46934 GemmMicrokernelTester()
46935 .mr(1)
46936 .nr(2)
46937 .kr(1)
46938 .sr(1)
46939 .m(1)
46940 .n(n)
46941 .k(k)
46942 .cn_stride(5)
46943 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46944 }
46945 }
46946 }
46947
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2_strided_a)46948 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_strided_a) {
46949 for (uint32_t n = 4; n <= 6; n += 2) {
46950 for (size_t k = 1; k <= 5; k += 2) {
46951 GemmMicrokernelTester()
46952 .mr(1)
46953 .nr(2)
46954 .kr(1)
46955 .sr(1)
46956 .m(1)
46957 .n(n)
46958 .k(k)
46959 .a_stride(7)
46960 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46961 }
46962 }
46963 }
46964
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2_subtile)46965 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_subtile) {
46966 for (uint32_t n = 4; n <= 6; n += 2) {
46967 for (size_t k = 1; k <= 5; k += 2) {
46968 for (uint32_t m = 1; m <= 1; m++) {
46969 GemmMicrokernelTester()
46970 .mr(1)
46971 .nr(2)
46972 .kr(1)
46973 .sr(1)
46974 .m(m)
46975 .n(n)
46976 .k(k)
46977 .iterations(1)
46978 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46979 }
46980 }
46981 }
46982 }
46983
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,strided_cm_subtile)46984 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cm_subtile) {
46985 for (size_t k = 1; k <= 5; k += 2) {
46986 for (uint32_t n = 1; n <= 2; n++) {
46987 for (uint32_t m = 1; m <= 1; m++) {
46988 GemmMicrokernelTester()
46989 .mr(1)
46990 .nr(2)
46991 .kr(1)
46992 .sr(1)
46993 .m(m)
46994 .n(n)
46995 .k(k)
46996 .cm_stride(5)
46997 .iterations(1)
46998 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46999 }
47000 }
47001 }
47002 }
47003
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,qmin)47004 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, qmin) {
47005 GemmMicrokernelTester()
47006 .mr(1)
47007 .nr(2)
47008 .kr(1)
47009 .sr(1)
47010 .m(1)
47011 .n(2)
47012 .k(1)
47013 .qmin(128)
47014 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47015 }
47016
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,qmax)47017 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, qmax) {
47018 GemmMicrokernelTester()
47019 .mr(1)
47020 .nr(2)
47021 .kr(1)
47022 .sr(1)
47023 .m(1)
47024 .n(2)
47025 .k(1)
47026 .qmax(128)
47027 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47028 }
47029
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,strided_cm)47030 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cm) {
47031 GemmMicrokernelTester()
47032 .mr(1)
47033 .nr(2)
47034 .kr(1)
47035 .sr(1)
47036 .m(1)
47037 .n(2)
47038 .k(1)
47039 .cm_stride(5)
47040 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47041 }
47042
47043
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1)47044 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1) {
47045 GemmMicrokernelTester()
47046 .mr(1)
47047 .nr(4)
47048 .kr(1)
47049 .sr(1)
47050 .m(1)
47051 .n(4)
47052 .k(1)
47053 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47054 }
47055
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,strided_cn)47056 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cn) {
47057 GemmMicrokernelTester()
47058 .mr(1)
47059 .nr(4)
47060 .kr(1)
47061 .sr(1)
47062 .m(1)
47063 .n(4)
47064 .k(1)
47065 .cn_stride(7)
47066 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47067 }
47068
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1_strided_a)47069 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_strided_a) {
47070 GemmMicrokernelTester()
47071 .mr(1)
47072 .nr(4)
47073 .kr(1)
47074 .sr(1)
47075 .m(1)
47076 .n(4)
47077 .k(1)
47078 .a_stride(3)
47079 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47080 }
47081
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1_subtile)47082 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile) {
47083 for (uint32_t n = 1; n <= 4; n++) {
47084 for (uint32_t m = 1; m <= 1; m++) {
47085 GemmMicrokernelTester()
47086 .mr(1)
47087 .nr(4)
47088 .kr(1)
47089 .sr(1)
47090 .m(m)
47091 .n(n)
47092 .k(1)
47093 .iterations(1)
47094 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47095 }
47096 }
47097 }
47098
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1_subtile_m)47099 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
47100 for (uint32_t m = 1; m <= 1; m++) {
47101 GemmMicrokernelTester()
47102 .mr(1)
47103 .nr(4)
47104 .kr(1)
47105 .sr(1)
47106 .m(m)
47107 .n(4)
47108 .k(1)
47109 .iterations(1)
47110 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47111 }
47112 }
47113
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1_subtile_n)47114 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
47115 for (uint32_t n = 1; n <= 4; n++) {
47116 GemmMicrokernelTester()
47117 .mr(1)
47118 .nr(4)
47119 .kr(1)
47120 .sr(1)
47121 .m(1)
47122 .n(n)
47123 .k(1)
47124 .iterations(1)
47125 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47126 }
47127 }
47128
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_gt_1)47129 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1) {
47130 for (size_t k = 2; k < 10; k++) {
47131 GemmMicrokernelTester()
47132 .mr(1)
47133 .nr(4)
47134 .kr(1)
47135 .sr(1)
47136 .m(1)
47137 .n(4)
47138 .k(k)
47139 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47140 }
47141 }
47142
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_gt_1_strided_a)47143 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1_strided_a) {
47144 for (size_t k = 2; k < 10; k++) {
47145 GemmMicrokernelTester()
47146 .mr(1)
47147 .nr(4)
47148 .kr(1)
47149 .sr(1)
47150 .m(1)
47151 .n(4)
47152 .k(k)
47153 .a_stride(11)
47154 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47155 }
47156 }
47157
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_gt_1_subtile)47158 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1_subtile) {
47159 for (size_t k = 2; k < 10; k++) {
47160 for (uint32_t n = 1; n <= 4; n++) {
47161 for (uint32_t m = 1; m <= 1; m++) {
47162 GemmMicrokernelTester()
47163 .mr(1)
47164 .nr(4)
47165 .kr(1)
47166 .sr(1)
47167 .m(m)
47168 .n(n)
47169 .k(k)
47170 .iterations(1)
47171 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47172 }
47173 }
47174 }
47175 }
47176
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4)47177 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4) {
47178 for (uint32_t n = 5; n < 8; n++) {
47179 for (size_t k = 1; k <= 5; k += 2) {
47180 GemmMicrokernelTester()
47181 .mr(1)
47182 .nr(4)
47183 .kr(1)
47184 .sr(1)
47185 .m(1)
47186 .n(n)
47187 .k(k)
47188 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47189 }
47190 }
47191 }
47192
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4_strided_cn)47193 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
47194 for (uint32_t n = 5; n < 8; n++) {
47195 for (size_t k = 1; k <= 5; k += 2) {
47196 GemmMicrokernelTester()
47197 .mr(1)
47198 .nr(4)
47199 .kr(1)
47200 .sr(1)
47201 .m(1)
47202 .n(n)
47203 .k(k)
47204 .cn_stride(7)
47205 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47206 }
47207 }
47208 }
47209
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4_strided_a)47210 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_strided_a) {
47211 for (uint32_t n = 5; n < 8; n++) {
47212 for (size_t k = 1; k <= 5; k += 2) {
47213 GemmMicrokernelTester()
47214 .mr(1)
47215 .nr(4)
47216 .kr(1)
47217 .sr(1)
47218 .m(1)
47219 .n(n)
47220 .k(k)
47221 .a_stride(7)
47222 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47223 }
47224 }
47225 }
47226
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4_subtile)47227 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_subtile) {
47228 for (uint32_t n = 5; n < 8; n++) {
47229 for (size_t k = 1; k <= 5; k += 2) {
47230 for (uint32_t m = 1; m <= 1; m++) {
47231 GemmMicrokernelTester()
47232 .mr(1)
47233 .nr(4)
47234 .kr(1)
47235 .sr(1)
47236 .m(m)
47237 .n(n)
47238 .k(k)
47239 .iterations(1)
47240 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47241 }
47242 }
47243 }
47244 }
47245
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4)47246 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4) {
47247 for (uint32_t n = 8; n <= 12; n += 4) {
47248 for (size_t k = 1; k <= 5; k += 2) {
47249 GemmMicrokernelTester()
47250 .mr(1)
47251 .nr(4)
47252 .kr(1)
47253 .sr(1)
47254 .m(1)
47255 .n(n)
47256 .k(k)
47257 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47258 }
47259 }
47260 }
47261
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4_strided_cn)47262 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
47263 for (uint32_t n = 8; n <= 12; n += 4) {
47264 for (size_t k = 1; k <= 5; k += 2) {
47265 GemmMicrokernelTester()
47266 .mr(1)
47267 .nr(4)
47268 .kr(1)
47269 .sr(1)
47270 .m(1)
47271 .n(n)
47272 .k(k)
47273 .cn_stride(7)
47274 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47275 }
47276 }
47277 }
47278
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4_strided_a)47279 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_strided_a) {
47280 for (uint32_t n = 8; n <= 12; n += 4) {
47281 for (size_t k = 1; k <= 5; k += 2) {
47282 GemmMicrokernelTester()
47283 .mr(1)
47284 .nr(4)
47285 .kr(1)
47286 .sr(1)
47287 .m(1)
47288 .n(n)
47289 .k(k)
47290 .a_stride(7)
47291 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47292 }
47293 }
47294 }
47295
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4_subtile)47296 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_subtile) {
47297 for (uint32_t n = 8; n <= 12; n += 4) {
47298 for (size_t k = 1; k <= 5; k += 2) {
47299 for (uint32_t m = 1; m <= 1; m++) {
47300 GemmMicrokernelTester()
47301 .mr(1)
47302 .nr(4)
47303 .kr(1)
47304 .sr(1)
47305 .m(m)
47306 .n(n)
47307 .k(k)
47308 .iterations(1)
47309 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47310 }
47311 }
47312 }
47313 }
47314
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,strided_cm_subtile)47315 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cm_subtile) {
47316 for (size_t k = 1; k <= 5; k += 2) {
47317 for (uint32_t n = 1; n <= 4; n++) {
47318 for (uint32_t m = 1; m <= 1; m++) {
47319 GemmMicrokernelTester()
47320 .mr(1)
47321 .nr(4)
47322 .kr(1)
47323 .sr(1)
47324 .m(m)
47325 .n(n)
47326 .k(k)
47327 .cm_stride(7)
47328 .iterations(1)
47329 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47330 }
47331 }
47332 }
47333 }
47334
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,qmin)47335 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, qmin) {
47336 GemmMicrokernelTester()
47337 .mr(1)
47338 .nr(4)
47339 .kr(1)
47340 .sr(1)
47341 .m(1)
47342 .n(4)
47343 .k(1)
47344 .qmin(128)
47345 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47346 }
47347
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,qmax)47348 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, qmax) {
47349 GemmMicrokernelTester()
47350 .mr(1)
47351 .nr(4)
47352 .kr(1)
47353 .sr(1)
47354 .m(1)
47355 .n(4)
47356 .k(1)
47357 .qmax(128)
47358 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47359 }
47360
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,strided_cm)47361 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cm) {
47362 GemmMicrokernelTester()
47363 .mr(1)
47364 .nr(4)
47365 .kr(1)
47366 .sr(1)
47367 .m(1)
47368 .n(4)
47369 .k(1)
47370 .cm_stride(7)
47371 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47372 }
47373
47374
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1)47375 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1) {
47376 GemmMicrokernelTester()
47377 .mr(1)
47378 .nr(4)
47379 .kr(1)
47380 .sr(1)
47381 .m(1)
47382 .n(4)
47383 .k(1)
47384 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
47385 }
47386
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,strided_cn)47387 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cn) {
47388 GemmMicrokernelTester()
47389 .mr(1)
47390 .nr(4)
47391 .kr(1)
47392 .sr(1)
47393 .m(1)
47394 .n(4)
47395 .k(1)
47396 .cn_stride(7)
47397 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
47398 }
47399
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_strided_a)47400 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
47401 GemmMicrokernelTester()
47402 .mr(1)
47403 .nr(4)
47404 .kr(1)
47405 .sr(1)
47406 .m(1)
47407 .n(4)
47408 .k(1)
47409 .a_stride(3)
47410 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
47411 }
47412
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_subtile)47413 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile) {
47414 for (uint32_t n = 1; n <= 4; n++) {
47415 for (uint32_t m = 1; m <= 1; m++) {
47416 GemmMicrokernelTester()
47417 .mr(1)
47418 .nr(4)
47419 .kr(1)
47420 .sr(1)
47421 .m(m)
47422 .n(n)
47423 .k(1)
47424 .iterations(1)
47425 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
47426 }
47427 }
47428 }
47429
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_subtile_m)47430 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
47431 for (uint32_t m = 1; m <= 1; m++) {
47432 GemmMicrokernelTester()
47433 .mr(1)
47434 .nr(4)
47435 .kr(1)
47436 .sr(1)
47437 .m(m)
47438 .n(4)
47439 .k(1)
47440 .iterations(1)
47441 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
47442 }
47443 }
47444
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_subtile_n)47445 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
47446 for (uint32_t n = 1; n <= 4; n++) {
47447 GemmMicrokernelTester()
47448 .mr(1)
47449 .nr(4)
47450 .kr(1)
47451 .sr(1)
47452 .m(1)
47453 .n(n)
47454 .k(1)
47455 .iterations(1)
47456 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
47457 }
47458 }
47459
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_gt_1)47460 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1) {
47461 for (size_t k = 2; k < 10; k++) {
47462 GemmMicrokernelTester()
47463 .mr(1)
47464 .nr(4)
47465 .kr(1)
47466 .sr(1)
47467 .m(1)
47468 .n(4)
47469 .k(k)
47470 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
47471 }
47472 }
47473
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_gt_1_strided_a)47474 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
47475 for (size_t k = 2; k < 10; k++) {
47476 GemmMicrokernelTester()
47477 .mr(1)
47478 .nr(4)
47479 .kr(1)
47480 .sr(1)
47481 .m(1)
47482 .n(4)
47483 .k(k)
47484 .a_stride(11)
47485 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
47486 }
47487 }
47488
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_gt_1_subtile)47489 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1_subtile) {
47490 for (size_t k = 2; k < 10; k++) {
47491 for (uint32_t n = 1; n <= 4; n++) {
47492 for (uint32_t m = 1; m <= 1; m++) {
47493 GemmMicrokernelTester()
47494 .mr(1)
47495 .nr(4)
47496 .kr(1)
47497 .sr(1)
47498 .m(m)
47499 .n(n)
47500 .k(k)
47501 .iterations(1)
47502 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
47503 }
47504 }
47505 }
47506 }
47507
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4)47508 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4) {
47509 for (uint32_t n = 5; n < 8; n++) {
47510 for (size_t k = 1; k <= 5; k += 2) {
47511 GemmMicrokernelTester()
47512 .mr(1)
47513 .nr(4)
47514 .kr(1)
47515 .sr(1)
47516 .m(1)
47517 .n(n)
47518 .k(k)
47519 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
47520 }
47521 }
47522 }
47523
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4_strided_cn)47524 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
47525 for (uint32_t n = 5; n < 8; n++) {
47526 for (size_t k = 1; k <= 5; k += 2) {
47527 GemmMicrokernelTester()
47528 .mr(1)
47529 .nr(4)
47530 .kr(1)
47531 .sr(1)
47532 .m(1)
47533 .n(n)
47534 .k(k)
47535 .cn_stride(7)
47536 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
47537 }
47538 }
47539 }
47540
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4_strided_a)47541 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
47542 for (uint32_t n = 5; n < 8; n++) {
47543 for (size_t k = 1; k <= 5; k += 2) {
47544 GemmMicrokernelTester()
47545 .mr(1)
47546 .nr(4)
47547 .kr(1)
47548 .sr(1)
47549 .m(1)
47550 .n(n)
47551 .k(k)
47552 .a_stride(7)
47553 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
47554 }
47555 }
47556 }
47557
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4_subtile)47558 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_subtile) {
47559 for (uint32_t n = 5; n < 8; n++) {
47560 for (size_t k = 1; k <= 5; k += 2) {
47561 for (uint32_t m = 1; m <= 1; m++) {
47562 GemmMicrokernelTester()
47563 .mr(1)
47564 .nr(4)
47565 .kr(1)
47566 .sr(1)
47567 .m(m)
47568 .n(n)
47569 .k(k)
47570 .iterations(1)
47571 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
47572 }
47573 }
47574 }
47575 }
47576
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4)47577 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4) {
47578 for (uint32_t n = 8; n <= 12; n += 4) {
47579 for (size_t k = 1; k <= 5; k += 2) {
47580 GemmMicrokernelTester()
47581 .mr(1)
47582 .nr(4)
47583 .kr(1)
47584 .sr(1)
47585 .m(1)
47586 .n(n)
47587 .k(k)
47588 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
47589 }
47590 }
47591 }
47592
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4_strided_cn)47593 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
47594 for (uint32_t n = 8; n <= 12; n += 4) {
47595 for (size_t k = 1; k <= 5; k += 2) {
47596 GemmMicrokernelTester()
47597 .mr(1)
47598 .nr(4)
47599 .kr(1)
47600 .sr(1)
47601 .m(1)
47602 .n(n)
47603 .k(k)
47604 .cn_stride(7)
47605 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
47606 }
47607 }
47608 }
47609
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4_strided_a)47610 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_strided_a) {
47611 for (uint32_t n = 8; n <= 12; n += 4) {
47612 for (size_t k = 1; k <= 5; k += 2) {
47613 GemmMicrokernelTester()
47614 .mr(1)
47615 .nr(4)
47616 .kr(1)
47617 .sr(1)
47618 .m(1)
47619 .n(n)
47620 .k(k)
47621 .a_stride(7)
47622 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
47623 }
47624 }
47625 }
47626
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4_subtile)47627 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_subtile) {
47628 for (uint32_t n = 8; n <= 12; n += 4) {
47629 for (size_t k = 1; k <= 5; k += 2) {
47630 for (uint32_t m = 1; m <= 1; m++) {
47631 GemmMicrokernelTester()
47632 .mr(1)
47633 .nr(4)
47634 .kr(1)
47635 .sr(1)
47636 .m(m)
47637 .n(n)
47638 .k(k)
47639 .iterations(1)
47640 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
47641 }
47642 }
47643 }
47644 }
47645
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,strided_cm_subtile)47646 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm_subtile) {
47647 for (size_t k = 1; k <= 5; k += 2) {
47648 for (uint32_t n = 1; n <= 4; n++) {
47649 for (uint32_t m = 1; m <= 1; m++) {
47650 GemmMicrokernelTester()
47651 .mr(1)
47652 .nr(4)
47653 .kr(1)
47654 .sr(1)
47655 .m(m)
47656 .n(n)
47657 .k(k)
47658 .cm_stride(7)
47659 .iterations(1)
47660 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
47661 }
47662 }
47663 }
47664 }
47665
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,qmin)47666 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmin) {
47667 GemmMicrokernelTester()
47668 .mr(1)
47669 .nr(4)
47670 .kr(1)
47671 .sr(1)
47672 .m(1)
47673 .n(4)
47674 .k(1)
47675 .qmin(128)
47676 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
47677 }
47678
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,qmax)47679 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmax) {
47680 GemmMicrokernelTester()
47681 .mr(1)
47682 .nr(4)
47683 .kr(1)
47684 .sr(1)
47685 .m(1)
47686 .n(4)
47687 .k(1)
47688 .qmax(128)
47689 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
47690 }
47691
TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,strided_cm)47692 TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm) {
47693 GemmMicrokernelTester()
47694 .mr(1)
47695 .nr(4)
47696 .kr(1)
47697 .sr(1)
47698 .m(1)
47699 .n(4)
47700 .k(1)
47701 .cm_stride(7)
47702 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
47703 }
47704
47705
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1)47706 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1) {
47707 GemmMicrokernelTester()
47708 .mr(2)
47709 .nr(2)
47710 .kr(1)
47711 .sr(1)
47712 .m(2)
47713 .n(2)
47714 .k(1)
47715 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47716 }
47717
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,strided_cn)47718 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cn) {
47719 GemmMicrokernelTester()
47720 .mr(2)
47721 .nr(2)
47722 .kr(1)
47723 .sr(1)
47724 .m(2)
47725 .n(2)
47726 .k(1)
47727 .cn_stride(5)
47728 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47729 }
47730
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_strided_a)47731 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_strided_a) {
47732 GemmMicrokernelTester()
47733 .mr(2)
47734 .nr(2)
47735 .kr(1)
47736 .sr(1)
47737 .m(2)
47738 .n(2)
47739 .k(1)
47740 .a_stride(3)
47741 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47742 }
47743
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_subtile)47744 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile) {
47745 for (uint32_t n = 1; n <= 2; n++) {
47746 for (uint32_t m = 1; m <= 2; m++) {
47747 GemmMicrokernelTester()
47748 .mr(2)
47749 .nr(2)
47750 .kr(1)
47751 .sr(1)
47752 .m(m)
47753 .n(n)
47754 .k(1)
47755 .iterations(1)
47756 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47757 }
47758 }
47759 }
47760
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_subtile_m)47761 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
47762 for (uint32_t m = 1; m <= 2; m++) {
47763 GemmMicrokernelTester()
47764 .mr(2)
47765 .nr(2)
47766 .kr(1)
47767 .sr(1)
47768 .m(m)
47769 .n(2)
47770 .k(1)
47771 .iterations(1)
47772 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47773 }
47774 }
47775
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_subtile_n)47776 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
47777 for (uint32_t n = 1; n <= 2; n++) {
47778 GemmMicrokernelTester()
47779 .mr(2)
47780 .nr(2)
47781 .kr(1)
47782 .sr(1)
47783 .m(2)
47784 .n(n)
47785 .k(1)
47786 .iterations(1)
47787 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47788 }
47789 }
47790
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_gt_1)47791 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1) {
47792 for (size_t k = 2; k < 10; k++) {
47793 GemmMicrokernelTester()
47794 .mr(2)
47795 .nr(2)
47796 .kr(1)
47797 .sr(1)
47798 .m(2)
47799 .n(2)
47800 .k(k)
47801 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47802 }
47803 }
47804
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_gt_1_strided_a)47805 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1_strided_a) {
47806 for (size_t k = 2; k < 10; k++) {
47807 GemmMicrokernelTester()
47808 .mr(2)
47809 .nr(2)
47810 .kr(1)
47811 .sr(1)
47812 .m(2)
47813 .n(2)
47814 .k(k)
47815 .a_stride(11)
47816 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47817 }
47818 }
47819
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_gt_1_subtile)47820 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1_subtile) {
47821 for (size_t k = 2; k < 10; k++) {
47822 for (uint32_t n = 1; n <= 2; n++) {
47823 for (uint32_t m = 1; m <= 2; m++) {
47824 GemmMicrokernelTester()
47825 .mr(2)
47826 .nr(2)
47827 .kr(1)
47828 .sr(1)
47829 .m(m)
47830 .n(n)
47831 .k(k)
47832 .iterations(1)
47833 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47834 }
47835 }
47836 }
47837 }
47838
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2)47839 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2) {
47840 for (uint32_t n = 3; n < 4; n++) {
47841 for (size_t k = 1; k <= 5; k += 2) {
47842 GemmMicrokernelTester()
47843 .mr(2)
47844 .nr(2)
47845 .kr(1)
47846 .sr(1)
47847 .m(2)
47848 .n(n)
47849 .k(k)
47850 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47851 }
47852 }
47853 }
47854
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2_strided_cn)47855 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
47856 for (uint32_t n = 3; n < 4; n++) {
47857 for (size_t k = 1; k <= 5; k += 2) {
47858 GemmMicrokernelTester()
47859 .mr(2)
47860 .nr(2)
47861 .kr(1)
47862 .sr(1)
47863 .m(2)
47864 .n(n)
47865 .k(k)
47866 .cn_stride(5)
47867 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47868 }
47869 }
47870 }
47871
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2_strided_a)47872 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_strided_a) {
47873 for (uint32_t n = 3; n < 4; n++) {
47874 for (size_t k = 1; k <= 5; k += 2) {
47875 GemmMicrokernelTester()
47876 .mr(2)
47877 .nr(2)
47878 .kr(1)
47879 .sr(1)
47880 .m(2)
47881 .n(n)
47882 .k(k)
47883 .a_stride(7)
47884 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47885 }
47886 }
47887 }
47888
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2_subtile)47889 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_subtile) {
47890 for (uint32_t n = 3; n < 4; n++) {
47891 for (size_t k = 1; k <= 5; k += 2) {
47892 for (uint32_t m = 1; m <= 2; m++) {
47893 GemmMicrokernelTester()
47894 .mr(2)
47895 .nr(2)
47896 .kr(1)
47897 .sr(1)
47898 .m(m)
47899 .n(n)
47900 .k(k)
47901 .iterations(1)
47902 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47903 }
47904 }
47905 }
47906 }
47907
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2)47908 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2) {
47909 for (uint32_t n = 4; n <= 6; n += 2) {
47910 for (size_t k = 1; k <= 5; k += 2) {
47911 GemmMicrokernelTester()
47912 .mr(2)
47913 .nr(2)
47914 .kr(1)
47915 .sr(1)
47916 .m(2)
47917 .n(n)
47918 .k(k)
47919 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47920 }
47921 }
47922 }
47923
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2_strided_cn)47924 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
47925 for (uint32_t n = 4; n <= 6; n += 2) {
47926 for (size_t k = 1; k <= 5; k += 2) {
47927 GemmMicrokernelTester()
47928 .mr(2)
47929 .nr(2)
47930 .kr(1)
47931 .sr(1)
47932 .m(2)
47933 .n(n)
47934 .k(k)
47935 .cn_stride(5)
47936 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47937 }
47938 }
47939 }
47940
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2_strided_a)47941 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_strided_a) {
47942 for (uint32_t n = 4; n <= 6; n += 2) {
47943 for (size_t k = 1; k <= 5; k += 2) {
47944 GemmMicrokernelTester()
47945 .mr(2)
47946 .nr(2)
47947 .kr(1)
47948 .sr(1)
47949 .m(2)
47950 .n(n)
47951 .k(k)
47952 .a_stride(7)
47953 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47954 }
47955 }
47956 }
47957
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2_subtile)47958 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_subtile) {
47959 for (uint32_t n = 4; n <= 6; n += 2) {
47960 for (size_t k = 1; k <= 5; k += 2) {
47961 for (uint32_t m = 1; m <= 2; m++) {
47962 GemmMicrokernelTester()
47963 .mr(2)
47964 .nr(2)
47965 .kr(1)
47966 .sr(1)
47967 .m(m)
47968 .n(n)
47969 .k(k)
47970 .iterations(1)
47971 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47972 }
47973 }
47974 }
47975 }
47976
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,strided_cm_subtile)47977 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cm_subtile) {
47978 for (size_t k = 1; k <= 5; k += 2) {
47979 for (uint32_t n = 1; n <= 2; n++) {
47980 for (uint32_t m = 1; m <= 2; m++) {
47981 GemmMicrokernelTester()
47982 .mr(2)
47983 .nr(2)
47984 .kr(1)
47985 .sr(1)
47986 .m(m)
47987 .n(n)
47988 .k(k)
47989 .cm_stride(5)
47990 .iterations(1)
47991 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47992 }
47993 }
47994 }
47995 }
47996
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,qmin)47997 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, qmin) {
47998 GemmMicrokernelTester()
47999 .mr(2)
48000 .nr(2)
48001 .kr(1)
48002 .sr(1)
48003 .m(2)
48004 .n(2)
48005 .k(1)
48006 .qmin(128)
48007 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48008 }
48009
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,qmax)48010 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, qmax) {
48011 GemmMicrokernelTester()
48012 .mr(2)
48013 .nr(2)
48014 .kr(1)
48015 .sr(1)
48016 .m(2)
48017 .n(2)
48018 .k(1)
48019 .qmax(128)
48020 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48021 }
48022
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,strided_cm)48023 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cm) {
48024 GemmMicrokernelTester()
48025 .mr(2)
48026 .nr(2)
48027 .kr(1)
48028 .sr(1)
48029 .m(2)
48030 .n(2)
48031 .k(1)
48032 .cm_stride(5)
48033 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48034 }
48035
48036
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1)48037 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1) {
48038 GemmMicrokernelTester()
48039 .mr(3)
48040 .nr(4)
48041 .kr(1)
48042 .sr(1)
48043 .m(3)
48044 .n(4)
48045 .k(1)
48046 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48047 }
48048
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,strided_cn)48049 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, strided_cn) {
48050 GemmMicrokernelTester()
48051 .mr(3)
48052 .nr(4)
48053 .kr(1)
48054 .sr(1)
48055 .m(3)
48056 .n(4)
48057 .k(1)
48058 .cn_stride(7)
48059 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48060 }
48061
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1_strided_a)48062 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_strided_a) {
48063 GemmMicrokernelTester()
48064 .mr(3)
48065 .nr(4)
48066 .kr(1)
48067 .sr(1)
48068 .m(3)
48069 .n(4)
48070 .k(1)
48071 .a_stride(3)
48072 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48073 }
48074
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1_subtile)48075 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_subtile) {
48076 for (uint32_t n = 1; n <= 4; n++) {
48077 for (uint32_t m = 1; m <= 3; m++) {
48078 GemmMicrokernelTester()
48079 .mr(3)
48080 .nr(4)
48081 .kr(1)
48082 .sr(1)
48083 .m(m)
48084 .n(n)
48085 .k(1)
48086 .iterations(1)
48087 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48088 }
48089 }
48090 }
48091
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1_subtile_m)48092 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
48093 for (uint32_t m = 1; m <= 3; m++) {
48094 GemmMicrokernelTester()
48095 .mr(3)
48096 .nr(4)
48097 .kr(1)
48098 .sr(1)
48099 .m(m)
48100 .n(4)
48101 .k(1)
48102 .iterations(1)
48103 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48104 }
48105 }
48106
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1_subtile_n)48107 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
48108 for (uint32_t n = 1; n <= 4; n++) {
48109 GemmMicrokernelTester()
48110 .mr(3)
48111 .nr(4)
48112 .kr(1)
48113 .sr(1)
48114 .m(3)
48115 .n(n)
48116 .k(1)
48117 .iterations(1)
48118 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48119 }
48120 }
48121
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_gt_1)48122 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_gt_1) {
48123 for (size_t k = 2; k < 10; k++) {
48124 GemmMicrokernelTester()
48125 .mr(3)
48126 .nr(4)
48127 .kr(1)
48128 .sr(1)
48129 .m(3)
48130 .n(4)
48131 .k(k)
48132 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48133 }
48134 }
48135
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_gt_1_strided_a)48136 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_gt_1_strided_a) {
48137 for (size_t k = 2; k < 10; k++) {
48138 GemmMicrokernelTester()
48139 .mr(3)
48140 .nr(4)
48141 .kr(1)
48142 .sr(1)
48143 .m(3)
48144 .n(4)
48145 .k(k)
48146 .a_stride(11)
48147 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48148 }
48149 }
48150
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_gt_1_subtile)48151 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_gt_1_subtile) {
48152 for (size_t k = 2; k < 10; k++) {
48153 for (uint32_t n = 1; n <= 4; n++) {
48154 for (uint32_t m = 1; m <= 3; m++) {
48155 GemmMicrokernelTester()
48156 .mr(3)
48157 .nr(4)
48158 .kr(1)
48159 .sr(1)
48160 .m(m)
48161 .n(n)
48162 .k(k)
48163 .iterations(1)
48164 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48165 }
48166 }
48167 }
48168 }
48169
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4)48170 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4) {
48171 for (uint32_t n = 5; n < 8; n++) {
48172 for (size_t k = 1; k <= 5; k += 2) {
48173 GemmMicrokernelTester()
48174 .mr(3)
48175 .nr(4)
48176 .kr(1)
48177 .sr(1)
48178 .m(3)
48179 .n(n)
48180 .k(k)
48181 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48182 }
48183 }
48184 }
48185
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4_strided_cn)48186 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
48187 for (uint32_t n = 5; n < 8; n++) {
48188 for (size_t k = 1; k <= 5; k += 2) {
48189 GemmMicrokernelTester()
48190 .mr(3)
48191 .nr(4)
48192 .kr(1)
48193 .sr(1)
48194 .m(3)
48195 .n(n)
48196 .k(k)
48197 .cn_stride(7)
48198 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48199 }
48200 }
48201 }
48202
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4_strided_a)48203 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4_strided_a) {
48204 for (uint32_t n = 5; n < 8; n++) {
48205 for (size_t k = 1; k <= 5; k += 2) {
48206 GemmMicrokernelTester()
48207 .mr(3)
48208 .nr(4)
48209 .kr(1)
48210 .sr(1)
48211 .m(3)
48212 .n(n)
48213 .k(k)
48214 .a_stride(7)
48215 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48216 }
48217 }
48218 }
48219
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4_subtile)48220 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4_subtile) {
48221 for (uint32_t n = 5; n < 8; n++) {
48222 for (size_t k = 1; k <= 5; k += 2) {
48223 for (uint32_t m = 1; m <= 3; m++) {
48224 GemmMicrokernelTester()
48225 .mr(3)
48226 .nr(4)
48227 .kr(1)
48228 .sr(1)
48229 .m(m)
48230 .n(n)
48231 .k(k)
48232 .iterations(1)
48233 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48234 }
48235 }
48236 }
48237 }
48238
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4)48239 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4) {
48240 for (uint32_t n = 8; n <= 12; n += 4) {
48241 for (size_t k = 1; k <= 5; k += 2) {
48242 GemmMicrokernelTester()
48243 .mr(3)
48244 .nr(4)
48245 .kr(1)
48246 .sr(1)
48247 .m(3)
48248 .n(n)
48249 .k(k)
48250 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48251 }
48252 }
48253 }
48254
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4_strided_cn)48255 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
48256 for (uint32_t n = 8; n <= 12; n += 4) {
48257 for (size_t k = 1; k <= 5; k += 2) {
48258 GemmMicrokernelTester()
48259 .mr(3)
48260 .nr(4)
48261 .kr(1)
48262 .sr(1)
48263 .m(3)
48264 .n(n)
48265 .k(k)
48266 .cn_stride(7)
48267 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48268 }
48269 }
48270 }
48271
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4_strided_a)48272 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4_strided_a) {
48273 for (uint32_t n = 8; n <= 12; n += 4) {
48274 for (size_t k = 1; k <= 5; k += 2) {
48275 GemmMicrokernelTester()
48276 .mr(3)
48277 .nr(4)
48278 .kr(1)
48279 .sr(1)
48280 .m(3)
48281 .n(n)
48282 .k(k)
48283 .a_stride(7)
48284 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48285 }
48286 }
48287 }
48288
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4_subtile)48289 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4_subtile) {
48290 for (uint32_t n = 8; n <= 12; n += 4) {
48291 for (size_t k = 1; k <= 5; k += 2) {
48292 for (uint32_t m = 1; m <= 3; m++) {
48293 GemmMicrokernelTester()
48294 .mr(3)
48295 .nr(4)
48296 .kr(1)
48297 .sr(1)
48298 .m(m)
48299 .n(n)
48300 .k(k)
48301 .iterations(1)
48302 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48303 }
48304 }
48305 }
48306 }
48307
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,strided_cm_subtile)48308 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, strided_cm_subtile) {
48309 for (size_t k = 1; k <= 5; k += 2) {
48310 for (uint32_t n = 1; n <= 4; n++) {
48311 for (uint32_t m = 1; m <= 3; m++) {
48312 GemmMicrokernelTester()
48313 .mr(3)
48314 .nr(4)
48315 .kr(1)
48316 .sr(1)
48317 .m(m)
48318 .n(n)
48319 .k(k)
48320 .cm_stride(7)
48321 .iterations(1)
48322 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48323 }
48324 }
48325 }
48326 }
48327
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,qmin)48328 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, qmin) {
48329 GemmMicrokernelTester()
48330 .mr(3)
48331 .nr(4)
48332 .kr(1)
48333 .sr(1)
48334 .m(3)
48335 .n(4)
48336 .k(1)
48337 .qmin(128)
48338 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48339 }
48340
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,qmax)48341 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, qmax) {
48342 GemmMicrokernelTester()
48343 .mr(3)
48344 .nr(4)
48345 .kr(1)
48346 .sr(1)
48347 .m(3)
48348 .n(4)
48349 .k(1)
48350 .qmax(128)
48351 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48352 }
48353
TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,strided_cm)48354 TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, strided_cm) {
48355 GemmMicrokernelTester()
48356 .mr(3)
48357 .nr(4)
48358 .kr(1)
48359 .sr(1)
48360 .m(3)
48361 .n(4)
48362 .k(1)
48363 .cm_stride(7)
48364 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48365 }
48366
48367
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1)48368 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1) {
48369 GemmMicrokernelTester()
48370 .mr(4)
48371 .nr(2)
48372 .kr(1)
48373 .sr(1)
48374 .m(4)
48375 .n(2)
48376 .k(1)
48377 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48378 }
48379
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,strided_cn)48380 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cn) {
48381 GemmMicrokernelTester()
48382 .mr(4)
48383 .nr(2)
48384 .kr(1)
48385 .sr(1)
48386 .m(4)
48387 .n(2)
48388 .k(1)
48389 .cn_stride(5)
48390 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48391 }
48392
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_strided_a)48393 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_strided_a) {
48394 GemmMicrokernelTester()
48395 .mr(4)
48396 .nr(2)
48397 .kr(1)
48398 .sr(1)
48399 .m(4)
48400 .n(2)
48401 .k(1)
48402 .a_stride(3)
48403 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48404 }
48405
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_subtile)48406 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile) {
48407 for (uint32_t n = 1; n <= 2; n++) {
48408 for (uint32_t m = 1; m <= 4; m++) {
48409 GemmMicrokernelTester()
48410 .mr(4)
48411 .nr(2)
48412 .kr(1)
48413 .sr(1)
48414 .m(m)
48415 .n(n)
48416 .k(1)
48417 .iterations(1)
48418 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48419 }
48420 }
48421 }
48422
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_subtile_m)48423 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
48424 for (uint32_t m = 1; m <= 4; m++) {
48425 GemmMicrokernelTester()
48426 .mr(4)
48427 .nr(2)
48428 .kr(1)
48429 .sr(1)
48430 .m(m)
48431 .n(2)
48432 .k(1)
48433 .iterations(1)
48434 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48435 }
48436 }
48437
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_subtile_n)48438 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
48439 for (uint32_t n = 1; n <= 2; n++) {
48440 GemmMicrokernelTester()
48441 .mr(4)
48442 .nr(2)
48443 .kr(1)
48444 .sr(1)
48445 .m(4)
48446 .n(n)
48447 .k(1)
48448 .iterations(1)
48449 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48450 }
48451 }
48452
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_gt_1)48453 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1) {
48454 for (size_t k = 2; k < 10; k++) {
48455 GemmMicrokernelTester()
48456 .mr(4)
48457 .nr(2)
48458 .kr(1)
48459 .sr(1)
48460 .m(4)
48461 .n(2)
48462 .k(k)
48463 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48464 }
48465 }
48466
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_gt_1_strided_a)48467 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_strided_a) {
48468 for (size_t k = 2; k < 10; k++) {
48469 GemmMicrokernelTester()
48470 .mr(4)
48471 .nr(2)
48472 .kr(1)
48473 .sr(1)
48474 .m(4)
48475 .n(2)
48476 .k(k)
48477 .a_stride(11)
48478 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48479 }
48480 }
48481
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_gt_1_subtile)48482 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_subtile) {
48483 for (size_t k = 2; k < 10; k++) {
48484 for (uint32_t n = 1; n <= 2; n++) {
48485 for (uint32_t m = 1; m <= 4; m++) {
48486 GemmMicrokernelTester()
48487 .mr(4)
48488 .nr(2)
48489 .kr(1)
48490 .sr(1)
48491 .m(m)
48492 .n(n)
48493 .k(k)
48494 .iterations(1)
48495 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48496 }
48497 }
48498 }
48499 }
48500
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2)48501 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2) {
48502 for (uint32_t n = 3; n < 4; n++) {
48503 for (size_t k = 1; k <= 5; k += 2) {
48504 GemmMicrokernelTester()
48505 .mr(4)
48506 .nr(2)
48507 .kr(1)
48508 .sr(1)
48509 .m(4)
48510 .n(n)
48511 .k(k)
48512 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48513 }
48514 }
48515 }
48516
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2_strided_cn)48517 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
48518 for (uint32_t n = 3; n < 4; n++) {
48519 for (size_t k = 1; k <= 5; k += 2) {
48520 GemmMicrokernelTester()
48521 .mr(4)
48522 .nr(2)
48523 .kr(1)
48524 .sr(1)
48525 .m(4)
48526 .n(n)
48527 .k(k)
48528 .cn_stride(5)
48529 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48530 }
48531 }
48532 }
48533
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2_strided_a)48534 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_a) {
48535 for (uint32_t n = 3; n < 4; n++) {
48536 for (size_t k = 1; k <= 5; k += 2) {
48537 GemmMicrokernelTester()
48538 .mr(4)
48539 .nr(2)
48540 .kr(1)
48541 .sr(1)
48542 .m(4)
48543 .n(n)
48544 .k(k)
48545 .a_stride(7)
48546 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48547 }
48548 }
48549 }
48550
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2_subtile)48551 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_subtile) {
48552 for (uint32_t n = 3; n < 4; n++) {
48553 for (size_t k = 1; k <= 5; k += 2) {
48554 for (uint32_t m = 1; m <= 4; m++) {
48555 GemmMicrokernelTester()
48556 .mr(4)
48557 .nr(2)
48558 .kr(1)
48559 .sr(1)
48560 .m(m)
48561 .n(n)
48562 .k(k)
48563 .iterations(1)
48564 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48565 }
48566 }
48567 }
48568 }
48569
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2)48570 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2) {
48571 for (uint32_t n = 4; n <= 6; n += 2) {
48572 for (size_t k = 1; k <= 5; k += 2) {
48573 GemmMicrokernelTester()
48574 .mr(4)
48575 .nr(2)
48576 .kr(1)
48577 .sr(1)
48578 .m(4)
48579 .n(n)
48580 .k(k)
48581 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48582 }
48583 }
48584 }
48585
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2_strided_cn)48586 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_cn) {
48587 for (uint32_t n = 4; n <= 6; n += 2) {
48588 for (size_t k = 1; k <= 5; k += 2) {
48589 GemmMicrokernelTester()
48590 .mr(4)
48591 .nr(2)
48592 .kr(1)
48593 .sr(1)
48594 .m(4)
48595 .n(n)
48596 .k(k)
48597 .cn_stride(5)
48598 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48599 }
48600 }
48601 }
48602
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2_strided_a)48603 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_a) {
48604 for (uint32_t n = 4; n <= 6; n += 2) {
48605 for (size_t k = 1; k <= 5; k += 2) {
48606 GemmMicrokernelTester()
48607 .mr(4)
48608 .nr(2)
48609 .kr(1)
48610 .sr(1)
48611 .m(4)
48612 .n(n)
48613 .k(k)
48614 .a_stride(7)
48615 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48616 }
48617 }
48618 }
48619
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2_subtile)48620 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_subtile) {
48621 for (uint32_t n = 4; n <= 6; n += 2) {
48622 for (size_t k = 1; k <= 5; k += 2) {
48623 for (uint32_t m = 1; m <= 4; m++) {
48624 GemmMicrokernelTester()
48625 .mr(4)
48626 .nr(2)
48627 .kr(1)
48628 .sr(1)
48629 .m(m)
48630 .n(n)
48631 .k(k)
48632 .iterations(1)
48633 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48634 }
48635 }
48636 }
48637 }
48638
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,strided_cm_subtile)48639 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm_subtile) {
48640 for (size_t k = 1; k <= 5; k += 2) {
48641 for (uint32_t n = 1; n <= 2; n++) {
48642 for (uint32_t m = 1; m <= 4; m++) {
48643 GemmMicrokernelTester()
48644 .mr(4)
48645 .nr(2)
48646 .kr(1)
48647 .sr(1)
48648 .m(m)
48649 .n(n)
48650 .k(k)
48651 .cm_stride(5)
48652 .iterations(1)
48653 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48654 }
48655 }
48656 }
48657 }
48658
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,qmin)48659 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmin) {
48660 GemmMicrokernelTester()
48661 .mr(4)
48662 .nr(2)
48663 .kr(1)
48664 .sr(1)
48665 .m(4)
48666 .n(2)
48667 .k(1)
48668 .qmin(128)
48669 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48670 }
48671
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,qmax)48672 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmax) {
48673 GemmMicrokernelTester()
48674 .mr(4)
48675 .nr(2)
48676 .kr(1)
48677 .sr(1)
48678 .m(4)
48679 .n(2)
48680 .k(1)
48681 .qmax(128)
48682 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48683 }
48684
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,strided_cm)48685 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm) {
48686 GemmMicrokernelTester()
48687 .mr(4)
48688 .nr(2)
48689 .kr(1)
48690 .sr(1)
48691 .m(4)
48692 .n(2)
48693 .k(1)
48694 .cm_stride(5)
48695 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48696 }
48697
48698
48699 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_eq_8)48700 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_eq_8) {
48701 TEST_REQUIRES_ARM_NEON_V8;
48702 GemmMicrokernelTester()
48703 .mr(4)
48704 .nr(8)
48705 .kr(1)
48706 .sr(1)
48707 .m(4)
48708 .n(8)
48709 .k(8)
48710 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
48711 }
48712
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,strided_cn)48713 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, strided_cn) {
48714 TEST_REQUIRES_ARM_NEON_V8;
48715 GemmMicrokernelTester()
48716 .mr(4)
48717 .nr(8)
48718 .kr(1)
48719 .sr(1)
48720 .m(4)
48721 .n(8)
48722 .k(8)
48723 .cn_stride(11)
48724 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
48725 }
48726
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_eq_8_strided_a)48727 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_eq_8_strided_a) {
48728 TEST_REQUIRES_ARM_NEON_V8;
48729 GemmMicrokernelTester()
48730 .mr(4)
48731 .nr(8)
48732 .kr(1)
48733 .sr(1)
48734 .m(4)
48735 .n(8)
48736 .k(8)
48737 .a_stride(11)
48738 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
48739 }
48740
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_eq_8_subtile)48741 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
48742 TEST_REQUIRES_ARM_NEON_V8;
48743 for (uint32_t n = 1; n <= 8; n++) {
48744 for (uint32_t m = 1; m <= 4; m++) {
48745 GemmMicrokernelTester()
48746 .mr(4)
48747 .nr(8)
48748 .kr(1)
48749 .sr(1)
48750 .m(m)
48751 .n(n)
48752 .k(8)
48753 .iterations(1)
48754 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
48755 }
48756 }
48757 }
48758
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_m)48759 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
48760 TEST_REQUIRES_ARM_NEON_V8;
48761 for (uint32_t m = 1; m <= 4; m++) {
48762 GemmMicrokernelTester()
48763 .mr(4)
48764 .nr(8)
48765 .kr(1)
48766 .sr(1)
48767 .m(m)
48768 .n(8)
48769 .k(8)
48770 .iterations(1)
48771 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
48772 }
48773 }
48774
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_n)48775 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
48776 TEST_REQUIRES_ARM_NEON_V8;
48777 for (uint32_t n = 1; n <= 8; n++) {
48778 GemmMicrokernelTester()
48779 .mr(4)
48780 .nr(8)
48781 .kr(1)
48782 .sr(1)
48783 .m(4)
48784 .n(n)
48785 .k(8)
48786 .iterations(1)
48787 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
48788 }
48789 }
48790
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_lt_8)48791 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_lt_8) {
48792 TEST_REQUIRES_ARM_NEON_V8;
48793 for (size_t k = 1; k < 8; k++) {
48794 GemmMicrokernelTester()
48795 .mr(4)
48796 .nr(8)
48797 .kr(1)
48798 .sr(1)
48799 .m(4)
48800 .n(8)
48801 .k(k)
48802 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
48803 }
48804 }
48805
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_lt_8_strided_a)48806 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_lt_8_strided_a) {
48807 TEST_REQUIRES_ARM_NEON_V8;
48808 for (size_t k = 1; k < 8; k++) {
48809 GemmMicrokernelTester()
48810 .mr(4)
48811 .nr(8)
48812 .kr(1)
48813 .sr(1)
48814 .m(4)
48815 .n(8)
48816 .k(k)
48817 .a_stride(11)
48818 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
48819 }
48820 }
48821
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_lt_8_subtile)48822 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
48823 TEST_REQUIRES_ARM_NEON_V8;
48824 for (size_t k = 1; k < 8; k++) {
48825 for (uint32_t n = 1; n <= 8; n++) {
48826 for (uint32_t m = 1; m <= 4; m++) {
48827 GemmMicrokernelTester()
48828 .mr(4)
48829 .nr(8)
48830 .kr(1)
48831 .sr(1)
48832 .m(m)
48833 .n(n)
48834 .k(k)
48835 .iterations(1)
48836 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
48837 }
48838 }
48839 }
48840 }
48841
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_gt_8)48842 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_gt_8) {
48843 TEST_REQUIRES_ARM_NEON_V8;
48844 for (size_t k = 9; k < 16; k++) {
48845 GemmMicrokernelTester()
48846 .mr(4)
48847 .nr(8)
48848 .kr(1)
48849 .sr(1)
48850 .m(4)
48851 .n(8)
48852 .k(k)
48853 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
48854 }
48855 }
48856
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_gt_8_strided_a)48857 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_gt_8_strided_a) {
48858 TEST_REQUIRES_ARM_NEON_V8;
48859 for (size_t k = 9; k < 16; k++) {
48860 GemmMicrokernelTester()
48861 .mr(4)
48862 .nr(8)
48863 .kr(1)
48864 .sr(1)
48865 .m(4)
48866 .n(8)
48867 .k(k)
48868 .a_stride(19)
48869 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
48870 }
48871 }
48872
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_gt_8_subtile)48873 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
48874 TEST_REQUIRES_ARM_NEON_V8;
48875 for (size_t k = 9; k < 16; k++) {
48876 for (uint32_t n = 1; n <= 8; n++) {
48877 for (uint32_t m = 1; m <= 4; m++) {
48878 GemmMicrokernelTester()
48879 .mr(4)
48880 .nr(8)
48881 .kr(1)
48882 .sr(1)
48883 .m(m)
48884 .n(n)
48885 .k(k)
48886 .iterations(1)
48887 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
48888 }
48889 }
48890 }
48891 }
48892
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_div_8)48893 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_div_8) {
48894 TEST_REQUIRES_ARM_NEON_V8;
48895 for (size_t k = 16; k <= 80; k += 8) {
48896 GemmMicrokernelTester()
48897 .mr(4)
48898 .nr(8)
48899 .kr(1)
48900 .sr(1)
48901 .m(4)
48902 .n(8)
48903 .k(k)
48904 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
48905 }
48906 }
48907
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_div_8_strided_a)48908 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_div_8_strided_a) {
48909 TEST_REQUIRES_ARM_NEON_V8;
48910 for (size_t k = 16; k <= 80; k += 8) {
48911 GemmMicrokernelTester()
48912 .mr(4)
48913 .nr(8)
48914 .kr(1)
48915 .sr(1)
48916 .m(4)
48917 .n(8)
48918 .k(k)
48919 .a_stride(83)
48920 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
48921 }
48922 }
48923
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_div_8_subtile)48924 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
48925 TEST_REQUIRES_ARM_NEON_V8;
48926 for (size_t k = 16; k <= 80; k += 8) {
48927 for (uint32_t n = 1; n <= 8; n++) {
48928 for (uint32_t m = 1; m <= 4; m++) {
48929 GemmMicrokernelTester()
48930 .mr(4)
48931 .nr(8)
48932 .kr(1)
48933 .sr(1)
48934 .m(m)
48935 .n(n)
48936 .k(k)
48937 .iterations(1)
48938 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
48939 }
48940 }
48941 }
48942 }
48943
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_gt_8)48944 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_gt_8) {
48945 TEST_REQUIRES_ARM_NEON_V8;
48946 for (uint32_t n = 9; n < 16; n++) {
48947 for (size_t k = 1; k <= 40; k += 9) {
48948 GemmMicrokernelTester()
48949 .mr(4)
48950 .nr(8)
48951 .kr(1)
48952 .sr(1)
48953 .m(4)
48954 .n(n)
48955 .k(k)
48956 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
48957 }
48958 }
48959 }
48960
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_gt_8_strided_cn)48961 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_gt_8_strided_cn) {
48962 TEST_REQUIRES_ARM_NEON_V8;
48963 for (uint32_t n = 9; n < 16; n++) {
48964 for (size_t k = 1; k <= 40; k += 9) {
48965 GemmMicrokernelTester()
48966 .mr(4)
48967 .nr(8)
48968 .kr(1)
48969 .sr(1)
48970 .m(4)
48971 .n(n)
48972 .k(k)
48973 .cn_stride(11)
48974 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
48975 }
48976 }
48977 }
48978
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_gt_8_strided_a)48979 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_gt_8_strided_a) {
48980 TEST_REQUIRES_ARM_NEON_V8;
48981 for (uint32_t n = 9; n < 16; n++) {
48982 for (size_t k = 1; k <= 40; k += 9) {
48983 GemmMicrokernelTester()
48984 .mr(4)
48985 .nr(8)
48986 .kr(1)
48987 .sr(1)
48988 .m(4)
48989 .n(n)
48990 .k(k)
48991 .a_stride(43)
48992 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
48993 }
48994 }
48995 }
48996
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_gt_8_subtile)48997 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_gt_8_subtile) {
48998 TEST_REQUIRES_ARM_NEON_V8;
48999 for (uint32_t n = 9; n < 16; n++) {
49000 for (size_t k = 1; k <= 40; k += 9) {
49001 for (uint32_t m = 1; m <= 4; m++) {
49002 GemmMicrokernelTester()
49003 .mr(4)
49004 .nr(8)
49005 .kr(1)
49006 .sr(1)
49007 .m(m)
49008 .n(n)
49009 .k(k)
49010 .iterations(1)
49011 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49012 }
49013 }
49014 }
49015 }
49016
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_div_8)49017 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_div_8) {
49018 TEST_REQUIRES_ARM_NEON_V8;
49019 for (uint32_t n = 16; n <= 24; n += 8) {
49020 for (size_t k = 1; k <= 40; k += 9) {
49021 GemmMicrokernelTester()
49022 .mr(4)
49023 .nr(8)
49024 .kr(1)
49025 .sr(1)
49026 .m(4)
49027 .n(n)
49028 .k(k)
49029 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49030 }
49031 }
49032 }
49033
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_div_8_strided_cn)49034 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_div_8_strided_cn) {
49035 TEST_REQUIRES_ARM_NEON_V8;
49036 for (uint32_t n = 16; n <= 24; n += 8) {
49037 for (size_t k = 1; k <= 40; k += 9) {
49038 GemmMicrokernelTester()
49039 .mr(4)
49040 .nr(8)
49041 .kr(1)
49042 .sr(1)
49043 .m(4)
49044 .n(n)
49045 .k(k)
49046 .cn_stride(11)
49047 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49048 }
49049 }
49050 }
49051
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_div_8_strided_a)49052 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_div_8_strided_a) {
49053 TEST_REQUIRES_ARM_NEON_V8;
49054 for (uint32_t n = 16; n <= 24; n += 8) {
49055 for (size_t k = 1; k <= 40; k += 9) {
49056 GemmMicrokernelTester()
49057 .mr(4)
49058 .nr(8)
49059 .kr(1)
49060 .sr(1)
49061 .m(4)
49062 .n(n)
49063 .k(k)
49064 .a_stride(43)
49065 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49066 }
49067 }
49068 }
49069
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_div_8_subtile)49070 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_div_8_subtile) {
49071 TEST_REQUIRES_ARM_NEON_V8;
49072 for (uint32_t n = 16; n <= 24; n += 8) {
49073 for (size_t k = 1; k <= 40; k += 9) {
49074 for (uint32_t m = 1; m <= 4; m++) {
49075 GemmMicrokernelTester()
49076 .mr(4)
49077 .nr(8)
49078 .kr(1)
49079 .sr(1)
49080 .m(m)
49081 .n(n)
49082 .k(k)
49083 .iterations(1)
49084 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49085 }
49086 }
49087 }
49088 }
49089
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,strided_cm_subtile)49090 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
49091 TEST_REQUIRES_ARM_NEON_V8;
49092 for (size_t k = 1; k <= 40; k += 9) {
49093 for (uint32_t n = 1; n <= 8; n++) {
49094 for (uint32_t m = 1; m <= 4; m++) {
49095 GemmMicrokernelTester()
49096 .mr(4)
49097 .nr(8)
49098 .kr(1)
49099 .sr(1)
49100 .m(m)
49101 .n(n)
49102 .k(k)
49103 .cm_stride(11)
49104 .iterations(1)
49105 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49106 }
49107 }
49108 }
49109 }
49110
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,qmin)49111 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, qmin) {
49112 TEST_REQUIRES_ARM_NEON_V8;
49113 GemmMicrokernelTester()
49114 .mr(4)
49115 .nr(8)
49116 .kr(1)
49117 .sr(1)
49118 .m(4)
49119 .n(8)
49120 .k(8)
49121 .qmin(128)
49122 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49123 }
49124
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,qmax)49125 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, qmax) {
49126 TEST_REQUIRES_ARM_NEON_V8;
49127 GemmMicrokernelTester()
49128 .mr(4)
49129 .nr(8)
49130 .kr(1)
49131 .sr(1)
49132 .m(4)
49133 .n(8)
49134 .k(8)
49135 .qmax(128)
49136 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49137 }
49138
TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,strided_cm)49139 TEST(GENERATE_QC8_GEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, strided_cm) {
49140 TEST_REQUIRES_ARM_NEON_V8;
49141 GemmMicrokernelTester()
49142 .mr(4)
49143 .nr(8)
49144 .kr(1)
49145 .sr(1)
49146 .m(4)
49147 .n(8)
49148 .k(8)
49149 .cm_stride(11)
49150 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49151 }
49152 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
49153
49154
49155 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8)49156 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8) {
49157 TEST_REQUIRES_ARM_NEON_DOT;
49158 GemmMicrokernelTester()
49159 .mr(4)
49160 .nr(8)
49161 .kr(4)
49162 .sr(1)
49163 .m(4)
49164 .n(8)
49165 .k(8)
49166 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49167 }
49168
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,strided_cn)49169 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, strided_cn) {
49170 TEST_REQUIRES_ARM_NEON_DOT;
49171 GemmMicrokernelTester()
49172 .mr(4)
49173 .nr(8)
49174 .kr(4)
49175 .sr(1)
49176 .m(4)
49177 .n(8)
49178 .k(8)
49179 .cn_stride(11)
49180 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49181 }
49182
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_strided_a)49183 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_strided_a) {
49184 TEST_REQUIRES_ARM_NEON_DOT;
49185 GemmMicrokernelTester()
49186 .mr(4)
49187 .nr(8)
49188 .kr(4)
49189 .sr(1)
49190 .m(4)
49191 .n(8)
49192 .k(8)
49193 .a_stride(11)
49194 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49195 }
49196
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_subtile)49197 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile) {
49198 TEST_REQUIRES_ARM_NEON_DOT;
49199 for (uint32_t n = 1; n <= 8; n++) {
49200 for (uint32_t m = 1; m <= 4; m++) {
49201 GemmMicrokernelTester()
49202 .mr(4)
49203 .nr(8)
49204 .kr(4)
49205 .sr(1)
49206 .m(m)
49207 .n(n)
49208 .k(8)
49209 .iterations(1)
49210 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49211 }
49212 }
49213 }
49214
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_subtile_m)49215 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_m) {
49216 TEST_REQUIRES_ARM_NEON_DOT;
49217 for (uint32_t m = 1; m <= 4; m++) {
49218 GemmMicrokernelTester()
49219 .mr(4)
49220 .nr(8)
49221 .kr(4)
49222 .sr(1)
49223 .m(m)
49224 .n(8)
49225 .k(8)
49226 .iterations(1)
49227 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49228 }
49229 }
49230
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_subtile_n)49231 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_n) {
49232 TEST_REQUIRES_ARM_NEON_DOT;
49233 for (uint32_t n = 1; n <= 8; n++) {
49234 GemmMicrokernelTester()
49235 .mr(4)
49236 .nr(8)
49237 .kr(4)
49238 .sr(1)
49239 .m(4)
49240 .n(n)
49241 .k(8)
49242 .iterations(1)
49243 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49244 }
49245 }
49246
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_lt_8)49247 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8) {
49248 TEST_REQUIRES_ARM_NEON_DOT;
49249 for (size_t k = 1; k < 8; k++) {
49250 GemmMicrokernelTester()
49251 .mr(4)
49252 .nr(8)
49253 .kr(4)
49254 .sr(1)
49255 .m(4)
49256 .n(8)
49257 .k(k)
49258 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49259 }
49260 }
49261
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_lt_8_strided_a)49262 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8_strided_a) {
49263 TEST_REQUIRES_ARM_NEON_DOT;
49264 for (size_t k = 1; k < 8; k++) {
49265 GemmMicrokernelTester()
49266 .mr(4)
49267 .nr(8)
49268 .kr(4)
49269 .sr(1)
49270 .m(4)
49271 .n(8)
49272 .k(k)
49273 .a_stride(11)
49274 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49275 }
49276 }
49277
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_lt_8_subtile)49278 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8_subtile) {
49279 TEST_REQUIRES_ARM_NEON_DOT;
49280 for (size_t k = 1; k < 8; k++) {
49281 for (uint32_t n = 1; n <= 8; n++) {
49282 for (uint32_t m = 1; m <= 4; m++) {
49283 GemmMicrokernelTester()
49284 .mr(4)
49285 .nr(8)
49286 .kr(4)
49287 .sr(1)
49288 .m(m)
49289 .n(n)
49290 .k(k)
49291 .iterations(1)
49292 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49293 }
49294 }
49295 }
49296 }
49297
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_gt_8)49298 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8) {
49299 TEST_REQUIRES_ARM_NEON_DOT;
49300 for (size_t k = 9; k < 16; k++) {
49301 GemmMicrokernelTester()
49302 .mr(4)
49303 .nr(8)
49304 .kr(4)
49305 .sr(1)
49306 .m(4)
49307 .n(8)
49308 .k(k)
49309 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49310 }
49311 }
49312
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_gt_8_strided_a)49313 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8_strided_a) {
49314 TEST_REQUIRES_ARM_NEON_DOT;
49315 for (size_t k = 9; k < 16; k++) {
49316 GemmMicrokernelTester()
49317 .mr(4)
49318 .nr(8)
49319 .kr(4)
49320 .sr(1)
49321 .m(4)
49322 .n(8)
49323 .k(k)
49324 .a_stride(19)
49325 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49326 }
49327 }
49328
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_gt_8_subtile)49329 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8_subtile) {
49330 TEST_REQUIRES_ARM_NEON_DOT;
49331 for (size_t k = 9; k < 16; k++) {
49332 for (uint32_t n = 1; n <= 8; n++) {
49333 for (uint32_t m = 1; m <= 4; m++) {
49334 GemmMicrokernelTester()
49335 .mr(4)
49336 .nr(8)
49337 .kr(4)
49338 .sr(1)
49339 .m(m)
49340 .n(n)
49341 .k(k)
49342 .iterations(1)
49343 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49344 }
49345 }
49346 }
49347 }
49348
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_div_8)49349 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_div_8) {
49350 TEST_REQUIRES_ARM_NEON_DOT;
49351 for (size_t k = 16; k <= 80; k += 8) {
49352 GemmMicrokernelTester()
49353 .mr(4)
49354 .nr(8)
49355 .kr(4)
49356 .sr(1)
49357 .m(4)
49358 .n(8)
49359 .k(k)
49360 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49361 }
49362 }
49363
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_div_8_strided_a)49364 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_div_8_strided_a) {
49365 TEST_REQUIRES_ARM_NEON_DOT;
49366 for (size_t k = 16; k <= 80; k += 8) {
49367 GemmMicrokernelTester()
49368 .mr(4)
49369 .nr(8)
49370 .kr(4)
49371 .sr(1)
49372 .m(4)
49373 .n(8)
49374 .k(k)
49375 .a_stride(83)
49376 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49377 }
49378 }
49379
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_div_8_subtile)49380 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_div_8_subtile) {
49381 TEST_REQUIRES_ARM_NEON_DOT;
49382 for (size_t k = 16; k <= 80; k += 8) {
49383 for (uint32_t n = 1; n <= 8; n++) {
49384 for (uint32_t m = 1; m <= 4; m++) {
49385 GemmMicrokernelTester()
49386 .mr(4)
49387 .nr(8)
49388 .kr(4)
49389 .sr(1)
49390 .m(m)
49391 .n(n)
49392 .k(k)
49393 .iterations(1)
49394 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49395 }
49396 }
49397 }
49398 }
49399
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8)49400 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8) {
49401 TEST_REQUIRES_ARM_NEON_DOT;
49402 for (uint32_t n = 9; n < 16; n++) {
49403 for (size_t k = 1; k <= 40; k += 9) {
49404 GemmMicrokernelTester()
49405 .mr(4)
49406 .nr(8)
49407 .kr(4)
49408 .sr(1)
49409 .m(4)
49410 .n(n)
49411 .k(k)
49412 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49413 }
49414 }
49415 }
49416
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8_strided_cn)49417 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_strided_cn) {
49418 TEST_REQUIRES_ARM_NEON_DOT;
49419 for (uint32_t n = 9; n < 16; n++) {
49420 for (size_t k = 1; k <= 40; k += 9) {
49421 GemmMicrokernelTester()
49422 .mr(4)
49423 .nr(8)
49424 .kr(4)
49425 .sr(1)
49426 .m(4)
49427 .n(n)
49428 .k(k)
49429 .cn_stride(11)
49430 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49431 }
49432 }
49433 }
49434
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8_strided_a)49435 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_strided_a) {
49436 TEST_REQUIRES_ARM_NEON_DOT;
49437 for (uint32_t n = 9; n < 16; n++) {
49438 for (size_t k = 1; k <= 40; k += 9) {
49439 GemmMicrokernelTester()
49440 .mr(4)
49441 .nr(8)
49442 .kr(4)
49443 .sr(1)
49444 .m(4)
49445 .n(n)
49446 .k(k)
49447 .a_stride(43)
49448 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49449 }
49450 }
49451 }
49452
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8_subtile)49453 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_subtile) {
49454 TEST_REQUIRES_ARM_NEON_DOT;
49455 for (uint32_t n = 9; n < 16; n++) {
49456 for (size_t k = 1; k <= 40; k += 9) {
49457 for (uint32_t m = 1; m <= 4; m++) {
49458 GemmMicrokernelTester()
49459 .mr(4)
49460 .nr(8)
49461 .kr(4)
49462 .sr(1)
49463 .m(m)
49464 .n(n)
49465 .k(k)
49466 .iterations(1)
49467 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49468 }
49469 }
49470 }
49471 }
49472
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8)49473 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8) {
49474 TEST_REQUIRES_ARM_NEON_DOT;
49475 for (uint32_t n = 16; n <= 24; n += 8) {
49476 for (size_t k = 1; k <= 40; k += 9) {
49477 GemmMicrokernelTester()
49478 .mr(4)
49479 .nr(8)
49480 .kr(4)
49481 .sr(1)
49482 .m(4)
49483 .n(n)
49484 .k(k)
49485 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49486 }
49487 }
49488 }
49489
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8_strided_cn)49490 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_strided_cn) {
49491 TEST_REQUIRES_ARM_NEON_DOT;
49492 for (uint32_t n = 16; n <= 24; n += 8) {
49493 for (size_t k = 1; k <= 40; k += 9) {
49494 GemmMicrokernelTester()
49495 .mr(4)
49496 .nr(8)
49497 .kr(4)
49498 .sr(1)
49499 .m(4)
49500 .n(n)
49501 .k(k)
49502 .cn_stride(11)
49503 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49504 }
49505 }
49506 }
49507
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8_strided_a)49508 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_strided_a) {
49509 TEST_REQUIRES_ARM_NEON_DOT;
49510 for (uint32_t n = 16; n <= 24; n += 8) {
49511 for (size_t k = 1; k <= 40; k += 9) {
49512 GemmMicrokernelTester()
49513 .mr(4)
49514 .nr(8)
49515 .kr(4)
49516 .sr(1)
49517 .m(4)
49518 .n(n)
49519 .k(k)
49520 .a_stride(43)
49521 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49522 }
49523 }
49524 }
49525
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8_subtile)49526 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_subtile) {
49527 TEST_REQUIRES_ARM_NEON_DOT;
49528 for (uint32_t n = 16; n <= 24; n += 8) {
49529 for (size_t k = 1; k <= 40; k += 9) {
49530 for (uint32_t m = 1; m <= 4; m++) {
49531 GemmMicrokernelTester()
49532 .mr(4)
49533 .nr(8)
49534 .kr(4)
49535 .sr(1)
49536 .m(m)
49537 .n(n)
49538 .k(k)
49539 .iterations(1)
49540 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49541 }
49542 }
49543 }
49544 }
49545
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,strided_cm_subtile)49546 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, strided_cm_subtile) {
49547 TEST_REQUIRES_ARM_NEON_DOT;
49548 for (size_t k = 1; k <= 40; k += 9) {
49549 for (uint32_t n = 1; n <= 8; n++) {
49550 for (uint32_t m = 1; m <= 4; m++) {
49551 GemmMicrokernelTester()
49552 .mr(4)
49553 .nr(8)
49554 .kr(4)
49555 .sr(1)
49556 .m(m)
49557 .n(n)
49558 .k(k)
49559 .cm_stride(11)
49560 .iterations(1)
49561 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49562 }
49563 }
49564 }
49565 }
49566
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,qmin)49567 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, qmin) {
49568 TEST_REQUIRES_ARM_NEON_DOT;
49569 GemmMicrokernelTester()
49570 .mr(4)
49571 .nr(8)
49572 .kr(4)
49573 .sr(1)
49574 .m(4)
49575 .n(8)
49576 .k(8)
49577 .qmin(128)
49578 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49579 }
49580
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,qmax)49581 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, qmax) {
49582 TEST_REQUIRES_ARM_NEON_DOT;
49583 GemmMicrokernelTester()
49584 .mr(4)
49585 .nr(8)
49586 .kr(4)
49587 .sr(1)
49588 .m(4)
49589 .n(8)
49590 .k(8)
49591 .qmax(128)
49592 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49593 }
49594
TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,strided_cm)49595 TEST(GENERATE_QC8_GEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, strided_cm) {
49596 TEST_REQUIRES_ARM_NEON_DOT;
49597 GemmMicrokernelTester()
49598 .mr(4)
49599 .nr(8)
49600 .kr(4)
49601 .sr(1)
49602 .m(4)
49603 .n(8)
49604 .k(8)
49605 .cm_stride(11)
49606 .Test(xnn_generate_qc8_gemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
49607 }
49608 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
49609